add missing test in LexerExec, fix LexerErrors

This commit is contained in:
ericvergnaud 2014-10-24 08:17:28 +08:00
parent 809bac2591
commit a683f05f6a
9 changed files with 44 additions and 914 deletions

View File

@ -716,14 +716,15 @@ public class Generator {
"abx",
"[@0,3:2='<EOF>',<-1>,1:3]\n",
"line 1:0 token recognition error at: 'abx'\n");
file.addLexerTest(input, "LexerExecDFA", "L",
LexerTestMethod tm = file.addLexerTest(input, "LexerExecDFA", "L",
"x : x",
"[@0,0:0='x',<3>,1:0]\n" +
"[@1,2:2=':',<2>,1:2]\n" +
"[@1,2:2=':',<1>,1:2]\n" +
"[@2,4:4='x',<3>,1:4]\n" +
"[@3,5:4='<EOF>',<-1>,1:5]\n",
"line 1:1 token recognition error at: ' '\n" +
"line 1:3 token recognition error at: ' '\n");
tm.lexerOnly = false;
return file;
}
@ -1173,6 +1174,13 @@ public class Generator {
file.addLexerTest(input, "LargeLexer", "L", "KW400",
"[@0,0:4='KW400',<402>,1:0]\n" +
"[@1,5:4='<EOF>',<-1>,1:5]\n", null);
/**
* This is a regression test for antlr/antlr4#687 "Empty zero-length tokens
* cannot have lexer commands" and antlr/antlr4#688 "Lexer cannot match
* zero-length tokens" */
file.addLexerTest(input, "ZeroLengthToken", "L", "'xxx'",
"[@0,0:4=''xxx'',<1>,1:0]\n" +
"[@1,5:4='<EOF>',<-1>,1:5]\n", null);
return file;
}

View File

@ -3,7 +3,8 @@ package org.antlr.v4.test.rt.gen;
public class LexerTestMethod extends TestMethod {
public String[] outputLines;
public boolean lexerOnly = true;
public LexerTestMethod(String name, String grammarName, String input,
String expectedOutput, String expectedErrors, Integer index) {
super(name, grammarName, input, expectedOutput, expectedErrors, index);

View File

@ -1,6 +1,5 @@
lexer grammar <grammarName>;
grammar <grammarName>;
start : ID ':' expr;
expr : primary expr? {} | expr '->' ID;
primary : ID;
ID : [a-z]+;
;

View File

@ -0,0 +1,9 @@
lexer grammar <grammarName>;
BeginString
: '\'' -> more, pushMode(StringMode)
;
mode StringMode;
StringMode_X : 'x' -> more;
StringMode_Done : -> more, mode(EndStringMode);
mode EndStringMode;
EndString : '\'' -> popMode;

View File

@ -28,7 +28,7 @@ public void test<test.name>() throws Exception {
};separator="\n", wrap, anchor>
String grammar = <test.grammar.lines:{ line | "<line>};separator="\\n\" +\n", wrap, anchor>";
<test.afterGrammar>
String found = execLexer("<test.grammar.grammarName>.g4", grammar, "<test.grammar.grammarName>", "<test.input>");
String found = execLexer("<test.grammar.grammarName>.g4", grammar, "<test.grammar.grammarName><if(!test.lexerOnly)>Lexer<endif>", "<test.input>");
assertEquals(<test.outputLines:{ line | "<line>\\n"};separator=" + \n", wrap, anchor>, found);
<if(test.expectedErrors)>
assertEquals("<test.expectedErrors>", this.stderrDuringParse);

View File

@ -123,15 +123,14 @@ public class TestLexerErrors extends BaseTest {
@Test
public void testLexerExecDFA() throws Exception {
String grammar = "lexer grammar L;\n" +
String grammar = "grammar L;\n" +
"start : ID ':' expr;\n" +
"expr : primary expr? {} | expr '->' ID;\n" +
"primary : ID;\n" +
"ID : [a-z]+;\n" +
";";
String found = execLexer("L.g4", grammar, "L", "x : x");
"ID : [a-z]+;";
String found = execLexer("L.g4", grammar, "LLexer", "x : x");
assertEquals("[@0,0:0='x',<3>,1:0]\n" +
"[@1,2:2=':',<2>,1:2]\n" +
"[@1,2:2=':',<1>,1:2]\n" +
"[@2,4:4='x',<3>,1:4]\n" +
"[@3,5:4='<EOF>',<-1>,1:5]\n", found);
assertEquals("line 1:1 token recognition error at: ' '\nline 1:3 token recognition error at: ' '\n", this.stderrDuringParse);

View File

@ -4623,5 +4623,22 @@ public class TestLexerExec extends BaseTest {
assertNull(this.stderrDuringParse);
}
@Test
public void testZeroLengthToken() throws Exception {
String grammar = "lexer grammar L;\n" +
"BeginString\n" +
" : '\\'' -> more, pushMode(StringMode)\n" +
" ;\n" +
"mode StringMode;\n" +
" StringMode_X : 'x' -> more;\n" +
" StringMode_Done : -> more, mode(EndStringMode);\n" +
"mode EndStringMode; \n" +
" EndString : '\\'' -> popMode;";
String found = execLexer("L.g4", grammar, "L", "'xxx'");
assertEquals("[@0,0:4=''xxx'',<1>,1:0]\n" +
"[@1,5:4='<EOF>',<-1>,1:5]\n", found);
assertNull(this.stderrDuringParse);
}
}

View File

@ -1,213 +0,0 @@
/*
* [The "BSD license"]
* Copyright (c) 2012 Terence Parr
* Copyright (c) 2012 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.test.tool;
import org.junit.Test;
import static org.junit.Assert.*;
public class TestLexerErrors extends BaseTest {
// TEST DETECTION
@Test public void testInvalidCharAtStart() throws Exception {
String grammar =
"lexer grammar L;\n" +
"A : 'a' 'b' ;\n";
String tokens = execLexer("L.g4", grammar, "L", "x");
String expectingTokens =
"[@0,1:0='<EOF>',<-1>,1:1]\n";
assertEquals(expectingTokens, tokens);
String expectingError = "line 1:0 token recognition error at: 'x'\n";
String error = stderrDuringParse;
assertEquals(expectingError, error);
}
@Test
public void testStringsEmbeddedInActions() {
String grammar =
"lexer grammar Actions;\n"
+ "ACTION2 : '[' (STRING | ~'\"')*? ']';\n"
+ "STRING : '\"' ('\\\"' | .)*? '\"';\n"
+ "WS : [ \\t\\r\\n]+ -> skip;\n";
String tokens = execLexer("Actions.g4", grammar, "Actions", "[\"foo\"]");
String expectingTokens =
"[@0,0:6='[\"foo\"]',<1>,1:0]\n" +
"[@1,7:6='<EOF>',<-1>,1:7]\n";
assertEquals(expectingTokens, tokens);
assertNull(stderrDuringParse);
tokens = execLexer("Actions.g4", grammar, "Actions", "[\"foo]");
expectingTokens =
"[@0,6:5='<EOF>',<-1>,1:6]\n";
assertEquals(expectingTokens, tokens);
assertEquals("line 1:0 token recognition error at: '[\"foo]'\n", stderrDuringParse);
}
@Test public void testEnforcedGreedyNestedBrances() {
String grammar =
"lexer grammar R;\n"
+ "ACTION : '{' (ACTION | ~[{}])* '}';\n"
+ "WS : [ \\r\\n\\t]+ -> skip;\n";
String tokens = execLexer("R.g4", grammar, "R", "{ { } }");
String expectingTokens =
"[@0,0:6='{ { } }',<1>,1:0]\n" +
"[@1,7:6='<EOF>',<-1>,1:7]\n";
assertEquals(expectingTokens, tokens);
assertEquals(null, stderrDuringParse);
tokens = execLexer("R.g4", grammar, "R", "{ { }");
expectingTokens =
"[@0,5:4='<EOF>',<-1>,1:5]\n";
assertEquals(expectingTokens, tokens);
assertEquals("line 1:0 token recognition error at: '{ { }'\n", stderrDuringParse);
}
@Test public void testInvalidCharAtStartAfterDFACache() throws Exception {
String grammar =
"lexer grammar L;\n" +
"A : 'a' 'b' ;\n";
String tokens = execLexer("L.g4", grammar, "L", "abx");
String expectingTokens =
"[@0,0:1='ab',<1>,1:0]\n" +
"[@1,3:2='<EOF>',<-1>,1:3]\n";
assertEquals(expectingTokens, tokens);
String expectingError = "line 1:2 token recognition error at: 'x'\n";
String error = stderrDuringParse;
assertEquals(expectingError, error);
}
@Test public void testInvalidCharInToken() throws Exception {
String grammar =
"lexer grammar L;\n" +
"A : 'a' 'b' ;\n";
String tokens = execLexer("L.g4", grammar, "L", "ax");
String expectingTokens =
"[@0,2:1='<EOF>',<-1>,1:2]\n";
assertEquals(expectingTokens, tokens);
String expectingError = "line 1:0 token recognition error at: 'ax'\n";
String error = stderrDuringParse;
assertEquals(expectingError, error);
}
@Test public void testInvalidCharInTokenAfterDFACache() throws Exception {
String grammar =
"lexer grammar L;\n" +
"A : 'a' 'b' ;\n";
String tokens = execLexer("L.g4", grammar, "L", "abax");
String expectingTokens =
"[@0,0:1='ab',<1>,1:0]\n" +
"[@1,4:3='<EOF>',<-1>,1:4]\n";
assertEquals(expectingTokens, tokens);
String expectingError = "line 1:2 token recognition error at: 'ax'\n";
String error = stderrDuringParse;
assertEquals(expectingError, error);
}
@Test public void testDFAToATNThatFailsBackToDFA() throws Exception {
String grammar =
"lexer grammar L;\n" +
"A : 'ab' ;\n"+
"B : 'abc' ;\n";
// The first ab caches the DFA then abx goes through the DFA but
// into the ATN for the x, which fails. Must go back into DFA
// and return to previous dfa accept state
String tokens = execLexer("L.g4", grammar, "L", "ababx");
String expectingTokens =
"[@0,0:1='ab',<1>,1:0]\n" +
"[@1,2:3='ab',<1>,1:2]\n" +
"[@2,5:4='<EOF>',<-1>,1:5]\n";
assertEquals(expectingTokens, tokens);
String expectingError = "line 1:4 token recognition error at: 'x'\n";
String error = stderrDuringParse;
assertEquals(expectingError, error);
}
@Test public void testDFAToATNThatMatchesThenFailsInATN() throws Exception {
String grammar =
"lexer grammar L;\n" +
"A : 'ab' ;\n"+
"B : 'abc' ;\n"+
"C : 'abcd' ;\n";
// The first ab caches the DFA then abx goes through the DFA but
// into the ATN for the c. It marks that hasn't except state
// and then keeps going in the ATN. It fails on the x, but
// uses the previous accepted in the ATN not DFA
String tokens = execLexer("L.g4", grammar, "L", "ababcx");
String expectingTokens =
"[@0,0:1='ab',<1>,1:0]\n" +
"[@1,2:4='abc',<2>,1:2]\n" +
"[@2,6:5='<EOF>',<-1>,1:6]\n";
assertEquals(expectingTokens, tokens);
String expectingError = "line 1:5 token recognition error at: 'x'\n";
String error = stderrDuringParse;
assertEquals(expectingError, error);
}
@Test public void testErrorInMiddle() throws Exception {
String grammar =
"lexer grammar L;\n" +
"A : 'abc' ;\n";
String tokens = execLexer("L.g4", grammar, "L", "abx");
String expectingTokens =
"[@0,3:2='<EOF>',<-1>,1:3]\n";
assertEquals(expectingTokens, tokens);
String expectingError = "line 1:0 token recognition error at: 'abx'\n";
String error = stderrDuringParse;
assertEquals(expectingError, error);
}
// TEST RECOVERY
/**
* This is a regression test for #45 "NullPointerException in LexerATNSimulator.execDFA".
* https://github.com/antlr/antlr4/issues/46
*/
@Test
public void testLexerExecDFA() throws Exception {
String grammar =
"grammar T;\n" +
"start : ID ':' expr;\n" +
"expr : primary expr? {} | expr '->' ID;\n" +
"primary : ID;\n" +
"ID : [a-z]+;\n" +
"\n";
String result = execLexer("T.g4", grammar, "TLexer", "x : x", false);
String expecting =
"[@0,0:0='x',<3>,1:0]\n" +
"[@1,2:2=':',<1>,1:2]\n" +
"[@2,4:4='x',<3>,1:4]\n" +
"[@3,5:4='<EOF>',<-1>,1:5]\n";
assertEquals(expecting, result);
assertEquals("line 1:1 token recognition error at: ' '\n" +
"line 1:3 token recognition error at: ' '\n",
this.stderrDuringParse);
}
}

View File

@ -1,690 +0,0 @@
/*
* [The "BSD license"]
* Copyright (c) 2012 Terence Parr
* Copyright (c) 2012 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.test.tool;
import org.antlr.v4.runtime.misc.Nullable;
import org.junit.Test;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
public class TestLexerExec extends BaseTest {
@Test public void testQuoteTranslation() throws Exception {
String grammar =
"lexer grammar L;\n"+
"QUOTE : '\"' ;\n"; // make sure this compiles
String found = execLexer("L.g4", grammar, "L", "\"");
String expecting =
"[@0,0:0='\"',<1>,1:0]\n" +
"[@1,1:0='<EOF>',<-1>,1:1]\n";
assertEquals(expecting, found);
}
@Test public void testRefToRuleDoesNotSetTokenNorEmitAnother() throws Exception {
String grammar =
"lexer grammar L;\n"+
"A : '-' I ;\n" +
"I : '0'..'9'+ ;\n"+
"WS : (' '|'\\n') -> skip ;";
String found = execLexer("L.g4", grammar, "L", "34 -21 3");
String expecting =
"[@0,0:1='34',<2>,1:0]\n" +
"[@1,3:5='-21',<1>,1:3]\n" +
"[@2,7:7='3',<2>,1:7]\n" +
"[@3,8:7='<EOF>',<-1>,1:8]\n"; // EOF has no length so range is 8:7 not 8:8
assertEquals(expecting, found);
}
@Test public void testSlashes() throws Exception {
String grammar =
"lexer grammar L;\n"+
"Backslash : '\\\\';\n" +
"Slash : '/';\n" +
"Vee : '\\\\/';\n" +
"Wedge : '/\\\\';\n"+
"WS : [ \\t] -> skip;";
String found = execLexer("L.g4", grammar, "L", "\\ / \\/ /\\");
String expecting =
"[@0,0:0='\\',<1>,1:0]\n" +
"[@1,2:2='/',<2>,1:2]\n" +
"[@2,4:5='\\/',<3>,1:4]\n" +
"[@3,7:8='/\\',<4>,1:7]\n" +
"[@4,9:8='<EOF>',<-1>,1:9]\n";
assertEquals(expecting, found);
}
/**
* This is a regression test for antlr/antlr4#224: "Parentheses without
* quantifier in lexer rules have unclear effect".
* https://github.com/antlr/antlr4/issues/224
*/
@Test public void testParentheses() {
String grammar =
"lexer grammar Demo;\n" +
"\n" +
"START_BLOCK: '-.-.-';\n" +
"\n" +
"ID : (LETTER SEPARATOR) (LETTER SEPARATOR)+;\n" +
"fragment LETTER: L_A|L_K;\n" +
"fragment L_A: '.-';\n" +
"fragment L_K: '-.-';\n" +
"\n" +
"SEPARATOR: '!';\n";
String found = execLexer("Demo.g4", grammar, "Demo", "-.-.-!");
String expecting =
"[@0,0:4='-.-.-',<1>,1:0]\n" +
"[@1,5:5='!',<3>,1:5]\n" +
"[@2,6:5='<EOF>',<-1>,1:6]\n";
assertEquals(expecting, found);
}
@Test
public void testNonGreedyTermination() throws Exception {
String grammar =
"lexer grammar L;\n"
+ "STRING : '\"' ('\"\"' | .)*? '\"';";
String found = execLexer("L.g4", grammar, "L", "\"hi\"\"mom\"");
assertEquals(
"[@0,0:3='\"hi\"',<1>,1:0]\n" +
"[@1,4:8='\"mom\"',<1>,1:4]\n" +
"[@2,9:8='<EOF>',<-1>,1:9]\n", found);
assertNull(stderrDuringParse);
}
@Test
public void testNonGreedyTermination2() throws Exception {
String grammar =
"lexer grammar L;\n"
+ "STRING : '\"' ('\"\"' | .)+? '\"';";
String found = execLexer("L.g4", grammar, "L", "\"\"\"mom\"");
assertEquals(
"[@0,0:6='\"\"\"mom\"',<1>,1:0]\n" +
"[@1,7:6='<EOF>',<-1>,1:7]\n", found);
assertNull(stderrDuringParse);
}
@Test
public void testGreedyOptional() throws Exception {
String grammar =
"lexer grammar L;\n"
+ "CMT : '//' .*? '\\n' CMT?;\n"
+ "WS : (' '|'\\t')+;";
String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n");
assertEquals(
"[@0,0:13='//blah\\n//blah\\n',<1>,1:0]\n" +
"[@1,14:13='<EOF>',<-1>,3:14]\n", found);
assertNull(stderrDuringParse);
}
@Test
public void testNonGreedyOptional() throws Exception {
String grammar =
"lexer grammar L;\n"
+ "CMT : '//' .*? '\\n' CMT??;\n"
+ "WS : (' '|'\\t')+;";
String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n");
assertEquals(
"[@0,0:6='//blah\\n',<1>,1:0]\n" +
"[@1,7:13='//blah\\n',<1>,2:0]\n" +
"[@2,14:13='<EOF>',<-1>,3:7]\n", found);
assertNull(stderrDuringParse);
}
@Test
public void testGreedyClosure() throws Exception {
String grammar =
"lexer grammar L;\n"
+ "CMT : '//' .*? '\\n' CMT*;\n"
+ "WS : (' '|'\\t')+;";
String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n");
assertEquals(
"[@0,0:13='//blah\\n//blah\\n',<1>,1:0]\n" +
"[@1,14:13='<EOF>',<-1>,3:14]\n", found);
assertNull(stderrDuringParse);
}
@Test
public void testNonGreedyClosure() throws Exception {
String grammar =
"lexer grammar L;\n"
+ "CMT : '//' .*? '\\n' CMT*?;\n"
+ "WS : (' '|'\\t')+;";
String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n");
assertEquals(
"[@0,0:6='//blah\\n',<1>,1:0]\n" +
"[@1,7:13='//blah\\n',<1>,2:0]\n" +
"[@2,14:13='<EOF>',<-1>,3:7]\n", found);
assertNull(stderrDuringParse);
}
@Test
public void testGreedyPositiveClosure() throws Exception {
String grammar =
"lexer grammar L;\n"
+ "CMT : ('//' .*? '\\n')+;\n"
+ "WS : (' '|'\\t')+;";
String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n");
assertEquals(
"[@0,0:13='//blah\\n//blah\\n',<1>,1:0]\n" +
"[@1,14:13='<EOF>',<-1>,3:14]\n", found);
assertNull(stderrDuringParse);
}
@Test
public void testNonGreedyPositiveClosure() throws Exception {
String grammar =
"lexer grammar L;\n"
+ "CMT : ('//' .*? '\\n')+?;\n"
+ "WS : (' '|'\\t')+;";
String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n");
assertEquals(
"[@0,0:6='//blah\\n',<1>,1:0]\n" +
"[@1,7:13='//blah\\n',<1>,2:0]\n" +
"[@2,14:13='<EOF>',<-1>,3:7]\n", found);
assertNull(stderrDuringParse);
}
@Test
public void testRecursiveLexerRuleRefWithWildcardStar1() throws Exception {
String grammar =
"lexer grammar L;\n"+
"CMT : '/*' (CMT | .)*? '*/' ;\n" +
"WS : (' '|'\\n')+ ;\n"
/*+ "ANY : .;"*/;
String expecting =
"[@0,0:8='/* ick */',<1>,1:0]\n" +
"[@1,9:9='\\n',<2>,1:9]\n" +
"[@2,10:34='/* /* */\\n/* /*nested*/ */',<1>,2:0]\n" +
"[@3,35:35='\\n',<2>,3:16]\n" +
"[@4,36:35='<EOF>',<-1>,4:17]\n";
// stuff on end of comment matches another rule
String found = execLexer("L.g4", grammar, "L",
"/* ick */\n" +
"/* /* */\n" +
"/* /*nested*/ */\n");
assertEquals(expecting, found);
assertNull(stderrDuringParse);
}
@Test
public void testRecursiveLexerRuleRefWithWildcardStar2() throws Exception {
String grammar =
"lexer grammar L;\n"+
"CMT : '/*' (CMT | .)*? '*/' ;\n" +
"WS : (' '|'\\n')+ ;\n"
/*+ "ANY : .;"*/;
// stuff on end of comment doesn't match another rule
String expecting =
"[@0,0:8='/* ick */',<1>,1:0]\n" +
"[@1,10:10='\\n',<2>,1:10]\n" +
"[@2,11:36='/* /* */x\\n/* /*nested*/ */',<1>,2:0]\n" +
"[@3,38:38='\\n',<2>,3:17]\n" +
"[@4,39:38='<EOF>',<-1>,4:18]\n";
String found = execLexer("L.g4", grammar, "L",
"/* ick */x\n" +
"/* /* */x\n" +
"/* /*nested*/ */x\n");
assertEquals(expecting, found);
assertEquals(
"line 1:9 token recognition error at: 'x'\n" +
"line 3:16 token recognition error at: 'x'\n", stderrDuringParse);
}
@Test
public void testRecursiveLexerRuleRefWithWildcardPlus1() throws Exception {
String grammar =
"lexer grammar L;\n"+
"CMT : '/*' (CMT | .)+? '*/' ;\n" +
"WS : (' '|'\\n')+ ;\n"
/*+ "ANY : .;"*/;
String expecting =
"[@0,0:8='/* ick */',<1>,1:0]\n" +
"[@1,9:9='\\n',<2>,1:9]\n" +
"[@2,10:34='/* /* */\\n/* /*nested*/ */',<1>,2:0]\n" +
"[@3,35:35='\\n',<2>,3:16]\n" +
"[@4,36:35='<EOF>',<-1>,4:17]\n";
// stuff on end of comment matches another rule
String found = execLexer("L.g4", grammar, "L",
"/* ick */\n" +
"/* /* */\n" +
"/* /*nested*/ */\n");
assertEquals(expecting, found);
assertNull(stderrDuringParse);
}
@Test
public void testRecursiveLexerRuleRefWithWildcardPlus2() throws Exception {
String grammar =
"lexer grammar L;\n"+
"CMT : '/*' (CMT | .)+? '*/' ;\n" +
"WS : (' '|'\\n')+ ;\n"
/*+ "ANY : .;"*/;
// stuff on end of comment doesn't match another rule
String expecting =
"[@0,0:8='/* ick */',<1>,1:0]\n" +
"[@1,10:10='\\n',<2>,1:10]\n" +
"[@2,11:36='/* /* */x\\n/* /*nested*/ */',<1>,2:0]\n" +
"[@3,38:38='\\n',<2>,3:17]\n" +
"[@4,39:38='<EOF>',<-1>,4:18]\n";
String found = execLexer("L.g4", grammar, "L",
"/* ick */x\n" +
"/* /* */x\n" +
"/* /*nested*/ */x\n");
assertEquals(expecting, found);
assertEquals(
"line 1:9 token recognition error at: 'x'\n" +
"line 3:16 token recognition error at: 'x'\n", stderrDuringParse);
}
@Test public void testActionPlacement() throws Exception {
String grammar =
"lexer grammar L;\n"+
"I : ({System.out.println(\"stuff fail: \" + getText());} 'a' | {System.out.println(\"stuff0: \" + getText());} 'a' {System.out.println(\"stuff1: \" + getText());} 'b' {System.out.println(\"stuff2: \" + getText());}) {System.out.println(getText());} ;\n"+
"WS : (' '|'\\n') -> skip ;\n" +
"J : .;\n";
String found = execLexer("L.g4", grammar, "L", "ab");
String expecting =
"stuff0: \n" +
"stuff1: a\n" +
"stuff2: ab\n" +
"ab\n" +
"[@0,0:1='ab',<1>,1:0]\n" +
"[@1,2:1='<EOF>',<-1>,1:2]\n";
assertEquals(expecting, found);
}
@Test public void testGreedyConfigs() throws Exception {
String grammar =
"lexer grammar L;\n"+
"I : ('a' | 'ab') {System.out.println(getText());} ;\n"+
"WS : (' '|'\\n') -> skip ;\n" +
"J : .;\n";
String found = execLexer("L.g4", grammar, "L", "ab");
String expecting =
"ab\n" +
"[@0,0:1='ab',<1>,1:0]\n" +
"[@1,2:1='<EOF>',<-1>,1:2]\n";
assertEquals(expecting, found);
}
@Test public void testNonGreedyConfigs() throws Exception {
String grammar =
"lexer grammar L;\n"+
"I : .*? ('a' | 'ab') {System.out.println(getText());} ;\n"+
"WS : (' '|'\\n') -> skip ;\n" +
"J : . {System.out.println(getText());};\n";
String found = execLexer("L.g4", grammar, "L", "ab");
String expecting =
"a\n" +
"b\n" +
"[@0,0:0='a',<1>,1:0]\n" +
"[@1,1:1='b',<3>,1:1]\n" +
"[@2,2:1='<EOF>',<-1>,1:2]\n";
assertEquals(expecting, found);
}
@Test public void testKeywordID() throws Exception {
String grammar =
"lexer grammar L;\n"+
"KEND : 'end' ;\n" + // has priority
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ ;";
String found = execLexer("L.g4", grammar, "L", "end eend ending a");
String expecting =
"[@0,0:2='end',<1>,1:0]\n" +
"[@1,3:3=' ',<3>,1:3]\n" +
"[@2,4:7='eend',<2>,1:4]\n" +
"[@3,8:8=' ',<3>,1:8]\n" +
"[@4,9:14='ending',<2>,1:9]\n" +
"[@5,15:15=' ',<3>,1:15]\n" +
"[@6,16:16='a',<2>,1:16]\n" +
"[@7,17:16='<EOF>',<-1>,1:17]\n";
assertEquals(expecting, found);
}
@Test public void testHexVsID() throws Exception {
String grammar =
"lexer grammar L;\n"+
"HexLiteral : '0' ('x'|'X') HexDigit+ ;\n"+
"DecimalLiteral : ('0' | '1'..'9' '0'..'9'*) ;\n" +
"FloatingPointLiteral : ('0x' | '0X') HexDigit* ('.' HexDigit*)? ;\n" +
"DOT : '.' ;\n" +
"ID : 'a'..'z'+ ;\n" +
"fragment HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;\n" +
"WS : (' '|'\\n')+ ;";
String found = execLexer("L.g4", grammar, "L", "x 0 1 a.b a.l");
String expecting =
"[@0,0:0='x',<5>,1:0]\n" +
"[@1,1:1=' ',<6>,1:1]\n" +
"[@2,2:2='0',<2>,1:2]\n" +
"[@3,3:3=' ',<6>,1:3]\n" +
"[@4,4:4='1',<2>,1:4]\n" +
"[@5,5:5=' ',<6>,1:5]\n" +
"[@6,6:6='a',<5>,1:6]\n" +
"[@7,7:7='.',<4>,1:7]\n" +
"[@8,8:8='b',<5>,1:8]\n" +
"[@9,9:9=' ',<6>,1:9]\n" +
"[@10,10:10='a',<5>,1:10]\n" +
"[@11,11:11='.',<4>,1:11]\n" +
"[@12,12:12='l',<5>,1:12]\n" +
"[@13,13:12='<EOF>',<-1>,1:13]\n";
assertEquals(expecting, found);
}
// must get DONE EOF
@Test public void testEOFByItself() throws Exception {
String grammar =
"lexer grammar L;\n" +
"DONE : EOF ;\n" +
"A : 'a';\n";
String found = execLexer("L.g4", grammar, "L", "");
String expecting =
"[@0,0:-1='<EOF>',<1>,1:0]\n" +
"[@1,0:-1='<EOF>',<-1>,1:0]\n";
assertEquals(expecting, found);
}
@Test public void testEOFSuffixInFirstRule() throws Exception {
String grammar =
"lexer grammar L;\n"+
"A : 'a' EOF ;\n"+
"B : 'a';\n"+
"C : 'c';\n";
String found = execLexer("L.g4", grammar, "L", "");
String expecting =
"[@0,0:-1='<EOF>',<-1>,1:0]\n";
assertEquals(expecting, found);
found = execLexer("L.g4", grammar, "L", "a");
expecting =
"[@0,0:0='a',<1>,1:0]\n" +
"[@1,1:0='<EOF>',<-1>,1:1]\n";
assertEquals(expecting, found);
}
@Test public void testCharSet() throws Exception {
String grammar =
"lexer grammar L;\n"+
"I : '0'..'9'+ {System.out.println(\"I\");} ;\n"+
"WS : [ \\n\\u000D] -> skip ;";
String found = execLexer("L.g4", grammar, "L", "34\r\n 34");
String expecting =
"I\n" +
"I\n" +
"[@0,0:1='34',<1>,1:0]\n" +
"[@1,5:6='34',<1>,2:1]\n" +
"[@2,7:6='<EOF>',<-1>,2:3]\n";
assertEquals(expecting, found);
}
@Test public void testCharSetPlus() throws Exception {
String grammar =
"lexer grammar L;\n"+
"I : '0'..'9'+ {System.out.println(\"I\");} ;\n"+
"WS : [ \\n\\u000D]+ -> skip ;";
String found = execLexer("L.g4", grammar, "L", "34\r\n 34");
String expecting =
"I\n" +
"I\n" +
"[@0,0:1='34',<1>,1:0]\n" +
"[@1,5:6='34',<1>,2:1]\n" +
"[@2,7:6='<EOF>',<-1>,2:3]\n";
assertEquals(expecting, found);
}
@Test public void testCharSetNot() throws Exception {
String grammar =
"lexer grammar L;\n"+
"I : ~[ab \\n] ~[ \\ncd]* {System.out.println(\"I\");} ;\n"+
"WS : [ \\n\\u000D]+ -> skip ;";
String found = execLexer("L.g4", grammar, "L", "xaf");
String expecting =
"I\n" +
"[@0,0:2='xaf',<1>,1:0]\n" +
"[@1,3:2='<EOF>',<-1>,1:3]\n";
assertEquals(expecting, found);
}
@Test public void testCharSetInSet() throws Exception {
String grammar =
"lexer grammar L;\n"+
"I : (~[ab \\n]|'a') {System.out.println(\"I\");} ;\n"+
"WS : [ \\n\\u000D]+ -> skip ;";
String found = execLexer("L.g4", grammar, "L", "a x");
String expecting =
"I\n" +
"I\n" +
"[@0,0:0='a',<1>,1:0]\n" +
"[@1,2:2='x',<1>,1:2]\n" +
"[@2,3:2='<EOF>',<-1>,1:3]\n";
assertEquals(expecting, found);
}
@Test public void testCharSetRange() throws Exception {
String grammar =
"lexer grammar L;\n"+
"I : [0-9]+ {System.out.println(\"I\");} ;\n"+
"ID : [a-zA-Z] [a-zA-Z0-9]* {System.out.println(\"ID\");} ;\n"+
"WS : [ \\n\\u0009\\r]+ -> skip ;";
String found = execLexer("L.g4", grammar, "L", "34\r 34 a2 abc \n ");
String expecting =
"I\n" +
"I\n" +
"ID\n" +
"ID\n" +
"[@0,0:1='34',<1>,1:0]\n" +
"[@1,4:5='34',<1>,1:4]\n" +
"[@2,7:8='a2',<2>,1:7]\n" +
"[@3,10:12='abc',<2>,1:10]\n" +
"[@4,18:17='<EOF>',<-1>,2:3]\n";
assertEquals(expecting, found);
}
@Test public void testCharSetWithMissingEndRange() throws Exception {
String grammar =
"lexer grammar L;\n"+
"I : [0-]+ {System.out.println(\"I\");} ;\n"+
"WS : [ \\n\\u000D]+ -> skip ;";
String found = execLexer("L.g4", grammar, "L", "00\r\n");
String expecting =
"I\n" +
"[@0,0:1='00',<1>,1:0]\n" +
"[@1,4:3='<EOF>',<-1>,2:0]\n";
assertEquals(expecting, found);
}
@Test public void testCharSetWithMissingEscapeChar() throws Exception {
String grammar =
"lexer grammar L;\n"+
"I : [0-9]+ {System.out.println(\"I\");} ;\n"+
"WS : [ \\u]+ -> skip ;";
String found = execLexer("L.g4", grammar, "L", "34 ");
String expecting =
"I\n" +
"[@0,0:1='34',<1>,1:0]\n" +
"[@1,3:2='<EOF>',<-1>,1:3]\n";
assertEquals(expecting, found);
}
@Test public void testCharSetWithEscapedChar() throws Exception {
String grammar =
"lexer grammar L;\n"+
"DASHBRACK : [\\-\\]]+ {System.out.println(\"DASHBRACK\");} ;\n"+
"WS : [ \\u]+ -> skip ;";
String found = execLexer("L.g4", grammar, "L", "- ] ");
String expecting =
"DASHBRACK\n" +
"DASHBRACK\n" +
"[@0,0:0='-',<1>,1:0]\n" +
"[@1,2:2=']',<1>,1:2]\n" +
"[@2,4:3='<EOF>',<-1>,1:4]\n";
assertEquals(expecting, found);
}
@Test public void testCharSetWithReversedRange() throws Exception {
String grammar =
"lexer grammar L;\n"+
"A : [z-a9]+ {System.out.println(\"A\");} ;\n"+
"WS : [ \\u]+ -> skip ;";
String found = execLexer("L.g4", grammar, "L", "9");
String expecting =
"A\n" +
"[@0,0:0='9',<1>,1:0]\n" +
"[@1,1:0='<EOF>',<-1>,1:1]\n";
assertEquals(expecting, found);
}
@Test public void testCharSetWithQuote() throws Exception {
String grammar =
"lexer grammar L;\n"+
"A : [\"a-z]+ {System.out.println(\"A\");} ;\n"+
"WS : [ \\n\\t]+ -> skip ;";
String found = execLexer("L.g4", grammar, "L", "b\"a");
String expecting =
"A\n" +
"[@0,0:2='b\"a',<1>,1:0]\n" +
"[@1,3:2='<EOF>',<-1>,1:3]\n";
assertEquals(expecting, found);
}
@Test public void testCharSetWithQuote2() throws Exception {
String grammar =
"lexer grammar L;\n"+
"A : [\"\\\\ab]+ {System.out.println(\"A\");} ;\n"+
"WS : [ \\n\\t]+ -> skip ;";
String found = execLexer("L.g4", grammar, "L", "b\"\\a");
String expecting =
"A\n" +
"[@0,0:3='b\"\\a',<1>,1:0]\n" +
"[@1,4:3='<EOF>',<-1>,1:4]\n";
assertEquals(expecting, found);
}
@Test
public void testPositionAdjustingLexer() throws Exception {
String grammar = load("PositionAdjustingLexer.g4", null);
String input =
"tokens\n" +
"tokens {\n" +
"notLabel\n" +
"label1 =\n" +
"label2 +=\n" +
"notLabel\n";
String found = execLexer("PositionAdjustingLexer.g4", grammar, "PositionAdjustingLexer", input);
final int TOKENS = 4;
final int LABEL = 5;
final int IDENTIFIER = 6;
String expecting =
"[@0,0:5='tokens',<" + IDENTIFIER + ">,1:0]\n" +
"[@1,7:12='tokens',<" + TOKENS + ">,2:0]\n" +
"[@2,14:14='{',<3>,2:7]\n" +
"[@3,16:23='notLabel',<" + IDENTIFIER + ">,3:0]\n" +
"[@4,25:30='label1',<" + LABEL + ">,4:0]\n" +
"[@5,32:32='=',<1>,4:7]\n" +
"[@6,34:39='label2',<" + LABEL + ">,5:0]\n" +
"[@7,41:42='+=',<2>,5:7]\n" +
"[@8,44:51='notLabel',<" + IDENTIFIER + ">,6:0]\n" +
"[@9,53:52='<EOF>',<-1>,7:0]\n";
assertEquals(expecting, found);
}
/**
* This is a regression test for antlr/antlr4#76 "Serialized ATN strings
* should be split when longer than 2^16 bytes (class file limitation)"
* https://github.com/antlr/antlr4/issues/76
*/
@Test
public void testLargeLexer() throws Exception {
StringBuilder grammar = new StringBuilder();
grammar.append("lexer grammar L;\n");
grammar.append("WS : [ \\t\\r\\n]+ -> skip;\n");
for (int i = 0; i < 4000; i++) {
grammar.append("KW").append(i).append(" : 'KW' '").append(i).append("';\n");
}
String input = "KW400";
String found = execLexer("L.g4", grammar.toString(), "L", input);
String expecting =
"[@0,0:4='KW400',<402>,1:0]\n" +
"[@1,5:4='<EOF>',<-1>,1:5]\n";
assertEquals(expecting, found);
}
/**
* This is a regression test for antlr/antlr4#687 "Empty zero-length tokens
* cannot have lexer commands" and antlr/antlr4#688 "Lexer cannot match
* zero-length tokens"
* https://github.com/antlr/antlr4/issues/687
* https://github.com/antlr/antlr4/issues/688
*/
@Test public void testZeroLengthToken() throws Exception {
String grammar =
"lexer grammar L;\n"+
"\n" +
"BeginString\n" +
" : '\\'' -> more, pushMode(StringMode)\n" +
" ;\n" +
"\n" +
"mode StringMode;\n" +
"\n" +
" StringMode_X : 'x' -> more;\n" +
" StringMode_Done : -> more, mode(EndStringMode);\n" +
"\n" +
"mode EndStringMode; \n" +
"\n" +
" EndString : '\\'' -> popMode;\n";
String found = execLexer("L.g4", grammar, "L", "'xxx'");
String expecting =
"[@0,0:4=''xxx'',<1>,1:0]\n" +
"[@1,5:4='<EOF>',<-1>,1:5]\n";
assertEquals(expecting, found);
}
}