Merge branch 'lexer-recover-eof' of github.com:sharwell/antlr4

2012-11-04 09:35:28 -08:00 · 2012-11-04 09:35:28 -08:00 · c073835186
parent f8ec86edd7 b917c01bba
commit c073835186
2 changed files with 49 additions and 6 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/Lexer.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/Lexer.java
@ -134,17 +134,17 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
 			throw new IllegalStateException("nextToken requires a non-null input stream.");
 		}

-		if (_hitEOF) {
-			emitEOF();
-			return _token;
-		}
-
 		// Mark start location in char stream so unbuffered streams are
 		// guaranteed at least have text of current token
 		int tokenStartMarker = _input.mark();
 		try{
 			outer:
 			while (true) {
+				if (_hitEOF) {
+					emitEOF();
+					return _token;
+				}
+
 				_token = null;
 				_channel = Token.DEFAULT_CHANNEL;
 				_tokenStartCharIndex = _input.index();
@ -369,7 +369,10 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
 	}

 	public void recover(LexerNoViableAltException e) {
-		getInterpreter().consume(_input); // skip a char and try again
+		if (_input.LA(1) != IntStream.EOF) {
+			// skip a char and try again
+			getInterpreter().consume(_input);
+		}
 	}

 	public void notifyListeners(LexerNoViableAltException e) {
--- a/tool/test/org/antlr/v4/test/TestLexerErrors.java
+++ b/tool/test/org/antlr/v4/test/TestLexerErrors.java
@ -46,6 +46,46 @@ public class TestLexerErrors extends BaseTest {
 		assertEquals(expectingError, error);
 	}

+	@Test
+	public void tesStringsEmbeddedInActions() {
+		String grammar =
+			"lexer grammar Actions;\n"
+			+ "ACTION2 : '[' (STRING | ~'\"')*? ']';\n"
+			+ "STRING : '\"' ('\\\"' | .)*? '\"';\n"
+			+ "WS : [ \\t\\r\\n]+ -> skip;\n";
+		String tokens = execLexer("Actions.g4", grammar, "Actions", "[\"foo\"]");
+		String expectingTokens =
+			"[@0,0:6='[\"foo\"]',<1>,1:0]\n" +
+			"[@1,7:6='<EOF>',<-1>,1:7]\n";
+		assertEquals(expectingTokens, tokens);
+		assertNull(stderrDuringParse);
+
+		tokens = execLexer("Actions.g4", grammar, "Actions", "[\"foo]");
+		expectingTokens =
+			"[@0,6:5='<EOF>',<-1>,1:6]\n";
+		assertEquals(expectingTokens, tokens);
+		assertEquals("line 1:0 token recognition error at: '[\"foo]'\n", stderrDuringParse);
+	}
+
+	@Test public void testEnforcedGreedyNestedBrances() {
+		String grammar =
+			"lexer grammar R;\n"
+			+ "ACTION : '{' (ACTION | ~[{}])* '}';\n"
+			+ "WS : [ \\r\\n\\t]+ -> skip;\n";
+		String tokens = execLexer("R.g4", grammar, "R", "{ { } }");
+		String expectingTokens =
+			"[@0,0:6='{ { } }',<1>,1:0]\n" +
+			"[@1,7:6='<EOF>',<-1>,1:7]\n";
+		assertEquals(expectingTokens, tokens);
+		assertEquals(null, stderrDuringParse);
+
+		tokens = execLexer("R.g4", grammar, "R", "{ { }");
+		expectingTokens =
+			"[@0,5:4='<EOF>',<-1>,1:5]\n";
+		assertEquals(expectingTokens, tokens);
+		assertEquals("line 1:0 token recognition error at: '{ { }'\n", stderrDuringParse);
+	}
+
 	@Test public void testInvalidCharAtStartAfterDFACache() throws Exception {
 		String grammar =
 			"lexer grammar L;\n" +