From 18508e2209889b910de259fb220a5848901d9227 Mon Sep 17 00:00:00 2001 From: Sam Harwell Date: Fri, 22 Feb 2013 14:06:25 -0600 Subject: [PATCH] Do not allow raw newline characters in lexer string literals --- tool/src/org/antlr/v4/parse/ANTLRLexer.g | 2 +- tool/test/org/antlr/v4/test/BaseTest.java | 2 ++ .../org/antlr/v4/test/TestATNDeserialization.java | 2 +- .../org/antlr/v4/test/TestATNLexerInterpreter.java | 14 +++++++------- .../org/antlr/v4/test/TestATNSerialization.java | 2 +- tool/test/org/antlr/v4/test/TestLexerExec.java | 14 +++++++------- 6 files changed, 19 insertions(+), 17 deletions(-) diff --git a/tool/src/org/antlr/v4/parse/ANTLRLexer.g b/tool/src/org/antlr/v4/parse/ANTLRLexer.g index 72f579c86..ab4977cb5 100644 --- a/tool/src/org/antlr/v4/parse/ANTLRLexer.g +++ b/tool/src/org/antlr/v4/parse/ANTLRLexer.g @@ -621,7 +621,7 @@ STRING_LITERAL @init { int len = 0; } - : '\'' ( ( ESC_SEQ | ~('\\'|'\'') ) {len++;} )* '\'' + : '\'' ( ( ESC_SEQ | ~('\\'|'\''|'\r'|'\n') ) {len++;} )* '\'' ; // A valid hex digit specification diff --git a/tool/test/org/antlr/v4/test/BaseTest.java b/tool/test/org/antlr/v4/test/BaseTest.java index 9f78a32c3..f87db29dd 100644 --- a/tool/test/org/antlr/v4/test/BaseTest.java +++ b/tool/test/org/antlr/v4/test/BaseTest.java @@ -159,6 +159,7 @@ public abstract class BaseTest { protected ATN createATN(Grammar g, boolean useSerializer) { if ( g.atn==null ) { semanticProcess(g); + assertEquals(0, g.tool.getNumErrors()); ParserATNFactory f; if ( g.isLexer() ) { @@ -169,6 +170,7 @@ public abstract class BaseTest { } g.atn = f.createATN(); + assertEquals(0, g.tool.getNumErrors()); } ATN atn = g.atn; diff --git a/tool/test/org/antlr/v4/test/TestATNDeserialization.java b/tool/test/org/antlr/v4/test/TestATNDeserialization.java index c76c3a93d..d62b7ec23 100644 --- a/tool/test/org/antlr/v4/test/TestATNDeserialization.java +++ b/tool/test/org/antlr/v4/test/TestATNDeserialization.java @@ -125,7 +125,7 @@ public class TestATNDeserialization extends BaseTest { @Test public void testLexerEOFInSet() throws Exception { LexerGrammar lg = new LexerGrammar( "lexer grammar L;\n"+ - "A : 'a' (EOF|'\n') ;\n"); + "A : 'a' (EOF|'\\n') ;\n"); checkDeserializationIsStable(lg); } diff --git a/tool/test/org/antlr/v4/test/TestATNLexerInterpreter.java b/tool/test/org/antlr/v4/test/TestATNLexerInterpreter.java index c3037a9ea..a88449c99 100644 --- a/tool/test/org/antlr/v4/test/TestATNLexerInterpreter.java +++ b/tool/test/org/antlr/v4/test/TestATNLexerInterpreter.java @@ -143,7 +143,7 @@ public class TestATNLexerInterpreter extends BaseTest { "lexer grammar L;\n"+ "KEND : 'end' ;\n" + "ID : 'a'..'z'+ ;\n" + - "WS : (' '|'\n')+ ;"); + "WS : (' '|'\\n')+ ;"); String expecting = "ID, EOF"; //checkLexerMatches(lg, "e", expecting); expecting = "KEND, EOF"; @@ -159,7 +159,7 @@ public class TestATNLexerInterpreter extends BaseTest { "lexer grammar L;\n"+ "INT : DIGIT+ ;\n" + "fragment DIGIT : '0'..'9' ;\n" + - "WS : (' '|'\n')+ ;"); + "WS : (' '|'\\n')+ ;"); String expecting = "INT, WS, INT, EOF"; checkLexerMatches(lg, "32 99", expecting); } @@ -168,7 +168,7 @@ public class TestATNLexerInterpreter extends BaseTest { LexerGrammar lg = new LexerGrammar( "lexer grammar L;\n"+ "CMT : '/*' (CMT | ~'*')+ '*/' ;\n" + - "WS : (' '|'\n')+ ;"); + "WS : (' '|'\\n')+ ;"); String expecting = "CMT, WS, CMT, EOF"; checkLexerMatches(lg, "/* ick */\n/* /*nested*/ */", expecting); } @@ -177,7 +177,7 @@ public class TestATNLexerInterpreter extends BaseTest { LexerGrammar lg = new LexerGrammar( "lexer grammar L;\n"+ "CMT : '/*' (CMT | .)*? '*/' ;\n" + - "WS : (' '|'\n')+ ;"); + "WS : (' '|'\\n')+ ;"); String expecting = "CMT, WS, CMT, WS, EOF"; checkLexerMatches(lg, @@ -257,7 +257,7 @@ public class TestATNLexerInterpreter extends BaseTest { @Test public void testEOFAtEndOfLineComment() throws Exception { LexerGrammar lg = new LexerGrammar( "lexer grammar L;\n"+ - "CMT : '//' ~('\n')* ;\n"); + "CMT : '//' ~('\\n')* ;\n"); String expecting = "CMT, EOF"; checkLexerMatches(lg, "//x", expecting); } @@ -265,7 +265,7 @@ public class TestATNLexerInterpreter extends BaseTest { @Test public void testEOFAtEndOfLineComment2() throws Exception { LexerGrammar lg = new LexerGrammar( "lexer grammar L;\n"+ - "CMT : '//' ~('\n'|'\r')* ;\n"); + "CMT : '//' ~('\\n'|'\\r')* ;\n"); String expecting = "CMT, EOF"; checkLexerMatches(lg, "//x", expecting); } @@ -276,7 +276,7 @@ public class TestATNLexerInterpreter extends BaseTest { @Test public void testEOFInSetAtEndOfLineComment() throws Exception { LexerGrammar lg = new LexerGrammar( "lexer grammar L;\n"+ - "CMT : '//' .* (EOF|'\n') ;\n"); + "CMT : '//' .* (EOF|'\\n') ;\n"); String expecting = "CMT, EOF"; checkLexerMatches(lg, "//", expecting); } diff --git a/tool/test/org/antlr/v4/test/TestATNSerialization.java b/tool/test/org/antlr/v4/test/TestATNSerialization.java index c6f2da748..2ed3a1f92 100644 --- a/tool/test/org/antlr/v4/test/TestATNSerialization.java +++ b/tool/test/org/antlr/v4/test/TestATNSerialization.java @@ -358,7 +358,7 @@ public class TestATNSerialization extends BaseTest { @Test public void testLexerEOFInSet() throws Exception { LexerGrammar lg = new LexerGrammar( "lexer grammar L;\n"+ - "INT : 'a' (EOF|'\n') ;\n"); + "INT : 'a' (EOF|'\\n') ;\n"); String expecting = "max type 1\n" + "0:TOKEN_START -1\n" + diff --git a/tool/test/org/antlr/v4/test/TestLexerExec.java b/tool/test/org/antlr/v4/test/TestLexerExec.java index c920192b9..60360c26e 100644 --- a/tool/test/org/antlr/v4/test/TestLexerExec.java +++ b/tool/test/org/antlr/v4/test/TestLexerExec.java @@ -202,7 +202,7 @@ public class TestLexerExec extends BaseTest { String grammar = "lexer grammar L;\n"+ "CMT : '/*' (CMT | .)*? '*/' ;\n" + - "WS : (' '|'\n')+ ;\n" + "WS : (' '|'\\n')+ ;\n" /*+ "ANY : .;"*/; String expecting = @@ -240,7 +240,7 @@ public class TestLexerExec extends BaseTest { String grammar = "lexer grammar L;\n"+ "CMT : '/*' (CMT | .)+? '*/' ;\n" + - "WS : (' '|'\n')+ ;\n" + "WS : (' '|'\\n')+ ;\n" /*+ "ANY : .;"*/; String expecting = @@ -382,7 +382,7 @@ public class TestLexerExec extends BaseTest { String grammar = "lexer grammar L;\n" + "STRING_START : '\"' {pushMode(STRING_MODE); more();} ;\n" + - "WS : (' '|'\n') {skip();} ;\n"+ + "WS : (' '|'\\n') {skip();} ;\n"+ "mode STRING_MODE;\n"+ "STRING : '\"' {popMode();} ;\n"+ "ANY : . {more();} ;\n"; @@ -398,7 +398,7 @@ public class TestLexerExec extends BaseTest { String grammar = "lexer grammar L;\n" + "STRING_START : '\"' -> pushMode(STRING_MODE), more ;\n" + - "WS : (' '|'\n') -> skip ;\n"+ + "WS : (' '|'\\n') -> skip ;\n"+ "mode STRING_MODE;\n"+ "STRING : '\"' -> popMode ;\n"+ // token type 2 "ANY : . -> more ;\n"; @@ -414,7 +414,7 @@ public class TestLexerExec extends BaseTest { String grammar = "lexer grammar L;\n" + "STRING_START : '\"' -> mode(STRING_MODE), more ;\n" + - "WS : (' '|'\n') -> skip ;\n"+ + "WS : (' '|'\\n') -> skip ;\n"+ "mode STRING_MODE;\n"+ "STRING : '\"' -> mode(DEFAULT_MODE) ;\n"+ // ttype 2 since '"' ambiguity "ANY : . -> more ;\n"; @@ -431,7 +431,7 @@ public class TestLexerExec extends BaseTest { "lexer grammar L;\n"+ "KEND : 'end' ;\n" + // has priority "ID : 'a'..'z'+ ;\n" + - "WS : (' '|'\n')+ ;"; + "WS : (' '|'\\n')+ ;"; String found = execLexer("L.g4", grammar, "L", "end eend ending a"); String expecting = "[@0,0:2='end',<1>,1:0]\n" + @@ -454,7 +454,7 @@ public class TestLexerExec extends BaseTest { "DOT : '.' ;\n" + "ID : 'a'..'z'+ ;\n" + "fragment HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;\n" + - "WS : (' '|'\n')+ ;"; + "WS : (' '|'\\n')+ ;"; String found = execLexer("L.g4", grammar, "L", "x 0 1 a.b a.l"); String expecting = "[@0,0:0='x',<5>,1:0]\n" +