From 156696be189bdf3929343fb4bea9b4fbe4a90063 Mon Sep 17 00:00:00 2001 From: Sam Harwell Date: Tue, 10 Jun 2014 17:25:45 -0500 Subject: [PATCH] Fix cases where file encoding was not handled properly in the tool (fixes #563) --- tool/src/org/antlr/v4/Tool.java | 2 +- .../org/antlr/v4/parse/TokenVocabParser.java | 17 ++++++-- .../v4/tool/BuildDependencyGenerator.java | 3 +- tool/test/org/antlr/v4/test/BaseTest.java | 8 +++- .../org/antlr/v4/test/TestParserExec.java | 40 +++++++++++++++++++ 5 files changed, 61 insertions(+), 9 deletions(-) diff --git a/tool/src/org/antlr/v4/Tool.java b/tool/src/org/antlr/v4/Tool.java index 2fd6c6c62..7df218901 100644 --- a/tool/src/org/antlr/v4/Tool.java +++ b/tool/src/org/antlr/v4/Tool.java @@ -604,7 +604,7 @@ public class Tool { return null; } - ANTLRFileStream in = new ANTLRFileStream(importedFile.getAbsolutePath()); + ANTLRFileStream in = new ANTLRFileStream(importedFile.getAbsolutePath(), grammarEncoding); GrammarRootAST root = parse(g.fileName, in); Grammar imported = createGrammar(root); imported.fileName = importedFile.getAbsolutePath(); diff --git a/tool/src/org/antlr/v4/parse/TokenVocabParser.java b/tool/src/org/antlr/v4/parse/TokenVocabParser.java index 2893ae4a6..934a40a68 100644 --- a/tool/src/org/antlr/v4/parse/TokenVocabParser.java +++ b/tool/src/org/antlr/v4/parse/TokenVocabParser.java @@ -37,9 +37,10 @@ import org.antlr.v4.tool.ErrorType; import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; -import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.util.LinkedHashMap; import java.util.Map; import java.util.regex.Matcher; @@ -60,12 +61,20 @@ public class TokenVocabParser { Map tokens = new LinkedHashMap(); int maxTokenType = -1; File fullFile = getImportedVocabFile(); - FileReader fr = null; + FileInputStream fis = null; BufferedReader br = null; try { Pattern tokenDefPattern = Pattern.compile("([^\n]+?)[ \\t]*?=[ \\t]*?([0-9]+)"); - fr = new FileReader(fullFile); - br = new BufferedReader(fr); + fis = new FileInputStream(fullFile); + InputStreamReader isr; + if (tool.grammarEncoding != null) { + isr = new InputStreamReader(fis, tool.grammarEncoding); + } + else { + isr = new InputStreamReader(fis); + } + + br = new BufferedReader(isr); String tokenDef = br.readLine(); int lineNum = 1; while ( tokenDef!=null ) { diff --git a/tool/src/org/antlr/v4/tool/BuildDependencyGenerator.java b/tool/src/org/antlr/v4/tool/BuildDependencyGenerator.java index 54e4937a6..69e71b5d0 100644 --- a/tool/src/org/antlr/v4/tool/BuildDependencyGenerator.java +++ b/tool/src/org/antlr/v4/tool/BuildDependencyGenerator.java @@ -31,7 +31,6 @@ package org.antlr.v4.tool; import org.antlr.v4.Tool; import org.antlr.v4.codegen.CodeGenerator; -import org.antlr.v4.misc.Utils; import org.antlr.v4.parse.ANTLRParser; import org.stringtemplate.v4.ST; import org.stringtemplate.v4.STGroup; @@ -252,7 +251,7 @@ public class BuildDependencyGenerator { public void loadDependencyTemplates() { if (templates != null) return; String fileName = "org/antlr/v4/tool/templates/depend.stg"; - templates = new STGroupFile(fileName); + templates = new STGroupFile(fileName, "UTF-8"); } public CodeGenerator getGenerator() { diff --git a/tool/test/org/antlr/v4/test/BaseTest.java b/tool/test/org/antlr/v4/test/BaseTest.java index 850d9a040..e1327461d 100644 --- a/tool/test/org/antlr/v4/test/BaseTest.java +++ b/tool/test/org/antlr/v4/test/BaseTest.java @@ -83,10 +83,11 @@ import javax.tools.ToolProvider; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; -import java.io.FileWriter; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.io.OutputStreamWriter; import java.io.PipedInputStream; import java.io.PipedOutputStream; import java.io.PrintStream; @@ -452,6 +453,8 @@ public abstract class BaseTest { options.add(tmpdir); options.add("-lib"); options.add(tmpdir); + options.add("-encoding"); + options.add("UTF-8"); options.add(new File(tmpdir,grammarFileName).toString()); final String[] optionsA = new String[options.size()]; @@ -1025,7 +1028,8 @@ public abstract class BaseTest { public static void writeFile(String dir, String fileName, String content) { try { File f = new File(dir, fileName); - FileWriter w = new FileWriter(f); + FileOutputStream outputStream = new FileOutputStream(f); + OutputStreamWriter w = new OutputStreamWriter(outputStream, "UTF-8"); BufferedWriter bw = new BufferedWriter(w); bw.write(content); bw.close(); diff --git a/tool/test/org/antlr/v4/test/TestParserExec.java b/tool/test/org/antlr/v4/test/TestParserExec.java index 0bc718933..21867a811 100644 --- a/tool/test/org/antlr/v4/test/TestParserExec.java +++ b/tool/test/org/antlr/v4/test/TestParserExec.java @@ -35,6 +35,7 @@ import org.junit.Test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; /** Test parser execution. * @@ -527,4 +528,43 @@ public class TestParserExec extends BaseTest { assertEquals("", found); assertEquals("line 1:6 rule floating_constant DEC:A floating-point constant cannot have internal white space\n", stderrDuringParse); } + + /** + * This is a regression test for antlr/antlr4#563 "Inconsistent token + * handling in ANTLR4". + * https://github.com/antlr/antlr4/issues/563 + */ + @Test public void testAlternateQuotes() throws Exception { + String lexerGrammar = + "lexer grammar ModeTagsLexer;\n" + + "\n" + + "// Default mode rules (the SEA)\n" + + "OPEN : '«' -> mode(ISLAND) ; // switch to ISLAND mode\n" + + "TEXT : ~'«'+ ; // clump all text together\n" + + "\n" + + "mode ISLAND;\n" + + "CLOSE : '»' -> mode(DEFAULT_MODE) ; // back to SEA mode \n" + + "SLASH : '/' ;\n" + + "ID : [a-zA-Z]+ ; // match/send ID in tag to parser\n"; + String parserGrammar = + "parser grammar ModeTagsParser;\n" + + "\n" + + "options { tokenVocab=ModeTagsLexer; } // use tokens from ModeTagsLexer.g4\n" + + "\n" + + "file: (tag | TEXT)* ;\n" + + "\n" + + "tag : '«' ID '»'\n" + + " | '«' '/' ID '»'\n" + + " ;"; + + boolean success = rawGenerateAndBuildRecognizer("ModeTagsLexer.g4", + lexerGrammar, + null, + "ModeTagsLexer"); + assertTrue(success); + + String found = execParser("ModeTagsParser.g4", parserGrammar, "ModeTagsParser", "ModeTagsLexer", "file", "", false); + assertEquals("", found); + assertNull(stderrDuringParse); + } }