Fix cases where file encoding was not handled properly in the tool (fixes #563)

2014-06-10 17:25:45 -05:00 · 2014-06-10 17:25:45 -05:00 · 156696be18
parent fdc46c53e6
commit 156696be18
5 changed files with 61 additions and 9 deletions
--- a/tool/src/org/antlr/v4/Tool.java
+++ b/tool/src/org/antlr/v4/Tool.java
@ -604,7 +604,7 @@ public class Tool {
 			return null;
 		}

-		ANTLRFileStream in = new ANTLRFileStream(importedFile.getAbsolutePath());
+		ANTLRFileStream in = new ANTLRFileStream(importedFile.getAbsolutePath(), grammarEncoding);
 		GrammarRootAST root = parse(g.fileName, in);
 		Grammar imported = createGrammar(root);
 		imported.fileName = importedFile.getAbsolutePath();
--- a/tool/src/org/antlr/v4/parse/TokenVocabParser.java
+++ b/tool/src/org/antlr/v4/parse/TokenVocabParser.java
@ -37,9 +37,10 @@ import org.antlr.v4.tool.ErrorType;

 import java.io.BufferedReader;
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.FileNotFoundException;
-import java.io.FileReader;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.util.LinkedHashMap;
 import java.util.Map;
 import java.util.regex.Matcher;
@ -60,12 +61,20 @@ public class TokenVocabParser {
 		Map<String,Integer> tokens = new LinkedHashMap<String,Integer>();
 		int maxTokenType = -1;
 		File fullFile = getImportedVocabFile();
-		FileReader fr = null;
+		FileInputStream fis = null;
 		BufferedReader br = null;
 		try {
 			Pattern tokenDefPattern = Pattern.compile("([^\n]+?)[ \\t]*?=[ \\t]*?([0-9]+)");
-			fr = new FileReader(fullFile);
-			br = new BufferedReader(fr);
+			fis = new FileInputStream(fullFile);
+			InputStreamReader isr;
+			if (tool.grammarEncoding != null) {
+				isr = new InputStreamReader(fis, tool.grammarEncoding);
+			}
+			else {
+				isr = new InputStreamReader(fis);
+			}
+
+			br = new BufferedReader(isr);
 			String tokenDef = br.readLine();
 			int lineNum = 1;
 			while ( tokenDef!=null ) {
--- a/tool/src/org/antlr/v4/tool/BuildDependencyGenerator.java
+++ b/tool/src/org/antlr/v4/tool/BuildDependencyGenerator.java
@ -31,7 +31,6 @@ package org.antlr.v4.tool;

 import org.antlr.v4.Tool;
 import org.antlr.v4.codegen.CodeGenerator;
-import org.antlr.v4.misc.Utils;
 import org.antlr.v4.parse.ANTLRParser;
 import org.stringtemplate.v4.ST;
 import org.stringtemplate.v4.STGroup;
@ -252,7 +251,7 @@ public class BuildDependencyGenerator {
    public void loadDependencyTemplates() {
        if (templates != null) return;
        String fileName = "org/antlr/v4/tool/templates/depend.stg";
-        templates = new STGroupFile(fileName);
+        templates = new STGroupFile(fileName, "UTF-8");
    }

    public CodeGenerator getGenerator() {
--- a/tool/test/org/antlr/v4/test/BaseTest.java
+++ b/tool/test/org/antlr/v4/test/BaseTest.java
@ -83,10 +83,11 @@ import javax.tools.ToolProvider;
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.File;
-import java.io.FileWriter;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
 import java.io.PipedInputStream;
 import java.io.PipedOutputStream;
 import java.io.PrintStream;
@ -452,6 +453,8 @@ public abstract class BaseTest {
 		options.add(tmpdir);
 		options.add("-lib");
 		options.add(tmpdir);
+		options.add("-encoding");
+		options.add("UTF-8");
 		options.add(new File(tmpdir,grammarFileName).toString());

 		final String[] optionsA = new String[options.size()];
@ -1025,7 +1028,8 @@ public abstract class BaseTest {
 	public static void writeFile(String dir, String fileName, String content) {
 		try {
 			File f = new File(dir, fileName);
-			FileWriter w = new FileWriter(f);
+			FileOutputStream outputStream = new FileOutputStream(f);
+			OutputStreamWriter w = new OutputStreamWriter(outputStream, "UTF-8");
 			BufferedWriter bw = new BufferedWriter(w);
 			bw.write(content);
 			bw.close();
--- a/tool/test/org/antlr/v4/test/TestParserExec.java
+++ b/tool/test/org/antlr/v4/test/TestParserExec.java
@ -35,6 +35,7 @@ import org.junit.Test;

 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;

 /** Test parser execution.
 *
@ -527,4 +528,43 @@ public class TestParserExec extends BaseTest {
 		assertEquals("", found);
 		assertEquals("line 1:6 rule floating_constant DEC:A floating-point constant cannot have internal white space\n", stderrDuringParse);
 	}
+
+	/**
+	 * This is a regression test for antlr/antlr4#563 "Inconsistent token
+	 * handling in ANTLR4".
+	 * https://github.com/antlr/antlr4/issues/563
+	 */
+	@Test public void testAlternateQuotes() throws Exception {
+		String lexerGrammar =
+			"lexer grammar ModeTagsLexer;\n" +
+			"\n" +
+			"// Default mode rules (the SEA)\n" +
+			"OPEN  : '«'     -> mode(ISLAND) ;       // switch to ISLAND mode\n" +
+			"TEXT  : ~'«'+ ;                         // clump all text together\n" +
+			"\n" +
+			"mode ISLAND;\n" +
+			"CLOSE : '»'     -> mode(DEFAULT_MODE) ; // back to SEA mode \n" +
+			"SLASH : '/' ;\n" +
+			"ID    : [a-zA-Z]+ ;                     // match/send ID in tag to parser\n";
+		String parserGrammar =
+			"parser grammar ModeTagsParser;\n" +
+			"\n" +
+			"options { tokenVocab=ModeTagsLexer; } // use tokens from ModeTagsLexer.g4\n" +
+			"\n" +
+			"file: (tag | TEXT)* ;\n" +
+			"\n" +
+			"tag : '«' ID '»'\n" +
+			"    | '«' '/' ID '»'\n" +
+			"    ;";
+
+		boolean success = rawGenerateAndBuildRecognizer("ModeTagsLexer.g4",
+														lexerGrammar,
+														null,
+														"ModeTagsLexer");
+		assertTrue(success);
+
+		String found = execParser("ModeTagsParser.g4", parserGrammar, "ModeTagsParser", "ModeTagsLexer", "file", "", false);
+		assertEquals("", found);
+		assertNull(stderrDuringParse);
+	}
 }