Merge pull request #618 from sharwell/fix-563

Fix cases where file encoding was not handled properly in the tool
This commit is contained in:
Terence Parr 2014-06-10 16:43:51 -07:00
commit 63069a571d
5 changed files with 61 additions and 9 deletions

View File

@ -604,7 +604,7 @@ public class Tool {
return null;
}
ANTLRFileStream in = new ANTLRFileStream(importedFile.getAbsolutePath());
ANTLRFileStream in = new ANTLRFileStream(importedFile.getAbsolutePath(), grammarEncoding);
GrammarRootAST root = parse(g.fileName, in);
Grammar imported = createGrammar(root);
imported.fileName = importedFile.getAbsolutePath();

View File

@ -37,9 +37,10 @@ import org.antlr.v4.tool.ErrorType;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.regex.Matcher;
@ -60,12 +61,20 @@ public class TokenVocabParser {
Map<String,Integer> tokens = new LinkedHashMap<String,Integer>();
int maxTokenType = -1;
File fullFile = getImportedVocabFile();
FileReader fr = null;
FileInputStream fis = null;
BufferedReader br = null;
try {
Pattern tokenDefPattern = Pattern.compile("([^\n]+?)[ \\t]*?=[ \\t]*?([0-9]+)");
fr = new FileReader(fullFile);
br = new BufferedReader(fr);
fis = new FileInputStream(fullFile);
InputStreamReader isr;
if (tool.grammarEncoding != null) {
isr = new InputStreamReader(fis, tool.grammarEncoding);
}
else {
isr = new InputStreamReader(fis);
}
br = new BufferedReader(isr);
String tokenDef = br.readLine();
int lineNum = 1;
while ( tokenDef!=null ) {

View File

@ -31,7 +31,6 @@ package org.antlr.v4.tool;
import org.antlr.v4.Tool;
import org.antlr.v4.codegen.CodeGenerator;
import org.antlr.v4.misc.Utils;
import org.antlr.v4.parse.ANTLRParser;
import org.stringtemplate.v4.ST;
import org.stringtemplate.v4.STGroup;
@ -252,7 +251,7 @@ public class BuildDependencyGenerator {
public void loadDependencyTemplates() {
if (templates != null) return;
String fileName = "org/antlr/v4/tool/templates/depend.stg";
templates = new STGroupFile(fileName);
templates = new STGroupFile(fileName, "UTF-8");
}
public CodeGenerator getGenerator() {

View File

@ -83,10 +83,11 @@ import javax.tools.ToolProvider;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PipedInputStream;
import java.io.PipedOutputStream;
import java.io.PrintStream;
@ -452,6 +453,8 @@ public abstract class BaseTest {
options.add(tmpdir);
options.add("-lib");
options.add(tmpdir);
options.add("-encoding");
options.add("UTF-8");
options.add(new File(tmpdir,grammarFileName).toString());
final String[] optionsA = new String[options.size()];
@ -1025,7 +1028,8 @@ public abstract class BaseTest {
public static void writeFile(String dir, String fileName, String content) {
try {
File f = new File(dir, fileName);
FileWriter w = new FileWriter(f);
FileOutputStream outputStream = new FileOutputStream(f);
OutputStreamWriter w = new OutputStreamWriter(outputStream, "UTF-8");
BufferedWriter bw = new BufferedWriter(w);
bw.write(content);
bw.close();

View File

@ -35,6 +35,7 @@ import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
/** Test parser execution.
*
@ -527,4 +528,43 @@ public class TestParserExec extends BaseTest {
assertEquals("", found);
assertEquals("line 1:6 rule floating_constant DEC:A floating-point constant cannot have internal white space\n", stderrDuringParse);
}
/**
* This is a regression test for antlr/antlr4#563 "Inconsistent token
* handling in ANTLR4".
* https://github.com/antlr/antlr4/issues/563
*/
@Test public void testAlternateQuotes() throws Exception {
String lexerGrammar =
"lexer grammar ModeTagsLexer;\n" +
"\n" +
"// Default mode rules (the SEA)\n" +
"OPEN : '«' -> mode(ISLAND) ; // switch to ISLAND mode\n" +
"TEXT : ~'«'+ ; // clump all text together\n" +
"\n" +
"mode ISLAND;\n" +
"CLOSE : '»' -> mode(DEFAULT_MODE) ; // back to SEA mode \n" +
"SLASH : '/' ;\n" +
"ID : [a-zA-Z]+ ; // match/send ID in tag to parser\n";
String parserGrammar =
"parser grammar ModeTagsParser;\n" +
"\n" +
"options { tokenVocab=ModeTagsLexer; } // use tokens from ModeTagsLexer.g4\n" +
"\n" +
"file: (tag | TEXT)* ;\n" +
"\n" +
"tag : '«' ID '»'\n" +
" | '«' '/' ID '»'\n" +
" ;";
boolean success = rawGenerateAndBuildRecognizer("ModeTagsLexer.g4",
lexerGrammar,
null,
"ModeTagsLexer");
assertTrue(success);
String found = execParser("ModeTagsParser.g4", parserGrammar, "ModeTagsParser", "ModeTagsLexer", "file", "", false);
assertEquals("", found);
assertNull(stderrDuringParse);
}
}