Merge pull request #1724 from KvanTTT/more-accurate-error-messages

More accurate error messages
This commit is contained in:
Terence Parr 2017-03-03 10:02:31 -08:00 committed by GitHub
commit 924b3d137c
6 changed files with 63 additions and 50 deletions

View File

@ -390,12 +390,14 @@ public class TestSymbolIssues extends BaseJavaToolTest {
"TOKEN_RANGE_2: [A-FD-J];\n" +
"TOKEN_RANGE_3: 'Z' | 'K'..'R' | 'O'..'V';\n" +
"TOKEN_RANGE_4: 'g'..'l' | [g-l];\n" +
"TOKEN_RANGE_WITHOUT_COLLISION: '_' | [a-zA-Z];",
"TOKEN_RANGE_WITHOUT_COLLISION: '_' | [a-zA-Z];\n" +
"TOKEN_RANGE_WITH_ESCAPED_CHARS: [\\n-\\r] | '\\n'..'\\r';",
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:2:18: chars \"a-f\" used multiple times in set [aa-f]\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars \"D-J\" used multiple times in set [A-FD-J]\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars \"O-V\" used multiple times in set 'Z' | 'K'..'R' | 'O'..'V'\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars \"g\" used multiple times in set {'g'..'l'}\n"
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:2:18: chars 'a'..'f' used multiple times in set [aa-f]\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars 'D'..'J' used multiple times in set [A-FD-J]\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars 'O'..'V' used multiple times in set 'Z' | 'K'..'R' | 'O'..'V'\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars 'g' used multiple times in set 'g'..'l'\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars '\\n' used multiple times in set '\\n'..'\\r'\n"
};
testErrors(test, false);

View File

@ -522,7 +522,7 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest {
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:30: invalid escape sequence\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:33: string literals and sets cannot be empty: 'F'..'A'\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: [f-a]\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: 'f'..'a'\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n" +
"error(" + ErrorType.INVALID_CHAR_SET.code + "): Test.g4:7:30: invalid charset (range without start or end): [-z]\n" +
"error(" + ErrorType.INVALID_CHAR_SET.code + "): Test.g4:7:37: invalid charset (range without start or end): [a-]\n" +

View File

@ -6,6 +6,8 @@
package org.antlr.v4.automata;
import org.antlr.v4.codegen.model.MatchSet;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.atn.AtomTransition;
@ -25,6 +27,7 @@ import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.Rule;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
@ -111,8 +114,8 @@ public class ATNOptimizer {
// TODO: Token is missing (i.e. position in source will not be displayed).
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName,
null,
String.valueOf(Character.toChars(v)),
matchSet.toString(true));
CharSupport.getANTLRCharLiteralForChar(v),
CharSupport.getIntervalSetEscapedString(matchSet));
break;
}
}

View File

@ -517,7 +517,7 @@ public class LexerATNFactory extends ParserATNFactory {
ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED,
g.fileName,
charSetAST.getToken(),
CharSupport.toRange(state.prevCodePoint, codePoint, CharSupport.ToRangeMode.BRACKETED));
CharSupport.getRangeEscapedString(state.prevCodePoint, codePoint));
}
checkSetCollision(charSetAST, set, state.prevCodePoint, codePoint);
set.add(state.prevCodePoint, codePoint);
@ -571,10 +571,7 @@ public class LexerATNFactory extends ParserATNFactory {
}
protected void checkSetCollision(GrammarAST ast, IntervalSet set, int el) {
if (set.contains(el)) {
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, ast.getToken(),
el, ast.getText());
}
checkSetCollision(ast, set, el, el);
}
protected void checkSetCollision(GrammarAST ast, IntervalSet set, int a, int b) {
@ -601,7 +598,7 @@ public class LexerATNFactory extends ParserATNFactory {
setText = sb.toString();
}
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, ast.getToken(),
CharSupport.toRange(a, b, CharSupport.ToRangeMode.NOT_BRACKETED), setText);
CharSupport.getRangeEscapedString(a, b), setText);
break;
}
}

View File

@ -7,6 +7,11 @@
package org.antlr.v4.misc;
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.IntervalSet;
import java.util.Iterator;
/** */
public class CharSupport {
@ -19,11 +24,6 @@ public class CharSupport {
*/
public static String ANTLRLiteralCharValueEscape[] = new String[255];
public enum ToRangeMode {
BRACKETED,
NOT_BRACKETED,
};
static {
ANTLRLiteralEscapedCharValue['n'] = '\n';
ANTLRLiteralEscapedCharValue['r'] = '\r';
@ -45,28 +45,34 @@ public class CharSupport {
* as \\uXXXX or \\u{XXXXXX} escapes.
*/
public static String getANTLRCharLiteralForChar(int c) {
if ( c< Lexer.MIN_CHAR_VALUE ) {
return "'<INVALID>'";
}
if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
}
if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
!Character.isISOControl((char)c) ) {
if ( c=='\\' ) {
return "'\\\\'";
}
if ( c=='\'') {
return "'\\''";
}
return '\''+Character.toString((char)c)+'\'';
}
if (c <= 0xFFFF) {
return String.format("\\u%04X", c);
String result;
if ( c < Lexer.MIN_CHAR_VALUE ) {
result = "<INVALID>";
}
else {
return String.format("\\u{%06X}", c);
String charValueEscape = c < ANTLRLiteralCharValueEscape.length ? ANTLRLiteralCharValueEscape[c] : null;
if (charValueEscape != null) {
result = charValueEscape;
}
else if (Character.UnicodeBlock.of((char) c) == Character.UnicodeBlock.BASIC_LATIN &&
!Character.isISOControl((char) c)) {
if (c == '\\') {
result = "\\\\";
}
else if (c == '\'') {
result = "\\'";
}
else {
result = Character.toString((char) c);
}
}
else if (c <= 0xFFFF) {
result = String.format("\\u%04X", c);
} else {
result = String.format("\\u{%06X}", c);
}
}
return '\'' + result + '\'';
}
/** Given a literal like (the 3 char sequence with single quotes) 'a',
@ -179,17 +185,22 @@ public class CharSupport {
return Character.toUpperCase(s.charAt(0)) + s.substring(1);
}
public static String toRange(int codePointStart, int codePointEnd, ToRangeMode mode) {
StringBuilder sb = new StringBuilder();
if (mode == ToRangeMode.BRACKETED) {
sb.append("[");
public static String getIntervalSetEscapedString(IntervalSet intervalSet) {
StringBuilder buf = new StringBuilder();
Iterator<Interval> iter = intervalSet.getIntervals().iterator();
while (iter.hasNext()) {
Interval interval = iter.next();
buf.append(getRangeEscapedString(interval.a, interval.b));
if (iter.hasNext()) {
buf.append(" | ");
}
sb.appendCodePoint(codePointStart)
.append("-")
.appendCodePoint(codePointEnd);
if (mode == ToRangeMode.BRACKETED) {
sb.append("]");
}
return sb.toString();
return buf.toString();
}
public static String getRangeEscapedString(int codePointStart, int codePointEnd) {
return codePointStart != codePointEnd
? getANTLRCharLiteralForChar(codePointStart) + ".." + getANTLRCharLiteralForChar(codePointEnd)
: getANTLRCharLiteralForChar(codePointStart);
}
}

View File

@ -1054,7 +1054,7 @@ public enum ErrorType {
*
* TODO: Does not work with fragment rules.
*/
CHARACTERS_COLLISION_IN_SET(180, "chars \"<arg>\" used multiple times in set <arg2>", ErrorSeverity.WARNING),
CHARACTERS_COLLISION_IN_SET(180, "chars <arg> used multiple times in set <arg2>", ErrorSeverity.WARNING),
/**
* Compiler Warning 181