Merge pull request #1724 from KvanTTT/more-accurate-error-messages
More accurate error messages
This commit is contained in:
commit
924b3d137c
|
@ -390,12 +390,14 @@ public class TestSymbolIssues extends BaseJavaToolTest {
|
|||
"TOKEN_RANGE_2: [A-FD-J];\n" +
|
||||
"TOKEN_RANGE_3: 'Z' | 'K'..'R' | 'O'..'V';\n" +
|
||||
"TOKEN_RANGE_4: 'g'..'l' | [g-l];\n" +
|
||||
"TOKEN_RANGE_WITHOUT_COLLISION: '_' | [a-zA-Z];",
|
||||
"TOKEN_RANGE_WITHOUT_COLLISION: '_' | [a-zA-Z];\n" +
|
||||
"TOKEN_RANGE_WITH_ESCAPED_CHARS: [\\n-\\r] | '\\n'..'\\r';",
|
||||
|
||||
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:2:18: chars \"a-f\" used multiple times in set [aa-f]\n" +
|
||||
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars \"D-J\" used multiple times in set [A-FD-J]\n" +
|
||||
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars \"O-V\" used multiple times in set 'Z' | 'K'..'R' | 'O'..'V'\n" +
|
||||
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars \"g\" used multiple times in set {'g'..'l'}\n"
|
||||
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:2:18: chars 'a'..'f' used multiple times in set [aa-f]\n" +
|
||||
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars 'D'..'J' used multiple times in set [A-FD-J]\n" +
|
||||
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars 'O'..'V' used multiple times in set 'Z' | 'K'..'R' | 'O'..'V'\n" +
|
||||
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars 'g' used multiple times in set 'g'..'l'\n" +
|
||||
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars '\\n' used multiple times in set '\\n'..'\\r'\n"
|
||||
};
|
||||
|
||||
testErrors(test, false);
|
||||
|
|
|
@ -522,7 +522,7 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest {
|
|||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:30: invalid escape sequence\n" +
|
||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence\n" +
|
||||
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:33: string literals and sets cannot be empty: 'F'..'A'\n" +
|
||||
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: [f-a]\n" +
|
||||
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: 'f'..'a'\n" +
|
||||
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n" +
|
||||
"error(" + ErrorType.INVALID_CHAR_SET.code + "): Test.g4:7:30: invalid charset (range without start or end): [-z]\n" +
|
||||
"error(" + ErrorType.INVALID_CHAR_SET.code + "): Test.g4:7:37: invalid charset (range without start or end): [a-]\n" +
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
package org.antlr.v4.automata;
|
||||
|
||||
import org.antlr.v4.codegen.model.MatchSet;
|
||||
import org.antlr.v4.runtime.Token;
|
||||
import org.antlr.v4.runtime.atn.ATN;
|
||||
import org.antlr.v4.runtime.atn.ATNState;
|
||||
import org.antlr.v4.runtime.atn.AtomTransition;
|
||||
|
@ -25,6 +27,7 @@ import org.antlr.v4.tool.Grammar;
|
|||
import org.antlr.v4.tool.Rule;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
|
@ -111,8 +114,8 @@ public class ATNOptimizer {
|
|||
// TODO: Token is missing (i.e. position in source will not be displayed).
|
||||
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName,
|
||||
null,
|
||||
String.valueOf(Character.toChars(v)),
|
||||
matchSet.toString(true));
|
||||
CharSupport.getANTLRCharLiteralForChar(v),
|
||||
CharSupport.getIntervalSetEscapedString(matchSet));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -517,7 +517,7 @@ public class LexerATNFactory extends ParserATNFactory {
|
|||
ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED,
|
||||
g.fileName,
|
||||
charSetAST.getToken(),
|
||||
CharSupport.toRange(state.prevCodePoint, codePoint, CharSupport.ToRangeMode.BRACKETED));
|
||||
CharSupport.getRangeEscapedString(state.prevCodePoint, codePoint));
|
||||
}
|
||||
checkSetCollision(charSetAST, set, state.prevCodePoint, codePoint);
|
||||
set.add(state.prevCodePoint, codePoint);
|
||||
|
@ -571,10 +571,7 @@ public class LexerATNFactory extends ParserATNFactory {
|
|||
}
|
||||
|
||||
protected void checkSetCollision(GrammarAST ast, IntervalSet set, int el) {
|
||||
if (set.contains(el)) {
|
||||
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, ast.getToken(),
|
||||
el, ast.getText());
|
||||
}
|
||||
checkSetCollision(ast, set, el, el);
|
||||
}
|
||||
|
||||
protected void checkSetCollision(GrammarAST ast, IntervalSet set, int a, int b) {
|
||||
|
@ -601,7 +598,7 @@ public class LexerATNFactory extends ParserATNFactory {
|
|||
setText = sb.toString();
|
||||
}
|
||||
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, ast.getToken(),
|
||||
CharSupport.toRange(a, b, CharSupport.ToRangeMode.NOT_BRACKETED), setText);
|
||||
CharSupport.getRangeEscapedString(a, b), setText);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,6 +7,11 @@
|
|||
package org.antlr.v4.misc;
|
||||
|
||||
import org.antlr.v4.runtime.Lexer;
|
||||
import org.antlr.v4.runtime.Token;
|
||||
import org.antlr.v4.runtime.misc.Interval;
|
||||
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/** */
|
||||
public class CharSupport {
|
||||
|
@ -19,11 +24,6 @@ public class CharSupport {
|
|||
*/
|
||||
public static String ANTLRLiteralCharValueEscape[] = new String[255];
|
||||
|
||||
public enum ToRangeMode {
|
||||
BRACKETED,
|
||||
NOT_BRACKETED,
|
||||
};
|
||||
|
||||
static {
|
||||
ANTLRLiteralEscapedCharValue['n'] = '\n';
|
||||
ANTLRLiteralEscapedCharValue['r'] = '\r';
|
||||
|
@ -45,28 +45,34 @@ public class CharSupport {
|
|||
* as \\uXXXX or \\u{XXXXXX} escapes.
|
||||
*/
|
||||
public static String getANTLRCharLiteralForChar(int c) {
|
||||
if ( c< Lexer.MIN_CHAR_VALUE ) {
|
||||
return "'<INVALID>'";
|
||||
}
|
||||
if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
|
||||
return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
|
||||
}
|
||||
if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
|
||||
!Character.isISOControl((char)c) ) {
|
||||
if ( c=='\\' ) {
|
||||
return "'\\\\'";
|
||||
}
|
||||
if ( c=='\'') {
|
||||
return "'\\''";
|
||||
}
|
||||
return '\''+Character.toString((char)c)+'\'';
|
||||
}
|
||||
if (c <= 0xFFFF) {
|
||||
return String.format("\\u%04X", c);
|
||||
String result;
|
||||
if ( c < Lexer.MIN_CHAR_VALUE ) {
|
||||
result = "<INVALID>";
|
||||
}
|
||||
else {
|
||||
return String.format("\\u{%06X}", c);
|
||||
String charValueEscape = c < ANTLRLiteralCharValueEscape.length ? ANTLRLiteralCharValueEscape[c] : null;
|
||||
if (charValueEscape != null) {
|
||||
result = charValueEscape;
|
||||
}
|
||||
else if (Character.UnicodeBlock.of((char) c) == Character.UnicodeBlock.BASIC_LATIN &&
|
||||
!Character.isISOControl((char) c)) {
|
||||
if (c == '\\') {
|
||||
result = "\\\\";
|
||||
}
|
||||
else if (c == '\'') {
|
||||
result = "\\'";
|
||||
}
|
||||
else {
|
||||
result = Character.toString((char) c);
|
||||
}
|
||||
}
|
||||
else if (c <= 0xFFFF) {
|
||||
result = String.format("\\u%04X", c);
|
||||
} else {
|
||||
result = String.format("\\u{%06X}", c);
|
||||
}
|
||||
}
|
||||
return '\'' + result + '\'';
|
||||
}
|
||||
|
||||
/** Given a literal like (the 3 char sequence with single quotes) 'a',
|
||||
|
@ -179,17 +185,22 @@ public class CharSupport {
|
|||
return Character.toUpperCase(s.charAt(0)) + s.substring(1);
|
||||
}
|
||||
|
||||
public static String toRange(int codePointStart, int codePointEnd, ToRangeMode mode) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
if (mode == ToRangeMode.BRACKETED) {
|
||||
sb.append("[");
|
||||
public static String getIntervalSetEscapedString(IntervalSet intervalSet) {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
Iterator<Interval> iter = intervalSet.getIntervals().iterator();
|
||||
while (iter.hasNext()) {
|
||||
Interval interval = iter.next();
|
||||
buf.append(getRangeEscapedString(interval.a, interval.b));
|
||||
if (iter.hasNext()) {
|
||||
buf.append(" | ");
|
||||
}
|
||||
}
|
||||
sb.appendCodePoint(codePointStart)
|
||||
.append("-")
|
||||
.appendCodePoint(codePointEnd);
|
||||
if (mode == ToRangeMode.BRACKETED) {
|
||||
sb.append("]");
|
||||
}
|
||||
return sb.toString();
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
public static String getRangeEscapedString(int codePointStart, int codePointEnd) {
|
||||
return codePointStart != codePointEnd
|
||||
? getANTLRCharLiteralForChar(codePointStart) + ".." + getANTLRCharLiteralForChar(codePointEnd)
|
||||
: getANTLRCharLiteralForChar(codePointStart);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1054,7 +1054,7 @@ public enum ErrorType {
|
|||
*
|
||||
* TODO: Does not work with fragment rules.
|
||||
*/
|
||||
CHARACTERS_COLLISION_IN_SET(180, "chars \"<arg>\" used multiple times in set <arg2>", ErrorSeverity.WARNING),
|
||||
CHARACTERS_COLLISION_IN_SET(180, "chars <arg> used multiple times in set <arg2>", ErrorSeverity.WARNING),
|
||||
|
||||
/**
|
||||
* Compiler Warning 181
|
||||
|
|
Loading…
Reference in New Issue