forked from jasder/antlr
Merge pull request #1724 from KvanTTT/more-accurate-error-messages
More accurate error messages
This commit is contained in:
commit
924b3d137c
|
@ -390,12 +390,14 @@ public class TestSymbolIssues extends BaseJavaToolTest {
|
||||||
"TOKEN_RANGE_2: [A-FD-J];\n" +
|
"TOKEN_RANGE_2: [A-FD-J];\n" +
|
||||||
"TOKEN_RANGE_3: 'Z' | 'K'..'R' | 'O'..'V';\n" +
|
"TOKEN_RANGE_3: 'Z' | 'K'..'R' | 'O'..'V';\n" +
|
||||||
"TOKEN_RANGE_4: 'g'..'l' | [g-l];\n" +
|
"TOKEN_RANGE_4: 'g'..'l' | [g-l];\n" +
|
||||||
"TOKEN_RANGE_WITHOUT_COLLISION: '_' | [a-zA-Z];",
|
"TOKEN_RANGE_WITHOUT_COLLISION: '_' | [a-zA-Z];\n" +
|
||||||
|
"TOKEN_RANGE_WITH_ESCAPED_CHARS: [\\n-\\r] | '\\n'..'\\r';",
|
||||||
|
|
||||||
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:2:18: chars \"a-f\" used multiple times in set [aa-f]\n" +
|
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:2:18: chars 'a'..'f' used multiple times in set [aa-f]\n" +
|
||||||
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars \"D-J\" used multiple times in set [A-FD-J]\n" +
|
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars 'D'..'J' used multiple times in set [A-FD-J]\n" +
|
||||||
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars \"O-V\" used multiple times in set 'Z' | 'K'..'R' | 'O'..'V'\n" +
|
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars 'O'..'V' used multiple times in set 'Z' | 'K'..'R' | 'O'..'V'\n" +
|
||||||
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars \"g\" used multiple times in set {'g'..'l'}\n"
|
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars 'g' used multiple times in set 'g'..'l'\n" +
|
||||||
|
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars '\\n' used multiple times in set '\\n'..'\\r'\n"
|
||||||
};
|
};
|
||||||
|
|
||||||
testErrors(test, false);
|
testErrors(test, false);
|
||||||
|
|
|
@ -522,7 +522,7 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest {
|
||||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:30: invalid escape sequence\n" +
|
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:30: invalid escape sequence\n" +
|
||||||
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence\n" +
|
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence\n" +
|
||||||
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:33: string literals and sets cannot be empty: 'F'..'A'\n" +
|
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:33: string literals and sets cannot be empty: 'F'..'A'\n" +
|
||||||
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: [f-a]\n" +
|
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: 'f'..'a'\n" +
|
||||||
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n" +
|
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n" +
|
||||||
"error(" + ErrorType.INVALID_CHAR_SET.code + "): Test.g4:7:30: invalid charset (range without start or end): [-z]\n" +
|
"error(" + ErrorType.INVALID_CHAR_SET.code + "): Test.g4:7:30: invalid charset (range without start or end): [-z]\n" +
|
||||||
"error(" + ErrorType.INVALID_CHAR_SET.code + "): Test.g4:7:37: invalid charset (range without start or end): [a-]\n" +
|
"error(" + ErrorType.INVALID_CHAR_SET.code + "): Test.g4:7:37: invalid charset (range without start or end): [a-]\n" +
|
||||||
|
|
|
@ -6,6 +6,8 @@
|
||||||
|
|
||||||
package org.antlr.v4.automata;
|
package org.antlr.v4.automata;
|
||||||
|
|
||||||
|
import org.antlr.v4.codegen.model.MatchSet;
|
||||||
|
import org.antlr.v4.runtime.Token;
|
||||||
import org.antlr.v4.runtime.atn.ATN;
|
import org.antlr.v4.runtime.atn.ATN;
|
||||||
import org.antlr.v4.runtime.atn.ATNState;
|
import org.antlr.v4.runtime.atn.ATNState;
|
||||||
import org.antlr.v4.runtime.atn.AtomTransition;
|
import org.antlr.v4.runtime.atn.AtomTransition;
|
||||||
|
@ -25,6 +27,7 @@ import org.antlr.v4.tool.Grammar;
|
||||||
import org.antlr.v4.tool.Rule;
|
import org.antlr.v4.tool.Rule;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -111,8 +114,8 @@ public class ATNOptimizer {
|
||||||
// TODO: Token is missing (i.e. position in source will not be displayed).
|
// TODO: Token is missing (i.e. position in source will not be displayed).
|
||||||
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName,
|
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName,
|
||||||
null,
|
null,
|
||||||
String.valueOf(Character.toChars(v)),
|
CharSupport.getANTLRCharLiteralForChar(v),
|
||||||
matchSet.toString(true));
|
CharSupport.getIntervalSetEscapedString(matchSet));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -517,7 +517,7 @@ public class LexerATNFactory extends ParserATNFactory {
|
||||||
ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED,
|
ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED,
|
||||||
g.fileName,
|
g.fileName,
|
||||||
charSetAST.getToken(),
|
charSetAST.getToken(),
|
||||||
CharSupport.toRange(state.prevCodePoint, codePoint, CharSupport.ToRangeMode.BRACKETED));
|
CharSupport.getRangeEscapedString(state.prevCodePoint, codePoint));
|
||||||
}
|
}
|
||||||
checkSetCollision(charSetAST, set, state.prevCodePoint, codePoint);
|
checkSetCollision(charSetAST, set, state.prevCodePoint, codePoint);
|
||||||
set.add(state.prevCodePoint, codePoint);
|
set.add(state.prevCodePoint, codePoint);
|
||||||
|
@ -571,10 +571,7 @@ public class LexerATNFactory extends ParserATNFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void checkSetCollision(GrammarAST ast, IntervalSet set, int el) {
|
protected void checkSetCollision(GrammarAST ast, IntervalSet set, int el) {
|
||||||
if (set.contains(el)) {
|
checkSetCollision(ast, set, el, el);
|
||||||
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, ast.getToken(),
|
|
||||||
el, ast.getText());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void checkSetCollision(GrammarAST ast, IntervalSet set, int a, int b) {
|
protected void checkSetCollision(GrammarAST ast, IntervalSet set, int a, int b) {
|
||||||
|
@ -601,7 +598,7 @@ public class LexerATNFactory extends ParserATNFactory {
|
||||||
setText = sb.toString();
|
setText = sb.toString();
|
||||||
}
|
}
|
||||||
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, ast.getToken(),
|
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, ast.getToken(),
|
||||||
CharSupport.toRange(a, b, CharSupport.ToRangeMode.NOT_BRACKETED), setText);
|
CharSupport.getRangeEscapedString(a, b), setText);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,11 @@
|
||||||
package org.antlr.v4.misc;
|
package org.antlr.v4.misc;
|
||||||
|
|
||||||
import org.antlr.v4.runtime.Lexer;
|
import org.antlr.v4.runtime.Lexer;
|
||||||
|
import org.antlr.v4.runtime.Token;
|
||||||
|
import org.antlr.v4.runtime.misc.Interval;
|
||||||
|
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||||
|
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
/** */
|
/** */
|
||||||
public class CharSupport {
|
public class CharSupport {
|
||||||
|
@ -19,11 +24,6 @@ public class CharSupport {
|
||||||
*/
|
*/
|
||||||
public static String ANTLRLiteralCharValueEscape[] = new String[255];
|
public static String ANTLRLiteralCharValueEscape[] = new String[255];
|
||||||
|
|
||||||
public enum ToRangeMode {
|
|
||||||
BRACKETED,
|
|
||||||
NOT_BRACKETED,
|
|
||||||
};
|
|
||||||
|
|
||||||
static {
|
static {
|
||||||
ANTLRLiteralEscapedCharValue['n'] = '\n';
|
ANTLRLiteralEscapedCharValue['n'] = '\n';
|
||||||
ANTLRLiteralEscapedCharValue['r'] = '\r';
|
ANTLRLiteralEscapedCharValue['r'] = '\r';
|
||||||
|
@ -45,28 +45,34 @@ public class CharSupport {
|
||||||
* as \\uXXXX or \\u{XXXXXX} escapes.
|
* as \\uXXXX or \\u{XXXXXX} escapes.
|
||||||
*/
|
*/
|
||||||
public static String getANTLRCharLiteralForChar(int c) {
|
public static String getANTLRCharLiteralForChar(int c) {
|
||||||
|
String result;
|
||||||
if ( c < Lexer.MIN_CHAR_VALUE ) {
|
if ( c < Lexer.MIN_CHAR_VALUE ) {
|
||||||
return "'<INVALID>'";
|
result = "<INVALID>";
|
||||||
}
|
|
||||||
if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
|
|
||||||
return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
|
|
||||||
}
|
|
||||||
if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
|
|
||||||
!Character.isISOControl((char)c) ) {
|
|
||||||
if ( c=='\\' ) {
|
|
||||||
return "'\\\\'";
|
|
||||||
}
|
|
||||||
if ( c=='\'') {
|
|
||||||
return "'\\''";
|
|
||||||
}
|
|
||||||
return '\''+Character.toString((char)c)+'\'';
|
|
||||||
}
|
|
||||||
if (c <= 0xFFFF) {
|
|
||||||
return String.format("\\u%04X", c);
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return String.format("\\u{%06X}", c);
|
String charValueEscape = c < ANTLRLiteralCharValueEscape.length ? ANTLRLiteralCharValueEscape[c] : null;
|
||||||
|
if (charValueEscape != null) {
|
||||||
|
result = charValueEscape;
|
||||||
}
|
}
|
||||||
|
else if (Character.UnicodeBlock.of((char) c) == Character.UnicodeBlock.BASIC_LATIN &&
|
||||||
|
!Character.isISOControl((char) c)) {
|
||||||
|
if (c == '\\') {
|
||||||
|
result = "\\\\";
|
||||||
|
}
|
||||||
|
else if (c == '\'') {
|
||||||
|
result = "\\'";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
result = Character.toString((char) c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (c <= 0xFFFF) {
|
||||||
|
result = String.format("\\u%04X", c);
|
||||||
|
} else {
|
||||||
|
result = String.format("\\u{%06X}", c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return '\'' + result + '\'';
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Given a literal like (the 3 char sequence with single quotes) 'a',
|
/** Given a literal like (the 3 char sequence with single quotes) 'a',
|
||||||
|
@ -179,17 +185,22 @@ public class CharSupport {
|
||||||
return Character.toUpperCase(s.charAt(0)) + s.substring(1);
|
return Character.toUpperCase(s.charAt(0)) + s.substring(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String toRange(int codePointStart, int codePointEnd, ToRangeMode mode) {
|
public static String getIntervalSetEscapedString(IntervalSet intervalSet) {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder buf = new StringBuilder();
|
||||||
if (mode == ToRangeMode.BRACKETED) {
|
Iterator<Interval> iter = intervalSet.getIntervals().iterator();
|
||||||
sb.append("[");
|
while (iter.hasNext()) {
|
||||||
}
|
Interval interval = iter.next();
|
||||||
sb.appendCodePoint(codePointStart)
|
buf.append(getRangeEscapedString(interval.a, interval.b));
|
||||||
.append("-")
|
if (iter.hasNext()) {
|
||||||
.appendCodePoint(codePointEnd);
|
buf.append(" | ");
|
||||||
if (mode == ToRangeMode.BRACKETED) {
|
}
|
||||||
sb.append("]");
|
}
|
||||||
}
|
return buf.toString();
|
||||||
return sb.toString();
|
}
|
||||||
|
|
||||||
|
public static String getRangeEscapedString(int codePointStart, int codePointEnd) {
|
||||||
|
return codePointStart != codePointEnd
|
||||||
|
? getANTLRCharLiteralForChar(codePointStart) + ".." + getANTLRCharLiteralForChar(codePointEnd)
|
||||||
|
: getANTLRCharLiteralForChar(codePointStart);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1054,7 +1054,7 @@ public enum ErrorType {
|
||||||
*
|
*
|
||||||
* TODO: Does not work with fragment rules.
|
* TODO: Does not work with fragment rules.
|
||||||
*/
|
*/
|
||||||
CHARACTERS_COLLISION_IN_SET(180, "chars \"<arg>\" used multiple times in set <arg2>", ErrorSeverity.WARNING),
|
CHARACTERS_COLLISION_IN_SET(180, "chars <arg> used multiple times in set <arg2>", ErrorSeverity.WARNING),
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compiler Warning 181
|
* Compiler Warning 181
|
||||||
|
|
Loading…
Reference in New Issue