Merge pull request #1724 from KvanTTT/more-accurate-error-messages

More accurate error messages
This commit is contained in:
Terence Parr 2017-03-03 10:02:31 -08:00 committed by GitHub
commit 924b3d137c
6 changed files with 63 additions and 50 deletions

View File

@ -390,12 +390,14 @@ public class TestSymbolIssues extends BaseJavaToolTest {
"TOKEN_RANGE_2: [A-FD-J];\n" + "TOKEN_RANGE_2: [A-FD-J];\n" +
"TOKEN_RANGE_3: 'Z' | 'K'..'R' | 'O'..'V';\n" + "TOKEN_RANGE_3: 'Z' | 'K'..'R' | 'O'..'V';\n" +
"TOKEN_RANGE_4: 'g'..'l' | [g-l];\n" + "TOKEN_RANGE_4: 'g'..'l' | [g-l];\n" +
"TOKEN_RANGE_WITHOUT_COLLISION: '_' | [a-zA-Z];", "TOKEN_RANGE_WITHOUT_COLLISION: '_' | [a-zA-Z];\n" +
"TOKEN_RANGE_WITH_ESCAPED_CHARS: [\\n-\\r] | '\\n'..'\\r';",
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:2:18: chars \"a-f\" used multiple times in set [aa-f]\n" + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:2:18: chars 'a'..'f' used multiple times in set [aa-f]\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars \"D-J\" used multiple times in set [A-FD-J]\n" + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars 'D'..'J' used multiple times in set [A-FD-J]\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars \"O-V\" used multiple times in set 'Z' | 'K'..'R' | 'O'..'V'\n" + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars 'O'..'V' used multiple times in set 'Z' | 'K'..'R' | 'O'..'V'\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars \"g\" used multiple times in set {'g'..'l'}\n" "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars 'g' used multiple times in set 'g'..'l'\n" +
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars '\\n' used multiple times in set '\\n'..'\\r'\n"
}; };
testErrors(test, false); testErrors(test, false);

View File

@ -522,7 +522,7 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest {
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:30: invalid escape sequence\n" + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:30: invalid escape sequence\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence\n" + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:33: string literals and sets cannot be empty: 'F'..'A'\n" + "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:33: string literals and sets cannot be empty: 'F'..'A'\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: [f-a]\n" + "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: 'f'..'a'\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n" + "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n" +
"error(" + ErrorType.INVALID_CHAR_SET.code + "): Test.g4:7:30: invalid charset (range without start or end): [-z]\n" + "error(" + ErrorType.INVALID_CHAR_SET.code + "): Test.g4:7:30: invalid charset (range without start or end): [-z]\n" +
"error(" + ErrorType.INVALID_CHAR_SET.code + "): Test.g4:7:37: invalid charset (range without start or end): [a-]\n" + "error(" + ErrorType.INVALID_CHAR_SET.code + "): Test.g4:7:37: invalid charset (range without start or end): [a-]\n" +

View File

@ -6,6 +6,8 @@
package org.antlr.v4.automata; package org.antlr.v4.automata;
import org.antlr.v4.codegen.model.MatchSet;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.atn.ATN; import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNState; import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.atn.AtomTransition; import org.antlr.v4.runtime.atn.AtomTransition;
@ -25,6 +27,7 @@ import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.Rule; import org.antlr.v4.tool.Rule;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator;
import java.util.List; import java.util.List;
/** /**
@ -111,8 +114,8 @@ public class ATNOptimizer {
// TODO: Token is missing (i.e. position in source will not be displayed). // TODO: Token is missing (i.e. position in source will not be displayed).
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName,
null, null,
String.valueOf(Character.toChars(v)), CharSupport.getANTLRCharLiteralForChar(v),
matchSet.toString(true)); CharSupport.getIntervalSetEscapedString(matchSet));
break; break;
} }
} }

View File

@ -517,7 +517,7 @@ public class LexerATNFactory extends ParserATNFactory {
ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED, ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED,
g.fileName, g.fileName,
charSetAST.getToken(), charSetAST.getToken(),
CharSupport.toRange(state.prevCodePoint, codePoint, CharSupport.ToRangeMode.BRACKETED)); CharSupport.getRangeEscapedString(state.prevCodePoint, codePoint));
} }
checkSetCollision(charSetAST, set, state.prevCodePoint, codePoint); checkSetCollision(charSetAST, set, state.prevCodePoint, codePoint);
set.add(state.prevCodePoint, codePoint); set.add(state.prevCodePoint, codePoint);
@ -571,10 +571,7 @@ public class LexerATNFactory extends ParserATNFactory {
} }
protected void checkSetCollision(GrammarAST ast, IntervalSet set, int el) { protected void checkSetCollision(GrammarAST ast, IntervalSet set, int el) {
if (set.contains(el)) { checkSetCollision(ast, set, el, el);
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, ast.getToken(),
el, ast.getText());
}
} }
protected void checkSetCollision(GrammarAST ast, IntervalSet set, int a, int b) { protected void checkSetCollision(GrammarAST ast, IntervalSet set, int a, int b) {
@ -601,7 +598,7 @@ public class LexerATNFactory extends ParserATNFactory {
setText = sb.toString(); setText = sb.toString();
} }
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, ast.getToken(), g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, ast.getToken(),
CharSupport.toRange(a, b, CharSupport.ToRangeMode.NOT_BRACKETED), setText); CharSupport.getRangeEscapedString(a, b), setText);
break; break;
} }
} }

View File

@ -7,6 +7,11 @@
package org.antlr.v4.misc; package org.antlr.v4.misc;
import org.antlr.v4.runtime.Lexer; import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.IntervalSet;
import java.util.Iterator;
/** */ /** */
public class CharSupport { public class CharSupport {
@ -19,11 +24,6 @@ public class CharSupport {
*/ */
public static String ANTLRLiteralCharValueEscape[] = new String[255]; public static String ANTLRLiteralCharValueEscape[] = new String[255];
public enum ToRangeMode {
BRACKETED,
NOT_BRACKETED,
};
static { static {
ANTLRLiteralEscapedCharValue['n'] = '\n'; ANTLRLiteralEscapedCharValue['n'] = '\n';
ANTLRLiteralEscapedCharValue['r'] = '\r'; ANTLRLiteralEscapedCharValue['r'] = '\r';
@ -45,28 +45,34 @@ public class CharSupport {
* as \\uXXXX or \\u{XXXXXX} escapes. * as \\uXXXX or \\u{XXXXXX} escapes.
*/ */
public static String getANTLRCharLiteralForChar(int c) { public static String getANTLRCharLiteralForChar(int c) {
String result;
if ( c < Lexer.MIN_CHAR_VALUE ) { if ( c < Lexer.MIN_CHAR_VALUE ) {
return "'<INVALID>'"; result = "<INVALID>";
}
if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
}
if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
!Character.isISOControl((char)c) ) {
if ( c=='\\' ) {
return "'\\\\'";
}
if ( c=='\'') {
return "'\\''";
}
return '\''+Character.toString((char)c)+'\'';
}
if (c <= 0xFFFF) {
return String.format("\\u%04X", c);
} }
else { else {
return String.format("\\u{%06X}", c); String charValueEscape = c < ANTLRLiteralCharValueEscape.length ? ANTLRLiteralCharValueEscape[c] : null;
if (charValueEscape != null) {
result = charValueEscape;
} }
else if (Character.UnicodeBlock.of((char) c) == Character.UnicodeBlock.BASIC_LATIN &&
!Character.isISOControl((char) c)) {
if (c == '\\') {
result = "\\\\";
}
else if (c == '\'') {
result = "\\'";
}
else {
result = Character.toString((char) c);
}
}
else if (c <= 0xFFFF) {
result = String.format("\\u%04X", c);
} else {
result = String.format("\\u{%06X}", c);
}
}
return '\'' + result + '\'';
} }
/** Given a literal like (the 3 char sequence with single quotes) 'a', /** Given a literal like (the 3 char sequence with single quotes) 'a',
@ -179,17 +185,22 @@ public class CharSupport {
return Character.toUpperCase(s.charAt(0)) + s.substring(1); return Character.toUpperCase(s.charAt(0)) + s.substring(1);
} }
public static String toRange(int codePointStart, int codePointEnd, ToRangeMode mode) { public static String getIntervalSetEscapedString(IntervalSet intervalSet) {
StringBuilder sb = new StringBuilder(); StringBuilder buf = new StringBuilder();
if (mode == ToRangeMode.BRACKETED) { Iterator<Interval> iter = intervalSet.getIntervals().iterator();
sb.append("["); while (iter.hasNext()) {
} Interval interval = iter.next();
sb.appendCodePoint(codePointStart) buf.append(getRangeEscapedString(interval.a, interval.b));
.append("-") if (iter.hasNext()) {
.appendCodePoint(codePointEnd); buf.append(" | ");
if (mode == ToRangeMode.BRACKETED) { }
sb.append("]"); }
} return buf.toString();
return sb.toString(); }
public static String getRangeEscapedString(int codePointStart, int codePointEnd) {
return codePointStart != codePointEnd
? getANTLRCharLiteralForChar(codePointStart) + ".." + getANTLRCharLiteralForChar(codePointEnd)
: getANTLRCharLiteralForChar(codePointStart);
} }
} }

View File

@ -1054,7 +1054,7 @@ public enum ErrorType {
* *
* TODO: Does not work with fragment rules. * TODO: Does not work with fragment rules.
*/ */
CHARACTERS_COLLISION_IN_SET(180, "chars \"<arg>\" used multiple times in set <arg2>", ErrorSeverity.WARNING), CHARACTERS_COLLISION_IN_SET(180, "chars <arg> used multiple times in set <arg2>", ErrorSeverity.WARNING),
/** /**
* Compiler Warning 181 * Compiler Warning 181