forked from jasder/antlr
almost got new error mech in
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9087]
This commit is contained in:
parent
9e69640bc1
commit
60c9fe76c0
|
@ -0,0 +1,61 @@
|
|||
package org.antlr.v4.runtime;
|
||||
|
||||
/** The interface for defining strategies to deal with syntax errors
|
||||
* encountered during a parse by ANTLR-generated parsers and tree parsers.
|
||||
* We distinguish between three different kinds of errors:
|
||||
*
|
||||
* o The parser could not figure out which path to take in the ATN
|
||||
* (none of the available alternatives could possibly match)
|
||||
* o The current input does not match what we were looking for.
|
||||
* o A predicate evaluated to false.
|
||||
*
|
||||
* The default implementation of this interface reports errors to any
|
||||
* error listeners of the parser. It also handles single token insertion
|
||||
* and deletion for mismatched elements.
|
||||
*
|
||||
* We pass in the parser to each function so that the same strategy
|
||||
* can be shared between multiple parsers running at the same time.
|
||||
* This is just for flexibility, not that we need it for the default system.
|
||||
*
|
||||
* TODO: what to do about lexers
|
||||
*/
|
||||
public interface ANTLRErrorStrategy {
|
||||
/** Report any kind of RecognitionException. */
|
||||
void reportError(BaseRecognizer recognizer,
|
||||
RecognitionException e)
|
||||
throws RecognitionException;
|
||||
|
||||
/** When matching elements within alternative, use this method
|
||||
* to recover. The default implementation uses single token
|
||||
* insertion and deletion. If you want to change the way ANTLR
|
||||
* response to mismatched element errors within an alternative,
|
||||
* implement this method.
|
||||
*
|
||||
* From the recognizer, we can get the input stream to get
|
||||
* the current input symbol and we can get the current context.
|
||||
* That context gives us the current state within the ATN.
|
||||
* From that state, we can look at its transition to figure out
|
||||
* what was expected.
|
||||
*
|
||||
* Because we can recover from a single token deletions by
|
||||
* "inserting" tokens, we need to specify what that implicitly created
|
||||
* token is. We use object, because it could be a tree node.
|
||||
*
|
||||
* To bail out upon first error, simply rethrow e.
|
||||
*/
|
||||
Object recoverInline(BaseRecognizer recognizer)
|
||||
throws RecognitionException;
|
||||
|
||||
/** Resynchronize the parser by consuming tokens until we find one
|
||||
* in the resynchronization set--loosely the set of tokens that can follow
|
||||
* the current rule.
|
||||
*
|
||||
* To bail out upon first error, simply rethrow e.
|
||||
*/
|
||||
void recover(BaseRecognizer recognizer);
|
||||
|
||||
/** Reset the error handler. The parser invokes this
|
||||
* from its own reset method.
|
||||
*/
|
||||
void reset();
|
||||
}
|
|
@ -31,5 +31,5 @@ package org.antlr.v4.runtime;
|
|||
|
||||
/** */
|
||||
public interface ANTLRParserListener {
|
||||
public void error(RecognitionException msg);
|
||||
public void error(int line, int charPositionInLine, String msg);
|
||||
}
|
||||
|
|
|
@ -63,21 +63,8 @@ public abstract class BaseRecognizer extends Recognizer<ParserATNSimulator> {
|
|||
*/
|
||||
protected boolean errorRecovery = false;
|
||||
|
||||
/** The index into the input stream where the last error occurred.
|
||||
* This is used to prevent infinite loops where an error is found
|
||||
* but no token is consumed during recovery...another error is found,
|
||||
* ad naseum. This is a failsafe mechanism to guarantee that at least
|
||||
* one token/tree node is consumed for two errors.
|
||||
*/
|
||||
protected int lastErrorIndex = -1;
|
||||
|
||||
/** In lieu of a return value, this indicates that a rule or token
|
||||
* has failed to match. Reset to false upon valid token match.
|
||||
*/
|
||||
// protected boolean failed = false;
|
||||
|
||||
/** Did the recognizer encounter a syntax error? Track how many. */
|
||||
public int syntaxErrors = 0;
|
||||
protected int syntaxErrors = 0;
|
||||
|
||||
public BaseRecognizer(IntStream input) {
|
||||
setInputStream(input);
|
||||
|
@ -88,8 +75,6 @@ public abstract class BaseRecognizer extends Recognizer<ParserATNSimulator> {
|
|||
if ( getInputStream()!=null ) getInputStream().seek(0);
|
||||
errorRecovery = false;
|
||||
_ctx = null;
|
||||
lastErrorIndex = -1;
|
||||
// failed = false;
|
||||
}
|
||||
|
||||
/** Match current input symbol against ttype. Attempt
|
||||
|
@ -102,6 +87,7 @@ public abstract class BaseRecognizer extends Recognizer<ParserATNSimulator> {
|
|||
* This way any error in a rule will cause an exception and
|
||||
* immediate exit from rule. Rule would recover by resynchronizing
|
||||
* to the set of symbols that can follow rule ref.
|
||||
* TODO: mv into Parser etc... to get more precise return value/efficiency
|
||||
*/
|
||||
public Object match(int ttype) throws RecognitionException {
|
||||
// System.out.println("match "+((TokenStream)input).LT(1)+" vs expected "+ttype);
|
||||
|
@ -109,18 +95,18 @@ public abstract class BaseRecognizer extends Recognizer<ParserATNSimulator> {
|
|||
if ( getInputStream().LA(1)==ttype ) {
|
||||
getInputStream().consume();
|
||||
errorRecovery = false;
|
||||
// failed = false;
|
||||
if ( buildParseTrees ) _ctx.addChild((Token)matchedSymbol);
|
||||
return matchedSymbol;
|
||||
}
|
||||
return _errHandler.recoverInline(this);
|
||||
// System.out.println("MATCH failure at state "+_ctx.s+
|
||||
// ", ctx="+_ctx.toString(this));
|
||||
IntervalSet expecting = _interp.atn.nextTokens(_ctx);
|
||||
// IntervalSet expecting = _interp.atn.nextTokens(_ctx);
|
||||
// System.out.println("could match "+expecting);
|
||||
|
||||
matchedSymbol = recoverFromMismatchedToken(ttype, expecting);
|
||||
// matchedSymbol = recoverFromMismatchedToken(ttype, expecting);
|
||||
// System.out.println("rsync'd to "+matchedSymbol);
|
||||
return matchedSymbol;
|
||||
// return matchedSymbol;
|
||||
}
|
||||
|
||||
// like matchSet but w/o consume; error checking routine.
|
||||
|
@ -129,7 +115,7 @@ public abstract class BaseRecognizer extends Recognizer<ParserATNSimulator> {
|
|||
// System.out.println("failed sync to "+expecting);
|
||||
IntervalSet followSet = computeErrorRecoverySet();
|
||||
followSet.addAll(expecting);
|
||||
NoViableAltException e = new NoViableAltException(this, _ctx);
|
||||
NoViableAltException e = new NoViableAltException(this);
|
||||
recoverFromMismatchedSet(e, followSet);
|
||||
}
|
||||
|
||||
|
@ -237,7 +223,7 @@ public abstract class BaseRecognizer extends Recognizer<ParserATNSimulator> {
|
|||
syntaxErrors++; // don't count spurious
|
||||
errorRecovery = true;
|
||||
|
||||
notifyListeners(e);
|
||||
notifyListeners(e.line, e.charPositionInLine, e.getMessage());
|
||||
}
|
||||
|
||||
|
||||
|
@ -277,15 +263,6 @@ public abstract class BaseRecognizer extends Recognizer<ParserATNSimulator> {
|
|||
*/
|
||||
}
|
||||
|
||||
/** A hook to listen in on the token consumption during error recovery.
|
||||
* The DebugParser subclasses this to fire events to the listenter.
|
||||
*/
|
||||
public void beginResync() {
|
||||
}
|
||||
|
||||
public void endResync() {
|
||||
}
|
||||
|
||||
/* Compute the error recovery set for the current rule. During
|
||||
* rule invocation, the parser pushes the set of tokens that can
|
||||
* follow that rule reference on the stack; this amounts to
|
||||
|
@ -498,9 +475,7 @@ public abstract class BaseRecognizer extends Recognizer<ParserATNSimulator> {
|
|||
((TokenStream)input).LT(1)+
|
||||
" since "+((TokenStream)input).LT(2)+" is what we want");
|
||||
*/
|
||||
beginResync();
|
||||
getInputStream().consume(); // simply delete extra token
|
||||
endResync();
|
||||
reportError(e); // report after consuming so AW sees the token in the exception
|
||||
// we want to return the token we're actually matching
|
||||
Object matchedSymbol = getCurrentInputSymbol();
|
||||
|
@ -545,6 +520,15 @@ public abstract class BaseRecognizer extends Recognizer<ParserATNSimulator> {
|
|||
*/
|
||||
protected Object getCurrentInputSymbol() { return null; }
|
||||
|
||||
public void notifyListeners(int line, int charPositionInLine, String msg) {
|
||||
if ( _listeners==null || _listeners.size()==0 ) {
|
||||
emitErrorMessage("line "+line+":"+charPositionInLine+" "+msg);
|
||||
return;
|
||||
}
|
||||
for (ANTLRParserListener pl : _listeners) {
|
||||
pl.error(line, charPositionInLine, msg);
|
||||
}
|
||||
}
|
||||
|
||||
public void enterOuterAlt(ParserRuleContext localctx, int altNum) {
|
||||
_ctx = localctx;
|
||||
|
|
|
@ -0,0 +1,448 @@
|
|||
package org.antlr.v4.runtime;
|
||||
|
||||
import org.antlr.v4.runtime.atn.ATN;
|
||||
import org.antlr.v4.runtime.atn.ATNState;
|
||||
import org.antlr.v4.runtime.atn.RuleTransition;
|
||||
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||
|
||||
/** This is the default error handling mechanism for ANTLR parsers
|
||||
* and tree parsers.
|
||||
*/
|
||||
public class DefaultANTLRErrorStrategy implements ANTLRErrorStrategy {
|
||||
/** This is true when we see an error and before having successfully
|
||||
* matched a token. Prevents generation of more than one error message
|
||||
* per error.
|
||||
*/
|
||||
protected boolean errorRecovery = false;
|
||||
|
||||
/** The index into the input stream where the last error occurred.
|
||||
* This is used to prevent infinite loops where an error is found
|
||||
* but no token is consumed during recovery...another error is found,
|
||||
* ad naseum. This is a failsafe mechanism to guarantee that at least
|
||||
* one token/tree node is consumed for two errors.
|
||||
*/
|
||||
protected int lastErrorIndex = -1;
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
errorRecovery = false;
|
||||
lastErrorIndex = -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reportError(BaseRecognizer recognizer,
|
||||
RecognitionException e)
|
||||
throws RecognitionException
|
||||
{
|
||||
if ( e instanceof NoViableAltException ) {
|
||||
reportNoViableAlternative(recognizer, (NoViableAltException)e);
|
||||
}
|
||||
else if ( e instanceof InputMismatchException ) {
|
||||
reportInputMismatch(recognizer, (InputMismatchException)e);
|
||||
}
|
||||
else if ( e instanceof FailedPredicateException ) {
|
||||
reportFailedPredicate(recognizer, (FailedPredicateException)e);
|
||||
}
|
||||
else {
|
||||
System.err.println("unknown recognition error type: "+e.getClass().getName());
|
||||
if ( recognizer!=null ) {
|
||||
recognizer.notifyListeners(e.line, e.charPositionInLine, e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Recover from NoViableAlt errors. Also there could be a mismatched
|
||||
* token that the match() routine could not recover from.
|
||||
*/
|
||||
@Override
|
||||
public void recover(BaseRecognizer recognizer) {
|
||||
if ( lastErrorIndex==recognizer.getInputStream().index() ) {
|
||||
// uh oh, another error at same token index; must be a case
|
||||
// where LT(1) is in the recovery token set so nothing is
|
||||
// consumed; consume a single token at least to prevent
|
||||
// an infinite loop; this is a failsafe.
|
||||
recognizer.getInputStream().consume();
|
||||
}
|
||||
lastErrorIndex = recognizer.getInputStream().index();
|
||||
IntervalSet followSet = computeErrorRecoverySet(recognizer);
|
||||
consumeUntil(recognizer, followSet);
|
||||
}
|
||||
|
||||
public void reportNoViableAlternative(BaseRecognizer recognizer,
|
||||
NoViableAltException e)
|
||||
throws RecognitionException
|
||||
{
|
||||
if ( recognizer.errorRecovery ) return;
|
||||
trackError(recognizer);
|
||||
|
||||
String msg = "no viable alternative at input "+getTokenErrorDisplay(e.token);
|
||||
recognizer.notifyListeners(e.line, e.charPositionInLine, msg);
|
||||
}
|
||||
|
||||
public void reportInputMismatch(BaseRecognizer recognizer,
|
||||
InputMismatchException e)
|
||||
throws RecognitionException
|
||||
{
|
||||
if ( recognizer.errorRecovery ) return;
|
||||
trackError(recognizer);
|
||||
|
||||
String msg = "mismatched input "+getTokenErrorDisplay(e.token)+
|
||||
" expecting "+e.expecting.toString(recognizer.getTokenNames());
|
||||
recognizer.notifyListeners(e.line, e.charPositionInLine, msg);
|
||||
}
|
||||
|
||||
public void reportFailedPredicate(BaseRecognizer recognizer,
|
||||
FailedPredicateException e)
|
||||
throws RecognitionException
|
||||
{
|
||||
if ( recognizer.errorRecovery ) return;
|
||||
trackError(recognizer);
|
||||
|
||||
String ruleName = recognizer.getRuleNames()[recognizer._ctx.getRuleIndex()];
|
||||
String msg = "rule "+ruleName+" failed predicate: {"+
|
||||
e.predicateText+"}?";
|
||||
recognizer.notifyListeners(e.line, e.charPositionInLine, msg);
|
||||
}
|
||||
|
||||
public void reportUnwantedToken(BaseRecognizer recognizer) {
|
||||
if ( recognizer.errorRecovery ) return;
|
||||
trackError(recognizer);
|
||||
|
||||
Token t = (Token)recognizer.getCurrentInputSymbol();
|
||||
|
||||
String tokenName = getTokenErrorDisplay(t);
|
||||
IntervalSet expecting = getExpectedTokens(recognizer);
|
||||
String msg = "extraneous input "+tokenName+" expecting "+
|
||||
expecting.toString(recognizer.getTokenNames());
|
||||
recognizer.notifyListeners(t.getLine(), t.getCharPositionInLine(), msg);
|
||||
}
|
||||
|
||||
public void reportMissingToken(BaseRecognizer recognizer) {
|
||||
if ( recognizer.errorRecovery ) return;
|
||||
trackError(recognizer);
|
||||
|
||||
Token t = (Token)recognizer.getCurrentInputSymbol();
|
||||
IntervalSet expecting = getExpectedTokens(recognizer);
|
||||
String msg = "missing "+expecting.toString(recognizer.getTokenNames())+
|
||||
" at "+getTokenErrorDisplay(t);
|
||||
|
||||
recognizer.notifyListeners(t.getLine(), t.getCharPositionInLine(), msg);
|
||||
}
|
||||
|
||||
/** Attempt to recover from a single missing or extra token.
|
||||
*
|
||||
* EXTRA TOKEN
|
||||
*
|
||||
* LA(1) is not what we are looking for. If LA(2) has the right token,
|
||||
* however, then assume LA(1) is some extra spurious token. Delete it
|
||||
* and LA(2) as if we were doing a normal match(), which advances the
|
||||
* input.
|
||||
*
|
||||
* MISSING TOKEN
|
||||
*
|
||||
* If current token is consistent with what could come after
|
||||
* ttype then it is ok to "insert" the missing token, else throw
|
||||
* exception For example, Input "i=(3;" is clearly missing the
|
||||
* ')'. When the parser returns from the nested call to expr, it
|
||||
* will have call chain:
|
||||
*
|
||||
* stat -> expr -> atom
|
||||
*
|
||||
* and it will be trying to match the ')' at this point in the
|
||||
* derivation:
|
||||
*
|
||||
* => ID '=' '(' INT ')' ('+' atom)* ';'
|
||||
* ^
|
||||
* match() will see that ';' doesn't match ')' and report a
|
||||
* mismatched token error. To recover, it sees that LA(1)==';'
|
||||
* is in the set of tokens that can follow the ')' token
|
||||
* reference in rule atom. It can assume that you forgot the ')'.
|
||||
*/
|
||||
@Override
|
||||
public Object recoverInline(BaseRecognizer recognizer)
|
||||
throws RecognitionException
|
||||
{
|
||||
IntervalSet expecting = getExpectedTokens(recognizer);
|
||||
IntervalSet follow = null;
|
||||
|
||||
RecognitionException e = null;
|
||||
// if next token is what we are looking for then "delete" this token
|
||||
int nextTokenType = recognizer.getInputStream().LA(2);
|
||||
if ( expecting.member(nextTokenType) ) {
|
||||
reportUnwantedToken(recognizer);
|
||||
System.err.println("recoverFromMismatchedToken deleting "+
|
||||
((TokenStream)recognizer.getInputStream()).LT(1)+
|
||||
" since "+((TokenStream)recognizer.getInputStream()).LT(2)+
|
||||
" is what we want");
|
||||
recognizer.getInputStream().consume(); // simply delete extra token
|
||||
// we want to return the token we're actually matching
|
||||
Object matchedSymbol = recognizer.getCurrentInputSymbol();
|
||||
recognizer.getInputStream().consume(); // move past ttype token as if all were ok
|
||||
return matchedSymbol;
|
||||
}
|
||||
// can't recover with single token deletion, try insertion
|
||||
if ( mismatchIsMissingToken() ) {
|
||||
reportMissingToken(recognizer);
|
||||
return getMissingSymbol(recognizer);
|
||||
}
|
||||
// even that didn't work; must throw the exception
|
||||
throw new InputMismatchException(recognizer);
|
||||
}
|
||||
|
||||
protected IntervalSet getExpectedTokens(BaseRecognizer recognizer) {
|
||||
return recognizer._interp.atn.nextTokens(recognizer._ctx);
|
||||
}
|
||||
|
||||
public boolean mismatchIsMissingToken() {
|
||||
return false;
|
||||
/*
|
||||
if ( follow==null ) {
|
||||
// we have no information about the follow; we can only consume
|
||||
// a single token and hope for the best
|
||||
return false;
|
||||
}
|
||||
// compute what can follow this grammar element reference
|
||||
if ( follow.member(Token.EOR_TOKEN_TYPE) ) {
|
||||
IntervalSet viableTokensFollowingThisRule = computeNextViableTokenSet();
|
||||
follow = follow.or(viableTokensFollowingThisRule);
|
||||
if ( ctx.sp>=0 ) { // remove EOR if we're not the start symbol
|
||||
follow.remove(Token.EOR_TOKEN_TYPE);
|
||||
}
|
||||
}
|
||||
// if current token is consistent with what could come after set
|
||||
// then we know we're missing a token; error recovery is free to
|
||||
// "insert" the missing token
|
||||
|
||||
//System.out.println("viable tokens="+follow.toString(getTokenNames()));
|
||||
//System.out.println("LT(1)="+((TokenStream)input).LT(1));
|
||||
|
||||
// IntervalSet cannot handle negative numbers like -1 (EOF) so I leave EOR
|
||||
// in follow set to indicate that the fall of the start symbol is
|
||||
// in the set (EOF can follow).
|
||||
if ( follow.member(input.LA(1)) || follow.member(Token.EOR_TOKEN_TYPE) ) {
|
||||
//System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
*/
|
||||
}
|
||||
|
||||
/** Conjure up a missing token during error recovery.
|
||||
*
|
||||
* The recognizer attempts to recover from single missing
|
||||
* symbols. But, actions might refer to that missing symbol.
|
||||
* For example, x=ID {f($x);}. The action clearly assumes
|
||||
* that there has been an identifier matched previously and that
|
||||
* $x points at that token. If that token is missing, but
|
||||
* the next token in the stream is what we want we assume that
|
||||
* this token is missing and we keep going. Because we
|
||||
* have to return some token to replace the missing token,
|
||||
* we have to conjure one up. This method gives the user control
|
||||
* over the tokens returned for missing tokens. Mostly,
|
||||
* you will want to create something special for identifier
|
||||
* tokens. For literals such as '{' and ',', the default
|
||||
* action in the parser or tree parser works. It simply creates
|
||||
* a CommonToken of the appropriate type. The text will be the token.
|
||||
* If you change what tokens must be created by the lexer,
|
||||
* override this method to create the appropriate tokens.
|
||||
*/
|
||||
protected Object getMissingSymbol(BaseRecognizer recognizer) {
|
||||
IntervalSet expecting = getExpectedTokens(recognizer);
|
||||
int expectedTokenType = expecting.getMinElement(); // get any element
|
||||
String tokenText = null;
|
||||
if ( expectedTokenType== Token.EOF ) tokenText = "<missing EOF>";
|
||||
else tokenText = "<missing "+recognizer.getTokenNames()[expectedTokenType]+">";
|
||||
CommonToken t = new CommonToken(expectedTokenType, tokenText);
|
||||
Token current = (Token)recognizer.getCurrentInputSymbol();
|
||||
if ( current.getType() == Token.EOF ) {
|
||||
current = ((TokenStream)recognizer.getInputStream()).LT(-1);
|
||||
}
|
||||
t.line = current.getLine();
|
||||
t.charPositionInLine = current.getCharPositionInLine();
|
||||
t.channel = Token.DEFAULT_CHANNEL;
|
||||
t.source = current.getTokenSource();
|
||||
return t;
|
||||
}
|
||||
|
||||
/** How should a token be displayed in an error message? The default
|
||||
* is to display just the text, but during development you might
|
||||
* want to have a lot of information spit out. Override in that case
|
||||
* to use t.toString() (which, for CommonToken, dumps everything about
|
||||
* the token). This is better than forcing you to override a method in
|
||||
* your token objects because you don't have to go modify your lexer
|
||||
* so that it creates a new Java type.
|
||||
*/
|
||||
public String getTokenErrorDisplay(Token t) {
|
||||
if ( t==null ) return "<no token>";
|
||||
String s = t.getText();
|
||||
if ( s==null ) {
|
||||
if ( t.getType()==Token.EOF ) {
|
||||
s = "<EOF>";
|
||||
}
|
||||
else {
|
||||
s = "<"+t.getType()+">";
|
||||
}
|
||||
}
|
||||
s = s.replaceAll("\n","\\\\n");
|
||||
s = s.replaceAll("\r","\\\\r");
|
||||
s = s.replaceAll("\t","\\\\t");
|
||||
return "'"+s+"'";
|
||||
}
|
||||
|
||||
/** Report a recognition problem.
|
||||
*
|
||||
* This method sets errorRecovery to indicate the parser is recovering
|
||||
* not parsing. Once in recovery mode, no errors are generated.
|
||||
* To get out of recovery mode, the parser must successfully match
|
||||
* a token (after a resync). So it will go:
|
||||
*
|
||||
* 1. error occurs
|
||||
* 2. enter recovery mode, report error
|
||||
* 3. consume until token found in resynch set
|
||||
* 4. try to resume parsing
|
||||
* 5. next match() will reset errorRecovery mode
|
||||
*/
|
||||
// public void _reportError(BaseRecognizer recognizer,
|
||||
// RecognitionException e) {
|
||||
// // if we've already reported an error and have not matched a token
|
||||
// // yet successfully, don't report any errors.
|
||||
// if ( recognizer.errorRecovery ) return;
|
||||
// trackError(recognizer);
|
||||
//
|
||||
// recognizer.notifyListeners(e.line, e.charPositionInLine, "dsfdkjasdf");
|
||||
// }
|
||||
|
||||
/* Compute the error recovery set for the current rule. During
|
||||
* rule invocation, the parser pushes the set of tokens that can
|
||||
* follow that rule reference on the stack; this amounts to
|
||||
* computing FIRST of what follows the rule reference in the
|
||||
* enclosing rule. See LinearApproximator.FIRST().
|
||||
* This local follow set only includes tokens
|
||||
* from within the rule; i.e., the FIRST computation done by
|
||||
* ANTLR stops at the end of a rule.
|
||||
*
|
||||
* EXAMPLE
|
||||
*
|
||||
* When you find a "no viable alt exception", the input is not
|
||||
* consistent with any of the alternatives for rule r. The best
|
||||
* thing to do is to consume tokens until you see something that
|
||||
* can legally follow a call to r *or* any rule that called r.
|
||||
* You don't want the exact set of viable next tokens because the
|
||||
* input might just be missing a token--you might consume the
|
||||
* rest of the input looking for one of the missing tokens.
|
||||
*
|
||||
* Consider grammar:
|
||||
*
|
||||
* a : '[' b ']'
|
||||
* | '(' b ')'
|
||||
* ;
|
||||
* b : c '^' INT ;
|
||||
* c : ID
|
||||
* | INT
|
||||
* ;
|
||||
*
|
||||
* At each rule invocation, the set of tokens that could follow
|
||||
* that rule is pushed on a stack. Here are the various
|
||||
* context-sensitive follow sets:
|
||||
*
|
||||
* FOLLOW(b1_in_a) = FIRST(']') = ']'
|
||||
* FOLLOW(b2_in_a) = FIRST(')') = ')'
|
||||
* FOLLOW(c_in_b) = FIRST('^') = '^'
|
||||
*
|
||||
* Upon erroneous input "[]", the call chain is
|
||||
*
|
||||
* a -> b -> c
|
||||
*
|
||||
* and, hence, the follow context stack is:
|
||||
*
|
||||
* depth follow set start of rule execution
|
||||
* 0 <EOF> a (from main())
|
||||
* 1 ']' b
|
||||
* 2 '^' c
|
||||
*
|
||||
* Notice that ')' is not included, because b would have to have
|
||||
* been called from a different context in rule a for ')' to be
|
||||
* included.
|
||||
*
|
||||
* For error recovery, we cannot consider FOLLOW(c)
|
||||
* (context-sensitive or otherwise). We need the combined set of
|
||||
* all context-sensitive FOLLOW sets--the set of all tokens that
|
||||
* could follow any reference in the call chain. We need to
|
||||
* resync to one of those tokens. Note that FOLLOW(c)='^' and if
|
||||
* we resync'd to that token, we'd consume until EOF. We need to
|
||||
* sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
|
||||
* In this case, for input "[]", LA(1) is ']' and in the set, so we would
|
||||
* not consume anything. After printing an error, rule c would
|
||||
* return normally. Rule b would not find the required '^' though.
|
||||
* At this point, it gets a mismatched token error and throws an
|
||||
* exception (since LA(1) is not in the viable following token
|
||||
* set). The rule exception handler tries to recover, but finds
|
||||
* the same recovery set and doesn't consume anything. Rule b
|
||||
* exits normally returning to rule a. Now it finds the ']' (and
|
||||
* with the successful match exits errorRecovery mode).
|
||||
*
|
||||
* So, you can see that the parser walks up the call chain looking
|
||||
* for the token that was a member of the recovery set.
|
||||
*
|
||||
* Errors are not generated in errorRecovery mode.
|
||||
*
|
||||
* ANTLR's error recovery mechanism is based upon original ideas:
|
||||
*
|
||||
* "Algorithms + Data Structures = Programs" by Niklaus Wirth
|
||||
*
|
||||
* and
|
||||
*
|
||||
* "A note on error recovery in recursive descent parsers":
|
||||
* http://portal.acm.org/citation.cfm?id=947902.947905
|
||||
*
|
||||
* Later, Josef Grosch had some good ideas:
|
||||
*
|
||||
* "Efficient and Comfortable Error Recovery in Recursive Descent
|
||||
* Parsers":
|
||||
* ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
|
||||
*
|
||||
* Like Grosch I implement context-sensitive FOLLOW sets that are combined
|
||||
* at run-time upon error to avoid overhead during parsing.
|
||||
*/
|
||||
protected IntervalSet computeErrorRecoverySet(BaseRecognizer recognizer) {
|
||||
ATN atn = recognizer._interp.atn;
|
||||
RuleContext ctx = recognizer._ctx;
|
||||
IntervalSet recoverSet = new IntervalSet();
|
||||
while ( ctx!=null && ctx.invokingState>=0 ) {
|
||||
// compute what follows who invoked us
|
||||
ATNState invokingState = atn.states.get(ctx.invokingState);
|
||||
RuleTransition rt = (RuleTransition)invokingState.transition(0);
|
||||
IntervalSet follow = atn.nextTokens(rt.followState, null);
|
||||
recoverSet.addAll(follow);
|
||||
ctx = ctx.parent;
|
||||
}
|
||||
System.out.println("recover set "+recoverSet.toString(recognizer.getTokenNames()));
|
||||
return recoverSet;
|
||||
}
|
||||
|
||||
// public void consumeUntil(BaseRecognizer recognizer, int tokenType) {
|
||||
// //System.out.println("consumeUntil "+tokenType);
|
||||
// int ttype = recognizer.getInputStream().LA(1);
|
||||
// while (ttype != Token.EOF && ttype != tokenType) {
|
||||
// recognizer.getInputStream().consume();
|
||||
// ttype = recognizer.getInputStream().LA(1);
|
||||
// }
|
||||
// }
|
||||
|
||||
/** Consume tokens until one matches the given token set */
|
||||
public void consumeUntil(BaseRecognizer recognizer, IntervalSet set) {
|
||||
//System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
|
||||
int ttype = recognizer.getInputStream().LA(1);
|
||||
while (ttype != Token.EOF && !set.member(ttype) ) {
|
||||
//System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
|
||||
recognizer.getInputStream().consume();
|
||||
ttype = recognizer.getInputStream().LA(1);
|
||||
}
|
||||
}
|
||||
|
||||
protected void trackError(BaseRecognizer recognizer) {
|
||||
recognizer.syntaxErrors++;
|
||||
recognizer.errorRecovery = true;
|
||||
}
|
||||
|
||||
}
|
|
@ -40,6 +40,11 @@ public class FailedPredicateException extends RecognitionException {
|
|||
/** Used for remote debugger deserialization */
|
||||
public FailedPredicateException() {;}
|
||||
|
||||
public FailedPredicateException(BaseRecognizer recognizer, String predText) {
|
||||
super(recognizer);
|
||||
this.predicateText = predText;
|
||||
}
|
||||
|
||||
public FailedPredicateException(BaseRecognizer recognizer,
|
||||
IntStream input,
|
||||
String ruleName,
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
package org.antlr.v4.runtime;
|
||||
|
||||
/** This signifies any kind of mismatched input exceptions such as
|
||||
* when the current input does not match the expected token or tree node.
|
||||
*/
|
||||
public class InputMismatchException extends RecognitionException {
|
||||
public InputMismatchException(BaseRecognizer recognizer) {
|
||||
super(recognizer, recognizer.getInputStream(), recognizer._ctx);
|
||||
}
|
||||
}
|
|
@ -41,8 +41,8 @@ public class NoViableAltException extends RecognitionException {
|
|||
/** Used for remote debugger deserialization */
|
||||
public NoViableAltException() {;}
|
||||
|
||||
public NoViableAltException(BaseRecognizer recognizer, RuleContext ctx) { // LL(1) error
|
||||
super(recognizer, recognizer.getInputStream(), ctx);
|
||||
public NoViableAltException(BaseRecognizer recognizer) { // LL(1) error
|
||||
super(recognizer, recognizer.getInputStream(), recognizer._ctx);
|
||||
}
|
||||
|
||||
public NoViableAltException(BaseRecognizer recognizer, IntStream input,
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
*/
|
||||
package org.antlr.v4.runtime;
|
||||
|
||||
import com.sun.org.apache.regexp.internal.RE;
|
||||
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||
import org.antlr.v4.runtime.tree.*;
|
||||
|
||||
|
@ -108,6 +109,10 @@ public class RecognitionException extends RuntimeException {
|
|||
public RecognitionException() {
|
||||
}
|
||||
|
||||
public RecognitionException(BaseRecognizer recognizer) {
|
||||
this(recognizer, recognizer.getInputStream(), recognizer._ctx);
|
||||
}
|
||||
|
||||
public RecognitionException(BaseRecognizer recognizer, IntStream input,
|
||||
RuleContext ctx)
|
||||
{
|
||||
|
@ -126,7 +131,7 @@ public class RecognitionException extends RuntimeException {
|
|||
this.line = token.getLine();
|
||||
this.charPositionInLine = token.getCharPositionInLine();
|
||||
}
|
||||
if ( input instanceof ASTNodeStream) {
|
||||
else if ( input instanceof ASTNodeStream) {
|
||||
//extractInformationFromTreeNodeStream(input);
|
||||
}
|
||||
else {
|
||||
|
|
|
@ -36,7 +36,8 @@ import java.util.*;
|
|||
public class Recognizer<ATNInterpreter> {
|
||||
public static final int EOF=-1;
|
||||
|
||||
protected List<ANTLRParserListener> listeners;
|
||||
protected ANTLRErrorStrategy _errHandler = new DefaultANTLRErrorStrategy();
|
||||
protected List<ANTLRParserListener> _listeners;
|
||||
|
||||
protected ATNInterpreter _interp;
|
||||
|
||||
|
@ -195,27 +196,22 @@ public class Recognizer<ATNInterpreter> {
|
|||
}
|
||||
|
||||
public void addListener(ANTLRParserListener pl) {
|
||||
if ( listeners==null ) {
|
||||
listeners =
|
||||
if ( _listeners ==null ) {
|
||||
_listeners =
|
||||
Collections.synchronizedList(new ArrayList<ANTLRParserListener>(2));
|
||||
}
|
||||
if ( pl!=null ) listeners.add(pl);
|
||||
if ( pl!=null ) _listeners.add(pl);
|
||||
}
|
||||
|
||||
public void removeListener(ANTLRParserListener pl) { listeners.remove(pl); }
|
||||
public void removeListener(ANTLRParserListener pl) { _listeners.remove(pl); }
|
||||
|
||||
public void removeListeners() { listeners.clear(); }
|
||||
public void removeListeners() { _listeners.clear(); }
|
||||
|
||||
public List<ANTLRParserListener> getListeners() { return listeners; }
|
||||
public List<ANTLRParserListener> getListeners() { return _listeners; }
|
||||
|
||||
public void notifyListeners(RecognitionException re) {
|
||||
if ( listeners==null || listeners.size()==0 ) {
|
||||
// call legacy v3 func; this calls emitErrorMessage(String msg)
|
||||
displayRecognitionError(re);
|
||||
return;
|
||||
}
|
||||
for (ANTLRParserListener pl : listeners) pl.error(re);
|
||||
}
|
||||
public ANTLRErrorStrategy getErrHandler() { return _errHandler; }
|
||||
|
||||
public void setErrHandler(ANTLRErrorStrategy h) { this._errHandler = h; }
|
||||
|
||||
// subclass needs to override these if there are sempreds or actions
|
||||
// that the ATN interp needs to execute
|
||||
|
|
|
@ -72,13 +72,21 @@ public class ATN {
|
|||
/** Used for runtime deserialization of ATNs from strings */
|
||||
public ATN() { }
|
||||
|
||||
/** Compute the set of valid tokens reachable from the current
|
||||
* position in the parse. ctx must not be null.
|
||||
*/
|
||||
public IntervalSet nextTokens(RuleContext ctx) {
|
||||
return nextTokens(ctx.s, ctx);
|
||||
ATNState s = states.get(ctx.s);
|
||||
if ( s == null ) return null;
|
||||
return nextTokens(s, ctx);
|
||||
}
|
||||
|
||||
public IntervalSet nextTokens(int stateNumber, RuleContext ctx) {
|
||||
ATNState s = states.get(stateNumber);
|
||||
if ( s == null ) return null;
|
||||
/** Compute the set of valid tokens that can occur starting in s.
|
||||
* If ctx is null, the set of tokens will not include what can follow
|
||||
* the rule surrounding s. In other words, the set will be
|
||||
* restricted to tokens reachable staying within s's rule.
|
||||
*/
|
||||
public IntervalSet nextTokens(ATNState s, RuleContext ctx) {
|
||||
LL1Analyzer anal = new LL1Analyzer(this);
|
||||
IntervalSet next = anal.LOOK(s, ctx);
|
||||
return next;
|
||||
|
|
|
@ -466,54 +466,9 @@ public class IntervalSet implements IntSet {
|
|||
else buf.append(tokenNames[a]);
|
||||
}
|
||||
else {
|
||||
buf.append(tokenNames[a]+".."+tokenNames[b]);
|
||||
}
|
||||
if ( iter.hasNext() ) {
|
||||
buf.append(", ");
|
||||
}
|
||||
}
|
||||
if ( this.size()>1 ) {
|
||||
buf.append("}");
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
/*
|
||||
public String toString(Grammar g) {
|
||||
StringBuffer buf = new StringBuffer();
|
||||
if ( this.intervals==null || this.intervals.size()==0 ) {
|
||||
return "{}";
|
||||
}
|
||||
if ( this.size()>1 ) {
|
||||
buf.append("{");
|
||||
}
|
||||
Iterator iter = this.intervals.iterator();
|
||||
while (iter.hasNext()) {
|
||||
Interval I = (Interval) iter.next();
|
||||
int a = I.a;
|
||||
int b = I.b;
|
||||
if ( a==b ) {
|
||||
if ( g!=null ) {
|
||||
buf.append(g.getTokenDisplayName(a));
|
||||
}
|
||||
else {
|
||||
buf.append(a);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if ( g!=null ) {
|
||||
if ( !g.isLexer() ) {
|
||||
for (int i=a; i<=b; i++) {
|
||||
if ( i>a ) buf.append(", ");
|
||||
buf.append(g.getTokenDisplayName(i));
|
||||
}
|
||||
}
|
||||
else {
|
||||
buf.append(g.getTokenDisplayName(a)+".."+g.getTokenDisplayName(b));
|
||||
}
|
||||
}
|
||||
else {
|
||||
buf.append(a+".."+b);
|
||||
buf.append(tokenNames[i]);
|
||||
}
|
||||
}
|
||||
if ( iter.hasNext() ) {
|
||||
|
@ -525,7 +480,6 @@ public class IntervalSet implements IntSet {
|
|||
}
|
||||
return buf.toString();
|
||||
}
|
||||
*/
|
||||
|
||||
public int size() {
|
||||
int n = 0;
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
import org.antlr.v4.runtime.*;
|
||||
import org.antlr.v4.runtime.tree.ParseTreeWalker;
|
||||
|
||||
public class TestW {
|
||||
public static void main(String[] args) throws Exception {
|
||||
WLexer t = new WLexer(new ANTLRFileStream(args[0]));
|
||||
CommonTokenStream tokens = new CommonTokenStream(t);
|
||||
// tokens.fill();
|
||||
// for (Object tok : tokens.getTokens()) {
|
||||
// System.out.println(tok);
|
||||
// }
|
||||
WParser p = new WParser(tokens);
|
||||
p.setBuildParseTrees(true);
|
||||
p.s();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
grammar W;
|
||||
|
||||
s : a ';' {System.out.println("done");} ;
|
||||
|
||||
a : '[' b ']'
|
||||
| '(' b ')'
|
||||
;
|
||||
|
||||
b : c '^' INT ;
|
||||
|
||||
c : ID
|
||||
| INT
|
||||
;
|
||||
|
||||
EQ : '=' ;
|
||||
INT : '0'..'9'+ ;
|
||||
ID : 'a'..'z'+ ;
|
||||
WS : (' '|'\n')+ {skip();} ;
|
|
@ -197,8 +197,8 @@ RuleFunction(currentRule,code,locals,ruleCtx,altLabelCtxs,namedActions,finallyAc
|
|||
<namedActions.after>
|
||||
}
|
||||
catch (RecognitionException re) {
|
||||
reportError(re);
|
||||
recover();
|
||||
_errHandler.reportError(this, re);
|
||||
_errHandler.recover(this);
|
||||
}
|
||||
finally {
|
||||
<finallyAction>
|
||||
|
@ -313,8 +313,6 @@ switch ( _interp.adaptivePredict(_input,<choice.decision>,_ctx) ) {
|
|||
case <i>:
|
||||
<alt>
|
||||
break;}; separator="\n">
|
||||
default :
|
||||
<error>
|
||||
}
|
||||
>>
|
||||
|
||||
|
@ -359,7 +357,7 @@ case <i>:
|
|||
|
||||
Sync(s) ::= "sync(<s.expecting.name>);"
|
||||
|
||||
ThrowNoViableAlt(t) ::= "throw new NoViableAltException(this,_ctx);"
|
||||
ThrowNoViableAlt(t) ::= "throw new NoViableAltException(this);"
|
||||
|
||||
TestSetInline(s) ::= <<
|
||||
<s.ttypes:{ttype | <s.varName>==<ttype>}; separator=" || ">
|
||||
|
@ -411,7 +409,7 @@ CommonSetStuff(m, expr, capture, invert) ::= <<
|
|||
setState(<m.stateNumber>);
|
||||
<if(m.labels)><m.labels:{l | <labelref(l)> = }><endif>_input.LT(1);
|
||||
<capture>
|
||||
if ( <if(!invert)>!<endif>(<expr>) ) throw new MismatchedSetException(this, _input);
|
||||
if ( <if(!invert)>!<endif>(<expr>) ) throw new InputMismatchedException(this);
|
||||
_input.consume();
|
||||
>>
|
||||
|
||||
|
@ -429,7 +427,7 @@ ForcedAction(a, chunks) ::= "<chunks>"
|
|||
ArgAction(a, chunks) ::= "<chunks>"
|
||||
|
||||
SemPred(p, chunks) ::= <<
|
||||
if (!(<chunks>)) throw new FailedPredicateException(this, _input, "<currentRule.name>", ""<!"<chunks>"!>);
|
||||
if (!(<chunks>)) throw new FailedPredicateException(this, "");
|
||||
>>
|
||||
|
||||
ActionText(t) ::= "<t.text>"
|
||||
|
|
|
@ -36,7 +36,7 @@ import org.antlr.v4.tool.GrammarAST;
|
|||
import java.util.List;
|
||||
|
||||
public class AltBlock extends Choice {
|
||||
@ModelElement public ThrowNoViableAlt error;
|
||||
// @ModelElement public ThrowNoViableAlt error;
|
||||
|
||||
public AltBlock(OutputModelFactory factory,
|
||||
GrammarAST blkOrEbnfRootAST,
|
||||
|
@ -44,6 +44,7 @@ public class AltBlock extends Choice {
|
|||
{
|
||||
super(factory, blkOrEbnfRootAST, alts);
|
||||
decision = ((BlockStartState)blkOrEbnfRootAST.atnState).decision;
|
||||
this.error = new ThrowNoViableAlt(factory, blkOrEbnfRootAST, null);
|
||||
// interp.predict() throws exception
|
||||
// this.error = new ThrowNoViableAlt(factory, blkOrEbnfRootAST, null);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -312,6 +312,11 @@ public class ErrorManager {
|
|||
e.printStackTrace(System.err);
|
||||
}
|
||||
|
||||
public static void panic(String msg) {
|
||||
rawError(msg);
|
||||
panic();
|
||||
}
|
||||
|
||||
public static void panic() {
|
||||
// can't call tool.panic since there may be multiple tools; just
|
||||
// one error manager
|
||||
|
|
Loading…
Reference in New Issue