Updated ANTLRErrorStrategy and DefaultErrorStrategy documentation

This commit is contained in:
Sam Harwell 2013-03-26 15:59:05 -05:00
parent 1213aaad0b
commit a7e021da4c
2 changed files with 287 additions and 108 deletions

View File

@ -31,28 +31,23 @@
package org.antlr.v4.runtime;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
/** The interface for defining strategies to deal with syntax errors
* encountered during a parse by ANTLR-generated parsers and tree parsers.
* We distinguish between three different kinds of errors:
/**
* The interface for defining strategies to deal with syntax errors encountered
* during a parse by ANTLR-generated parsers. We distinguish between three
* different kinds of errors:
*
* o The parser could not figure out which path to take in the ATN
* (none of the available alternatives could possibly match)
* o The current input does not match what we were looking for.
* o A predicate evaluated to false.
* <ul>
* <li>The parser could not figure out which path to take in the ATN (none of
* the available alternatives could possibly match)</li>
* <li>The current input does not match what we were looking for</li>
* <li>A predicate evaluated to false</li>
* </ul>
*
* The default implementation of this interface reports errors to any
* error listeners of the parser. It also handles single token insertion
* and deletion for mismatched elements.
*
* We pass in the parser to each function so that the same strategy
* can be shared between multiple parsers running at the same time.
* This is just for flexibility, not that we need it for the default system.
*
* TODO: To bail out upon first error, simply rethrow e?
*
* TODO: what to do about lexers
* Implementations of this interface report syntax errors by calling
* {@link Parser#notifyErrorListeners}.
* <p/>
* TODO: what to do about lexers
*/
public interface ANTLRErrorStrategy {
/**
@ -61,70 +56,73 @@ public interface ANTLRErrorStrategy {
*/
void reset(@NotNull Parser recognizer);
/** When matching elements within alternative, use this method
* to recover. The default implementation uses single token
* insertion and deletion. If you want to change the way ANTLR
* response to mismatched element errors within an alternative,
* implement this method.
/**
* This method is called when an unexpected symbol is encountered during an
* inline match operation, such as {@link Parser#match}. If the error
* strategy successfully recovers from the match failure, this method
* returns the {@link Token} instance which should be treated as the
* successful result of the match.
* <p/>
* Note that the calling code will not report an error if this method
* returns successfully. The error strategy implementation is responsible
* for calling {@link Parser#notifyErrorListeners} as appropriate.
*
* From the recognizer, we can get the input stream to get
* the current input symbol and we can get the current context.
* That context gives us the current state within the ATN.
* From that state, we can look at its transition to figure out
* what was expected.
*
* Because we can recover from a single token deletions by
* "inserting" tokens, we need to specify what that implicitly created
* token is. We use object, because it could be a tree node.
* @param recognizer the parser instance
* @throws RecognitionException if the error strategy was not able to
* recover from the unexpected input symbol
*/
@NotNull
Token recoverInline(@NotNull Parser recognizer)
throws RecognitionException;
/** Resynchronize the parser by consuming tokens until we find one
* in the resynchronization set--loosely the set of tokens that can follow
* the current rule. The exception contains info you might want to
* use to recover better.
/**
* This method is called to recover from exception {@code e}. This method is
* called after {@link #reportError} by the default exception handler
* generated for a rule method.
*
* @see #reportError
*
* @param recognizer the parser instance
* @param e the recognition exception to recover from
* @throws RecognitionException if the error strategy could not recover from
* the recognition exception
*/
void recover(@NotNull Parser recognizer,
@NotNull RecognitionException e)
throws RecognitionException;
/** Make sure that the current lookahead symbol is consistent with
* what were expecting at this point in the ATN. You can call this
* anytime but ANTLR only generates code to check before subrules/loops
* and each iteration.
/**
* This method provides the error handler with an opportunity to handle
* syntactic or semantic errors in the input stream before they result in a
* {@link RecognitionException}.
* <p/>
* The generated code currently contains calls to {@link #sync} after
* entering the decision state of a closure block ({@code (...)*} or
* {@code (...)+}).
* <p/>
* For an implementation based on Jim Idle's "magic sync" mechanism, see
* {@link DefaultErrorStrategy#sync}.
*
* Implements Jim Idle's magic sync mechanism in closures and optional
* subrules. E.g.,
* @see DefaultErrorStrategy#sync
*
* a : sync ( stuff sync )* ;
* sync : {consume to what can follow sync} ;
*
* Previous versions of ANTLR did a poor job of their recovery within
* loops. A single mismatch token or missing token would force the parser
* to bail out of the entire rules surrounding the loop. So, for rule
*
* classDef : 'class' ID '{' member* '}'
*
* input with an extra token between members would force the parser to
* consume until it found the next class definition rather than the
* next member definition of the current class.
*
* This functionality cost a little bit of effort because the parser
* has to compare token set at the start of the loop and at each
* iteration. If for some reason speed is suffering for you, you can
* turn off this functionality by simply overriding this method as
* a blank { }.
* @param recognizer the parser instance
* @throws RecognitionException if an error is detected by the error
* strategy but cannot be automatically recovered at the current state in
* the parsing process
*/
void sync(@NotNull Parser recognizer)
throws RecognitionException;
/** Is the parser in the process of recovering from an error? Upon
* a syntax error, the parser enters recovery mode and stays there until
* the next successful match of a token. In this way, we can
* avoid sending out spurious error messages. We only want one error
* message per syntax error
/**
* Tests whether or not {@code recognizer} is in the process of recovering
* from an error. In error recovery mode, {@link Parser#consume} adds
* symbols to the parse tree by calling
* {@link ParserRuleContext#addErrorNode(Token)} instead of
* {@link ParserRuleContext#addChild(Token)}.
*
* @param recognizer the parser instance
* @return {@code true} if the parser is currently recovering from a parse
* error, otherwise {@code false}
*/
boolean inErrorRecoveryMode(@NotNull Parser recognizer);
@ -136,7 +134,13 @@ public interface ANTLRErrorStrategy {
*/
void reportMatch(@NotNull Parser recognizer);
/** Report any kind of RecognitionException. */
/**
* Report any kind of {@link RecognitionException}. This method is called by
* the default exception handler generated for a rule method.
*
* @param recognizer the parser instance
* @param e the recognition exception to report
*/
void reportError(@NotNull Parser recognizer,
@NotNull RecognitionException e);
}

View File

@ -86,6 +86,9 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
errorRecoveryMode = true;
}
/**
* {@inheritDoc}
*/
@Override
public boolean inErrorRecoveryMode(Parser recognizer) {
return errorRecoveryMode;
@ -113,6 +116,25 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
endErrorCondition(recognizer);
}
/**
* {@inheritDoc}
* <p/>
* The default implementation returns immediately if the handler is already
* in error recovery mode. Otherwise, it calls {@link #beginErrorCondition}
* and dispatches the reporting task based on the runtime type of {@code e}
* according to the following table.
*
* <ul>
* <li>{@link NoViableAltException}: Dispatches the call to
* {@link #reportNoViableAlternative}</li>
* <li>{@link InputMismatchException}: Dispatches the call to
* {@link #reportInputMismatch}</li>
* <li>{@link FailedPredicateException}: Dispatches the call to
* {@link #reportFailedPredicate}</li>
* <li>All other types: calls {@link Parser#notifyErrorListeners} to report
* the exception</li>
* </ul>
*/
@Override
public void reportError(Parser recognizer,
RecognitionException e)
@ -121,7 +143,7 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
// yet successfully, don't report any errors.
if (inErrorRecoveryMode(recognizer)) {
// System.err.print("[SPURIOUS] ");
return; // don't count spurious errors
return; // don't report spurious errors
}
beginErrorCondition(recognizer);
if ( e instanceof NoViableAltException ) {
@ -141,8 +163,12 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
}
}
/** Recover from NoViableAlt errors. Also there could be a mismatched
* token that the match() routine could not recover from.
/**
* {@inheritDoc}
* <p/>
* The default implementation resynchronizes the parser by consuming tokens
* until we find one in the resynchronization set--loosely the set of tokens
* that can follow the current rule.
*/
@Override
public void recover(Parser recognizer, RecognitionException e) {
@ -170,21 +196,52 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
consumeUntil(recognizer, followSet);
}
/** Make sure that the current lookahead symbol is consistent with
* what were expecting at this point in the ATN.
/**
* The default implementation of {@link ANTLRErrorStrategy#sync} makes sure
* that the current lookahead symbol is consistent with what were expecting
* at this point in the ATN. You can call this anytime but ANTLR only
* generates code to check before subrules/loops and each iteration.
* <p/>
* Implements Jim Idle's magic sync mechanism in closures and optional
* subrules. E.g.,
*
* At the start of a sub rule upon error, sync() performs single
* token deletion, if possible. If it can't do that, it bails
* on the current rule and uses the default error recovery,
* which consumes until the resynchronization set of the current rule.
* <pre>
* a : sync ( stuff sync )* ;
* sync : {consume to what can follow sync} ;
* </pre>
*
* If the sub rule is optional, ()? or ()* or optional alternative,
* then the expected set includes what follows the subrule.
* At the start of a sub rule upon error, {@link #sync} performs single
* token deletion, if possible. If it can't do that, it bails on the current
* rule and uses the default error recovery, which consumes until the
* resynchronization set of the current rule.
* <p/>
* If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
* with an empty alternative), then the expected set includes what follows
* the subrule.
* <p/>
* During loop iteration, it consumes until it sees a token that can start a
* sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
* stay in the loop as long as possible.
* <p/>
* <strong>ORIGINS</strong>
* <p/>
* Previous versions of ANTLR did a poor job of their recovery within loops.
* A single mismatch token or missing token would force the parser to bail
* out of the entire rules surrounding the loop. So, for rule
*
* During loop iteration, it consumes until it sees a token that can
* start a sub rule or what follows loop. Yes, that is pretty aggressive.
* We opt to stay in the loop as long as possible.
*/
* <pre>
* classDef : 'class' ID '{' member* '}'
* </pre>
*
* input with an extra token between members would force the parser to
* consume until it found the next class definition rather than the next
* member definition of the current class.
* <p/>
* This functionality cost a little bit of effort because the parser has to
* compare token set at the start of the loop and at each iteration. If for
* some reason speed is suffering for you, you can turn off this
* functionality by simply overriding this method as a blank { }.
*/
@Override
public void sync(Parser recognizer) throws RecognitionException {
ATNState s = recognizer.getInterpreter().atn.states.get(recognizer.getState());
@ -233,6 +290,15 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
}
}
/**
* This is called by {@link #reportError} when the exception is a
* {@link NoViableAltException}.
*
* @see #reportError
*
* @param recognizer the parser instance
* @param e the recognition exception
*/
protected void reportNoViableAlternative(@NotNull Parser recognizer,
@NotNull NoViableAltException e)
{
@ -249,6 +315,15 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
recognizer.notifyErrorListeners(e.getOffendingToken(), msg, e);
}
/**
* This is called by {@link #reportError} when the exception is an
* {@link InputMismatchException}.
*
* @see #reportError
*
* @param recognizer the parser instance
* @param e the recognition exception
*/
protected void reportInputMismatch(@NotNull Parser recognizer,
@NotNull InputMismatchException e)
{
@ -257,6 +332,15 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
recognizer.notifyErrorListeners(e.getOffendingToken(), msg, e);
}
/**
* This is called by {@link #reportError} when the exception is a
* {@link FailedPredicateException}.
*
* @see #reportError
*
* @param recognizer the parser instance
* @param e the recognition exception
*/
protected void reportFailedPredicate(@NotNull Parser recognizer,
@NotNull FailedPredicateException e)
{
@ -265,6 +349,24 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
recognizer.notifyErrorListeners(e.getOffendingToken(), msg, e);
}
/**
* This method is called to report a syntax error which requires the removal
* of a token from the input stream. At the time this method is called, the
* erroneous symbol is current {@code LT(1)} symbol and has not yet been
* removed from the input stream. When this method returns,
* {@code recognizer} is in error recovery mode.
* <p/>
* This method is called when {@link #singleTokenDeletion} identifies
* single-token deletion as a viable recovery strategy for a mismatched
* input error.
* <p/>
* The default implementation simply returns if the handler is already in
* error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
* enter error recovery mode, followed by calling
* {@link Parser#notifyErrorListeners}.
*
* @param recognizer the parser instance
*/
protected void reportUnwantedToken(@NotNull Parser recognizer) {
if (inErrorRecoveryMode(recognizer)) {
return;
@ -280,6 +382,23 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
recognizer.notifyErrorListeners(t, msg, null);
}
/**
* This method is called to report a syntax error which requires the
* insertion of a missing token into the input stream. At the time this
* method is called, the missing token has not yet been inserted. When this
* method returns, {@code recognizer} is in error recovery mode.
* <p/>
* This method is called when {@link #singleTokenInsertion} identifies
* single-token insertion as a viable recovery strategy for a mismatched
* input error.
* <p/>
* The default implementation simply returns if the handler is already in
* error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
* enter error recovery mode, followed by calling
* {@link Parser#notifyErrorListeners}.
*
* @param recognizer the parser instance
*/
protected void reportMissingToken(@NotNull Parser recognizer) {
if (inErrorRecoveryMode(recognizer)) {
return;
@ -295,34 +414,55 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
recognizer.notifyErrorListeners(t, msg, null);
}
/** Attempt to recover from a single missing or extra token.
/**
* {@inheritDoc}
* <p/>
* The default implementation attempts to recover from the mismatched input
* by using single token insertion and deletion as described below. If the
* recovery attempt fails, this method throws an
* {@link InputMismatchException}.
* <p/>
* <strong>EXTRA TOKEN</strong> (single token deletion)
* <p/>
* {@code LA(1)} is not what we are looking for. If {@code LA(2)} has the
* right token, however, then assume {@code LA(1)} is some extra spurious
* token and delete it. Then consume and return the next token (which was
* the {@code LA(2)} token) as the successful result of the match operation.
* <p/>
* This recovery strategy is implemented by {@link #singleTokenDeletion}.
* <p/>
* <strong>MISSING TOKEN</strong> (single token insertion)
* <p/>
* If current token (at {@code LA(1)}) is consistent with what could come
* after the expected {@code LA(1)} token, then assume the token is missing
* and use the parser's {@link TokenFactory} to create it on the fly. The
* "insertion" is performed by returning the created token as the successful
* result of the match operation.
* <p/>
* This recovery strategy is implemented by {@link #singleTokenInsertion}.
* <p/>
* <strong>EXAMPLE</strong>
* <p/>
* For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When
* the parser returns from the nested call to {@code expr}, it will have
* call chain:
*
* EXTRA TOKEN
* <pre>
* stat -> expr -> atom
* </pre>
*
* LA(1) is not what we are looking for. If LA(2) has the right token,
* however, then assume LA(1) is some extra spurious token. Delete it
* and LA(2) as if we were doing a normal match(), which advances the
* input.
* and it will be trying to match the {@code ')'} at this point in the
* derivation:
*
* MISSING TOKEN
* <pre>
* => ID '=' '(' INT ')' ('+' atom)* ';'
* ^
* </pre>
*
* If current token is consistent with what could come after
* ttype then it is ok to "insert" the missing token, else throw
* exception For example, Input "i=(3;" is clearly missing the
* ')'. When the parser returns from the nested call to expr, it
* will have call chain:
*
* stat -> expr -> atom
*
* and it will be trying to match the ')' at this point in the
* derivation:
*
* => ID '=' '(' INT ')' ('+' atom)* ';'
* ^
* match() will see that ';' doesn't match ')' and report a
* mismatched token error. To recover, it sees that LA(1)==';'
* is in the set of tokens that can follow the ')' token
* reference in rule atom. It can assume that you forgot the ')'.
* The attempt to match {@code ')'} will fail when it sees {@code ';'} and
* call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'}
* is in the set of tokens that can follow the {@code ')'} token reference
* in rule {@code atom}. It can assume that you forgot the {@code ')'}.
*/
@Override
public Token recoverInline(Parser recognizer)
@ -346,7 +486,23 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
throw new InputMismatchException(recognizer);
}
// if next token is what we are looking for then "delete" this token
/**
* This method implements the single-token insertion inline error recovery
* strategy. It is called by {@link #recoverInline} if the single-token
* deletion strategy fails to recover from the mismatched input. If this
* method returns {@code true}, {@code recognizer} will be in error recovery
* mode.
* <p/>
* This method determines whether or not single-token insertion is viable by
* checking if the {@code LA(1)} input symbol could be successfully matched
* if it were instead the {@code LA(2)} symbol. If this method returns
* {@code true}, the caller is responsible for creating and inserting a
* token with the correct type to produce this behavior.
*
* @param recognizer the parser instance
* @return {@code true} if single-token insertion is a viable recovery
* strategy for the current mismatched input, otherwise {@code false}
*/
protected boolean singleTokenInsertion(@NotNull Parser recognizer) {
int currentSymbolType = recognizer.getInputStream().LA(1);
// if current token is consistent with what could come after current
@ -364,6 +520,25 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
return false;
}
/**
* This method implements the single-token deletion inline error recovery
* strategy. It is called by {@link #recoverInline} to attempt to recover
* from mismatched input. If this method returns null, the parser and error
* handler state will not have changed. If this method returns non-null,
* {@code recognizer} will <em>not</em> be in error recovery mode since the
* returned token was a successful match.
* <p/>
* If the single-token deletion is successful, this method calls
* {@link #reportUnwantedToken} to report the error, followed by
* {@link Parser#consume} to actually "delete" the extraneous token. Then,
* before returning {@link #reportMatch} is called to signal a successful
* match.
*
* @param recognizer the parser instance
* @return the successfully matched {@link Token} instance if single-token
* deletion successfully recovers from the mismatched input, otherwise
* {@code null}
*/
@Nullable
protected Token singleTokenDeletion(@NotNull Parser recognizer) {
int nextTokenType = recognizer.getInputStream().LA(2);
@ -578,7 +753,7 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
return recoverSet;
}
/** Consume tokens until one matches the given token set */
/** Consume tokens until one matches the given token set. */
protected void consumeUntil(@NotNull Parser recognizer, @NotNull IntervalSet set) {
// System.err.println("consumeUntil("+set.toString(recognizer.getTokenNames())+")");
int ttype = recognizer.getInputStream().LA(1);