diff --git a/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java b/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java index fbcc3239d..5907e6632 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java +++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java @@ -31,28 +31,23 @@ package org.antlr.v4.runtime; import org.antlr.v4.runtime.misc.NotNull; -import org.antlr.v4.runtime.misc.Nullable; -/** The interface for defining strategies to deal with syntax errors - * encountered during a parse by ANTLR-generated parsers and tree parsers. - * We distinguish between three different kinds of errors: +/** + * The interface for defining strategies to deal with syntax errors encountered + * during a parse by ANTLR-generated parsers. We distinguish between three + * different kinds of errors: * - * o The parser could not figure out which path to take in the ATN - * (none of the available alternatives could possibly match) - * o The current input does not match what we were looking for. - * o A predicate evaluated to false. + * * - * The default implementation of this interface reports errors to any - * error listeners of the parser. It also handles single token insertion - * and deletion for mismatched elements. - * - * We pass in the parser to each function so that the same strategy - * can be shared between multiple parsers running at the same time. - * This is just for flexibility, not that we need it for the default system. - * - * TODO: To bail out upon first error, simply rethrow e? - * - * TODO: what to do about lexers + * Implementations of this interface report syntax errors by calling + * {@link Parser#notifyErrorListeners}. + *

+ * TODO: what to do about lexers */ public interface ANTLRErrorStrategy { /** @@ -61,70 +56,73 @@ public interface ANTLRErrorStrategy { */ void reset(@NotNull Parser recognizer); - /** When matching elements within alternative, use this method - * to recover. The default implementation uses single token - * insertion and deletion. If you want to change the way ANTLR - * response to mismatched element errors within an alternative, - * implement this method. + /** + * This method is called when an unexpected symbol is encountered during an + * inline match operation, such as {@link Parser#match}. If the error + * strategy successfully recovers from the match failure, this method + * returns the {@link Token} instance which should be treated as the + * successful result of the match. + *

+ * Note that the calling code will not report an error if this method + * returns successfully. The error strategy implementation is responsible + * for calling {@link Parser#notifyErrorListeners} as appropriate. * - * From the recognizer, we can get the input stream to get - * the current input symbol and we can get the current context. - * That context gives us the current state within the ATN. - * From that state, we can look at its transition to figure out - * what was expected. - * - * Because we can recover from a single token deletions by - * "inserting" tokens, we need to specify what that implicitly created - * token is. We use object, because it could be a tree node. + * @param recognizer the parser instance + * @throws RecognitionException if the error strategy was not able to + * recover from the unexpected input symbol */ @NotNull Token recoverInline(@NotNull Parser recognizer) throws RecognitionException; - /** Resynchronize the parser by consuming tokens until we find one - * in the resynchronization set--loosely the set of tokens that can follow - * the current rule. The exception contains info you might want to - * use to recover better. + /** + * This method is called to recover from exception {@code e}. This method is + * called after {@link #reportError} by the default exception handler + * generated for a rule method. + * + * @see #reportError + * + * @param recognizer the parser instance + * @param e the recognition exception to recover from + * @throws RecognitionException if the error strategy could not recover from + * the recognition exception */ void recover(@NotNull Parser recognizer, @NotNull RecognitionException e) throws RecognitionException; - /** Make sure that the current lookahead symbol is consistent with - * what were expecting at this point in the ATN. You can call this - * anytime but ANTLR only generates code to check before subrules/loops - * and each iteration. + /** + * This method provides the error handler with an opportunity to handle + * syntactic or semantic errors in the input stream before they result in a + * {@link RecognitionException}. + *

+ * The generated code currently contains calls to {@link #sync} after + * entering the decision state of a closure block ({@code (...)*} or + * {@code (...)+}). + *

+ * For an implementation based on Jim Idle's "magic sync" mechanism, see + * {@link DefaultErrorStrategy#sync}. * - * Implements Jim Idle's magic sync mechanism in closures and optional - * subrules. E.g., + * @see DefaultErrorStrategy#sync * - * a : sync ( stuff sync )* ; - * sync : {consume to what can follow sync} ; - * - * Previous versions of ANTLR did a poor job of their recovery within - * loops. A single mismatch token or missing token would force the parser - * to bail out of the entire rules surrounding the loop. So, for rule - * - * classDef : 'class' ID '{' member* '}' - * - * input with an extra token between members would force the parser to - * consume until it found the next class definition rather than the - * next member definition of the current class. - * - * This functionality cost a little bit of effort because the parser - * has to compare token set at the start of the loop and at each - * iteration. If for some reason speed is suffering for you, you can - * turn off this functionality by simply overriding this method as - * a blank { }. + * @param recognizer the parser instance + * @throws RecognitionException if an error is detected by the error + * strategy but cannot be automatically recovered at the current state in + * the parsing process */ void sync(@NotNull Parser recognizer) throws RecognitionException; - /** Is the parser in the process of recovering from an error? Upon - * a syntax error, the parser enters recovery mode and stays there until - * the next successful match of a token. In this way, we can - * avoid sending out spurious error messages. We only want one error - * message per syntax error + /** + * Tests whether or not {@code recognizer} is in the process of recovering + * from an error. In error recovery mode, {@link Parser#consume} adds + * symbols to the parse tree by calling + * {@link ParserRuleContext#addErrorNode(Token)} instead of + * {@link ParserRuleContext#addChild(Token)}. + * + * @param recognizer the parser instance + * @return {@code true} if the parser is currently recovering from a parse + * error, otherwise {@code false} */ boolean inErrorRecoveryMode(@NotNull Parser recognizer); @@ -136,7 +134,13 @@ public interface ANTLRErrorStrategy { */ void reportMatch(@NotNull Parser recognizer); - /** Report any kind of RecognitionException. */ + /** + * Report any kind of {@link RecognitionException}. This method is called by + * the default exception handler generated for a rule method. + * + * @param recognizer the parser instance + * @param e the recognition exception to report + */ void reportError(@NotNull Parser recognizer, @NotNull RecognitionException e); } diff --git a/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java b/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java index eb17eafa4..8dfe13c45 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java +++ b/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java @@ -86,6 +86,9 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { errorRecoveryMode = true; } + /** + * {@inheritDoc} + */ @Override public boolean inErrorRecoveryMode(Parser recognizer) { return errorRecoveryMode; @@ -113,6 +116,25 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { endErrorCondition(recognizer); } + /** + * {@inheritDoc} + *

+ * The default implementation returns immediately if the handler is already + * in error recovery mode. Otherwise, it calls {@link #beginErrorCondition} + * and dispatches the reporting task based on the runtime type of {@code e} + * according to the following table. + * + *

+ */ @Override public void reportError(Parser recognizer, RecognitionException e) @@ -121,7 +143,7 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { // yet successfully, don't report any errors. if (inErrorRecoveryMode(recognizer)) { // System.err.print("[SPURIOUS] "); - return; // don't count spurious errors + return; // don't report spurious errors } beginErrorCondition(recognizer); if ( e instanceof NoViableAltException ) { @@ -141,8 +163,12 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { } } - /** Recover from NoViableAlt errors. Also there could be a mismatched - * token that the match() routine could not recover from. + /** + * {@inheritDoc} + *

+ * The default implementation resynchronizes the parser by consuming tokens + * until we find one in the resynchronization set--loosely the set of tokens + * that can follow the current rule. */ @Override public void recover(Parser recognizer, RecognitionException e) { @@ -170,21 +196,52 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { consumeUntil(recognizer, followSet); } - /** Make sure that the current lookahead symbol is consistent with - * what were expecting at this point in the ATN. + /** + * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure + * that the current lookahead symbol is consistent with what were expecting + * at this point in the ATN. You can call this anytime but ANTLR only + * generates code to check before subrules/loops and each iteration. + *

+ * Implements Jim Idle's magic sync mechanism in closures and optional + * subrules. E.g., * - * At the start of a sub rule upon error, sync() performs single - * token deletion, if possible. If it can't do that, it bails - * on the current rule and uses the default error recovery, - * which consumes until the resynchronization set of the current rule. + *

+	 * a : sync ( stuff sync )* ;
+	 * sync : {consume to what can follow sync} ;
+	 * 
* - * If the sub rule is optional, ()? or ()* or optional alternative, - * then the expected set includes what follows the subrule. + * At the start of a sub rule upon error, {@link #sync} performs single + * token deletion, if possible. If it can't do that, it bails on the current + * rule and uses the default error recovery, which consumes until the + * resynchronization set of the current rule. + *

+ * If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block + * with an empty alternative), then the expected set includes what follows + * the subrule. + *

+ * During loop iteration, it consumes until it sees a token that can start a + * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to + * stay in the loop as long as possible. + *

+ * ORIGINS + *

+ * Previous versions of ANTLR did a poor job of their recovery within loops. + * A single mismatch token or missing token would force the parser to bail + * out of the entire rules surrounding the loop. So, for rule * - * During loop iteration, it consumes until it sees a token that can - * start a sub rule or what follows loop. Yes, that is pretty aggressive. - * We opt to stay in the loop as long as possible. - */ + *

+	 * classDef : 'class' ID '{' member* '}'
+	 * 
+ * + * input with an extra token between members would force the parser to + * consume until it found the next class definition rather than the next + * member definition of the current class. + *

+ * This functionality cost a little bit of effort because the parser has to + * compare token set at the start of the loop and at each iteration. If for + * some reason speed is suffering for you, you can turn off this + * functionality by simply overriding this method as a blank { }. + */ @Override public void sync(Parser recognizer) throws RecognitionException { ATNState s = recognizer.getInterpreter().atn.states.get(recognizer.getState()); @@ -233,6 +290,15 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { } } + /** + * This is called by {@link #reportError} when the exception is a + * {@link NoViableAltException}. + * + * @see #reportError + * + * @param recognizer the parser instance + * @param e the recognition exception + */ protected void reportNoViableAlternative(@NotNull Parser recognizer, @NotNull NoViableAltException e) { @@ -249,6 +315,15 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { recognizer.notifyErrorListeners(e.getOffendingToken(), msg, e); } + /** + * This is called by {@link #reportError} when the exception is an + * {@link InputMismatchException}. + * + * @see #reportError + * + * @param recognizer the parser instance + * @param e the recognition exception + */ protected void reportInputMismatch(@NotNull Parser recognizer, @NotNull InputMismatchException e) { @@ -257,6 +332,15 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { recognizer.notifyErrorListeners(e.getOffendingToken(), msg, e); } + /** + * This is called by {@link #reportError} when the exception is a + * {@link FailedPredicateException}. + * + * @see #reportError + * + * @param recognizer the parser instance + * @param e the recognition exception + */ protected void reportFailedPredicate(@NotNull Parser recognizer, @NotNull FailedPredicateException e) { @@ -265,6 +349,24 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { recognizer.notifyErrorListeners(e.getOffendingToken(), msg, e); } + /** + * This method is called to report a syntax error which requires the removal + * of a token from the input stream. At the time this method is called, the + * erroneous symbol is current {@code LT(1)} symbol and has not yet been + * removed from the input stream. When this method returns, + * {@code recognizer} is in error recovery mode. + *

+ * This method is called when {@link #singleTokenDeletion} identifies + * single-token deletion as a viable recovery strategy for a mismatched + * input error. + *

+ * The default implementation simply returns if the handler is already in + * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + * enter error recovery mode, followed by calling + * {@link Parser#notifyErrorListeners}. + * + * @param recognizer the parser instance + */ protected void reportUnwantedToken(@NotNull Parser recognizer) { if (inErrorRecoveryMode(recognizer)) { return; @@ -280,6 +382,23 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { recognizer.notifyErrorListeners(t, msg, null); } + /** + * This method is called to report a syntax error which requires the + * insertion of a missing token into the input stream. At the time this + * method is called, the missing token has not yet been inserted. When this + * method returns, {@code recognizer} is in error recovery mode. + *

+ * This method is called when {@link #singleTokenInsertion} identifies + * single-token insertion as a viable recovery strategy for a mismatched + * input error. + *

+ * The default implementation simply returns if the handler is already in + * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + * enter error recovery mode, followed by calling + * {@link Parser#notifyErrorListeners}. + * + * @param recognizer the parser instance + */ protected void reportMissingToken(@NotNull Parser recognizer) { if (inErrorRecoveryMode(recognizer)) { return; @@ -295,34 +414,55 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { recognizer.notifyErrorListeners(t, msg, null); } - /** Attempt to recover from a single missing or extra token. + /** + * {@inheritDoc} + *

+ * The default implementation attempts to recover from the mismatched input + * by using single token insertion and deletion as described below. If the + * recovery attempt fails, this method throws an + * {@link InputMismatchException}. + *

+ * EXTRA TOKEN (single token deletion) + *

+ * {@code LA(1)} is not what we are looking for. If {@code LA(2)} has the + * right token, however, then assume {@code LA(1)} is some extra spurious + * token and delete it. Then consume and return the next token (which was + * the {@code LA(2)} token) as the successful result of the match operation. + *

+ * This recovery strategy is implemented by {@link #singleTokenDeletion}. + *

+ * MISSING TOKEN (single token insertion) + *

+ * If current token (at {@code LA(1)}) is consistent with what could come + * after the expected {@code LA(1)} token, then assume the token is missing + * and use the parser's {@link TokenFactory} to create it on the fly. The + * "insertion" is performed by returning the created token as the successful + * result of the match operation. + *

+ * This recovery strategy is implemented by {@link #singleTokenInsertion}. + *

+ * EXAMPLE + *

+ * For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When + * the parser returns from the nested call to {@code expr}, it will have + * call chain: * - * EXTRA TOKEN + *

+	 * stat -> expr -> atom
+	 * 
* - * LA(1) is not what we are looking for. If LA(2) has the right token, - * however, then assume LA(1) is some extra spurious token. Delete it - * and LA(2) as if we were doing a normal match(), which advances the - * input. + * and it will be trying to match the {@code ')'} at this point in the + * derivation: * - * MISSING TOKEN + *
+	 * => ID '=' '(' INT ')' ('+' atom)* ';'
+	 *                    ^
+	 * 
* - * If current token is consistent with what could come after - * ttype then it is ok to "insert" the missing token, else throw - * exception For example, Input "i=(3;" is clearly missing the - * ')'. When the parser returns from the nested call to expr, it - * will have call chain: - * - * stat -> expr -> atom - * - * and it will be trying to match the ')' at this point in the - * derivation: - * - * => ID '=' '(' INT ')' ('+' atom)* ';' - * ^ - * match() will see that ';' doesn't match ')' and report a - * mismatched token error. To recover, it sees that LA(1)==';' - * is in the set of tokens that can follow the ')' token - * reference in rule atom. It can assume that you forgot the ')'. + * The attempt to match {@code ')'} will fail when it sees {@code ';'} and + * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} + * is in the set of tokens that can follow the {@code ')'} token reference + * in rule {@code atom}. It can assume that you forgot the {@code ')'}. */ @Override public Token recoverInline(Parser recognizer) @@ -346,7 +486,23 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { throw new InputMismatchException(recognizer); } - // if next token is what we are looking for then "delete" this token + /** + * This method implements the single-token insertion inline error recovery + * strategy. It is called by {@link #recoverInline} if the single-token + * deletion strategy fails to recover from the mismatched input. If this + * method returns {@code true}, {@code recognizer} will be in error recovery + * mode. + *

+ * This method determines whether or not single-token insertion is viable by + * checking if the {@code LA(1)} input symbol could be successfully matched + * if it were instead the {@code LA(2)} symbol. If this method returns + * {@code true}, the caller is responsible for creating and inserting a + * token with the correct type to produce this behavior. + * + * @param recognizer the parser instance + * @return {@code true} if single-token insertion is a viable recovery + * strategy for the current mismatched input, otherwise {@code false} + */ protected boolean singleTokenInsertion(@NotNull Parser recognizer) { int currentSymbolType = recognizer.getInputStream().LA(1); // if current token is consistent with what could come after current @@ -364,6 +520,25 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { return false; } + /** + * This method implements the single-token deletion inline error recovery + * strategy. It is called by {@link #recoverInline} to attempt to recover + * from mismatched input. If this method returns null, the parser and error + * handler state will not have changed. If this method returns non-null, + * {@code recognizer} will not be in error recovery mode since the + * returned token was a successful match. + *

+ * If the single-token deletion is successful, this method calls + * {@link #reportUnwantedToken} to report the error, followed by + * {@link Parser#consume} to actually "delete" the extraneous token. Then, + * before returning {@link #reportMatch} is called to signal a successful + * match. + * + * @param recognizer the parser instance + * @return the successfully matched {@link Token} instance if single-token + * deletion successfully recovers from the mismatched input, otherwise + * {@code null} + */ @Nullable protected Token singleTokenDeletion(@NotNull Parser recognizer) { int nextTokenType = recognizer.getInputStream().LA(2); @@ -578,7 +753,7 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { return recoverSet; } - /** Consume tokens until one matches the given token set */ + /** Consume tokens until one matches the given token set. */ protected void consumeUntil(@NotNull Parser recognizer, @NotNull IntervalSet set) { // System.err.println("consumeUntil("+set.toString(recognizer.getTokenNames())+")"); int ttype = recognizer.getInputStream().LA(1);