diff --git a/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java b/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java index fbcc3239d..5907e6632 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java +++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java @@ -31,28 +31,23 @@ package org.antlr.v4.runtime; import org.antlr.v4.runtime.misc.NotNull; -import org.antlr.v4.runtime.misc.Nullable; -/** The interface for defining strategies to deal with syntax errors - * encountered during a parse by ANTLR-generated parsers and tree parsers. - * We distinguish between three different kinds of errors: +/** + * The interface for defining strategies to deal with syntax errors encountered + * during a parse by ANTLR-generated parsers. We distinguish between three + * different kinds of errors: * - * o The parser could not figure out which path to take in the ATN - * (none of the available alternatives could possibly match) - * o The current input does not match what we were looking for. - * o A predicate evaluated to false. + *
+ * a : sync ( stuff sync )* ; + * sync : {consume to what can follow sync} ; + ** - * If the sub rule is optional, ()? or ()* or optional alternative, - * then the expected set includes what follows the subrule. + * At the start of a sub rule upon error, {@link #sync} performs single + * token deletion, if possible. If it can't do that, it bails on the current + * rule and uses the default error recovery, which consumes until the + * resynchronization set of the current rule. + * + * If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block + * with an empty alternative), then the expected set includes what follows + * the subrule. + * + * During loop iteration, it consumes until it sees a token that can start a + * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to + * stay in the loop as long as possible. + * + * ORIGINS + * + * Previous versions of ANTLR did a poor job of their recovery within loops. + * A single mismatch token or missing token would force the parser to bail + * out of the entire rules surrounding the loop. So, for rule * - * During loop iteration, it consumes until it sees a token that can - * start a sub rule or what follows loop. Yes, that is pretty aggressive. - * We opt to stay in the loop as long as possible. - */ + *
+ * classDef : 'class' ID '{' member* '}' + *+ * + * input with an extra token between members would force the parser to + * consume until it found the next class definition rather than the next + * member definition of the current class. + * + * This functionality cost a little bit of effort because the parser has to + * compare token set at the start of the loop and at each iteration. If for + * some reason speed is suffering for you, you can turn off this + * functionality by simply overriding this method as a blank { }. + */ @Override public void sync(Parser recognizer) throws RecognitionException { ATNState s = recognizer.getInterpreter().atn.states.get(recognizer.getState()); @@ -233,6 +290,15 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { } } + /** + * This is called by {@link #reportError} when the exception is a + * {@link NoViableAltException}. + * + * @see #reportError + * + * @param recognizer the parser instance + * @param e the recognition exception + */ protected void reportNoViableAlternative(@NotNull Parser recognizer, @NotNull NoViableAltException e) { @@ -249,6 +315,15 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { recognizer.notifyErrorListeners(e.getOffendingToken(), msg, e); } + /** + * This is called by {@link #reportError} when the exception is an + * {@link InputMismatchException}. + * + * @see #reportError + * + * @param recognizer the parser instance + * @param e the recognition exception + */ protected void reportInputMismatch(@NotNull Parser recognizer, @NotNull InputMismatchException e) { @@ -257,6 +332,15 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { recognizer.notifyErrorListeners(e.getOffendingToken(), msg, e); } + /** + * This is called by {@link #reportError} when the exception is a + * {@link FailedPredicateException}. + * + * @see #reportError + * + * @param recognizer the parser instance + * @param e the recognition exception + */ protected void reportFailedPredicate(@NotNull Parser recognizer, @NotNull FailedPredicateException e) { @@ -265,6 +349,24 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { recognizer.notifyErrorListeners(e.getOffendingToken(), msg, e); } + /** + * This method is called to report a syntax error which requires the removal + * of a token from the input stream. At the time this method is called, the + * erroneous symbol is current {@code LT(1)} symbol and has not yet been + * removed from the input stream. When this method returns, + * {@code recognizer} is in error recovery mode. + * + * This method is called when {@link #singleTokenDeletion} identifies + * single-token deletion as a viable recovery strategy for a mismatched + * input error. + * + * The default implementation simply returns if the handler is already in + * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + * enter error recovery mode, followed by calling + * {@link Parser#notifyErrorListeners}. + * + * @param recognizer the parser instance + */ protected void reportUnwantedToken(@NotNull Parser recognizer) { if (inErrorRecoveryMode(recognizer)) { return; @@ -280,6 +382,23 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { recognizer.notifyErrorListeners(t, msg, null); } + /** + * This method is called to report a syntax error which requires the + * insertion of a missing token into the input stream. At the time this + * method is called, the missing token has not yet been inserted. When this + * method returns, {@code recognizer} is in error recovery mode. + * + * This method is called when {@link #singleTokenInsertion} identifies + * single-token insertion as a viable recovery strategy for a mismatched + * input error. + * + * The default implementation simply returns if the handler is already in + * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + * enter error recovery mode, followed by calling + * {@link Parser#notifyErrorListeners}. + * + * @param recognizer the parser instance + */ protected void reportMissingToken(@NotNull Parser recognizer) { if (inErrorRecoveryMode(recognizer)) { return; @@ -295,34 +414,55 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { recognizer.notifyErrorListeners(t, msg, null); } - /** Attempt to recover from a single missing or extra token. + /** + * {@inheritDoc} + * + * The default implementation attempts to recover from the mismatched input + * by using single token insertion and deletion as described below. If the + * recovery attempt fails, this method throws an + * {@link InputMismatchException}. + * + * EXTRA TOKEN (single token deletion) + * + * {@code LA(1)} is not what we are looking for. If {@code LA(2)} has the + * right token, however, then assume {@code LA(1)} is some extra spurious + * token and delete it. Then consume and return the next token (which was + * the {@code LA(2)} token) as the successful result of the match operation. + * + * This recovery strategy is implemented by {@link #singleTokenDeletion}. + * + * MISSING TOKEN (single token insertion) + * + * If current token (at {@code LA(1)}) is consistent with what could come + * after the expected {@code LA(1)} token, then assume the token is missing + * and use the parser's {@link TokenFactory} to create it on the fly. The + * "insertion" is performed by returning the created token as the successful + * result of the match operation. + * + * This recovery strategy is implemented by {@link #singleTokenInsertion}. + * + * EXAMPLE + * + * For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When + * the parser returns from the nested call to {@code expr}, it will have + * call chain: * - * EXTRA TOKEN + *
+ * stat -> expr -> atom + ** - * LA(1) is not what we are looking for. If LA(2) has the right token, - * however, then assume LA(1) is some extra spurious token. Delete it - * and LA(2) as if we were doing a normal match(), which advances the - * input. + * and it will be trying to match the {@code ')'} at this point in the + * derivation: * - * MISSING TOKEN + *
+ * => ID '=' '(' INT ')' ('+' atom)* ';' + * ^ + ** - * If current token is consistent with what could come after - * ttype then it is ok to "insert" the missing token, else throw - * exception For example, Input "i=(3;" is clearly missing the - * ')'. When the parser returns from the nested call to expr, it - * will have call chain: - * - * stat -> expr -> atom - * - * and it will be trying to match the ')' at this point in the - * derivation: - * - * => ID '=' '(' INT ')' ('+' atom)* ';' - * ^ - * match() will see that ';' doesn't match ')' and report a - * mismatched token error. To recover, it sees that LA(1)==';' - * is in the set of tokens that can follow the ')' token - * reference in rule atom. It can assume that you forgot the ')'. + * The attempt to match {@code ')'} will fail when it sees {@code ';'} and + * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} + * is in the set of tokens that can follow the {@code ')'} token reference + * in rule {@code atom}. It can assume that you forgot the {@code ')'}. */ @Override public Token recoverInline(Parser recognizer) @@ -346,7 +486,23 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { throw new InputMismatchException(recognizer); } - // if next token is what we are looking for then "delete" this token + /** + * This method implements the single-token insertion inline error recovery + * strategy. It is called by {@link #recoverInline} if the single-token + * deletion strategy fails to recover from the mismatched input. If this + * method returns {@code true}, {@code recognizer} will be in error recovery + * mode. + * + * This method determines whether or not single-token insertion is viable by + * checking if the {@code LA(1)} input symbol could be successfully matched + * if it were instead the {@code LA(2)} symbol. If this method returns + * {@code true}, the caller is responsible for creating and inserting a + * token with the correct type to produce this behavior. + * + * @param recognizer the parser instance + * @return {@code true} if single-token insertion is a viable recovery + * strategy for the current mismatched input, otherwise {@code false} + */ protected boolean singleTokenInsertion(@NotNull Parser recognizer) { int currentSymbolType = recognizer.getInputStream().LA(1); // if current token is consistent with what could come after current @@ -364,6 +520,25 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { return false; } + /** + * This method implements the single-token deletion inline error recovery + * strategy. It is called by {@link #recoverInline} to attempt to recover + * from mismatched input. If this method returns null, the parser and error + * handler state will not have changed. If this method returns non-null, + * {@code recognizer} will not be in error recovery mode since the + * returned token was a successful match. + * + * If the single-token deletion is successful, this method calls + * {@link #reportUnwantedToken} to report the error, followed by + * {@link Parser#consume} to actually "delete" the extraneous token. Then, + * before returning {@link #reportMatch} is called to signal a successful + * match. + * + * @param recognizer the parser instance + * @return the successfully matched {@link Token} instance if single-token + * deletion successfully recovers from the mismatched input, otherwise + * {@code null} + */ @Nullable protected Token singleTokenDeletion(@NotNull Parser recognizer) { int nextTokenType = recognizer.getInputStream().LA(2); @@ -578,7 +753,7 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { return recoverSet; } - /** Consume tokens until one matches the given token set */ + /** Consume tokens until one matches the given token set. */ protected void consumeUntil(@NotNull Parser recognizer, @NotNull IntervalSet set) { // System.err.println("consumeUntil("+set.toString(recognizer.getTokenNames())+")"); int ttype = recognizer.getInputStream().LA(1);