Updated ANTLRErrorStrategy and DefaultErrorStrategy documentation

2013-03-26 15:59:05 -05:00 · 2013-03-26 15:59:05 -05:00 · a7e021da4c
parent 1213aaad0b
commit a7e021da4c
2 changed files with 287 additions and 108 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java
@ -31,27 +31,22 @@
 package org.antlr.v4.runtime;

 import org.antlr.v4.runtime.misc.NotNull;
-import org.antlr.v4.runtime.misc.Nullable;

-/** The interface for defining strategies to deal with syntax errors
- *  encountered during a parse by ANTLR-generated parsers and tree parsers.
- *  We distinguish between three different kinds of errors:
+/**
+ * The interface for defining strategies to deal with syntax errors encountered
+ * during a parse by ANTLR-generated parsers. We distinguish between three
+ * different kinds of errors:
 *
- *  	o The parser could not figure out which path to take in the ATN
- *        (none of the available alternatives could possibly match)
- *      o The current input does not match what we were looking for.
- *      o A predicate evaluated to false.
- *
- *  The default implementation of this interface reports errors to any
- *  error listeners of the parser. It also handles single token insertion
- *  and deletion for mismatched elements.
- *
- *  We pass in the parser to each function so that the same strategy
- *  can be shared between multiple parsers running at the same time.
- *  This is just for flexibility, not that we need it for the default system.
- *
- *  TODO: To bail out upon first error, simply rethrow e?
+ * <ul>
+ * <li>The parser could not figure out which path to take in the ATN (none of
+ * the available alternatives could possibly match)</li>
+ * <li>The current input does not match what we were looking for</li>
+ * <li>A predicate evaluated to false</li>
+ * </ul>
 *
+ * Implementations of this interface report syntax errors by calling
+ * {@link Parser#notifyErrorListeners}.
+ * <p/>
 * TODO: what to do about lexers
 */
 public interface ANTLRErrorStrategy {
@ -61,70 +56,73 @@ public interface ANTLRErrorStrategy {
 	 */
 	void reset(@NotNull Parser recognizer);

-	/** When matching elements within alternative, use this method
-	 *  to recover. The default implementation uses single token
-	 *  insertion and deletion. If you want to change the way ANTLR
-	 *  response to mismatched element errors within an alternative,
-	 *  implement this method.
+	/**
+	 * This method is called when an unexpected symbol is encountered during an
+	 * inline match operation, such as {@link Parser#match}. If the error
+	 * strategy successfully recovers from the match failure, this method
+	 * returns the {@link Token} instance which should be treated as the
+	 * successful result of the match.
+	 * <p/>
+	 * Note that the calling code will not report an error if this method
+	 * returns successfully. The error strategy implementation is responsible
+	 * for calling {@link Parser#notifyErrorListeners} as appropriate.
 	 *
-	 *  From the recognizer, we can get the input stream to get
-	 *  the current input symbol and we can get the current context.
-	 *  That context gives us the current state within the ATN.
-	 *  From that state, we can look at its transition to figure out
-	 *  what was expected.
-	 *
-	 *  Because we can recover from a single token deletions by
-	 *  "inserting" tokens, we need to specify what that implicitly created
-	 *  token is. We use object, because it could be a tree node.
+	 * @param recognizer the parser instance
+	 * @throws RecognitionException if the error strategy was not able to
+	 * recover from the unexpected input symbol
 	 */
 	@NotNull
 	Token recoverInline(@NotNull Parser recognizer)
 		throws RecognitionException;

-	/** Resynchronize the parser by consuming tokens until we find one
-	 *  in the resynchronization set--loosely the set of tokens that can follow
-	 *  the current rule. The exception contains info you might want to
-	 *  use to recover better.
+	/**
+	 * This method is called to recover from exception {@code e}. This method is
+	 * called after {@link #reportError} by the default exception handler
+	 * generated for a rule method.
+	 *
+	 * @see #reportError
+	 *
+	 * @param recognizer the parser instance
+	 * @param e the recognition exception to recover from
+	 * @throws RecognitionException if the error strategy could not recover from
+	 * the recognition exception
 	 */
 	void recover(@NotNull Parser recognizer,
 				 @NotNull RecognitionException e)
 		throws RecognitionException;

-	/** Make sure that the current lookahead symbol is consistent with
-	 *  what were expecting at this point in the ATN. You can call this
-	 *  anytime but ANTLR only generates code to check before subrules/loops
-	 *  and each iteration.
+	/**
+	 * This method provides the error handler with an opportunity to handle
+	 * syntactic or semantic errors in the input stream before they result in a
+	 * {@link RecognitionException}.
+	 * <p/>
+	 * The generated code currently contains calls to {@link #sync} after
+	 * entering the decision state of a closure block ({@code (...)*} or
+	 * {@code (...)+}).
+	 * <p/>
+	 * For an implementation based on Jim Idle's "magic sync" mechanism, see
+	 * {@link DefaultErrorStrategy#sync}.
 	 *
-	 *  Implements Jim Idle's magic sync mechanism in closures and optional
-	 *  subrules. E.g.,
+	 * @see DefaultErrorStrategy#sync
 	 *
-	 * 		a : sync ( stuff sync )* ;
-	 * 		sync : {consume to what can follow sync} ;
-	 *
-	 *  Previous versions of ANTLR did a poor job of their recovery within
-	 *  loops. A single mismatch token or missing token would force the parser
-	 *  to bail out of the entire rules surrounding the loop. So, for rule
-	 *
-	 *  classDef : 'class' ID '{' member* '}'
-	 *
-	 *  input with an extra token between members would force the parser to
-	 *  consume until it found the next class definition rather than the
-	 *  next member definition of the current class.
-	 *
-	 *  This functionality cost a little bit of effort because the parser
-	 *  has to compare token set at the start of the loop and at each
-	 *  iteration. If for some reason speed is suffering for you, you can
-	 *  turn off this functionality by simply overriding this method as
-	 *  a blank { }.
+	 * @param recognizer the parser instance
+	 * @throws RecognitionException if an error is detected by the error
+	 * strategy but cannot be automatically recovered at the current state in
+	 * the parsing process
 	 */
 	void sync(@NotNull Parser recognizer)
 		throws RecognitionException;

-	/** Is the parser in the process of recovering from an error? Upon
-	 *  a syntax error, the parser enters recovery mode and stays there until
-	 *  the next successful match of a token. In this way, we can
-	 *  avoid sending out spurious error messages. We only want one error
-	 *  message per syntax error
+	/**
+	 * Tests whether or not {@code recognizer} is in the process of recovering
+	 * from an error. In error recovery mode, {@link Parser#consume} adds
+	 * symbols to the parse tree by calling
+	 * {@link ParserRuleContext#addErrorNode(Token)} instead of
+	 * {@link ParserRuleContext#addChild(Token)}.
+	 *
+	 * @param recognizer the parser instance
+	 * @return {@code true} if the parser is currently recovering from a parse
+	 * error, otherwise {@code false}
 	 */
 	boolean inErrorRecoveryMode(@NotNull Parser recognizer);

@ -136,7 +134,13 @@ public interface ANTLRErrorStrategy {
 	 */
 	void reportMatch(@NotNull Parser recognizer);

-	/** Report any kind of RecognitionException. */
+	/**
+	 * Report any kind of {@link RecognitionException}. This method is called by
+	 * the default exception handler generated for a rule method.
+	 *
+	 * @param recognizer the parser instance
+	 * @param e the recognition exception to report
+	 */
 	void reportError(@NotNull Parser recognizer,
 					 @NotNull RecognitionException e);
 }
--- a/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java
@ -86,6 +86,9 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
 		errorRecoveryMode = true;
 	}

+	/**
+	 * {@inheritDoc}
+	 */
 	@Override
 	public boolean inErrorRecoveryMode(Parser recognizer) {
 		return errorRecoveryMode;
@ -113,6 +116,25 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
 		endErrorCondition(recognizer);
 	}

+	/**
+	 * {@inheritDoc}
+	 * <p/>
+	 * The default implementation returns immediately if the handler is already
+	 * in error recovery mode. Otherwise, it calls {@link #beginErrorCondition}
+	 * and dispatches the reporting task based on the runtime type of {@code e}
+	 * according to the following table.
+	 *
+	 * <ul>
+	 * <li>{@link NoViableAltException}: Dispatches the call to
+	 * {@link #reportNoViableAlternative}</li>
+	 * <li>{@link InputMismatchException}: Dispatches the call to
+	 * {@link #reportInputMismatch}</li>
+	 * <li>{@link FailedPredicateException}: Dispatches the call to
+	 * {@link #reportFailedPredicate}</li>
+	 * <li>All other types: calls {@link Parser#notifyErrorListeners} to report
+	 * the exception</li>
+	 * </ul>
+	 */
 	@Override
 	public void reportError(Parser recognizer,
 							RecognitionException e)
@ -121,7 +143,7 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
 		// yet successfully, don't report any errors.
 		if (inErrorRecoveryMode(recognizer)) {
 //			System.err.print("[SPURIOUS] ");
-			return; // don't count spurious errors
+			return; // don't report spurious errors
 		}
 		beginErrorCondition(recognizer);
 		if ( e instanceof NoViableAltException ) {
@ -141,8 +163,12 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
 		}
 	}

-	/** Recover from NoViableAlt errors. Also there could be a mismatched
-	 *  token that the match() routine could not recover from.
+	/**
+	 * {@inheritDoc}
+	 * <p/>
+	 * The default implementation resynchronizes the parser by consuming tokens
+	 * until we find one in the resynchronization set--loosely the set of tokens
+	 * that can follow the current rule.
 	 */
 	@Override
 	public void recover(Parser recognizer, RecognitionException e) {
@ -170,20 +196,51 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
 		consumeUntil(recognizer, followSet);
 	}

-	/** Make sure that the current lookahead symbol is consistent with
-	 *  what were expecting at this point in the ATN.
+	/**
+	 * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure
+	 * that the current lookahead symbol is consistent with what were expecting
+	 * at this point in the ATN. You can call this anytime but ANTLR only
+	 * generates code to check before subrules/loops and each iteration.
+	 * <p/>
+	 * Implements Jim Idle's magic sync mechanism in closures and optional
+	 * subrules. E.g.,
 	 *
-	 *  At the start of a sub rule upon error, sync() performs single
-	 *  token deletion, if possible. If it can't do that, it bails
-	 *  on the current rule and uses the default error recovery,
-	 *  which consumes until the resynchronization set of the current rule.
+	 * <pre>
+	 * a : sync ( stuff sync )* ;
+	 * sync : {consume to what can follow sync} ;
+	 * </pre>
 	 *
-	 *  If the sub rule is optional, ()? or ()* or optional alternative,
-	 *  then the expected set includes what follows the subrule.
+	 * At the start of a sub rule upon error, {@link #sync} performs single
+	 * token deletion, if possible. If it can't do that, it bails on the current
+	 * rule and uses the default error recovery, which consumes until the
+	 * resynchronization set of the current rule.
+	 * <p/>
+	 * If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
+	 * with an empty alternative), then the expected set includes what follows
+	 * the subrule.
+	 * <p/>
+	 * During loop iteration, it consumes until it sees a token that can start a
+	 * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
+	 * stay in the loop as long as possible.
+	 * <p/>
+	 * <strong>ORIGINS</strong>
+	 * <p/>
+	 * Previous versions of ANTLR did a poor job of their recovery within loops.
+	 * A single mismatch token or missing token would force the parser to bail
+	 * out of the entire rules surrounding the loop. So, for rule
 	 *
-	 *  During loop iteration, it consumes until it sees a token that can
-	 *  start a sub rule or what follows loop. Yes, that is pretty aggressive.
-	 *  We opt to stay in the loop as long as possible.
+	 * <pre>
+	 * classDef : 'class' ID '{' member* '}'
+	 * </pre>
+	 *
+	 * input with an extra token between members would force the parser to
+	 * consume until it found the next class definition rather than the next
+	 * member definition of the current class.
+	 * <p/>
+	 * This functionality cost a little bit of effort because the parser has to
+	 * compare token set at the start of the loop and at each iteration. If for
+	 * some reason speed is suffering for you, you can turn off this
+	 * functionality by simply overriding this method as a blank { }.
 	 */
 	@Override
 	public void sync(Parser recognizer) throws RecognitionException {
@ -233,6 +290,15 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
 		}
 	}

+	/**
+	 * This is called by {@link #reportError} when the exception is a
+	 * {@link NoViableAltException}.
+	 *
+	 * @see #reportError
+	 *
+	 * @param recognizer the parser instance
+	 * @param e the recognition exception
+	 */
 	protected void reportNoViableAlternative(@NotNull Parser recognizer,
 											 @NotNull NoViableAltException e)
 	{
@ -249,6 +315,15 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
 		recognizer.notifyErrorListeners(e.getOffendingToken(), msg, e);
 	}

+	/**
+	 * This is called by {@link #reportError} when the exception is an
+	 * {@link InputMismatchException}.
+	 *
+	 * @see #reportError
+	 *
+	 * @param recognizer the parser instance
+	 * @param e the recognition exception
+	 */
 	protected void reportInputMismatch(@NotNull Parser recognizer,
 									   @NotNull InputMismatchException e)
 	{
@ -257,6 +332,15 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
 		recognizer.notifyErrorListeners(e.getOffendingToken(), msg, e);
 	}

+	/**
+	 * This is called by {@link #reportError} when the exception is a
+	 * {@link FailedPredicateException}.
+	 *
+	 * @see #reportError
+	 *
+	 * @param recognizer the parser instance
+	 * @param e the recognition exception
+	 */
 	protected void reportFailedPredicate(@NotNull Parser recognizer,
 										 @NotNull FailedPredicateException e)
 	{
@ -265,6 +349,24 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
 		recognizer.notifyErrorListeners(e.getOffendingToken(), msg, e);
 	}

+	/**
+	 * This method is called to report a syntax error which requires the removal
+	 * of a token from the input stream. At the time this method is called, the
+	 * erroneous symbol is current {@code LT(1)} symbol and has not yet been
+	 * removed from the input stream. When this method returns,
+	 * {@code recognizer} is in error recovery mode.
+	 * <p/>
+	 * This method is called when {@link #singleTokenDeletion} identifies
+	 * single-token deletion as a viable recovery strategy for a mismatched
+	 * input error.
+	 * <p/>
+	 * The default implementation simply returns if the handler is already in
+	 * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
+	 * enter error recovery mode, followed by calling
+	 * {@link Parser#notifyErrorListeners}.
+	 *
+	 * @param recognizer the parser instance
+	 */
 	protected void reportUnwantedToken(@NotNull Parser recognizer) {
 		if (inErrorRecoveryMode(recognizer)) {
 			return;
@ -280,6 +382,23 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
 		recognizer.notifyErrorListeners(t, msg, null);
 	}

+	/**
+	 * This method is called to report a syntax error which requires the
+	 * insertion of a missing token into the input stream. At the time this
+	 * method is called, the missing token has not yet been inserted. When this
+	 * method returns, {@code recognizer} is in error recovery mode.
+	 * <p/>
+	 * This method is called when {@link #singleTokenInsertion} identifies
+	 * single-token insertion as a viable recovery strategy for a mismatched
+	 * input error.
+	 * <p/>
+	 * The default implementation simply returns if the handler is already in
+	 * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
+	 * enter error recovery mode, followed by calling
+	 * {@link Parser#notifyErrorListeners}.
+	 *
+	 * @param recognizer the parser instance
+	 */
 	protected void reportMissingToken(@NotNull Parser recognizer) {
 		if (inErrorRecoveryMode(recognizer)) {
 			return;
@ -295,34 +414,55 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
 		recognizer.notifyErrorListeners(t, msg, null);
 	}

-	/** Attempt to recover from a single missing or extra token.
-	 *
-	 *  EXTRA TOKEN
-	 *
-	 *  LA(1) is not what we are looking for.  If LA(2) has the right token,
-	 *  however, then assume LA(1) is some extra spurious token.  Delete it
-	 *  and LA(2) as if we were doing a normal match(), which advances the
-	 *  input.
-	 *
-	 *  MISSING TOKEN
-	 *
-	 *  If current token is consistent with what could come after
-	 *  ttype then it is ok to "insert" the missing token, else throw
-	 *  exception For example, Input "i=(3;" is clearly missing the
-	 *  ')'.  When the parser returns from the nested call to expr, it
-	 *  will have call chain:
+	/**
+	 * {@inheritDoc}
+	 * <p/>
+	 * The default implementation attempts to recover from the mismatched input
+	 * by using single token insertion and deletion as described below. If the
+	 * recovery attempt fails, this method throws an
+	 * {@link InputMismatchException}.
+	 * <p/>
+	 * <strong>EXTRA TOKEN</strong> (single token deletion)
+	 * <p/>
+	 * {@code LA(1)} is not what we are looking for. If {@code LA(2)} has the
+	 * right token, however, then assume {@code LA(1)} is some extra spurious
+	 * token and delete it. Then consume and return the next token (which was
+	 * the {@code LA(2)} token) as the successful result of the match operation.
+	 * <p/>
+	 * This recovery strategy is implemented by {@link #singleTokenDeletion}.
+	 * <p/>
+	 * <strong>MISSING TOKEN</strong> (single token insertion)
+	 * <p/>
+	 * If current token (at {@code LA(1)}) is consistent with what could come
+	 * after the expected {@code LA(1)} token, then assume the token is missing
+	 * and use the parser's {@link TokenFactory} to create it on the fly. The
+	 * "insertion" is performed by returning the created token as the successful
+	 * result of the match operation.
+	 * <p/>
+	 * This recovery strategy is implemented by {@link #singleTokenInsertion}.
+	 * <p/>
+	 * <strong>EXAMPLE</strong>
+	 * <p/>
+	 * For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When
+	 * the parser returns from the nested call to {@code expr}, it will have
+	 * call chain:
 	 *
+	 * <pre>
 	 * stat -> expr -> atom
+	 * </pre>
 	 *
-	 *  and it will be trying to match the ')' at this point in the
+	 * and it will be trying to match the {@code ')'} at this point in the
 	 * derivation:
 	 *
+	 * <pre>
 	 * => ID '=' '(' INT ')' ('+' atom)* ';'
 	 *                    ^
-	 *  match() will see that ';' doesn't match ')' and report a
-	 *  mismatched token error.  To recover, it sees that LA(1)==';'
-	 *  is in the set of tokens that can follow the ')' token
-	 *  reference in rule atom.  It can assume that you forgot the ')'.
+	 * </pre>
+	 *
+	 * The attempt to match {@code ')'} will fail when it sees {@code ';'} and
+	 * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'}
+	 * is in the set of tokens that can follow the {@code ')'} token reference
+	 * in rule {@code atom}. It can assume that you forgot the {@code ')'}.
 	 */
 	@Override
 	public Token recoverInline(Parser recognizer)
@ -346,7 +486,23 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
 		throw new InputMismatchException(recognizer);
 	}

-	// if next token is what we are looking for then "delete" this token
+	/**
+	 * This method implements the single-token insertion inline error recovery
+	 * strategy. It is called by {@link #recoverInline} if the single-token
+	 * deletion strategy fails to recover from the mismatched input. If this
+	 * method returns {@code true}, {@code recognizer} will be in error recovery
+	 * mode.
+	 * <p/>
+	 * This method determines whether or not single-token insertion is viable by
+	 * checking if the {@code LA(1)} input symbol could be successfully matched
+	 * if it were instead the {@code LA(2)} symbol. If this method returns
+	 * {@code true}, the caller is responsible for creating and inserting a
+	 * token with the correct type to produce this behavior.
+	 *
+	 * @param recognizer the parser instance
+	 * @return {@code true} if single-token insertion is a viable recovery
+	 * strategy for the current mismatched input, otherwise {@code false}
+	 */
 	protected boolean singleTokenInsertion(@NotNull Parser recognizer) {
 		int currentSymbolType = recognizer.getInputStream().LA(1);
 		// if current token is consistent with what could come after current
@ -364,6 +520,25 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
 		return false;
 	}

+	/**
+	 * This method implements the single-token deletion inline error recovery
+	 * strategy. It is called by {@link #recoverInline} to attempt to recover
+	 * from mismatched input. If this method returns null, the parser and error
+	 * handler state will not have changed. If this method returns non-null,
+	 * {@code recognizer} will <em>not</em> be in error recovery mode since the
+	 * returned token was a successful match.
+	 * <p/>
+	 * If the single-token deletion is successful, this method calls
+	 * {@link #reportUnwantedToken} to report the error, followed by
+	 * {@link Parser#consume} to actually "delete" the extraneous token. Then,
+	 * before returning {@link #reportMatch} is called to signal a successful
+	 * match.
+	 *
+	 * @param recognizer the parser instance
+	 * @return the successfully matched {@link Token} instance if single-token
+	 * deletion successfully recovers from the mismatched input, otherwise
+	 * {@code null}
+	 */
 	@Nullable
 	protected Token singleTokenDeletion(@NotNull Parser recognizer) {
 		int nextTokenType = recognizer.getInputStream().LA(2);
@ -578,7 +753,7 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
 		return recoverSet;
 	}

-	/** Consume tokens until one matches the given token set */
+	/** Consume tokens until one matches the given token set. */
 	protected void consumeUntil(@NotNull Parser recognizer, @NotNull IntervalSet set) {
 //		System.err.println("consumeUntil("+set.toString(recognizer.getTokenNames())+")");
 		int ttype = recognizer.getInputStream().LA(1);