/* * [The "BSD license"] * Copyright (c) 2013 Terence Parr * Copyright (c) 2013 Sam Harwell * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ using System; using Antlr4.Runtime; using Antlr4.Runtime.Atn; using Antlr4.Runtime.Misc; using Sharpen; namespace Antlr4.Runtime { /// /// This is the default implementation of /// IAntlrErrorStrategy /// used for /// error reporting and recovery in ANTLR parsers. /// public class DefaultErrorStrategy : IAntlrErrorStrategy { /// /// Indicates whether the error strategy is currently "recovering from an /// error". /// /// /// Indicates whether the error strategy is currently "recovering from an /// error". This is used to suppress reporting multiple error messages while /// attempting to recover from a detected syntax error. /// /// InErrorRecoveryMode(Parser) protected internal bool errorRecoveryMode = false; /// The index into the input stream where the last error occurred. /// /// The index into the input stream where the last error occurred. /// This is used to prevent infinite loops where an error is found /// but no token is consumed during recovery...another error is found, /// ad nauseum. This is a failsafe mechanism to guarantee that at least /// one token/tree node is consumed for two errors. /// protected internal int lastErrorIndex = -1; protected internal IntervalSet lastErrorStates; /// /// ///

The default implementation simply calls /// EndErrorCondition(Parser) /// to /// ensure that the handler is not in error recovery mode.

///
public virtual void Reset(Parser recognizer) { EndErrorCondition(recognizer); } /// /// This method is called to enter error recovery mode when a recognition /// exception is reported. /// /// /// This method is called to enter error recovery mode when a recognition /// exception is reported. /// /// the parser instance protected internal virtual void BeginErrorCondition(Parser recognizer) { errorRecoveryMode = true; } /// public virtual bool InErrorRecoveryMode(Parser recognizer) { return errorRecoveryMode; } /// /// This method is called to leave error recovery mode after recovering from /// a recognition exception. /// /// /// This method is called to leave error recovery mode after recovering from /// a recognition exception. /// /// protected internal virtual void EndErrorCondition(Parser recognizer) { errorRecoveryMode = false; lastErrorStates = null; lastErrorIndex = -1; } /// /// ///

The default implementation simply calls /// EndErrorCondition(Parser) /// .

///
public virtual void ReportMatch(Parser recognizer) { EndErrorCondition(recognizer); } /// /// ///

The default implementation returns immediately if the handler is already /// in error recovery mode. Otherwise, it calls /// BeginErrorCondition(Parser) /// and dispatches the reporting task based on the runtime type of /// e /// according to the following table.

/// ///
public virtual void ReportError(Parser recognizer, RecognitionException e) { // if we've already reported an error and have not matched a token // yet successfully, don't report any errors. if (InErrorRecoveryMode(recognizer)) { // System.err.print("[SPURIOUS] "); return; } // don't report spurious errors BeginErrorCondition(recognizer); if (e is NoViableAltException) { ReportNoViableAlternative(recognizer, (NoViableAltException)e); } else { if (e is InputMismatchException) { ReportInputMismatch(recognizer, (InputMismatchException)e); } else { if (e is FailedPredicateException) { ReportFailedPredicate(recognizer, (FailedPredicateException)e); } else { System.Console.Error.WriteLine("unknown recognition error type: " + e.GetType().FullName); NotifyErrorListeners(recognizer, e.Message, e); } } } } protected internal virtual void NotifyErrorListeners(Parser recognizer, string message, RecognitionException e) { recognizer.NotifyErrorListeners(e.OffendingToken, message, e); } /// /// ///

The default implementation resynchronizes the parser by consuming tokens /// until we find one in the resynchronization set--loosely the set of tokens /// that can follow the current rule.

///
public virtual void Recover(Parser recognizer, RecognitionException e) { // System.out.println("recover in "+recognizer.getRuleInvocationStack()+ // " index="+recognizer.getInputStream().index()+ // ", lastErrorIndex="+ // lastErrorIndex+ // ", states="+lastErrorStates); if (lastErrorIndex == ((ITokenStream)recognizer.InputStream).Index && lastErrorStates != null && lastErrorStates.Contains(recognizer.State)) { // uh oh, another error at same token index and previously-visited // state in ATN; must be a case where LT(1) is in the recovery // token set so nothing got consumed. Consume a single token // at least to prevent an infinite loop; this is a failsafe. // System.err.println("seen error condition before index="+ // lastErrorIndex+", states="+lastErrorStates); // System.err.println("FAILSAFE consumes "+recognizer.getTokenNames()[recognizer.getInputStream().LA(1)]); recognizer.Consume(); } lastErrorIndex = ((ITokenStream)recognizer.InputStream).Index; if (lastErrorStates == null) { lastErrorStates = new IntervalSet(); } lastErrorStates.Add(recognizer.State); IntervalSet followSet = GetErrorRecoverySet(recognizer); ConsumeUntil(recognizer, followSet); } /// /// The default implementation of /// IAntlrErrorStrategy.Sync(Parser) /// makes sure /// that the current lookahead symbol is consistent with what were expecting /// at this point in the ATN. You can call this anytime but ANTLR only /// generates code to check before subrules/loops and each iteration. ///

Implements Jim Idle's magic sync mechanism in closures and optional /// subrules. E.g.,

///
        /// a : sync ( stuff sync )* ;
        /// sync : {consume to what can follow sync} ;
        /// 
/// At the start of a sub rule upon error, /// Sync(Parser) /// performs single /// token deletion, if possible. If it can't do that, it bails on the current /// rule and uses the default error recovery, which consumes until the /// resynchronization set of the current rule. ///

If the sub rule is optional ( /// (...)? /// , /// (...)* /// , or block /// with an empty alternative), then the expected set includes what follows /// the subrule.

///

During loop iteration, it consumes until it sees a token that can start a /// sub rule or what follows loop. Yes, that is pretty aggressive. We opt to /// stay in the loop as long as possible.

///

ORIGINS

///

Previous versions of ANTLR did a poor job of their recovery within loops. /// A single mismatch token or missing token would force the parser to bail /// out of the entire rules surrounding the loop. So, for rule

///
        /// classDef : 'class' ID '{' member* '}'
        /// 
/// input with an extra token between members would force the parser to /// consume until it found the next class definition rather than the next /// member definition of the current class. ///

This functionality cost a little bit of effort because the parser has to /// compare token set at the start of the loop and at each iteration. If for /// some reason speed is suffering for you, you can turn off this /// functionality by simply overriding this method as a blank { }.

///
/// public virtual void Sync(Parser recognizer) { ATNState s = recognizer.Interpreter.atn.states[recognizer.State]; // System.err.println("sync @ "+s.stateNumber+"="+s.getClass().getSimpleName()); // If already recovering, don't try to sync if (InErrorRecoveryMode(recognizer)) { return; } ITokenStream tokens = ((ITokenStream)recognizer.InputStream); int la = tokens.La(1); // try cheaper subset first; might get lucky. seems to shave a wee bit off if (recognizer.Atn.NextTokens(s).Contains(la) || la == TokenConstants.Eof) { return; } // Return but don't end recovery. only do that upon valid token match if (recognizer.IsExpectedToken(la)) { return; } switch (s.StateType) { case StateType.BlockStart: case StateType.StarBlockStart: case StateType.PlusBlockStart: case StateType.StarLoopEntry: { // report error and recover if possible if (SingleTokenDeletion(recognizer) != null) { return; } throw new InputMismatchException(recognizer); } case StateType.PlusLoopBack: case StateType.StarLoopBack: { // System.err.println("at loop back: "+s.getClass().getSimpleName()); ReportUnwantedToken(recognizer); IntervalSet expecting = recognizer.GetExpectedTokens(); IntervalSet whatFollowsLoopIterationOrRule = expecting.Or(GetErrorRecoverySet(recognizer)); ConsumeUntil(recognizer, whatFollowsLoopIterationOrRule); break; } default: { // do nothing if we can't identify the exact kind of ATN state break; } } } /// /// This is called by /// ReportError(Parser, RecognitionException) /// when the exception is a /// NoViableAltException /// . /// /// ReportError(Parser, RecognitionException) /// the parser instance /// the recognition exception protected internal virtual void ReportNoViableAlternative(Parser recognizer, NoViableAltException e) { ITokenStream tokens = ((ITokenStream)recognizer.InputStream); string input; if (tokens != null) { if (e.GetStartToken().Type == TokenConstants.Eof) { input = ""; } else { input = tokens.GetText(e.GetStartToken(), e.OffendingToken); } } else { input = ""; } string msg = "no viable alternative at input " + EscapeWSAndQuote(input); NotifyErrorListeners(recognizer, msg, e); } /// /// This is called by /// ReportError(Parser, RecognitionException) /// when the exception is an /// InputMismatchException /// . /// /// ReportError(Parser, RecognitionException) /// the parser instance /// the recognition exception protected internal virtual void ReportInputMismatch(Parser recognizer, InputMismatchException e) { string msg = "mismatched input " + GetTokenErrorDisplay(e.OffendingToken) + " expecting " + e.GetExpectedTokens().ToString(recognizer.TokenNames); NotifyErrorListeners(recognizer, msg, e); } /// /// This is called by /// ReportError(Parser, RecognitionException) /// when the exception is a /// FailedPredicateException /// . /// /// ReportError(Parser, RecognitionException) /// the parser instance /// the recognition exception protected internal virtual void ReportFailedPredicate(Parser recognizer, FailedPredicateException e) { string ruleName = recognizer.RuleNames[recognizer._ctx.GetRuleIndex()]; string msg = "rule " + ruleName + " " + e.Message; NotifyErrorListeners(recognizer, msg, e); } /// /// This method is called to report a syntax error which requires the removal /// of a token from the input stream. /// /// /// This method is called to report a syntax error which requires the removal /// of a token from the input stream. At the time this method is called, the /// erroneous symbol is current /// LT(1) /// symbol and has not yet been /// removed from the input stream. When this method returns, /// recognizer /// is in error recovery mode. ///

This method is called when /// SingleTokenDeletion(Parser) /// identifies /// single-token deletion as a viable recovery strategy for a mismatched /// input error.

///

The default implementation simply returns if the handler is already in /// error recovery mode. Otherwise, it calls /// BeginErrorCondition(Parser) /// to /// enter error recovery mode, followed by calling /// Parser.NotifyErrorListeners(string) /// .

///
/// the parser instance protected internal virtual void ReportUnwantedToken(Parser recognizer) { if (InErrorRecoveryMode(recognizer)) { return; } BeginErrorCondition(recognizer); IToken t = recognizer.CurrentToken; string tokenName = GetTokenErrorDisplay(t); IntervalSet expecting = GetExpectedTokens(recognizer); string msg = "extraneous input " + tokenName + " expecting " + expecting.ToString(recognizer.TokenNames); recognizer.NotifyErrorListeners(t, msg, null); } /// /// This method is called to report a syntax error which requires the /// insertion of a missing token into the input stream. /// /// /// This method is called to report a syntax error which requires the /// insertion of a missing token into the input stream. At the time this /// method is called, the missing token has not yet been inserted. When this /// method returns, /// recognizer /// is in error recovery mode. ///

This method is called when /// SingleTokenInsertion(Parser) /// identifies /// single-token insertion as a viable recovery strategy for a mismatched /// input error.

///

The default implementation simply returns if the handler is already in /// error recovery mode. Otherwise, it calls /// BeginErrorCondition(Parser) /// to /// enter error recovery mode, followed by calling /// Parser.NotifyErrorListeners(string) /// .

///
/// the parser instance protected internal virtual void ReportMissingToken(Parser recognizer) { if (InErrorRecoveryMode(recognizer)) { return; } BeginErrorCondition(recognizer); IToken t = recognizer.CurrentToken; IntervalSet expecting = GetExpectedTokens(recognizer); string msg = "missing " + expecting.ToString(recognizer.TokenNames) + " at " + GetTokenErrorDisplay(t); recognizer.NotifyErrorListeners(t, msg, null); } /// /// ///

The default implementation attempts to recover from the mismatched input /// by using single token insertion and deletion as described below. If the /// recovery attempt fails, this method throws an /// InputMismatchException /// .

///

EXTRA TOKEN (single token deletion)

///

/// LA(1) /// is not what we are looking for. If /// LA(2) /// has the /// right token, however, then assume /// LA(1) /// is some extra spurious /// token and delete it. Then consume and return the next token (which was /// the /// LA(2) /// token) as the successful result of the match operation.

///

This recovery strategy is implemented by /// SingleTokenDeletion(Parser) /// .

///

MISSING TOKEN (single token insertion)

///

If current token (at /// LA(1) /// ) is consistent with what could come /// after the expected /// LA(1) /// token, then assume the token is missing /// and use the parser's /// ITokenFactory /// to create it on the fly. The /// "insertion" is performed by returning the created token as the successful /// result of the match operation.

///

This recovery strategy is implemented by /// SingleTokenInsertion(Parser) /// .

///

EXAMPLE

///

For example, Input /// i=(3; /// is clearly missing the /// ')' /// . When /// the parser returns from the nested call to /// expr /// , it will have /// call chain:

///
        /// stat → expr → atom
        /// 
/// and it will be trying to match the /// ')' /// at this point in the /// derivation: ///
        /// => ID '=' '(' INT ')' ('+' atom)* ';'
        /// ^
        /// 
/// The attempt to match /// ')' /// will fail when it sees /// ';' /// and /// call /// RecoverInline(Parser) /// . To recover, it sees that /// LA(1)==';' /// is in the set of tokens that can follow the /// ')' /// token reference /// in rule /// atom /// . It can assume that you forgot the /// ')' /// . ///
/// public virtual IToken RecoverInline(Parser recognizer) { // SINGLE TOKEN DELETION IToken matchedSymbol = SingleTokenDeletion(recognizer); if (matchedSymbol != null) { // we have deleted the extra token. // now, move past ttype token as if all were ok recognizer.Consume(); return matchedSymbol; } // SINGLE TOKEN INSERTION if (SingleTokenInsertion(recognizer)) { return GetMissingSymbol(recognizer); } // even that didn't work; must throw the exception throw new InputMismatchException(recognizer); } /// /// This method implements the single-token insertion inline error recovery /// strategy. /// /// /// This method implements the single-token insertion inline error recovery /// strategy. It is called by /// RecoverInline(Parser) /// if the single-token /// deletion strategy fails to recover from the mismatched input. If this /// method returns /// true /// , /// recognizer /// will be in error recovery /// mode. ///

This method determines whether or not single-token insertion is viable by /// checking if the /// LA(1) /// input symbol could be successfully matched /// if it were instead the /// LA(2) /// symbol. If this method returns /// true /// , the caller is responsible for creating and inserting a /// token with the correct type to produce this behavior.

///
/// the parser instance /// /// /// true /// if single-token insertion is a viable recovery /// strategy for the current mismatched input, otherwise /// false /// protected internal virtual bool SingleTokenInsertion(Parser recognizer) { int currentSymbolType = ((ITokenStream)recognizer.InputStream).La(1); // if current token is consistent with what could come after current // ATN state, then we know we're missing a token; error recovery // is free to conjure up and insert the missing token ATNState currentState = recognizer.Interpreter.atn.states[recognizer.State]; ATNState next = currentState.Transition(0).target; ATN atn = recognizer.Interpreter.atn; IntervalSet expectingAtLL2 = atn.NextTokens(next, PredictionContext.FromRuleContext(atn, recognizer._ctx)); // System.out.println("LT(2) set="+expectingAtLL2.toString(recognizer.getTokenNames())); if (expectingAtLL2.Contains(currentSymbolType)) { ReportMissingToken(recognizer); return true; } return false; } /// /// This method implements the single-token deletion inline error recovery /// strategy. /// /// /// This method implements the single-token deletion inline error recovery /// strategy. It is called by /// RecoverInline(Parser) /// to attempt to recover /// from mismatched input. If this method returns null, the parser and error /// handler state will not have changed. If this method returns non-null, /// recognizer /// will not be in error recovery mode since the /// returned token was a successful match. ///

If the single-token deletion is successful, this method calls /// ReportUnwantedToken(Parser) /// to report the error, followed by /// Parser.Consume() /// to actually "delete" the extraneous token. Then, /// before returning /// ReportMatch(Parser) /// is called to signal a successful /// match.

///
/// the parser instance /// /// the successfully matched /// IToken /// instance if single-token /// deletion successfully recovers from the mismatched input, otherwise /// null /// [Nullable] protected internal virtual IToken SingleTokenDeletion(Parser recognizer) { int nextTokenType = ((ITokenStream)recognizer.InputStream).La(2); IntervalSet expecting = GetExpectedTokens(recognizer); if (expecting.Contains(nextTokenType)) { ReportUnwantedToken(recognizer); recognizer.Consume(); // simply delete extra token // we want to return the token we're actually matching IToken matchedSymbol = recognizer.CurrentToken; ReportMatch(recognizer); // we know current token is correct return matchedSymbol; } return null; } /// Conjure up a missing token during error recovery. /// /// Conjure up a missing token during error recovery. /// The recognizer attempts to recover from single missing /// symbols. But, actions might refer to that missing symbol. /// For example, x=ID {f($x);}. The action clearly assumes /// that there has been an identifier matched previously and that /// $x points at that token. If that token is missing, but /// the next token in the stream is what we want we assume that /// this token is missing and we keep going. Because we /// have to return some token to replace the missing token, /// we have to conjure one up. This method gives the user control /// over the tokens returned for missing tokens. Mostly, /// you will want to create something special for identifier /// tokens. For literals such as '{' and ',', the default /// action in the parser or tree parser works. It simply creates /// a CommonToken of the appropriate type. The text will be the token. /// If you change what tokens must be created by the lexer, /// override this method to create the appropriate tokens. /// [NotNull] protected internal virtual IToken GetMissingSymbol(Parser recognizer) { IToken currentSymbol = recognizer.CurrentToken; IntervalSet expecting = GetExpectedTokens(recognizer); int expectedTokenType = expecting.GetMinElement(); // get any element string tokenText; if (expectedTokenType == TokenConstants.Eof) { tokenText = ""; } else { tokenText = ""; } IToken current = currentSymbol; IToken lookback = ((ITokenStream)recognizer.InputStream).Lt(-1); if (current.Type == TokenConstants.Eof && lookback != null) { current = lookback; } return ConstructToken(((ITokenStream)recognizer.InputStream).TokenSource, expectedTokenType, tokenText, current); } protected internal virtual IToken ConstructToken(ITokenSource tokenSource, int expectedTokenType, string tokenText, IToken current) { ITokenFactory factory = tokenSource.TokenFactory; return factory.Create(Tuple.Create(tokenSource, current.TokenSource.InputStream), expectedTokenType, tokenText, TokenConstants.DefaultChannel, -1, -1, current.Line, current.Column); } [NotNull] protected internal virtual IntervalSet GetExpectedTokens(Parser recognizer) { return recognizer.GetExpectedTokens(); } /// /// How should a token be displayed in an error message? The default /// is to display just the text, but during development you might /// want to have a lot of information spit out. /// /// /// How should a token be displayed in an error message? The default /// is to display just the text, but during development you might /// want to have a lot of information spit out. Override in that case /// to use t.toString() (which, for CommonToken, dumps everything about /// the token). This is better than forcing you to override a method in /// your token objects because you don't have to go modify your lexer /// so that it creates a new Java type. /// protected internal virtual string GetTokenErrorDisplay(IToken t) { if (t == null) { return ""; } string s = GetSymbolText(t); if (s == null) { if (GetSymbolType(t) == TokenConstants.Eof) { s = ""; } else { s = "<" + GetSymbolType(t) + ">"; } } return EscapeWSAndQuote(s); } protected internal virtual string GetSymbolText(IToken symbol) { return symbol.Text; } protected internal virtual int GetSymbolType(IToken symbol) { return symbol.Type; } [NotNull] protected internal virtual string EscapeWSAndQuote(string s) { // if ( s==null ) return s; s = s.Replace("\n", "\\n"); s = s.Replace("\r", "\\r"); s = s.Replace("\t", "\\t"); return "'" + s + "'"; } [NotNull] protected internal virtual IntervalSet GetErrorRecoverySet(Parser recognizer) { ATN atn = recognizer.Interpreter.atn; RuleContext ctx = recognizer._ctx; IntervalSet recoverSet = new IntervalSet(); while (ctx != null && ctx.invokingState >= 0) { // compute what follows who invoked us ATNState invokingState = atn.states[ctx.invokingState]; RuleTransition rt = (RuleTransition)invokingState.Transition(0); IntervalSet follow = atn.NextTokens(rt.followState); recoverSet.AddAll(follow); ctx = ctx.parent; } recoverSet.Remove(TokenConstants.Epsilon); // System.out.println("recover set "+recoverSet.toString(recognizer.getTokenNames())); return recoverSet; } /// Consume tokens until one matches the given token set. /// Consume tokens until one matches the given token set. protected internal virtual void ConsumeUntil(Parser recognizer, IntervalSet set) { // System.err.println("consumeUntil("+set.toString(recognizer.getTokenNames())+")"); int ttype = ((ITokenStream)recognizer.InputStream).La(1); while (ttype != TokenConstants.Eof && !set.Contains(ttype)) { //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]); // recognizer.getInputStream().consume(); recognizer.Consume(); ttype = ((ITokenStream)recognizer.InputStream).La(1); } } } }