antlr/Antlr4.Runtime/DefaultErrorStrategy.cs

549 lines
23 KiB
C#

/*
* [The "BSD license"]
* Copyright (c) 2013 Terence Parr
* Copyright (c) 2013 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
using System;
using Antlr4.Runtime;
using Antlr4.Runtime.Atn;
using Antlr4.Runtime.Misc;
using Sharpen;
namespace Antlr4.Runtime
{
/// <summary>
/// This is the default error handling mechanism for ANTLR parsers
/// and tree parsers.
/// </summary>
/// <remarks>
/// This is the default error handling mechanism for ANTLR parsers
/// and tree parsers.
/// </remarks>
public class DefaultErrorStrategy : IAntlrErrorStrategy
{
/// <summary>
/// This is true after we see an error and before having successfully
/// matched a token.
/// </summary>
/// <remarks>
/// This is true after we see an error and before having successfully
/// matched a token. Prevents generation of more than one error message
/// per error.
/// </remarks>
protected internal bool errorRecoveryMode = false;
/// <summary>The index into the input stream where the last error occurred.</summary>
/// <remarks>
/// The index into the input stream where the last error occurred.
/// This is used to prevent infinite loops where an error is found
/// but no token is consumed during recovery...another error is found,
/// ad nauseum. This is a failsafe mechanism to guarantee that at least
/// one token/tree node is consumed for two errors.
/// </remarks>
protected internal int lastErrorIndex = -1;
protected internal IntervalSet lastErrorStates;
public virtual void BeginErrorCondition(Parser recognizer)
{
errorRecoveryMode = true;
}
public virtual bool InErrorRecoveryMode(Parser recognizer)
{
return errorRecoveryMode;
}
public virtual void EndErrorCondition(Parser recognizer)
{
errorRecoveryMode = false;
lastErrorStates = null;
lastErrorIndex = -1;
}
/// <exception cref="Antlr4.Runtime.RecognitionException"></exception>
public virtual void ReportError(Parser recognizer, RecognitionException e)
{
// if we've already reported an error and have not matched a token
// yet successfully, don't report any errors.
if (errorRecoveryMode)
{
// System.err.print("[SPURIOUS] ");
return;
}
// don't count spurious errors
recognizer._syntaxErrors++;
BeginErrorCondition(recognizer);
if (e is NoViableAltException)
{
ReportNoViableAlternative(recognizer, (NoViableAltException)e);
}
else
{
if (e is InputMismatchException)
{
ReportInputMismatch(recognizer, (InputMismatchException)e);
}
else
{
if (e is FailedPredicateException)
{
ReportFailedPredicate(recognizer, (FailedPredicateException)e);
}
else
{
System.Console.Error.WriteLine("unknown recognition error type: " + e.GetType().FullName
);
NotifyErrorListeners(recognizer, e.Message, e);
}
}
}
}
protected internal virtual void NotifyErrorListeners(Parser recognizer, string message
, RecognitionException e)
{
if (recognizer != null)
{
recognizer.NotifyErrorListeners(e.OffendingToken, message, e);
}
}
/// <summary>Recover from NoViableAlt errors.</summary>
/// <remarks>
/// Recover from NoViableAlt errors. Also there could be a mismatched
/// token that the match() routine could not recover from.
/// </remarks>
public virtual void Recover(Parser recognizer, RecognitionException e)
{
// System.out.println("recover in "+recognizer.getRuleInvocationStack()+
// " index="+recognizer.getInputStream().index()+
// ", lastErrorIndex="+
// lastErrorIndex+
// ", states="+lastErrorStates);
if (lastErrorIndex == ((ITokenStream)recognizer.InputStream).Index && lastErrorStates
!= null && lastErrorStates.Contains(recognizer.State))
{
// uh oh, another error at same token index and previously-visited
// state in ATN; must be a case where LT(1) is in the recovery
// token set so nothing got consumed. Consume a single token
// at least to prevent an infinite loop; this is a failsafe.
// System.err.println("seen error condition before index="+
// lastErrorIndex+", states="+lastErrorStates);
// System.err.println("FAILSAFE consumes "+recognizer.getTokenNames()[recognizer.getInputStream().LA(1)]);
recognizer.Consume();
}
lastErrorIndex = ((ITokenStream)recognizer.InputStream).Index;
if (lastErrorStates == null)
{
lastErrorStates = new IntervalSet();
}
lastErrorStates.Add(recognizer.State);
IntervalSet followSet = GetErrorRecoverySet(recognizer);
ConsumeUntil(recognizer, followSet);
}
/// <summary>
/// Make sure that the current lookahead symbol is consistent with
/// what were expecting at this point in the ATN.
/// </summary>
/// <remarks>
/// Make sure that the current lookahead symbol is consistent with
/// what were expecting at this point in the ATN.
/// At the start of a sub rule upon error, sync() performs single
/// token deletion, if possible. If it can't do that, it bails
/// on the current rule and uses the default error recovery,
/// which consumes until the resynchronization set of the current rule.
/// If the sub rule is optional, ()? or ()* or optional alternative,
/// then the expected set includes what follows the subrule.
/// During loop iteration, it consumes until it sees a token that can
/// start a sub rule or what follows loop. Yes, that is pretty aggressive.
/// We opt to stay in the loop as long as possible.
/// </remarks>
public virtual void Sync(Parser recognizer)
{
ATNState s = recognizer.Interpreter.atn.states[recognizer.State];
// System.err.println("sync @ "+s.stateNumber+"="+s.getClass().getSimpleName());
// If already recovering, don't try to sync
if (errorRecoveryMode)
{
return;
}
ITokenStream tokens = ((ITokenStream)recognizer.InputStream);
int la = tokens.La(1);
// try cheaper subset first; might get lucky. seems to shave a wee bit off
if (recognizer.Atn.NextTokens(s).Contains(la) || la == TokenConstants.Eof)
{
return;
}
// Return but don't end recovery. only do that upon valid token match
if (recognizer.IsExpectedToken(la))
{
return;
}
switch (s.StateType)
{
case StateType.BlockStart:
case StateType.StarBlockStart:
case StateType.PlusBlockStart:
case StateType.StarLoopEntry:
{
// report error and recover if possible
if (SingleTokenDeletion(recognizer) != null)
{
return;
}
throw new InputMismatchException(recognizer);
}
case StateType.PlusLoopBack:
case StateType.StarLoopBack:
{
// System.err.println("at loop back: "+s.getClass().getSimpleName());
ReportUnwantedToken(recognizer);
IntervalSet expecting = recognizer.GetExpectedTokens();
IntervalSet whatFollowsLoopIterationOrRule = expecting.Or(GetErrorRecoverySet(recognizer
));
ConsumeUntil(recognizer, whatFollowsLoopIterationOrRule);
break;
}
default:
{
// do nothing if we can't identify the exact kind of ATN state
break;
break;
}
}
}
/// <exception cref="Antlr4.Runtime.RecognitionException"></exception>
public virtual void ReportNoViableAlternative(Parser recognizer, NoViableAltException
e)
{
ITokenStream tokens = ((ITokenStream)recognizer.InputStream);
string input;
if (tokens != null)
{
if (e.GetStartToken().Type == TokenConstants.Eof)
{
input = "<EOF>";
}
else
{
input = tokens.GetText(e.GetStartToken(), e.OffendingToken);
}
}
else
{
input = "<unknown input>";
}
string msg = "no viable alternative at input " + EscapeWSAndQuote(input);
NotifyErrorListeners(recognizer, msg, e);
}
/// <exception cref="Antlr4.Runtime.RecognitionException"></exception>
public virtual void ReportInputMismatch(Parser recognizer, InputMismatchException
e)
{
string msg = "mismatched input " + GetTokenErrorDisplay(e.OffendingToken) + " expecting "
+ e.GetExpectedTokens().ToString(recognizer.TokenNames);
NotifyErrorListeners(recognizer, msg, e);
}
/// <exception cref="Antlr4.Runtime.RecognitionException"></exception>
public virtual void ReportFailedPredicate(Parser recognizer, FailedPredicateException
e)
{
string ruleName = recognizer.RuleNames[recognizer._ctx.GetRuleIndex()];
string msg = "rule " + ruleName + " " + e.Message;
NotifyErrorListeners(recognizer, msg, e);
}
public virtual void ReportUnwantedToken(Parser recognizer)
{
if (errorRecoveryMode)
{
return;
}
recognizer._syntaxErrors++;
BeginErrorCondition(recognizer);
IToken t = recognizer.CurrentToken;
string tokenName = GetTokenErrorDisplay(t);
IntervalSet expecting = GetExpectedTokens(recognizer);
string msg = "extraneous input " + tokenName + " expecting " + expecting.ToString
(recognizer.TokenNames);
recognizer.NotifyErrorListeners(t, msg, null);
}
public virtual void ReportMissingToken(Parser recognizer)
{
if (errorRecoveryMode)
{
return;
}
recognizer._syntaxErrors++;
BeginErrorCondition(recognizer);
IToken t = recognizer.CurrentToken;
IntervalSet expecting = GetExpectedTokens(recognizer);
string msg = "missing " + expecting.ToString(recognizer.TokenNames) + " at " + GetTokenErrorDisplay
(t);
recognizer.NotifyErrorListeners(t, msg, null);
}
/// <summary>Attempt to recover from a single missing or extra token.</summary>
/// <remarks>
/// Attempt to recover from a single missing or extra token.
/// EXTRA TOKEN
/// LA(1) is not what we are looking for. If LA(2) has the right token,
/// however, then assume LA(1) is some extra spurious token. Delete it
/// and LA(2) as if we were doing a normal match(), which advances the
/// input.
/// MISSING TOKEN
/// If current token is consistent with what could come after
/// ttype then it is ok to "insert" the missing token, else throw
/// exception For example, Input "i=(3;" is clearly missing the
/// ')'. When the parser returns from the nested call to expr, it
/// will have call chain:
/// stat -&gt; expr -&gt; atom
/// and it will be trying to match the ')' at this point in the
/// derivation:
/// =&gt; ID '=' '(' INT ')' ('+' atom)* ';'
/// ^
/// match() will see that ';' doesn't match ')' and report a
/// mismatched token error. To recover, it sees that LA(1)==';'
/// is in the set of tokens that can follow the ')' token
/// reference in rule atom. It can assume that you forgot the ')'.
/// </remarks>
/// <exception cref="Antlr4.Runtime.RecognitionException"></exception>
public virtual IToken RecoverInline(Parser recognizer)
{
// SINGLE TOKEN DELETION
IToken matchedSymbol = SingleTokenDeletion(recognizer);
if (matchedSymbol != null)
{
// we have deleted the extra token.
// now, move past ttype token as if all were ok
recognizer.Consume();
return matchedSymbol;
}
// SINGLE TOKEN INSERTION
if (SingleTokenInsertion(recognizer))
{
return GetMissingSymbol(recognizer);
}
// even that didn't work; must throw the exception
throw new InputMismatchException(recognizer);
}
// if next token is what we are looking for then "delete" this token
public virtual bool SingleTokenInsertion(Parser recognizer)
{
int currentSymbolType = ((ITokenStream)recognizer.InputStream).La(1);
// if current token is consistent with what could come after current
// ATN state, then we know we're missing a token; error recovery
// is free to conjure up and insert the missing token
ATNState currentState = recognizer.Interpreter.atn.states[recognizer.State];
ATNState next = currentState.Transition(0).target;
ATN atn = recognizer.Interpreter.atn;
IntervalSet expectingAtLL2 = atn.NextTokens(next, PredictionContext.FromRuleContext
(atn, recognizer._ctx));
// System.out.println("LT(2) set="+expectingAtLL2.toString(recognizer.getTokenNames()));
if (expectingAtLL2.Contains(currentSymbolType))
{
ReportMissingToken(recognizer);
return true;
}
return false;
}
public virtual IToken SingleTokenDeletion(Parser recognizer)
{
int nextTokenType = ((ITokenStream)recognizer.InputStream).La(2);
IntervalSet expecting = GetExpectedTokens(recognizer);
if (expecting.Contains(nextTokenType))
{
ReportUnwantedToken(recognizer);
recognizer.Consume();
// simply delete extra token
// we want to return the token we're actually matching
IToken matchedSymbol = recognizer.CurrentToken;
EndErrorCondition(recognizer);
// we know current token is correct
return matchedSymbol;
}
return null;
}
/// <summary>Conjure up a missing token during error recovery.</summary>
/// <remarks>
/// Conjure up a missing token during error recovery.
/// The recognizer attempts to recover from single missing
/// symbols. But, actions might refer to that missing symbol.
/// For example, x=ID {f($x);}. The action clearly assumes
/// that there has been an identifier matched previously and that
/// $x points at that token. If that token is missing, but
/// the next token in the stream is what we want we assume that
/// this token is missing and we keep going. Because we
/// have to return some token to replace the missing token,
/// we have to conjure one up. This method gives the user control
/// over the tokens returned for missing tokens. Mostly,
/// you will want to create something special for identifier
/// tokens. For literals such as '{' and ',', the default
/// action in the parser or tree parser works. It simply creates
/// a CommonToken of the appropriate type. The text will be the token.
/// If you change what tokens must be created by the lexer,
/// override this method to create the appropriate tokens.
/// </remarks>
protected internal virtual IToken GetMissingSymbol(Parser recognizer)
{
IToken currentSymbol = recognizer.CurrentToken;
IntervalSet expecting = GetExpectedTokens(recognizer);
int expectedTokenType = expecting.GetMinElement();
// get any element
string tokenText;
if (expectedTokenType == TokenConstants.Eof)
{
tokenText = "<missing EOF>";
}
else
{
tokenText = "<missing " + recognizer.TokenNames[expectedTokenType] + ">";
}
IToken current = currentSymbol;
IToken lookback = ((ITokenStream)recognizer.InputStream).Lt(-1);
if (current.Type == TokenConstants.Eof && lookback != null)
{
current = lookback;
}
return ConstructToken(((ITokenStream)recognizer.InputStream).TokenSource, expectedTokenType
, tokenText, current);
}
protected internal virtual IToken ConstructToken(ITokenSource tokenSource, int expectedTokenType
, string tokenText, IToken current)
{
ITokenFactory factory = tokenSource.TokenFactory;
return factory.Create(Tuple.Create(tokenSource, current.TokenSource.InputStream),
expectedTokenType, tokenText, TokenConstants.DefaultChannel, -1, -1, current.
Line, current.Column);
}
public virtual IntervalSet GetExpectedTokens(Parser recognizer)
{
return recognizer.GetExpectedTokens();
}
/// <summary>
/// How should a token be displayed in an error message? The default
/// is to display just the text, but during development you might
/// want to have a lot of information spit out.
/// </summary>
/// <remarks>
/// How should a token be displayed in an error message? The default
/// is to display just the text, but during development you might
/// want to have a lot of information spit out. Override in that case
/// to use t.toString() (which, for CommonToken, dumps everything about
/// the token). This is better than forcing you to override a method in
/// your token objects because you don't have to go modify your lexer
/// so that it creates a new Java type.
/// </remarks>
public virtual string GetTokenErrorDisplay(IToken t)
{
if (t == null)
{
return "<no token>";
}
string s = GetSymbolText(t);
if (s == null)
{
if (GetSymbolType(t) == TokenConstants.Eof)
{
s = "<EOF>";
}
else
{
s = "<" + GetSymbolType(t) + ">";
}
}
return EscapeWSAndQuote(s);
}
protected internal virtual string GetSymbolText(IToken symbol)
{
return symbol.Text;
}
protected internal virtual int GetSymbolType(IToken symbol)
{
return symbol.Type;
}
protected internal virtual string EscapeWSAndQuote(string s)
{
// if ( s==null ) return s;
s = s.Replace("\n", "\\n");
s = s.Replace("\r", "\\r");
s = s.Replace("\t", "\\t");
return "'" + s + "'";
}
protected internal virtual IntervalSet GetErrorRecoverySet(Parser recognizer)
{
ATN atn = recognizer.Interpreter.atn;
RuleContext ctx = recognizer._ctx;
IntervalSet recoverSet = new IntervalSet();
while (ctx != null && ctx.invokingState >= 0)
{
// compute what follows who invoked us
ATNState invokingState = atn.states[ctx.invokingState];
RuleTransition rt = (RuleTransition)invokingState.Transition(0);
IntervalSet follow = atn.NextTokens(rt.followState);
recoverSet.AddAll(follow);
ctx = ctx.parent;
}
recoverSet.Remove(TokenConstants.Epsilon);
// System.out.println("recover set "+recoverSet.toString(recognizer.getTokenNames()));
return recoverSet;
}
/// <summary>Consume tokens until one matches the given token set</summary>
public virtual void ConsumeUntil(Parser recognizer, IntervalSet set)
{
// System.err.println("consumeUntil("+set.toString(recognizer.getTokenNames())+")");
int ttype = ((ITokenStream)recognizer.InputStream).La(1);
while (ttype != TokenConstants.Eof && !set.Contains(ttype))
{
//System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
// recognizer.getInputStream().consume();
recognizer.Consume();
ttype = ((ITokenStream)recognizer.InputStream).La(1);
}
}
}
}