diff --git a/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java b/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java new file mode 100644 index 000000000..4deba562c --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java @@ -0,0 +1,61 @@ +package org.antlr.v4.runtime; + +/** The interface for defining strategies to deal with syntax errors + * encountered during a parse by ANTLR-generated parsers and tree parsers. + * We distinguish between three different kinds of errors: + * + * o The parser could not figure out which path to take in the ATN + * (none of the available alternatives could possibly match) + * o The current input does not match what we were looking for. + * o A predicate evaluated to false. + * + * The default implementation of this interface reports errors to any + * error listeners of the parser. It also handles single token insertion + * and deletion for mismatched elements. + * + * We pass in the parser to each function so that the same strategy + * can be shared between multiple parsers running at the same time. + * This is just for flexibility, not that we need it for the default system. + * + * TODO: what to do about lexers + */ +public interface ANTLRErrorStrategy { + /** Report any kind of RecognitionException. */ + void reportError(BaseRecognizer recognizer, + RecognitionException e) + throws RecognitionException; + + /** When matching elements within alternative, use this method + * to recover. The default implementation uses single token + * insertion and deletion. If you want to change the way ANTLR + * response to mismatched element errors within an alternative, + * implement this method. + * + * From the recognizer, we can get the input stream to get + * the current input symbol and we can get the current context. + * That context gives us the current state within the ATN. + * From that state, we can look at its transition to figure out + * what was expected. + * + * Because we can recover from a single token deletions by + * "inserting" tokens, we need to specify what that implicitly created + * token is. We use object, because it could be a tree node. + * + * To bail out upon first error, simply rethrow e. + */ + Object recoverInline(BaseRecognizer recognizer) + throws RecognitionException; + + /** Resynchronize the parser by consuming tokens until we find one + * in the resynchronization set--loosely the set of tokens that can follow + * the current rule. + * + * To bail out upon first error, simply rethrow e. + */ + void recover(BaseRecognizer recognizer); + + /** Reset the error handler. The parser invokes this + * from its own reset method. + */ + void reset(); +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/ANTLRParserListener.java b/runtime/Java/src/org/antlr/v4/runtime/ANTLRParserListener.java index 11618fb67..161e1a363 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/ANTLRParserListener.java +++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRParserListener.java @@ -31,5 +31,5 @@ package org.antlr.v4.runtime; /** */ public interface ANTLRParserListener { - public void error(RecognitionException msg); + public void error(int line, int charPositionInLine, String msg); } diff --git a/runtime/Java/src/org/antlr/v4/runtime/BaseRecognizer.java b/runtime/Java/src/org/antlr/v4/runtime/BaseRecognizer.java index 680feacb9..2689452af 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/BaseRecognizer.java +++ b/runtime/Java/src/org/antlr/v4/runtime/BaseRecognizer.java @@ -63,21 +63,8 @@ public abstract class BaseRecognizer extends Recognizer { */ protected boolean errorRecovery = false; - /** The index into the input stream where the last error occurred. - * This is used to prevent infinite loops where an error is found - * but no token is consumed during recovery...another error is found, - * ad naseum. This is a failsafe mechanism to guarantee that at least - * one token/tree node is consumed for two errors. - */ - protected int lastErrorIndex = -1; - - /** In lieu of a return value, this indicates that a rule or token - * has failed to match. Reset to false upon valid token match. - */ -// protected boolean failed = false; - /** Did the recognizer encounter a syntax error? Track how many. */ - public int syntaxErrors = 0; + protected int syntaxErrors = 0; public BaseRecognizer(IntStream input) { setInputStream(input); @@ -88,8 +75,6 @@ public abstract class BaseRecognizer extends Recognizer { if ( getInputStream()!=null ) getInputStream().seek(0); errorRecovery = false; _ctx = null; - lastErrorIndex = -1; -// failed = false; } /** Match current input symbol against ttype. Attempt @@ -102,6 +87,7 @@ public abstract class BaseRecognizer extends Recognizer { * This way any error in a rule will cause an exception and * immediate exit from rule. Rule would recover by resynchronizing * to the set of symbols that can follow rule ref. + * TODO: mv into Parser etc... to get more precise return value/efficiency */ public Object match(int ttype) throws RecognitionException { // System.out.println("match "+((TokenStream)input).LT(1)+" vs expected "+ttype); @@ -109,18 +95,18 @@ public abstract class BaseRecognizer extends Recognizer { if ( getInputStream().LA(1)==ttype ) { getInputStream().consume(); errorRecovery = false; -// failed = false; if ( buildParseTrees ) _ctx.addChild((Token)matchedSymbol); return matchedSymbol; } + return _errHandler.recoverInline(this); // System.out.println("MATCH failure at state "+_ctx.s+ // ", ctx="+_ctx.toString(this)); - IntervalSet expecting = _interp.atn.nextTokens(_ctx); +// IntervalSet expecting = _interp.atn.nextTokens(_ctx); // System.out.println("could match "+expecting); - matchedSymbol = recoverFromMismatchedToken(ttype, expecting); +// matchedSymbol = recoverFromMismatchedToken(ttype, expecting); // System.out.println("rsync'd to "+matchedSymbol); - return matchedSymbol; +// return matchedSymbol; } // like matchSet but w/o consume; error checking routine. @@ -129,7 +115,7 @@ public abstract class BaseRecognizer extends Recognizer { // System.out.println("failed sync to "+expecting); IntervalSet followSet = computeErrorRecoverySet(); followSet.addAll(expecting); - NoViableAltException e = new NoViableAltException(this, _ctx); + NoViableAltException e = new NoViableAltException(this); recoverFromMismatchedSet(e, followSet); } @@ -237,7 +223,7 @@ public abstract class BaseRecognizer extends Recognizer { syntaxErrors++; // don't count spurious errorRecovery = true; - notifyListeners(e); + notifyListeners(e.line, e.charPositionInLine, e.getMessage()); } @@ -277,15 +263,6 @@ public abstract class BaseRecognizer extends Recognizer { */ } - /** A hook to listen in on the token consumption during error recovery. - * The DebugParser subclasses this to fire events to the listenter. - */ - public void beginResync() { - } - - public void endResync() { - } - /* Compute the error recovery set for the current rule. During * rule invocation, the parser pushes the set of tokens that can * follow that rule reference on the stack; this amounts to @@ -498,9 +475,7 @@ public abstract class BaseRecognizer extends Recognizer { ((TokenStream)input).LT(1)+ " since "+((TokenStream)input).LT(2)+" is what we want"); */ - beginResync(); getInputStream().consume(); // simply delete extra token - endResync(); reportError(e); // report after consuming so AW sees the token in the exception // we want to return the token we're actually matching Object matchedSymbol = getCurrentInputSymbol(); @@ -545,6 +520,15 @@ public abstract class BaseRecognizer extends Recognizer { */ protected Object getCurrentInputSymbol() { return null; } + public void notifyListeners(int line, int charPositionInLine, String msg) { + if ( _listeners==null || _listeners.size()==0 ) { + emitErrorMessage("line "+line+":"+charPositionInLine+" "+msg); + return; + } + for (ANTLRParserListener pl : _listeners) { + pl.error(line, charPositionInLine, msg); + } + } public void enterOuterAlt(ParserRuleContext localctx, int altNum) { _ctx = localctx; diff --git a/runtime/Java/src/org/antlr/v4/runtime/DefaultANTLRErrorStrategy.java b/runtime/Java/src/org/antlr/v4/runtime/DefaultANTLRErrorStrategy.java new file mode 100644 index 000000000..1bdce00e5 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/DefaultANTLRErrorStrategy.java @@ -0,0 +1,448 @@ +package org.antlr.v4.runtime; + +import org.antlr.v4.runtime.atn.ATN; +import org.antlr.v4.runtime.atn.ATNState; +import org.antlr.v4.runtime.atn.RuleTransition; +import org.antlr.v4.runtime.misc.IntervalSet; + +/** This is the default error handling mechanism for ANTLR parsers + * and tree parsers. + */ +public class DefaultANTLRErrorStrategy implements ANTLRErrorStrategy { + /** This is true when we see an error and before having successfully + * matched a token. Prevents generation of more than one error message + * per error. + */ + protected boolean errorRecovery = false; + + /** The index into the input stream where the last error occurred. + * This is used to prevent infinite loops where an error is found + * but no token is consumed during recovery...another error is found, + * ad naseum. This is a failsafe mechanism to guarantee that at least + * one token/tree node is consumed for two errors. + */ + protected int lastErrorIndex = -1; + + @Override + public void reset() { + errorRecovery = false; + lastErrorIndex = -1; + } + + @Override + public void reportError(BaseRecognizer recognizer, + RecognitionException e) + throws RecognitionException + { + if ( e instanceof NoViableAltException ) { + reportNoViableAlternative(recognizer, (NoViableAltException)e); + } + else if ( e instanceof InputMismatchException ) { + reportInputMismatch(recognizer, (InputMismatchException)e); + } + else if ( e instanceof FailedPredicateException ) { + reportFailedPredicate(recognizer, (FailedPredicateException)e); + } + else { + System.err.println("unknown recognition error type: "+e.getClass().getName()); + if ( recognizer!=null ) { + recognizer.notifyListeners(e.line, e.charPositionInLine, e.getMessage()); + } + } + } + + /** Recover from NoViableAlt errors. Also there could be a mismatched + * token that the match() routine could not recover from. + */ + @Override + public void recover(BaseRecognizer recognizer) { + if ( lastErrorIndex==recognizer.getInputStream().index() ) { + // uh oh, another error at same token index; must be a case + // where LT(1) is in the recovery token set so nothing is + // consumed; consume a single token at least to prevent + // an infinite loop; this is a failsafe. + recognizer.getInputStream().consume(); + } + lastErrorIndex = recognizer.getInputStream().index(); + IntervalSet followSet = computeErrorRecoverySet(recognizer); + consumeUntil(recognizer, followSet); + } + + public void reportNoViableAlternative(BaseRecognizer recognizer, + NoViableAltException e) + throws RecognitionException + { + if ( recognizer.errorRecovery ) return; + trackError(recognizer); + + String msg = "no viable alternative at input "+getTokenErrorDisplay(e.token); + recognizer.notifyListeners(e.line, e.charPositionInLine, msg); + } + + public void reportInputMismatch(BaseRecognizer recognizer, + InputMismatchException e) + throws RecognitionException + { + if ( recognizer.errorRecovery ) return; + trackError(recognizer); + + String msg = "mismatched input "+getTokenErrorDisplay(e.token)+ + " expecting "+e.expecting.toString(recognizer.getTokenNames()); + recognizer.notifyListeners(e.line, e.charPositionInLine, msg); + } + + public void reportFailedPredicate(BaseRecognizer recognizer, + FailedPredicateException e) + throws RecognitionException + { + if ( recognizer.errorRecovery ) return; + trackError(recognizer); + + String ruleName = recognizer.getRuleNames()[recognizer._ctx.getRuleIndex()]; + String msg = "rule "+ruleName+" failed predicate: {"+ + e.predicateText+"}?"; + recognizer.notifyListeners(e.line, e.charPositionInLine, msg); + } + + public void reportUnwantedToken(BaseRecognizer recognizer) { + if ( recognizer.errorRecovery ) return; + trackError(recognizer); + + Token t = (Token)recognizer.getCurrentInputSymbol(); + + String tokenName = getTokenErrorDisplay(t); + IntervalSet expecting = getExpectedTokens(recognizer); + String msg = "extraneous input "+tokenName+" expecting "+ + expecting.toString(recognizer.getTokenNames()); + recognizer.notifyListeners(t.getLine(), t.getCharPositionInLine(), msg); + } + + public void reportMissingToken(BaseRecognizer recognizer) { + if ( recognizer.errorRecovery ) return; + trackError(recognizer); + + Token t = (Token)recognizer.getCurrentInputSymbol(); + IntervalSet expecting = getExpectedTokens(recognizer); + String msg = "missing "+expecting.toString(recognizer.getTokenNames())+ + " at "+getTokenErrorDisplay(t); + + recognizer.notifyListeners(t.getLine(), t.getCharPositionInLine(), msg); + } + + /** Attempt to recover from a single missing or extra token. + * + * EXTRA TOKEN + * + * LA(1) is not what we are looking for. If LA(2) has the right token, + * however, then assume LA(1) is some extra spurious token. Delete it + * and LA(2) as if we were doing a normal match(), which advances the + * input. + * + * MISSING TOKEN + * + * If current token is consistent with what could come after + * ttype then it is ok to "insert" the missing token, else throw + * exception For example, Input "i=(3;" is clearly missing the + * ')'. When the parser returns from the nested call to expr, it + * will have call chain: + * + * stat -> expr -> atom + * + * and it will be trying to match the ')' at this point in the + * derivation: + * + * => ID '=' '(' INT ')' ('+' atom)* ';' + * ^ + * match() will see that ';' doesn't match ')' and report a + * mismatched token error. To recover, it sees that LA(1)==';' + * is in the set of tokens that can follow the ')' token + * reference in rule atom. It can assume that you forgot the ')'. + */ + @Override + public Object recoverInline(BaseRecognizer recognizer) + throws RecognitionException + { + IntervalSet expecting = getExpectedTokens(recognizer); + IntervalSet follow = null; + + RecognitionException e = null; + // if next token is what we are looking for then "delete" this token + int nextTokenType = recognizer.getInputStream().LA(2); + if ( expecting.member(nextTokenType) ) { + reportUnwantedToken(recognizer); + System.err.println("recoverFromMismatchedToken deleting "+ + ((TokenStream)recognizer.getInputStream()).LT(1)+ + " since "+((TokenStream)recognizer.getInputStream()).LT(2)+ + " is what we want"); + recognizer.getInputStream().consume(); // simply delete extra token + // we want to return the token we're actually matching + Object matchedSymbol = recognizer.getCurrentInputSymbol(); + recognizer.getInputStream().consume(); // move past ttype token as if all were ok + return matchedSymbol; + } + // can't recover with single token deletion, try insertion + if ( mismatchIsMissingToken() ) { + reportMissingToken(recognizer); + return getMissingSymbol(recognizer); + } + // even that didn't work; must throw the exception + throw new InputMismatchException(recognizer); + } + + protected IntervalSet getExpectedTokens(BaseRecognizer recognizer) { + return recognizer._interp.atn.nextTokens(recognizer._ctx); + } + + public boolean mismatchIsMissingToken() { + return false; + /* + if ( follow==null ) { + // we have no information about the follow; we can only consume + // a single token and hope for the best + return false; + } + // compute what can follow this grammar element reference + if ( follow.member(Token.EOR_TOKEN_TYPE) ) { + IntervalSet viableTokensFollowingThisRule = computeNextViableTokenSet(); + follow = follow.or(viableTokensFollowingThisRule); + if ( ctx.sp>=0 ) { // remove EOR if we're not the start symbol + follow.remove(Token.EOR_TOKEN_TYPE); + } + } + // if current token is consistent with what could come after set + // then we know we're missing a token; error recovery is free to + // "insert" the missing token + + //System.out.println("viable tokens="+follow.toString(getTokenNames())); + //System.out.println("LT(1)="+((TokenStream)input).LT(1)); + + // IntervalSet cannot handle negative numbers like -1 (EOF) so I leave EOR + // in follow set to indicate that the fall of the start symbol is + // in the set (EOF can follow). + if ( follow.member(input.LA(1)) || follow.member(Token.EOR_TOKEN_TYPE) ) { + //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting..."); + return true; + } + return false; + */ + } + + /** Conjure up a missing token during error recovery. + * + * The recognizer attempts to recover from single missing + * symbols. But, actions might refer to that missing symbol. + * For example, x=ID {f($x);}. The action clearly assumes + * that there has been an identifier matched previously and that + * $x points at that token. If that token is missing, but + * the next token in the stream is what we want we assume that + * this token is missing and we keep going. Because we + * have to return some token to replace the missing token, + * we have to conjure one up. This method gives the user control + * over the tokens returned for missing tokens. Mostly, + * you will want to create something special for identifier + * tokens. For literals such as '{' and ',', the default + * action in the parser or tree parser works. It simply creates + * a CommonToken of the appropriate type. The text will be the token. + * If you change what tokens must be created by the lexer, + * override this method to create the appropriate tokens. + */ + protected Object getMissingSymbol(BaseRecognizer recognizer) { + IntervalSet expecting = getExpectedTokens(recognizer); + int expectedTokenType = expecting.getMinElement(); // get any element + String tokenText = null; + if ( expectedTokenType== Token.EOF ) tokenText = ""; + else tokenText = ""; + CommonToken t = new CommonToken(expectedTokenType, tokenText); + Token current = (Token)recognizer.getCurrentInputSymbol(); + if ( current.getType() == Token.EOF ) { + current = ((TokenStream)recognizer.getInputStream()).LT(-1); + } + t.line = current.getLine(); + t.charPositionInLine = current.getCharPositionInLine(); + t.channel = Token.DEFAULT_CHANNEL; + t.source = current.getTokenSource(); + return t; + } + + /** How should a token be displayed in an error message? The default + * is to display just the text, but during development you might + * want to have a lot of information spit out. Override in that case + * to use t.toString() (which, for CommonToken, dumps everything about + * the token). This is better than forcing you to override a method in + * your token objects because you don't have to go modify your lexer + * so that it creates a new Java type. + */ + public String getTokenErrorDisplay(Token t) { + if ( t==null ) return ""; + String s = t.getText(); + if ( s==null ) { + if ( t.getType()==Token.EOF ) { + s = ""; + } + else { + s = "<"+t.getType()+">"; + } + } + s = s.replaceAll("\n","\\\\n"); + s = s.replaceAll("\r","\\\\r"); + s = s.replaceAll("\t","\\\\t"); + return "'"+s+"'"; + } + + /** Report a recognition problem. + * + * This method sets errorRecovery to indicate the parser is recovering + * not parsing. Once in recovery mode, no errors are generated. + * To get out of recovery mode, the parser must successfully match + * a token (after a resync). So it will go: + * + * 1. error occurs + * 2. enter recovery mode, report error + * 3. consume until token found in resynch set + * 4. try to resume parsing + * 5. next match() will reset errorRecovery mode + */ +// public void _reportError(BaseRecognizer recognizer, +// RecognitionException e) { +// // if we've already reported an error and have not matched a token +// // yet successfully, don't report any errors. +// if ( recognizer.errorRecovery ) return; +// trackError(recognizer); +// +// recognizer.notifyListeners(e.line, e.charPositionInLine, "dsfdkjasdf"); +// } + + /* Compute the error recovery set for the current rule. During + * rule invocation, the parser pushes the set of tokens that can + * follow that rule reference on the stack; this amounts to + * computing FIRST of what follows the rule reference in the + * enclosing rule. See LinearApproximator.FIRST(). + * This local follow set only includes tokens + * from within the rule; i.e., the FIRST computation done by + * ANTLR stops at the end of a rule. + * + * EXAMPLE + * + * When you find a "no viable alt exception", the input is not + * consistent with any of the alternatives for rule r. The best + * thing to do is to consume tokens until you see something that + * can legally follow a call to r *or* any rule that called r. + * You don't want the exact set of viable next tokens because the + * input might just be missing a token--you might consume the + * rest of the input looking for one of the missing tokens. + * + * Consider grammar: + * + * a : '[' b ']' + * | '(' b ')' + * ; + * b : c '^' INT ; + * c : ID + * | INT + * ; + * + * At each rule invocation, the set of tokens that could follow + * that rule is pushed on a stack. Here are the various + * context-sensitive follow sets: + * + * FOLLOW(b1_in_a) = FIRST(']') = ']' + * FOLLOW(b2_in_a) = FIRST(')') = ')' + * FOLLOW(c_in_b) = FIRST('^') = '^' + * + * Upon erroneous input "[]", the call chain is + * + * a -> b -> c + * + * and, hence, the follow context stack is: + * + * depth follow set start of rule execution + * 0 a (from main()) + * 1 ']' b + * 2 '^' c + * + * Notice that ')' is not included, because b would have to have + * been called from a different context in rule a for ')' to be + * included. + * + * For error recovery, we cannot consider FOLLOW(c) + * (context-sensitive or otherwise). We need the combined set of + * all context-sensitive FOLLOW sets--the set of all tokens that + * could follow any reference in the call chain. We need to + * resync to one of those tokens. Note that FOLLOW(c)='^' and if + * we resync'd to that token, we'd consume until EOF. We need to + * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. + * In this case, for input "[]", LA(1) is ']' and in the set, so we would + * not consume anything. After printing an error, rule c would + * return normally. Rule b would not find the required '^' though. + * At this point, it gets a mismatched token error and throws an + * exception (since LA(1) is not in the viable following token + * set). The rule exception handler tries to recover, but finds + * the same recovery set and doesn't consume anything. Rule b + * exits normally returning to rule a. Now it finds the ']' (and + * with the successful match exits errorRecovery mode). + * + * So, you can see that the parser walks up the call chain looking + * for the token that was a member of the recovery set. + * + * Errors are not generated in errorRecovery mode. + * + * ANTLR's error recovery mechanism is based upon original ideas: + * + * "Algorithms + Data Structures = Programs" by Niklaus Wirth + * + * and + * + * "A note on error recovery in recursive descent parsers": + * http://portal.acm.org/citation.cfm?id=947902.947905 + * + * Later, Josef Grosch had some good ideas: + * + * "Efficient and Comfortable Error Recovery in Recursive Descent + * Parsers": + * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip + * + * Like Grosch I implement context-sensitive FOLLOW sets that are combined + * at run-time upon error to avoid overhead during parsing. + */ + protected IntervalSet computeErrorRecoverySet(BaseRecognizer recognizer) { + ATN atn = recognizer._interp.atn; + RuleContext ctx = recognizer._ctx; + IntervalSet recoverSet = new IntervalSet(); + while ( ctx!=null && ctx.invokingState>=0 ) { + // compute what follows who invoked us + ATNState invokingState = atn.states.get(ctx.invokingState); + RuleTransition rt = (RuleTransition)invokingState.transition(0); + IntervalSet follow = atn.nextTokens(rt.followState, null); + recoverSet.addAll(follow); + ctx = ctx.parent; + } + System.out.println("recover set "+recoverSet.toString(recognizer.getTokenNames())); + return recoverSet; + } + +// public void consumeUntil(BaseRecognizer recognizer, int tokenType) { +// //System.out.println("consumeUntil "+tokenType); +// int ttype = recognizer.getInputStream().LA(1); +// while (ttype != Token.EOF && ttype != tokenType) { +// recognizer.getInputStream().consume(); +// ttype = recognizer.getInputStream().LA(1); +// } +// } + + /** Consume tokens until one matches the given token set */ + public void consumeUntil(BaseRecognizer recognizer, IntervalSet set) { + //System.out.println("consumeUntil("+set.toString(getTokenNames())+")"); + int ttype = recognizer.getInputStream().LA(1); + while (ttype != Token.EOF && !set.member(ttype) ) { + //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]); + recognizer.getInputStream().consume(); + ttype = recognizer.getInputStream().LA(1); + } + } + + protected void trackError(BaseRecognizer recognizer) { + recognizer.syntaxErrors++; + recognizer.errorRecovery = true; + } + +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/FailedPredicateException.java b/runtime/Java/src/org/antlr/v4/runtime/FailedPredicateException.java index f9b6eeaa8..5401b586a 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/FailedPredicateException.java +++ b/runtime/Java/src/org/antlr/v4/runtime/FailedPredicateException.java @@ -40,6 +40,11 @@ public class FailedPredicateException extends RecognitionException { /** Used for remote debugger deserialization */ public FailedPredicateException() {;} + public FailedPredicateException(BaseRecognizer recognizer, String predText) { + super(recognizer); + this.predicateText = predText; + } + public FailedPredicateException(BaseRecognizer recognizer, IntStream input, String ruleName, diff --git a/runtime/Java/src/org/antlr/v4/runtime/InputMismatchException.java b/runtime/Java/src/org/antlr/v4/runtime/InputMismatchException.java new file mode 100644 index 000000000..ce3f605b3 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/InputMismatchException.java @@ -0,0 +1,10 @@ +package org.antlr.v4.runtime; + +/** This signifies any kind of mismatched input exceptions such as + * when the current input does not match the expected token or tree node. + */ +public class InputMismatchException extends RecognitionException { + public InputMismatchException(BaseRecognizer recognizer) { + super(recognizer, recognizer.getInputStream(), recognizer._ctx); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/NoViableAltException.java b/runtime/Java/src/org/antlr/v4/runtime/NoViableAltException.java index 6921c0233..c400a8c8d 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/NoViableAltException.java +++ b/runtime/Java/src/org/antlr/v4/runtime/NoViableAltException.java @@ -41,8 +41,8 @@ public class NoViableAltException extends RecognitionException { /** Used for remote debugger deserialization */ public NoViableAltException() {;} - public NoViableAltException(BaseRecognizer recognizer, RuleContext ctx) { // LL(1) error - super(recognizer, recognizer.getInputStream(), ctx); + public NoViableAltException(BaseRecognizer recognizer) { // LL(1) error + super(recognizer, recognizer.getInputStream(), recognizer._ctx); } public NoViableAltException(BaseRecognizer recognizer, IntStream input, diff --git a/runtime/Java/src/org/antlr/v4/runtime/RecognitionException.java b/runtime/Java/src/org/antlr/v4/runtime/RecognitionException.java index fc64b3afd..c785f7e2b 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/RecognitionException.java +++ b/runtime/Java/src/org/antlr/v4/runtime/RecognitionException.java @@ -28,6 +28,7 @@ */ package org.antlr.v4.runtime; +import com.sun.org.apache.regexp.internal.RE; import org.antlr.v4.runtime.misc.IntervalSet; import org.antlr.v4.runtime.tree.*; @@ -108,6 +109,10 @@ public class RecognitionException extends RuntimeException { public RecognitionException() { } + public RecognitionException(BaseRecognizer recognizer) { + this(recognizer, recognizer.getInputStream(), recognizer._ctx); + } + public RecognitionException(BaseRecognizer recognizer, IntStream input, RuleContext ctx) { @@ -126,7 +131,7 @@ public class RecognitionException extends RuntimeException { this.line = token.getLine(); this.charPositionInLine = token.getCharPositionInLine(); } - if ( input instanceof ASTNodeStream) { + else if ( input instanceof ASTNodeStream) { //extractInformationFromTreeNodeStream(input); } else { diff --git a/runtime/Java/src/org/antlr/v4/runtime/Recognizer.java b/runtime/Java/src/org/antlr/v4/runtime/Recognizer.java index bda1dfd0b..ed8d3ead4 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/Recognizer.java +++ b/runtime/Java/src/org/antlr/v4/runtime/Recognizer.java @@ -36,7 +36,8 @@ import java.util.*; public class Recognizer { public static final int EOF=-1; - protected List listeners; + protected ANTLRErrorStrategy _errHandler = new DefaultANTLRErrorStrategy(); + protected List _listeners; protected ATNInterpreter _interp; @@ -195,27 +196,22 @@ public class Recognizer { } public void addListener(ANTLRParserListener pl) { - if ( listeners==null ) { - listeners = + if ( _listeners ==null ) { + _listeners = Collections.synchronizedList(new ArrayList(2)); } - if ( pl!=null ) listeners.add(pl); + if ( pl!=null ) _listeners.add(pl); } - public void removeListener(ANTLRParserListener pl) { listeners.remove(pl); } + public void removeListener(ANTLRParserListener pl) { _listeners.remove(pl); } - public void removeListeners() { listeners.clear(); } + public void removeListeners() { _listeners.clear(); } - public List getListeners() { return listeners; } + public List getListeners() { return _listeners; } - public void notifyListeners(RecognitionException re) { - if ( listeners==null || listeners.size()==0 ) { - // call legacy v3 func; this calls emitErrorMessage(String msg) - displayRecognitionError(re); - return; - } - for (ANTLRParserListener pl : listeners) pl.error(re); - } + public ANTLRErrorStrategy getErrHandler() { return _errHandler; } + + public void setErrHandler(ANTLRErrorStrategy h) { this._errHandler = h; } // subclass needs to override these if there are sempreds or actions // that the ATN interp needs to execute diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ATN.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ATN.java index f09b1c5fd..7aee37632 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATN.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATN.java @@ -72,13 +72,21 @@ public class ATN { /** Used for runtime deserialization of ATNs from strings */ public ATN() { } + /** Compute the set of valid tokens reachable from the current + * position in the parse. ctx must not be null. + */ public IntervalSet nextTokens(RuleContext ctx) { - return nextTokens(ctx.s, ctx); + ATNState s = states.get(ctx.s); + if ( s == null ) return null; + return nextTokens(s, ctx); } - public IntervalSet nextTokens(int stateNumber, RuleContext ctx) { - ATNState s = states.get(stateNumber); - if ( s == null ) return null; + /** Compute the set of valid tokens that can occur starting in s. + * If ctx is null, the set of tokens will not include what can follow + * the rule surrounding s. In other words, the set will be + * restricted to tokens reachable staying within s's rule. + */ + public IntervalSet nextTokens(ATNState s, RuleContext ctx) { LL1Analyzer anal = new LL1Analyzer(this); IntervalSet next = anal.LOOK(s, ctx); return next; diff --git a/runtime/Java/src/org/antlr/v4/runtime/misc/IntervalSet.java b/runtime/Java/src/org/antlr/v4/runtime/misc/IntervalSet.java index 2e64fe853..7c7d93b9d 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/misc/IntervalSet.java +++ b/runtime/Java/src/org/antlr/v4/runtime/misc/IntervalSet.java @@ -466,7 +466,10 @@ public class IntervalSet implements IntSet { else buf.append(tokenNames[a]); } else { - buf.append(tokenNames[a]+".."+tokenNames[b]); + for (int i=a; i<=b; i++) { + if ( i>a ) buf.append(", "); + buf.append(tokenNames[i]); + } } if ( iter.hasNext() ) { buf.append(", "); @@ -478,55 +481,6 @@ public class IntervalSet implements IntSet { return buf.toString(); } - /* - public String toString(Grammar g) { - StringBuffer buf = new StringBuffer(); - if ( this.intervals==null || this.intervals.size()==0 ) { - return "{}"; - } - if ( this.size()>1 ) { - buf.append("{"); - } - Iterator iter = this.intervals.iterator(); - while (iter.hasNext()) { - Interval I = (Interval) iter.next(); - int a = I.a; - int b = I.b; - if ( a==b ) { - if ( g!=null ) { - buf.append(g.getTokenDisplayName(a)); - } - else { - buf.append(a); - } - } - else { - if ( g!=null ) { - if ( !g.isLexer() ) { - for (int i=a; i<=b; i++) { - if ( i>a ) buf.append(", "); - buf.append(g.getTokenDisplayName(i)); - } - } - else { - buf.append(g.getTokenDisplayName(a)+".."+g.getTokenDisplayName(b)); - } - } - else { - buf.append(a+".."+b); - } - } - if ( iter.hasNext() ) { - buf.append(", "); - } - } - if ( this.size()>1 ) { - buf.append("}"); - } - return buf.toString(); - } - */ - public int size() { int n = 0; int numIntervals = intervals.size(); diff --git a/tool/playground/TestW.java b/tool/playground/TestW.java new file mode 100644 index 000000000..1c93835f1 --- /dev/null +++ b/tool/playground/TestW.java @@ -0,0 +1,16 @@ +import org.antlr.v4.runtime.*; +import org.antlr.v4.runtime.tree.ParseTreeWalker; + +public class TestW { + public static void main(String[] args) throws Exception { + WLexer t = new WLexer(new ANTLRFileStream(args[0])); + CommonTokenStream tokens = new CommonTokenStream(t); +// tokens.fill(); +// for (Object tok : tokens.getTokens()) { +// System.out.println(tok); +// } + WParser p = new WParser(tokens); + p.setBuildParseTrees(true); + p.s(); + } +} diff --git a/tool/playground/W.g b/tool/playground/W.g new file mode 100644 index 000000000..48c17fd02 --- /dev/null +++ b/tool/playground/W.g @@ -0,0 +1,18 @@ +grammar W; + +s : a ';' {System.out.println("done");} ; + +a : '[' b ']' + | '(' b ')' + ; + +b : c '^' INT ; + +c : ID + | INT + ; + +EQ : '=' ; +INT : '0'..'9'+ ; +ID : 'a'..'z'+ ; +WS : (' '|'\n')+ {skip();} ; diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg index 82672eafa..95abb80bf 100644 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg @@ -197,8 +197,8 @@ RuleFunction(currentRule,code,locals,ruleCtx,altLabelCtxs,namedActions,finallyAc } catch (RecognitionException re) { - reportError(re); - recover(); + _errHandler.reportError(this, re); + _errHandler.recover(this); } finally { @@ -313,8 +313,6 @@ switch ( _interp.adaptivePredict(_input,,_ctx) ) { case : break;}; separator="\n"> - default : - } >> @@ -359,7 +357,7 @@ case : Sync(s) ::= "sync();" -ThrowNoViableAlt(t) ::= "throw new NoViableAltException(this,_ctx);" +ThrowNoViableAlt(t) ::= "throw new NoViableAltException(this);" TestSetInline(s) ::= << ==}; separator=" || "> @@ -411,7 +409,7 @@ CommonSetStuff(m, expr, capture, invert) ::= << setState(); = }>_input.LT(1); -if ( !() ) throw new MismatchedSetException(this, _input); +if ( !() ) throw new InputMismatchedException(this); _input.consume(); >> @@ -429,7 +427,7 @@ ForcedAction(a, chunks) ::= "" ArgAction(a, chunks) ::= "" SemPred(p, chunks) ::= << -if (!()) throw new FailedPredicateException(this, _input, "", """!>); +if (!()) throw new FailedPredicateException(this, ""); >> ActionText(t) ::= "" diff --git a/tool/src/org/antlr/v4/codegen/model/AltBlock.java b/tool/src/org/antlr/v4/codegen/model/AltBlock.java index 26cb72ebe..6cfeadd64 100644 --- a/tool/src/org/antlr/v4/codegen/model/AltBlock.java +++ b/tool/src/org/antlr/v4/codegen/model/AltBlock.java @@ -36,7 +36,7 @@ import org.antlr.v4.tool.GrammarAST; import java.util.List; public class AltBlock extends Choice { - @ModelElement public ThrowNoViableAlt error; +// @ModelElement public ThrowNoViableAlt error; public AltBlock(OutputModelFactory factory, GrammarAST blkOrEbnfRootAST, @@ -44,6 +44,7 @@ public class AltBlock extends Choice { { super(factory, blkOrEbnfRootAST, alts); decision = ((BlockStartState)blkOrEbnfRootAST.atnState).decision; - this.error = new ThrowNoViableAlt(factory, blkOrEbnfRootAST, null); + // interp.predict() throws exception +// this.error = new ThrowNoViableAlt(factory, blkOrEbnfRootAST, null); } } diff --git a/tool/src/org/antlr/v4/tool/ErrorManager.java b/tool/src/org/antlr/v4/tool/ErrorManager.java index 9d36a06cf..cd1fe06c0 100644 --- a/tool/src/org/antlr/v4/tool/ErrorManager.java +++ b/tool/src/org/antlr/v4/tool/ErrorManager.java @@ -312,6 +312,11 @@ public class ErrorManager { e.printStackTrace(System.err); } + public static void panic(String msg) { + rawError(msg); + panic(); + } + public static void panic() { // can't call tool.panic since there may be multiple tools; just // one error manager