more cleanup

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9093]
2011-10-02 13:17:06 -08:00 · 2011-10-02 13:17:06 -08:00 · 05360be562
parent 02d424a92b
commit 05360be562
4 changed files with 7 additions and 410 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/BaseRecognizer.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/BaseRecognizer.java
@ -98,71 +98,15 @@ public abstract class BaseRecognizer extends Recognizer<ParserATNSimulator> {
 			return matchedSymbol;
 		}
 		return _errHandler.recoverInline(this);
-//		System.out.println("MATCH failure at state "+_ctx.s+
-//			", ctx="+_ctx.toString(this));
-//		IntervalSet expecting = _interp.atn.nextTokens(_ctx);
-//		System.out.println("could match "+expecting);
-
-//		matchedSymbol = recoverFromMismatchedToken(ttype, expecting);
-//		System.out.println("rsync'd to "+matchedSymbol);
-//		return matchedSymbol;
 	}

-	// like matchSet but w/o consume; error checking routine.
-//	public void sync(IntervalSet expecting) {
-//		if ( expecting.contains(getInputStream().LA(1)) ) return;
-////		System.out.println("failed sync to "+expecting);
-//		IntervalSet followSet = computeErrorRecoverySet();
-//		followSet.addAll(expecting);
-//		NoViableAltException e = new NoViableAltException(this);
-//		recoverFromMismatchedSet(e, followSet);
-//	}
-
 	/** Match the wildcard: in a symbol */
 	public void matchAny() {
 		errorRecovery = false;
-//		failed = false;
+		_errHandler.reset();
 		getInputStream().consume();
 	}

-	public boolean mismatchIsUnwantedToken(int ttype) {
-		return getInputStream().LA(2)==ttype;
-	}
-
-	public boolean mismatchIsMissingToken(IntervalSet follow) {
-		return false;
-		/*
-		if ( follow==null ) {
-			// we have no information about the follow; we can only consume
-			// a single token and hope for the best
-			return false;
-		}
-		// compute what can follow this grammar element reference
-		if ( follow.member(Token.EOR_TOKEN_TYPE) ) {
-			IntervalSet viableTokensFollowingThisRule = computeNextViableTokenSet();
-			follow = follow.or(viableTokensFollowingThisRule);
-            if ( ctx.sp>=0 ) { // remove EOR if we're not the start symbol
-                follow.remove(Token.EOR_TOKEN_TYPE);
-            }
-		}
-		// if current token is consistent with what could come after set
-		// then we know we're missing a token; error recovery is free to
-		// "insert" the missing token
-
-		//System.out.println("viable tokens="+follow.toString(getTokenNames()));
-		//System.out.println("LT(1)="+((TokenStream)input).LT(1));
-
-		// IntervalSet cannot handle negative numbers like -1 (EOF) so I leave EOR
-		// in follow set to indicate that the fall of the start symbol is
-		// in the set (EOF can follow).
-		if ( follow.member(input.LA(1)) || follow.member(Token.EOR_TOKEN_TYPE) ) {
-			//System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
-			return true;
-		}
-		return false;
-		*/
-	}
-
 	/** Track the RuleContext objects during the parse and hook them up
 	 *  using the children list so that it forms a parse tree.
 	 *  The RuleContext returned from the start rule represents the root
@ -225,7 +169,6 @@ public abstract class BaseRecognizer extends Recognizer<ParserATNSimulator> {
 		notifyListeners(e.offendingToken, e.getMessage(), e);
 	}

-
 	/** Get number of recognition errors (lexer, parser, tree parser).  Each
 	 *  recognizer tracks its own number.  So parser and lexer each have
 	 *  separate count.  Does not count the spurious errors found between
@ -237,275 +180,6 @@ public abstract class BaseRecognizer extends Recognizer<ParserATNSimulator> {
 		return syntaxErrors;
 	}

-
-	/** Recover from an error found on the input stream.  This is
-	 *  for NoViableAlt and mismatched symbol exceptions.  If you enable
-	 *  single token insertion and deletion, this will usually not
-	 *  handle mismatched symbol exceptions but there could be a mismatched
-	 *  token that the match() routine could not recover from.
-	 */
-	public void recover() {
-		getInputStream().consume();
-		/*
-		if ( lastErrorIndex==input.index() ) {
-			// uh oh, another error at same token index; must be a case
-			// where LT(1) is in the recovery token set so nothing is
-			// consumed; consume a single token so at least to prevent
-			// an infinite loop; this is a failsafe.
-			input.consume();
-		}
-		lastErrorIndex = input.index();
-		IntervalSet followSet = computeErrorRecoverySet();
-		beginResync();
-		consumeUntil(followSet);
-		endResync();
-		*/
-	}
-
-	/*  Compute the error recovery set for the current rule.  During
-	 *  rule invocation, the parser pushes the set of tokens that can
-	 *  follow that rule reference on the stack; this amounts to
-	 *  computing FIRST of what follows the rule reference in the
-	 *  enclosing rule. See LinearApproximator.FIRST().
-	 *  This local follow set only includes tokens
-	 *  from within the rule; i.e., the FIRST computation done by
-	 *  ANTLR stops at the end of a rule.
-	 *
-	 *  EXAMPLE
-	 *
-	 *  When you find a "no viable alt exception", the input is not
-	 *  consistent with any of the alternatives for rule r.  The best
-	 *  thing to do is to consume tokens until you see something that
-	 *  can legally follow a call to r *or* any rule that called r.
-	 *  You don't want the exact set of viable next tokens because the
-	 *  input might just be missing a token--you might consume the
-	 *  rest of the input looking for one of the missing tokens.
-	 *
-	 *  Consider grammar:
-	 *
-	 *  a : '[' b ']'
-	 *    | '(' b ')'
-	 *    ;
-	 *  b : c '^' INT ;
-	 *  c : ID
-	 *    | INT
-	 *    ;
-	 *
-	 *  At each rule invocation, the set of tokens that could follow
-	 *  that rule is pushed on a stack.  Here are the various
-	 *  context-sensitive follow sets:
-	 *
-	 *  FOLLOW(b1_in_a) = FIRST(']') = ']'
-	 *  FOLLOW(b2_in_a) = FIRST(')') = ')'
-	 *  FOLLOW(c_in_b) = FIRST('^') = '^'
-	 *
-	 *  Upon erroneous input "[]", the call chain is
-	 *
-	 *  a -> b -> c
-	 *
-	 *  and, hence, the follow context stack is:
-	 *
-	 *  depth     follow set       start of rule execution
-	 *    0         <EOF>                    a (from main())
-	 *    1          ']'                     b
-	 *    2          '^'                     c
-	 *
-	 *  Notice that ')' is not included, because b would have to have
-	 *  been called from a different context in rule a for ')' to be
-	 *  included.
-	 *
-	 *  For error recovery, we cannot consider FOLLOW(c)
-	 *  (context-sensitive or otherwise).  We need the combined set of
-	 *  all context-sensitive FOLLOW sets--the set of all tokens that
-	 *  could follow any reference in the call chain.  We need to
-	 *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
-	 *  we resync'd to that token, we'd consume until EOF.  We need to
-	 *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
-	 *  In this case, for input "[]", LA(1) is ']' and in the set, so we would
-	 *  not consume anything. After printing an error, rule c would
-	 *  return normally.  Rule b would not find the required '^' though.
-	 *  At this point, it gets a mismatched token error and throws an
-	 *  exception (since LA(1) is not in the viable following token
-	 *  set).  The rule exception handler tries to recover, but finds
-	 *  the same recovery set and doesn't consume anything.  Rule b
-	 *  exits normally returning to rule a.  Now it finds the ']' (and
-	 *  with the successful match exits errorRecovery mode).
-	 *
-	 *  So, you can see that the parser walks up the call chain looking
-	 *  for the token that was a member of the recovery set.
-	 *
-	 *  Errors are not generated in errorRecovery mode.
-	 *
-	 *  ANTLR's error recovery mechanism is based upon original ideas:
-	 *
-	 *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
-	 *
-	 *  and
-	 *
-	 *  "A note on error recovery in recursive descent parsers":
-	 *  http://portal.acm.org/citation.cfm?id=947902.947905
-	 *
-	 *  Later, Josef Grosch had some good ideas:
-	 *
-	 *  "Efficient and Comfortable Error Recovery in Recursive Descent
-	 *  Parsers":
-	 *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
-	 *
-	 *  Like Grosch I implement context-sensitive FOLLOW sets that are combined
-	 *  at run-time upon error to avoid overhead during parsing.
-	 */
-	protected IntervalSet computeErrorRecoverySet() {
-		return null;
-//		int top = ctx.sp;
-//		IntervalSet followSet = new IntervalSet();
-//		for (int i=top; i>=0; i--) { // i==0 is EOF context for start rule invocation
-//			IntervalSet f = (IntervalSet)ctx.get(i).follow;
-//			followSet.orInPlace(f);
-//		}
-//		return followSet;
-	}
-
-	/** Compute the context-sensitive FOLLOW set for current rule.
-	 *  This is set of token types that can follow a specific rule
-	 *  reference given a specific call chain.  You get the set of
-	 *  viable tokens that can possibly come next (lookahead depth 1)
-	 *  given the current call chain.  Contrast this with the
-	 *  definition of plain FOLLOW for rule r:
-	 *
-	 *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
-	 *
-	 *  where x in T* and alpha, beta in V*; T is set of terminals and
-	 *  V is the set of terminals and nonterminals.  In other words,
-	 *  FOLLOW(r) is the set of all tokens that can possibly follow
-	 *  references to r in *any* sentential form (context).  At
-	 *  runtime, however, we know precisely which context applies as
-	 *  we have the call chain.  We may compute the exact (rather
-	 *  than covering superset) set of following tokens.
-	 *
-	 *  For example, consider grammar:
-	 *
-	 *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
-	 *       | "return" expr '.'
-	 *       ;
-	 *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
-	 *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
-	 *       | '(' expr ')'
-	 *       ;
-	 *
-	 *  The FOLLOW sets are all inclusive whereas context-sensitive
-	 *  FOLLOW sets are precisely what could follow a rule reference.
-	 *  For input input "i=(3);", here is the derivation:
-	 *
-	 *  stat => ID '=' expr ';'
-	 *       => ID '=' atom ('+' atom)* ';'
-	 *       => ID '=' '(' expr ')' ('+' atom)* ';'
-	 *       => ID '=' '(' atom ')' ('+' atom)* ';'
-	 *       => ID '=' '(' INT ')' ('+' atom)* ';'
-	 *       => ID '=' '(' INT ')' ';'
-	 *
-	 *  At the "3" token, you'd have a call chain of
-	 *
-	 *    stat -> expr -> atom -> expr -> atom
-	 *
-	 *  What can follow that specific nested ref to atom?  Exactly ')'
-	 *  as you can see by looking at the derivation of this specific
-	 *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
-	 *
-	 *  You want the exact viable token set when recovering from a
-	 *  token mismatch.  Upon token mismatch, if LA(1) is member of
-	 *  the viable next token set, then you know there is most likely
-	 *  a missing token in the input stream.  "Insert" one by just not
-	 *  throwing an exception.
-	 */
-	public IntervalSet computeNextViableTokenSet() {
-		return null;
-//		int top = ctx.sp;
-//		IntervalSet followSet = new IntervalSet();
-//		for (int i=top; i>=0; i--) { // i==0 is EOF context for start rule invocation
-//			IntervalSet f = (IntervalSet)ctx.get(i).follow;
-//			followSet.orInPlace(f);
-//			// can we see end of rule? if not, don't include follow of this rule
-//			if ( !f.member(Token.EOR_TOKEN_TYPE) ) break;
-//			// else combine with tokens that can follow this rule (rm EOR also)
-//			// EOR indicates we have to include follow(start rule); i.e., EOF
-//			followSet.remove(Token.EOR_TOKEN_TYPE);
-//		}
-//		return followSet;
-	}
-
-	/** Attempt to recover from a single missing or extra token.
-	 *
-	 *  EXTRA TOKEN
-	 *
-	 *  LA(1) is not what we are looking for.  If LA(2) has the right token,
-	 *  however, then assume LA(1) is some extra spurious token.  Delete it
-	 *  and LA(2) as if we were doing a normal match(), which advances the
-	 *  input.
-	 *
-	 *  MISSING TOKEN
-	 *
-	 *  If current token is consistent with what could come after
-	 *  ttype then it is ok to "insert" the missing token, else throw
-	 *  exception For example, Input "i=(3;" is clearly missing the
-	 *  ')'.  When the parser returns from the nested call to expr, it
-	 *  will have call chain:
-	 *
-	 *    stat -> expr -> atom
-	 *
-	 *  and it will be trying to match the ')' at this point in the
-	 *  derivation:
-	 *
-	 *       => ID '=' '(' INT ')' ('+' atom)* ';'
-	 *                          ^
-	 *  match() will see that ';' doesn't match ')' and report a
-	 *  mismatched token error.  To recover, it sees that LA(1)==';'
-	 *  is in the set of tokens that can follow the ')' token
-	 *  reference in rule atom.  It can assume that you forgot the ')'.
-	protected Object recoverFromMismatchedToken(int ttype, IntervalSet follow)
-		throws RecognitionException
-	{
-		RecognitionException e = null;
-		// if next token is what we are looking for then "delete" this token
-		if ( mismatchIsUnwantedToken(ttype) ) {
-			e = new UnwantedTokenException(this, getInputStream(), ttype);
-			System.err.println("recoverFromMismatchedToken deleting "+
-							   ((TokenStream)input).LT(1)+
-							   " since "+((TokenStream)input).LT(2)+" is what we want");
-			getInputStream().consume(); // simply delete extra token
-			reportError(e);  // report after consuming so AW sees the token in the exception
-			// we want to return the token we're actually matching
-			Object matchedSymbol = getCurrentInputSymbol();
-			getInputStream().consume(); // move past ttype token as if all were ok
-			return matchedSymbol;
-		}
-		// can't recover with single token deletion, try insertion
-		if ( mismatchIsMissingToken(follow) ) {
-			Object inserted = getMissingSymbol(e, ttype, follow);
-			e = new MissingTokenException(this, getInputStream(), ttype, inserted);
-			reportError(e);  // report after inserting so AW sees the token in the exception
-			return inserted;
-		}
-		// even that didn't work; must throw the exception
-		e = new MismatchedTokenException(this, getInputStream(), ttype);
-		throw e;
-	}
-	*/
-
-/*
-	public Object recoverFromMismatchedSet(RecognitionException e,
-										   IntervalSet follow)
-		throws RecognitionException
-	{
-		if ( mismatchIsMissingToken(follow) ) {
-			// System.out.println("missing token");
-			reportError(e);
-			// we don't know how to conjure up a token for sets yet
-			return getMissingSymbol(e, Token.INVALID_TYPE, follow);
-		}
-		// TODO do single token deletion like above for Token mismatch
-		throw e;
-	}
-*/
 	public abstract IntStream getInputStream();
 	public abstract void setInputStream(IntStream input);

@ -549,51 +223,8 @@ public abstract class BaseRecognizer extends Recognizer<ParserATNSimulator> {
 		_ctx = (ParserRuleContext)_ctx.parent;
 	}

-
-	/** Conjure up a missing token during error recovery.
-	 *
-	 *  The recognizer attempts to recover from single missing
-	 *  symbols. But, actions might refer to that missing symbol.
-	 *  For example, x=ID {f($x);}. The action clearly assumes
-	 *  that there has been an identifier matched previously and that
-	 *  $x points at that token. If that token is missing, but
-	 *  the next token in the stream is what we want we assume that
-	 *  this token is missing and we keep going. Because we
-	 *  have to return some token to replace the missing token,
-	 *  we have to conjure one up. This method gives the user control
-	 *  over the tokens returned for missing tokens. Mostly,
-	 *  you will want to create something special for identifier
-	 *  tokens. For literals such as '{' and ',', the default
-	 *  action in the parser or tree parser works. It simply creates
-	 *  a CommonToken of the appropriate type. The text will be the token.
-	 *  If you change what tokens must be created by the lexer,
-	 *  override this method to create the appropriate tokens.
-	 */
-	protected Object getMissingSymbol(RecognitionException e,
-									  int expectedTokenType,
-									  IntervalSet follow)
-	{
-		return null;
-	}
-
-	public void consumeUntil(int tokenType) {
-		//System.out.println("consumeUntil "+tokenType);
-		int ttype = getInputStream().LA(1);
-		while (ttype != Token.EOF && ttype != tokenType) {
-			getInputStream().consume();
-			ttype = getInputStream().LA(1);
-		}
-	}
-
-	/** Consume tokens until one matches the given token set */
-	public void consumeUntil(IntervalSet set) {
-		//System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
-		int ttype = getInputStream().LA(1);
-		while (ttype != Token.EOF && !set.contains(ttype) ) {
-			//System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
-			getInputStream().consume();
-			ttype = getInputStream().LA(1);
-		}
+	public IntervalSet getExpectedTokens() {
+		return _interp.atn.nextTokens(_ctx);
 	}

 	public ParserRuleContext getInvokingContext(int ruleIndex) {
@ -654,9 +285,6 @@ public abstract class BaseRecognizer extends Recognizer<ParserATNSimulator> {
 		return rules;
 	}

-    /** Return whether or not a backtracking attempt failed. */
-//    public boolean failed() { return failed; }
-
 	/** For debugging and other purposes, might want the grammar name.
 	 *  Have ANTLR generate an implementation for this method.
 	 */
@ -678,19 +306,6 @@ public abstract class BaseRecognizer extends Recognizer<ParserATNSimulator> {
 		return strings;
 	}

-//	public void traceIn(String ruleName, int ruleIndex, Object inputSymbol)  {
-//		System.out.print("enter "+ruleName+" "+inputSymbol);
-//		System.out.println();
-//	}
-//
-//	public void traceOut(String ruleName,
-//						 int ruleIndex,
-//						 Object inputSymbol)
-//	{
-//		System.out.print("exit "+ruleName+" "+inputSymbol);
-//		System.out.println();
-//	}
-
 	/** Indicate that the recognizer has changed internal state that is
 	 *  consistent with the ATN state passed in.  This way we always know
 	 *  where we are in the ATN as the parser goes along. The rule
--- a/runtime/Java/src/org/antlr/v4/runtime/DefaultANTLRErrorStrategy.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/DefaultANTLRErrorStrategy.java
@ -198,8 +198,8 @@ public class DefaultANTLRErrorStrategy implements ANTLRErrorStrategy {
 		throw new InputMismatchException(recognizer);
 	}

-	protected IntervalSet getExpectedTokens(BaseRecognizer recognizer) {
-		return recognizer._interp.atn.nextTokens(recognizer._ctx);
+	public IntervalSet getExpectedTokens(BaseRecognizer recognizer) {
+		return recognizer.getExpectedTokens();
 	}

 	public boolean mismatchIsMissingToken() {
--- a/runtime/Java/src/org/antlr/v4/runtime/Parser.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/Parser.java
@ -78,24 +78,6 @@ public class Parser extends BaseRecognizer {
 	@Override
 	public void setInputStream(IntStream input) { _input = (TokenStream)input; }

-	protected Object getMissingSymbol(RecognitionException e,
-									  int expectedTokenType)
-	{
-		String tokenText = null;
-		if ( expectedTokenType== Token.EOF ) tokenText = "<missing EOF>";
-		else tokenText = "<missing "+getTokenNames()[expectedTokenType]+">";
-		CommonToken t = new CommonToken(expectedTokenType, tokenText);
-		Token current = ((TokenStream)_input).LT(1);
-		if ( current.getType() == Token.EOF ) {
-			current = ((TokenStream)_input).LT(-1);
-		}
-		t.line = current.getLine();
-		t.charPositionInLine = current.getCharPositionInLine();
-		t.channel = Token.DEFAULT_CHANNEL;
-		t.source = current.getTokenSource();
-		return t;
-	}
-
 	/** Set the token stream and reset the parser */
 	public void setTokenStream(TokenStream input) {
 		this._input = null;
--- a/tool/test/org/antlr/v4/test/TestParseErrors.java
+++ b/tool/test/org/antlr/v4/test/TestParseErrors.java
@ -87,10 +87,10 @@ public class TestParseErrors extends BaseTest {
 			"WS : ' ' {skip();} ;" +
 			"acClass\n" +
 			"@init\n" +
-			"{ System.out.println(computeContextSensitiveRuleFOLLOW().toString(tokenNames)); }\n" +
+			"{ System.out.println(getExpectedTokens().toString(tokenNames)); }\n" +
 			"  : ;\n";
 		String result = execParser("T.g", grammar, "TParser", "TLexer", "start", "dog and software", false);
-		String expecting = "{HARDWARE,SOFTWARE}\n";
+		String expecting = "{'hardware', 'software'}\n";
 		assertEquals(expecting, result);
 	}
 }