Merge pull request #898 from parrt/check-eof-in-tree

Check eof in tree
2015-06-11 13:13:19 -07:00 · 2015-06-11 13:13:19 -07:00 · a3ddd82950
parent 8cf7082e89 ed41558dc4
commit a3ddd82950
4 changed files with 149 additions and 6 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/Parser.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/Parser.java
@ -177,6 +177,9 @@ public abstract class Parser extends Recognizer<Token, ParserATNSimulator> {
 	 */
 	protected int _syntaxErrors;

+	/** Indicates parser has match()ed EOF token. See {@link #exitRule()}. */
+	protected boolean matchedEOF;
+
 	public Parser(TokenStream input) {
 		setInputStream(input);
 	}
@ -187,6 +190,7 @@ public abstract class Parser extends Recognizer<Token, ParserATNSimulator> {
 		_errHandler.reset(this);
 		_ctx = null;
 		_syntaxErrors = 0;
+		matchedEOF = false;
 		setTrace(false);
 		_precedenceStack.clear();
 		_precedenceStack.push(0);
@ -217,6 +221,9 @@ public abstract class Parser extends Recognizer<Token, ParserATNSimulator> {
 	public Token match(int ttype) throws RecognitionException {
 		Token t = getCurrentToken();
 		if ( t.getType()==ttype ) {
+			if ( ttype==Token.EOF ) {
+				matchedEOF = true;
+			}
 			_errHandler.reportMatch(this);
 			consume();
 		}
@ -630,7 +637,13 @@ public abstract class Parser extends Recognizer<Token, ParserATNSimulator> {
 	}

    public void exitRule() {
-		_ctx.stop = _input.LT(-1);
+		if ( matchedEOF ) {
+			// if we have matched EOF, it cannot consume past EOF so we use LT(1) here
+			_ctx.stop = _input.LT(1); // LT(1) will be end of file
+		}
+		else {
+			_ctx.stop = _input.LT(-1); // stop node is what we just matched
+		}
        // trigger event on _ctx, before it reverts to parent
        if ( _parseListeners != null) triggerExitRuleEvent();
 		setState(_ctx.invokingState);
@ -909,6 +922,10 @@ public abstract class Parser extends Recognizer<Token, ParserATNSimulator> {
        return false;
    }

+	public boolean isMatchedEOF() {
+		return matchedEOF;
+	}
+
 	/**
 	 * Computes the set of input symbols which could follow the current parser
 	 * state and context, as given by {@link #getState} and {@link #getContext},
--- a/runtime/Java/src/org/antlr/v4/runtime/ParserRuleContext.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/ParserRuleContext.java
@ -272,7 +272,12 @@ public class ParserRuleContext extends RuleContext {

 	@Override
 	public Interval getSourceInterval() {
-		if ( start==null || stop==null ) return Interval.INVALID;
+		if ( start == null ) {
+			return Interval.INVALID;
+		}
+		if ( stop==null || stop.getTokenIndex()<start.getTokenIndex() ) {
+			return Interval.of(start.getTokenIndex(), start.getTokenIndex()-1); // empty
+		}
 		return Interval.of(start.getTokenIndex(), stop.getTokenIndex());
 	}

--- a/runtime/Java/src/org/antlr/v4/runtime/tree/SyntaxTree.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/tree/SyntaxTree.java
@ -42,10 +42,17 @@ public interface SyntaxTree extends Tree {
 	 * Return an {@link Interval} indicating the index in the
 	 * {@link TokenStream} of the first and last token associated with this
 	 * subtree. If this node is a leaf, then the interval represents a single
-	 * token.
+	 * token and has interval i..i for token index i.
+	 *
+	 * <p>An interval of i..i-1 indicates an empty interval at position
+	 * i in the input stream, where 0 &lt;= i &lt;= the size of the input
+	 * token stream.  Currently, the code base can only have i=0..n-1 but
+	 * in concept one could have an empty interval after EOF. </p>
 	 *
 	 * <p>If source interval is unknown, this returns {@link Interval#INVALID}.</p>
+	 *
+	 * <p>As a weird special case, the source interval for rules matched after
+	 * EOF is unspecified.</p>
 	 */
-
 	Interval getSourceInterval();
 }
--- a/tool/test/org/antlr/v4/test/tool/TestParserInterpreter.java
+++ b/tool/test/org/antlr/v4/test/tool/TestParserInterpreter.java
@ -65,7 +65,120 @@ public class TestParserInterpreter extends BaseTest {
 			"s : A ;",
 			lg);

-		testInterp(lg, g, "s", "a", "(s a)");
+		ParseTree t = testInterp(lg, g, "s", "a", "(s a)");
+		assertEquals("0..0", t.getSourceInterval().toString());
+	}
+
+	@Test public void testEOF() throws Exception {
+		LexerGrammar lg = new LexerGrammar(
+			"lexer grammar L;\n" +
+			"A : 'a' ;\n");
+		Grammar g = new Grammar(
+			"parser grammar T;\n" +
+			"s : A EOF ;",
+			lg);
+
+		ParseTree t = testInterp(lg, g, "s", "a", "(s a <EOF>)");
+		assertEquals("0..1", t.getSourceInterval().toString());
+	}
+
+	@Test public void testEOFInChild() throws Exception {
+		LexerGrammar lg = new LexerGrammar(
+			"lexer grammar L;\n" +
+			"A : 'a' ;\n");
+		Grammar g = new Grammar(
+			"parser grammar T;\n" +
+			"s : x ;\n" +
+			"x : A EOF ;",
+			lg);
+
+		ParseTree t = testInterp(lg, g, "s", "a", "(s (x a <EOF>))");
+		assertEquals("0..1", t.getSourceInterval().toString());
+		assertEquals("0..1", t.getChild(0).getSourceInterval().toString());
+	}
+
+	@Test public void testEmptyRuleAfterEOFInChild() throws Exception {
+		LexerGrammar lg = new LexerGrammar(
+			"lexer grammar L;\n" +
+			"A : 'a' ;\n");
+		Grammar g = new Grammar(
+			"parser grammar T;\n" +
+			"s : x y;\n" +
+			"x : A EOF ;\n" +
+			"y : ;",
+			lg);
+
+		ParseTree t = testInterp(lg, g, "s", "a", "(s (x a <EOF>) y)");
+		assertEquals("0..1", t.getSourceInterval().toString()); // s
+		assertEquals("0..1", t.getChild(0).getSourceInterval().toString()); // x
+// unspecified		assertEquals("1..0", t.getChild(1).getSourceInterval().toString()); // y
+	}
+
+	@Test public void testEmptyRuleAfterJustEOFInChild() throws Exception {
+		LexerGrammar lg = new LexerGrammar(
+			"lexer grammar L;\n" +
+			"A : 'a' ;\n");
+		Grammar g = new Grammar(
+			"parser grammar T;\n" +
+			"s : x y;\n" +
+			"x : EOF ;\n" +
+			"y : ;",
+			lg);
+
+		ParseTree t = testInterp(lg, g, "s", "", "(s (x <EOF>) y)");
+		assertEquals("0..0", t.getSourceInterval().toString()); // s
+		assertEquals("0..0", t.getChild(0).getSourceInterval().toString()); // x
+		// this next one is a weird special case where somebody tries to match beyond in the file
+// unspecified		assertEquals("0..-1", t.getChild(1).getSourceInterval().toString()); // y
+	}
+
+	@Test public void testEmptyInput() throws Exception {
+		LexerGrammar lg = new LexerGrammar(
+			"lexer grammar L;\n" +
+			"A : 'a' ;\n");
+		Grammar g = new Grammar(
+			"parser grammar T;\n" +
+			"s : x EOF ;\n" +
+			"x : ;\n",
+			lg);
+
+		ParseTree t = testInterp(lg, g, "s", "", "(s x <EOF>)");
+		assertEquals("0..0", t.getSourceInterval().toString()); // s
+		assertEquals("0..-1", t.getChild(0).getSourceInterval().toString()); // x
+	}
+
+	@Test public void testEmptyInputWithCallsAfter() throws Exception {
+		LexerGrammar lg = new LexerGrammar(
+			"lexer grammar L;\n" +
+			"A : 'a' ;\n");
+		Grammar g = new Grammar(
+			"parser grammar T;\n" +
+			"s : x y ;\n" +
+			"x : EOF ;\n" +
+			"y : z ;\n" +
+			"z : ;",
+			lg);
+
+		ParseTree t = testInterp(lg, g, "s", "", "(s (x <EOF>) (y z))");
+		assertEquals("0..0", t.getSourceInterval().toString()); // s
+		assertEquals("0..0", t.getChild(0).getSourceInterval().toString()); // x
+// unspecified		assertEquals("0..-1", t.getChild(1).getSourceInterval().toString()); // x
+	}
+
+	@Test public void testEmptyFirstRule() throws Exception {
+		LexerGrammar lg = new LexerGrammar(
+			"lexer grammar L;\n" +
+			"A : 'a' ;\n");
+		Grammar g = new Grammar(
+			"parser grammar T;\n" +
+			"s : x A ;\n" +
+			"x : ;\n",
+			lg);
+
+		ParseTree t = testInterp(lg, g, "s", "a", "(s x a)");
+		assertEquals("0..0", t.getSourceInterval().toString()); // s
+		// This gets an empty interval because the stop token is null for x
+		assertEquals("0..-1", t.getChild(0).getSourceInterval().toString()); // x
 	}

 	@Test public void testAorB() throws Exception {
@ -234,7 +347,7 @@ public class TestParserInterpreter extends BaseTest {
 		testInterp(lg, g, "e", "a+a*a", "(e (e a) + (e (e a) * (e a)))");
 	}

-	void testInterp(LexerGrammar lg, Grammar g,
+	ParseTree testInterp(LexerGrammar lg, Grammar g,
 					String startRule, String input,
 					String expectedParseTree)
 	{
@ -244,5 +357,6 @@ public class TestParserInterpreter extends BaseTest {
 		ParseTree t = parser.parse(g.rules.get(startRule).index);
 		System.out.println("parse tree: "+t.toStringTree(parser));
 		assertEquals(expectedParseTree, t.toStringTree(parser));
+		return t;
 	}
 }