Merge pull request #898 from parrt/check-eof-in-tree

Check eof in tree
This commit is contained in:
Terence Parr 2015-06-11 13:13:19 -07:00
commit a3ddd82950
4 changed files with 149 additions and 6 deletions

View File

@ -177,6 +177,9 @@ public abstract class Parser extends Recognizer<Token, ParserATNSimulator> {
*/
protected int _syntaxErrors;
/** Indicates parser has match()ed EOF token. See {@link #exitRule()}. */
protected boolean matchedEOF;
public Parser(TokenStream input) {
setInputStream(input);
}
@ -187,6 +190,7 @@ public abstract class Parser extends Recognizer<Token, ParserATNSimulator> {
_errHandler.reset(this);
_ctx = null;
_syntaxErrors = 0;
matchedEOF = false;
setTrace(false);
_precedenceStack.clear();
_precedenceStack.push(0);
@ -217,6 +221,9 @@ public abstract class Parser extends Recognizer<Token, ParserATNSimulator> {
public Token match(int ttype) throws RecognitionException {
Token t = getCurrentToken();
if ( t.getType()==ttype ) {
if ( ttype==Token.EOF ) {
matchedEOF = true;
}
_errHandler.reportMatch(this);
consume();
}
@ -630,7 +637,13 @@ public abstract class Parser extends Recognizer<Token, ParserATNSimulator> {
}
public void exitRule() {
_ctx.stop = _input.LT(-1);
if ( matchedEOF ) {
// if we have matched EOF, it cannot consume past EOF so we use LT(1) here
_ctx.stop = _input.LT(1); // LT(1) will be end of file
}
else {
_ctx.stop = _input.LT(-1); // stop node is what we just matched
}
// trigger event on _ctx, before it reverts to parent
if ( _parseListeners != null) triggerExitRuleEvent();
setState(_ctx.invokingState);
@ -909,6 +922,10 @@ public abstract class Parser extends Recognizer<Token, ParserATNSimulator> {
return false;
}
public boolean isMatchedEOF() {
return matchedEOF;
}
/**
* Computes the set of input symbols which could follow the current parser
* state and context, as given by {@link #getState} and {@link #getContext},

View File

@ -272,7 +272,12 @@ public class ParserRuleContext extends RuleContext {
@Override
public Interval getSourceInterval() {
if ( start==null || stop==null ) return Interval.INVALID;
if ( start == null ) {
return Interval.INVALID;
}
if ( stop==null || stop.getTokenIndex()<start.getTokenIndex() ) {
return Interval.of(start.getTokenIndex(), start.getTokenIndex()-1); // empty
}
return Interval.of(start.getTokenIndex(), stop.getTokenIndex());
}

View File

@ -42,10 +42,17 @@ public interface SyntaxTree extends Tree {
* Return an {@link Interval} indicating the index in the
* {@link TokenStream} of the first and last token associated with this
* subtree. If this node is a leaf, then the interval represents a single
* token.
* token and has interval i..i for token index i.
*
* <p>An interval of i..i-1 indicates an empty interval at position
* i in the input stream, where 0 &lt;= i &lt;= the size of the input
* token stream. Currently, the code base can only have i=0..n-1 but
* in concept one could have an empty interval after EOF. </p>
*
* <p>If source interval is unknown, this returns {@link Interval#INVALID}.</p>
*
* <p>As a weird special case, the source interval for rules matched after
* EOF is unspecified.</p>
*/
Interval getSourceInterval();
}

View File

@ -65,7 +65,120 @@ public class TestParserInterpreter extends BaseTest {
"s : A ;",
lg);
testInterp(lg, g, "s", "a", "(s a)");
ParseTree t = testInterp(lg, g, "s", "a", "(s a)");
assertEquals("0..0", t.getSourceInterval().toString());
}
@Test public void testEOF() throws Exception {
LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n" +
"A : 'a' ;\n");
Grammar g = new Grammar(
"parser grammar T;\n" +
"s : A EOF ;",
lg);
ParseTree t = testInterp(lg, g, "s", "a", "(s a <EOF>)");
assertEquals("0..1", t.getSourceInterval().toString());
}
@Test public void testEOFInChild() throws Exception {
LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n" +
"A : 'a' ;\n");
Grammar g = new Grammar(
"parser grammar T;\n" +
"s : x ;\n" +
"x : A EOF ;",
lg);
ParseTree t = testInterp(lg, g, "s", "a", "(s (x a <EOF>))");
assertEquals("0..1", t.getSourceInterval().toString());
assertEquals("0..1", t.getChild(0).getSourceInterval().toString());
}
@Test public void testEmptyRuleAfterEOFInChild() throws Exception {
LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n" +
"A : 'a' ;\n");
Grammar g = new Grammar(
"parser grammar T;\n" +
"s : x y;\n" +
"x : A EOF ;\n" +
"y : ;",
lg);
ParseTree t = testInterp(lg, g, "s", "a", "(s (x a <EOF>) y)");
assertEquals("0..1", t.getSourceInterval().toString()); // s
assertEquals("0..1", t.getChild(0).getSourceInterval().toString()); // x
// unspecified assertEquals("1..0", t.getChild(1).getSourceInterval().toString()); // y
}
@Test public void testEmptyRuleAfterJustEOFInChild() throws Exception {
LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n" +
"A : 'a' ;\n");
Grammar g = new Grammar(
"parser grammar T;\n" +
"s : x y;\n" +
"x : EOF ;\n" +
"y : ;",
lg);
ParseTree t = testInterp(lg, g, "s", "", "(s (x <EOF>) y)");
assertEquals("0..0", t.getSourceInterval().toString()); // s
assertEquals("0..0", t.getChild(0).getSourceInterval().toString()); // x
// this next one is a weird special case where somebody tries to match beyond in the file
// unspecified assertEquals("0..-1", t.getChild(1).getSourceInterval().toString()); // y
}
@Test public void testEmptyInput() throws Exception {
LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n" +
"A : 'a' ;\n");
Grammar g = new Grammar(
"parser grammar T;\n" +
"s : x EOF ;\n" +
"x : ;\n",
lg);
ParseTree t = testInterp(lg, g, "s", "", "(s x <EOF>)");
assertEquals("0..0", t.getSourceInterval().toString()); // s
assertEquals("0..-1", t.getChild(0).getSourceInterval().toString()); // x
}
@Test public void testEmptyInputWithCallsAfter() throws Exception {
LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n" +
"A : 'a' ;\n");
Grammar g = new Grammar(
"parser grammar T;\n" +
"s : x y ;\n" +
"x : EOF ;\n" +
"y : z ;\n" +
"z : ;",
lg);
ParseTree t = testInterp(lg, g, "s", "", "(s (x <EOF>) (y z))");
assertEquals("0..0", t.getSourceInterval().toString()); // s
assertEquals("0..0", t.getChild(0).getSourceInterval().toString()); // x
// unspecified assertEquals("0..-1", t.getChild(1).getSourceInterval().toString()); // x
}
@Test public void testEmptyFirstRule() throws Exception {
LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n" +
"A : 'a' ;\n");
Grammar g = new Grammar(
"parser grammar T;\n" +
"s : x A ;\n" +
"x : ;\n",
lg);
ParseTree t = testInterp(lg, g, "s", "a", "(s x a)");
assertEquals("0..0", t.getSourceInterval().toString()); // s
// This gets an empty interval because the stop token is null for x
assertEquals("0..-1", t.getChild(0).getSourceInterval().toString()); // x
}
@Test public void testAorB() throws Exception {
@ -234,7 +347,7 @@ public class TestParserInterpreter extends BaseTest {
testInterp(lg, g, "e", "a+a*a", "(e (e a) + (e (e a) * (e a)))");
}
void testInterp(LexerGrammar lg, Grammar g,
ParseTree testInterp(LexerGrammar lg, Grammar g,
String startRule, String input,
String expectedParseTree)
{
@ -244,5 +357,6 @@ public class TestParserInterpreter extends BaseTest {
ParseTree t = parser.parse(g.rules.get(startRule).index);
System.out.println("parse tree: "+t.toStringTree(parser));
assertEquals(expectedParseTree, t.toStringTree(parser));
return t;
}
}