more tests

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9224]
2011-10-29 12:08:34 -08:00 · 2011-10-29 12:08:34 -08:00 · bc5577307c
parent 6e2bbcdb42
commit bc5577307c
8 changed files with 99 additions and 64 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/ANTLRStringStream.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRStringStream.java
@ -203,7 +203,10 @@ public class ANTLRStringStream implements CharStream {
 	}

 	public String substring(int start, int stop) {
-		return new String(data,start,stop-start+1);
+		int last = stop - start + 1;
+		if ( last >= data.length ) last = data.length-1;
+		if ( start >= data.length ) return "";
+		return new String(data, start, last);
 	}

 	public int getLine() {
--- a/runtime/Java/src/org/antlr/v4/runtime/Lexer.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/Lexer.java
@ -120,20 +120,22 @@ public abstract class Lexer extends Recognizer<LexerATNSimulator>
 			text = null;
 			do {
 				type = Token.INVALID_TYPE;
-				if ( input.LA(1)==CharStream.EOF ) {
-					WritableToken eof = new CommonToken(this,Token.EOF,
-												Token.DEFAULT_CHANNEL,
-												input.index(),input.index());
-					eof.setLine(getLine());
-					eof.setCharPositionInLine(getCharPositionInLine());
-					return eof;
-				}
 //				System.out.println("nextToken line "+tokenStartLine+" at "+((char)input.LA(1))+
 //								   " in mode "+mode+
 //								   " at index "+input.index());
 				int ttype = _interp.match(input, mode);
 //				System.out.println("accepted ttype "+ttype);
-				if ( type == Token.INVALID_TYPE) type = ttype;
+
+//				if ( input.LA(1)==CharStream.EOF ) {
+//					WritableToken eof = new CommonToken(this,Token.EOF,
+//												Token.DEFAULT_CHANNEL,
+//												input.index(),input.index());
+//					eof.setLine(getLine());
+//					eof.setCharPositionInLine(getCharPositionInLine());
+//					return eof;
+//				}
+
+				if ( type == Token.INVALID_TYPE ) type = ttype;
 				if ( type==SKIP ) {
 					continue outer;
 				}
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java
@ -35,8 +35,8 @@ import org.antlr.v4.runtime.misc.OrderedHashSet;

 /** "dup" of ParserInterpreter */
 public class LexerATNSimulator extends ATNSimulator {
-	public static boolean debug = true;
-	public static boolean dfa_debug = true;
+	public static boolean debug = false;
+	public static boolean dfa_debug = false;
 	public static final int NUM_EDGES = 255;

 	protected Lexer recog;
@ -103,7 +103,7 @@ public class LexerATNSimulator extends ATNSimulator {
 		DFAState s = s0;
 		int startIndex = input.index();
 		int t = input.LA(1);
-		if ( t==CharStream.EOF ) return -1; // TODO: how to match EOF in lexer rule?
+//		if ( t==CharStream.EOF ) return -1; // TODO: how to match EOF in lexer rule?
 	loop:
 		while ( true ) {
 			if ( dfa_debug ) System.out.println("state "+s.stateNumber+" LA(1)=="+(char)t);
@ -119,14 +119,16 @@ public class LexerATNSimulator extends ATNSimulator {
 				if ( t==CharStream.EOF ) break;
 			}
 			// if no edge, pop over to ATN interpreter, update DFA and return
-			if ( s.edges == null || t >= s.edges.length || s.edges[t] == null ) {
+			if ( s.edges == null || t >= s.edges.length || t <= CharStream.EOF ||
+				 s.edges[t] == null )
+			{
 				if ( dfa_debug ) System.out.println("no edge for "+(char)t);
 				int ttype = -1;
 				try {
 					if ( dfa_debug ) {
-					System.out.println("ATN exec upon "+
-									   input.substring(startIndex,input.index())+
-									   " at DFA state "+s.stateNumber+" = "+s.configs);
+						System.out.println("ATN exec upon "+
+										   input.substring(startIndex,input.index())+
+										   " at DFA state "+s.stateNumber+" = "+s.configs);
 					}
 					ATN_failover++;
 					ttype = exec(input, s.configs);
@ -140,7 +142,7 @@ public class LexerATNSimulator extends ATNSimulator {
 				}

 				if ( ttype==-1 ) {
-					addDFAEdge(s, t, ERROR);
+					if ( t != CharStream.EOF ) addDFAEdge(s, t, ERROR);
 					break loop; // dead end; no where to go, fall back on prev if any
 				}
 				// action already executed
@ -153,8 +155,11 @@ public class LexerATNSimulator extends ATNSimulator {
 			t = input.LA(1);
 		}
 		if ( prevAcceptState==null ) {
+			if ( t==CharStream.EOF ) {
+				return Token.EOF;
+			}
 			if ( debug ) System.out.println("!!! no viable alt in dfa");
-			return -1;
+			throw new LexerNoViableAltException(recog, input, s.configs); // TODO: closure is empty
 		}
 		if ( recog!=null ) {
 			int actionIndex = atn.ruleToActionIndex[prevAcceptState.ruleIndex];
@ -184,7 +189,7 @@ public class LexerATNSimulator extends ATNSimulator {
 		OrderedHashSet<ATNConfig> reach = new OrderedHashSet<ATNConfig>();

 		int t = input.LA(1);
-		if ( t==Token.EOF ) return Token.EOF;
+//		if ( t==Token.EOF ) return Token.EOF;

 		do { // while more work
 			if ( debug ) System.out.println("in reach starting closure: " + closure);
@ -217,7 +222,6 @@ public class LexerATNSimulator extends ATNSimulator {
 			for (int ci=0; ci<reach.size(); ci++) { // TODO: foreach
 				ATNConfig c = reach.get(ci);
 				if ( c.state instanceof RuleStopState ) {
-					System.out.println("found stop in reach: "+c.state);
 					if ( debug ) {
 						System.out.println("in reach we hit accept state "+c+" index "+
 										   input.index()+", reach="+reach+
@ -260,9 +264,8 @@ public class LexerATNSimulator extends ATNSimulator {
 		} while ( true );

 		if ( prevAccept==null ) {
-			if ( t==Token.EOF ) {
-				System.out.println("EOF in token at input index "+input.index());
-				//return Token.EOF;
+			if ( t==CharStream.EOF ) {
+				return Token.EOF;
 			}
 //					System.out.println("no viable token at input "+getTokenName(input.LA(1))+", index "+input.index());
 			throw new LexerNoViableAltException(recog, input, closure); // TODO: closure is empty
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java
@ -188,7 +188,6 @@ public class ParserATNSimulator extends ATNSimulator {
 				if ( dfa_debug ) System.out.println("no edge for "+t);
 				int alt = -1;
 				if ( dfa_debug ) {
-
 					System.out.println("ATN exec upon "+
 									   getInputString(input, startIndex) +
 									   " at DFA state "+s.stateNumber);
--- a/tool/test/org/antlr/v4/test/BaseTest.java
+++ b/tool/test/org/antlr/v4/test/BaseTest.java
@ -33,24 +33,15 @@ import org.antlr.v4.automata.*;
 import org.antlr.v4.codegen.CodeGenerator;
 import org.antlr.v4.misc.Utils;
 import org.antlr.v4.runtime.*;
-import org.antlr.v4.runtime.atn.ATN;
-import org.antlr.v4.runtime.atn.ATNState;
-import org.antlr.v4.runtime.atn.DecisionState;
-import org.antlr.v4.runtime.atn.LexerATNSimulator;
+import org.antlr.v4.runtime.atn.*;
 import org.antlr.v4.runtime.dfa.DFA;
 import org.antlr.v4.semantics.SemanticPipeline;
 import org.antlr.v4.tool.*;
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Before;
-import org.stringtemplate.v4.ST;
-import org.stringtemplate.v4.STGroup;
-import org.stringtemplate.v4.STGroupString;
+import org.antlr.v4.tool.Rule;
+import org.junit.*;
+import org.stringtemplate.v4.*;

-import javax.tools.JavaCompiler;
-import javax.tools.JavaFileObject;
-import javax.tools.StandardJavaFileManager;
-import javax.tools.ToolProvider;
+import javax.tools.*;
 import java.io.*;
 import java.util.*;

@ -169,14 +160,20 @@ public abstract class BaseTest {
 		LexerATNSimulator interp = new LexerATNSimulator(atn);
 		List<String> tokenTypes = new ArrayList<String>();
 		int ttype;
+		int t;
 		do {
+			t = input.LA(1);
 			if ( adaptive ) ttype = interp.match(input, Lexer.DEFAULT_MODE);
 			else ttype = interp.matchATN(input);
-			if ( ttype == Token.EOF ) tokenTypes.add("EOF");
+			if ( ttype == Token.EOF ) {
+				tokenTypes.add("EOF");
+			}
 			else {
 				tokenTypes.add(lg.typeToTokenList.get(ttype));
 			}
-		} while ( ttype!=Token.EOF );
+			// stop upon EOF token or when we see EOF on input since we might
+			// match DONE : EOF ; in lexer and need to know when to stop.
+		} while ( ttype!=Token.EOF && t!=CharStream.EOF );
 		return tokenTypes;
 	}

--- a/tool/test/org/antlr/v4/test/TestATNLexerInterpreter.java
+++ b/tool/test/org/antlr/v4/test/TestATNLexerInterpreter.java
@ -236,7 +236,7 @@ public class TestATNLexerInterpreter extends BaseTest {
 			"lexer grammar L;\n"+
 			"DONE : EOF ;\n"+
 			"A : 'a';\n");
-		String expecting = "A, DONE, EOF";
+		String expecting = "A, DONE";
 		checkLexerMatches(lg, "a", expecting);
 	}

--- a/tool/test/org/antlr/v4/test/TestLexerExec.java
+++ b/tool/test/org/antlr/v4/test/TestLexerExec.java
@ -14,7 +14,7 @@ public class TestLexerExec extends BaseTest {
 			"[@0,0:1='34',<4>,1:0]\n" +
 			"[@1,3:5='-21',<3>,1:3]\n" +
 			"[@2,7:7='3',<4>,1:7]\n" +
-			"[@3,8:8='<EOF>',<-1>,1:8]\n";
+			"[@3,8:7='<EOF>',<-1>,1:8]\n"; // EOF has no length so range is 8:7 not 8:8
 		assertEquals(expecting, found);
 	}

@ -29,7 +29,7 @@ public class TestLexerExec extends BaseTest {
 			"I\n" +
 			"[@0,0:1='34',<3>,1:0]\n" +
 			"[@1,3:4='34',<3>,1:3]\n" +
-			"[@2,5:5='<EOF>',<-1>,1:5]\n";
+			"[@2,5:4='<EOF>',<-1>,1:5]\n";
 		assertEquals(expecting, found);
 	}

@ -45,7 +45,7 @@ public class TestLexerExec extends BaseTest {
 		String expecting =
 			"[@0,0:4='\"abc\"',<5>,1:0]\n" +
 			"[@1,6:9='\"ab\"',<5>,1:6]\n" +
-			"[@2,10:10='<EOF>',<-1>,1:10]\n";
+			"[@2,10:9='<EOF>',<-1>,1:10]\n";
 		assertEquals(expecting, found);
 	}

@ -64,7 +64,7 @@ public class TestLexerExec extends BaseTest {
 			"[@4,9:14='ending',<4>,1:9]\n" +
 			"[@5,15:15=' ',<5>,1:15]\n" +
 			"[@6,16:16='a',<4>,1:16]\n" +
-			"[@7,17:17='<EOF>',<-1>,1:17]\n";
+			"[@7,17:16='<EOF>',<-1>,1:17]\n";
 		assertEquals(expecting, found);
 	}

@ -93,7 +93,31 @@ public class TestLexerExec extends BaseTest {
 			"[@10,10:10='a',<7>,1:10]\n" +
 			"[@11,11:11='.',<6>,1:11]\n" +
 			"[@12,12:12='l',<7>,1:12]\n" +
-			"[@13,13:13='<EOF>',<-1>,1:13]\n";
+			"[@13,13:12='<EOF>',<-1>,1:13]\n";
+		assertEquals(expecting, found);
+	}
+
+	@Test public void testEOFByItself() throws Exception {
+		String grammar =
+			"lexer grammar L;\n" +
+			"DONE : EOF ;\n" +
+			"A : 'a';\n";
+		String found = execLexer("L.g", grammar, "L", "");
+		String expecting =
+			"[@0,0:0='x',<7>,1:0]\n" +
+			"[@1,1:1=' ',<8>,1:1]\n" +
+			"[@2,2:2='0',<4>,1:2]\n" +
+			"[@3,3:3=' ',<8>,1:3]\n" +
+			"[@4,4:4='1',<4>,1:4]\n" +
+			"[@5,5:5=' ',<8>,1:5]\n" +
+			"[@6,6:6='a',<7>,1:6]\n" +
+			"[@7,7:7='.',<6>,1:7]\n" +
+			"[@8,8:8='b',<7>,1:8]\n" +
+			"[@9,9:9=' ',<8>,1:9]\n" +
+			"[@10,10:10='a',<7>,1:10]\n" +
+			"[@11,11:11='.',<6>,1:11]\n" +
+			"[@12,12:12='l',<7>,1:12]\n" +
+			"[@13,13:12='<EOF>',<-1>,1:13]\n";
 		assertEquals(expecting, found);
 	}

--- a/tool/test/org/antlr/v4/test/TestParserExec.java
+++ b/tool/test/org/antlr/v4/test/TestParserExec.java
@ -148,12 +148,12 @@ public class TestParserExec extends BaseTest {
 								  input, false);
 		assertEquals("{}\n", found);
 		input =
-			"{a b { } ;";
+			"{a b { }";
 		found = execParser("T.g", grammar, "TParser", "TLexer", "s",
 								  input, false);
 		assertEquals("{ab{}\n", found);
 		input =
-			"{ } a 2) { } ;"; // FAILS to match since it terminates loop at first { }
+			"{ } a 2) { }"; // FAILS to match since it terminates loop at first { }
 		found = execParser("T.g", grammar, "TParser", "TLexer", "s",
 								  input, false);
 		assertEquals("", found); // should not print output; resync kills rest of input
@ -227,19 +227,23 @@ public class TestParserExec extends BaseTest {
 		input =
 			"if ( 1 ) { x=3; { return 4; } } return 99; abc=def;";
 		found = execParser("T.g", grammar, "TParser", "TLexer", "s",
-								  input, false);
+						   input, false);
 		assertEquals("if(1){x=3;{return4;}}return99;abc=def;\n", found);
 		input =
-			"x=1; a=3;"; // FAILS to match since it can't match last element
-		found = execParser("T.g", grammar, "TParser", "TLexer", "s",
-								  input, false);
-		// can't match EOF to ID '=' '0' ';'
-		assertEquals("no viable token at input EOF, index 8\n", found);
+		"x=1; a=3;"; // FAILS to match since it can't match last element
+		execParser("T.g", grammar, "TParser", "TLexer", "s",
+				   input, false);
+		// can't match EOF to ID '=' '3' ';'
+		assertEquals("line 1:9 no viable alternative at input ''\n",
+					 this.stderrDuringParse);
+
 		input =
-			"x=1; a=b; z=3;"; // FAILS to match since it can't match last element
-		found = execParser("T.g", grammar, "TParser", "TLexer", "s",
-								  input, false);
-		assertEquals("no viable token at input EOF, index 12\n", found); // should not finish to print output
+		"x=1; a=b; z=3;"; // FAILS to match since it can't match last element
+		execParser("T.g", grammar, "TParser", "TLexer", "s",
+				   input, false);
+		assertEquals("line 1:14 no viable alternative at input ''\n",
+					 this.stderrDuringParse);
+		// should not finish to print output
 	}

 	@Test public void testStatLoopNongreedyNecessary() throws Exception {
@ -274,10 +278,11 @@ public class TestParserExec extends BaseTest {
 		assertEquals("if(1){x=3;{return4;}}return99;abc=def;\n", found);
 		input =
 			"x=1; a=3;"; // FAILS to match since it can't match either stat
-		found = execParser("T.g", grammar, "TParser", "TLexer", "s",
+		execParser("T.g", grammar, "TParser", "TLexer", "s",
 								  input, false);
 		// can't match EOF to ID '=' '0' ';'
-		assertEquals("no viable token at input EOF, index 8\n", found);
+		assertEquals("line 1:9 no viable alternative at input ''\n",
+					 this.stderrDuringParse);
 		input =
 			"x=1; a=b; z=3;"; // stops at a=b; ignores z=3;
 		found = execParser("T.g", grammar, "TParser", "TLexer", "s",
@ -359,7 +364,7 @@ public class TestParserExec extends BaseTest {
 								  input, false);
 		assertEquals("if(34)ab\n", found);
 		input =
-		"if ( 34 ))) ) ( a = = b( ;";
+		"if ( 34 ))) ) ( a = = b(";
 		found = execParser("T.g", grammar, "TParser", "TLexer", "s",
 						   input, false);
 		assertEquals("if(34))))(a==b(\n", found);
@ -367,7 +372,7 @@ public class TestParserExec extends BaseTest {

 	/** When .* is on the end of a rule, no tokens predict the exit branch of the loop
 	 *  since it immediately hits the end of the rule.  Non-greedy loops
-	 *  never consume more tokens than exist following the .* end that
+	 *  never consume more tokens than exist following the .* in that
 	 *  same rule. So, in this case, the greedy loop always wins and it will
 	 *  suck tokens until end of file. Unfortunately, the '.' in rule s
 	 *  will not match, leading to a syntax error.
@ -385,6 +390,8 @@ public class TestParserExec extends BaseTest {
 		"if ( 34 ) a b .";
 		String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
 								  input, false);
-		assertEquals("no viable token at input EOF, index 7\nif(34)ab.\n", found);
+		assertEquals("if(34)ab.\n", found);
+		assertEquals("line 1:15 no viable alternative at input ''\n",
+					 this.stderrDuringParse);
 	}
 }