more tests

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9224]
This commit is contained in:
parrt 2011-10-29 12:08:34 -08:00
parent 6e2bbcdb42
commit bc5577307c
8 changed files with 99 additions and 64 deletions

View File

@ -203,7 +203,10 @@ public class ANTLRStringStream implements CharStream {
}
public String substring(int start, int stop) {
return new String(data,start,stop-start+1);
int last = stop - start + 1;
if ( last >= data.length ) last = data.length-1;
if ( start >= data.length ) return "";
return new String(data, start, last);
}
public int getLine() {

View File

@ -120,20 +120,22 @@ public abstract class Lexer extends Recognizer<LexerATNSimulator>
text = null;
do {
type = Token.INVALID_TYPE;
if ( input.LA(1)==CharStream.EOF ) {
WritableToken eof = new CommonToken(this,Token.EOF,
Token.DEFAULT_CHANNEL,
input.index(),input.index());
eof.setLine(getLine());
eof.setCharPositionInLine(getCharPositionInLine());
return eof;
}
// System.out.println("nextToken line "+tokenStartLine+" at "+((char)input.LA(1))+
// " in mode "+mode+
// " at index "+input.index());
int ttype = _interp.match(input, mode);
// System.out.println("accepted ttype "+ttype);
if ( type == Token.INVALID_TYPE) type = ttype;
// if ( input.LA(1)==CharStream.EOF ) {
// WritableToken eof = new CommonToken(this,Token.EOF,
// Token.DEFAULT_CHANNEL,
// input.index(),input.index());
// eof.setLine(getLine());
// eof.setCharPositionInLine(getCharPositionInLine());
// return eof;
// }
if ( type == Token.INVALID_TYPE ) type = ttype;
if ( type==SKIP ) {
continue outer;
}

View File

@ -35,8 +35,8 @@ import org.antlr.v4.runtime.misc.OrderedHashSet;
/** "dup" of ParserInterpreter */
public class LexerATNSimulator extends ATNSimulator {
public static boolean debug = true;
public static boolean dfa_debug = true;
public static boolean debug = false;
public static boolean dfa_debug = false;
public static final int NUM_EDGES = 255;
protected Lexer recog;
@ -103,7 +103,7 @@ public class LexerATNSimulator extends ATNSimulator {
DFAState s = s0;
int startIndex = input.index();
int t = input.LA(1);
if ( t==CharStream.EOF ) return -1; // TODO: how to match EOF in lexer rule?
// if ( t==CharStream.EOF ) return -1; // TODO: how to match EOF in lexer rule?
loop:
while ( true ) {
if ( dfa_debug ) System.out.println("state "+s.stateNumber+" LA(1)=="+(char)t);
@ -119,14 +119,16 @@ public class LexerATNSimulator extends ATNSimulator {
if ( t==CharStream.EOF ) break;
}
// if no edge, pop over to ATN interpreter, update DFA and return
if ( s.edges == null || t >= s.edges.length || s.edges[t] == null ) {
if ( s.edges == null || t >= s.edges.length || t <= CharStream.EOF ||
s.edges[t] == null )
{
if ( dfa_debug ) System.out.println("no edge for "+(char)t);
int ttype = -1;
try {
if ( dfa_debug ) {
System.out.println("ATN exec upon "+
input.substring(startIndex,input.index())+
" at DFA state "+s.stateNumber+" = "+s.configs);
System.out.println("ATN exec upon "+
input.substring(startIndex,input.index())+
" at DFA state "+s.stateNumber+" = "+s.configs);
}
ATN_failover++;
ttype = exec(input, s.configs);
@ -140,7 +142,7 @@ public class LexerATNSimulator extends ATNSimulator {
}
if ( ttype==-1 ) {
addDFAEdge(s, t, ERROR);
if ( t != CharStream.EOF ) addDFAEdge(s, t, ERROR);
break loop; // dead end; no where to go, fall back on prev if any
}
// action already executed
@ -153,8 +155,11 @@ public class LexerATNSimulator extends ATNSimulator {
t = input.LA(1);
}
if ( prevAcceptState==null ) {
if ( t==CharStream.EOF ) {
return Token.EOF;
}
if ( debug ) System.out.println("!!! no viable alt in dfa");
return -1;
throw new LexerNoViableAltException(recog, input, s.configs); // TODO: closure is empty
}
if ( recog!=null ) {
int actionIndex = atn.ruleToActionIndex[prevAcceptState.ruleIndex];
@ -184,7 +189,7 @@ public class LexerATNSimulator extends ATNSimulator {
OrderedHashSet<ATNConfig> reach = new OrderedHashSet<ATNConfig>();
int t = input.LA(1);
if ( t==Token.EOF ) return Token.EOF;
// if ( t==Token.EOF ) return Token.EOF;
do { // while more work
if ( debug ) System.out.println("in reach starting closure: " + closure);
@ -217,7 +222,6 @@ public class LexerATNSimulator extends ATNSimulator {
for (int ci=0; ci<reach.size(); ci++) { // TODO: foreach
ATNConfig c = reach.get(ci);
if ( c.state instanceof RuleStopState ) {
System.out.println("found stop in reach: "+c.state);
if ( debug ) {
System.out.println("in reach we hit accept state "+c+" index "+
input.index()+", reach="+reach+
@ -260,9 +264,8 @@ public class LexerATNSimulator extends ATNSimulator {
} while ( true );
if ( prevAccept==null ) {
if ( t==Token.EOF ) {
System.out.println("EOF in token at input index "+input.index());
//return Token.EOF;
if ( t==CharStream.EOF ) {
return Token.EOF;
}
// System.out.println("no viable token at input "+getTokenName(input.LA(1))+", index "+input.index());
throw new LexerNoViableAltException(recog, input, closure); // TODO: closure is empty

View File

@ -188,7 +188,6 @@ public class ParserATNSimulator extends ATNSimulator {
if ( dfa_debug ) System.out.println("no edge for "+t);
int alt = -1;
if ( dfa_debug ) {
System.out.println("ATN exec upon "+
getInputString(input, startIndex) +
" at DFA state "+s.stateNumber);

View File

@ -33,24 +33,15 @@ import org.antlr.v4.automata.*;
import org.antlr.v4.codegen.CodeGenerator;
import org.antlr.v4.misc.Utils;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.atn.DecisionState;
import org.antlr.v4.runtime.atn.LexerATNSimulator;
import org.antlr.v4.runtime.atn.*;
import org.antlr.v4.runtime.dfa.DFA;
import org.antlr.v4.semantics.SemanticPipeline;
import org.antlr.v4.tool.*;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.stringtemplate.v4.ST;
import org.stringtemplate.v4.STGroup;
import org.stringtemplate.v4.STGroupString;
import org.antlr.v4.tool.Rule;
import org.junit.*;
import org.stringtemplate.v4.*;
import javax.tools.JavaCompiler;
import javax.tools.JavaFileObject;
import javax.tools.StandardJavaFileManager;
import javax.tools.ToolProvider;
import javax.tools.*;
import java.io.*;
import java.util.*;
@ -169,14 +160,20 @@ public abstract class BaseTest {
LexerATNSimulator interp = new LexerATNSimulator(atn);
List<String> tokenTypes = new ArrayList<String>();
int ttype;
int t;
do {
t = input.LA(1);
if ( adaptive ) ttype = interp.match(input, Lexer.DEFAULT_MODE);
else ttype = interp.matchATN(input);
if ( ttype == Token.EOF ) tokenTypes.add("EOF");
if ( ttype == Token.EOF ) {
tokenTypes.add("EOF");
}
else {
tokenTypes.add(lg.typeToTokenList.get(ttype));
}
} while ( ttype!=Token.EOF );
// stop upon EOF token or when we see EOF on input since we might
// match DONE : EOF ; in lexer and need to know when to stop.
} while ( ttype!=Token.EOF && t!=CharStream.EOF );
return tokenTypes;
}

View File

@ -236,7 +236,7 @@ public class TestATNLexerInterpreter extends BaseTest {
"lexer grammar L;\n"+
"DONE : EOF ;\n"+
"A : 'a';\n");
String expecting = "A, DONE, EOF";
String expecting = "A, DONE";
checkLexerMatches(lg, "a", expecting);
}

View File

@ -14,7 +14,7 @@ public class TestLexerExec extends BaseTest {
"[@0,0:1='34',<4>,1:0]\n" +
"[@1,3:5='-21',<3>,1:3]\n" +
"[@2,7:7='3',<4>,1:7]\n" +
"[@3,8:8='<EOF>',<-1>,1:8]\n";
"[@3,8:7='<EOF>',<-1>,1:8]\n"; // EOF has no length so range is 8:7 not 8:8
assertEquals(expecting, found);
}
@ -29,7 +29,7 @@ public class TestLexerExec extends BaseTest {
"I\n" +
"[@0,0:1='34',<3>,1:0]\n" +
"[@1,3:4='34',<3>,1:3]\n" +
"[@2,5:5='<EOF>',<-1>,1:5]\n";
"[@2,5:4='<EOF>',<-1>,1:5]\n";
assertEquals(expecting, found);
}
@ -45,7 +45,7 @@ public class TestLexerExec extends BaseTest {
String expecting =
"[@0,0:4='\"abc\"',<5>,1:0]\n" +
"[@1,6:9='\"ab\"',<5>,1:6]\n" +
"[@2,10:10='<EOF>',<-1>,1:10]\n";
"[@2,10:9='<EOF>',<-1>,1:10]\n";
assertEquals(expecting, found);
}
@ -64,7 +64,7 @@ public class TestLexerExec extends BaseTest {
"[@4,9:14='ending',<4>,1:9]\n" +
"[@5,15:15=' ',<5>,1:15]\n" +
"[@6,16:16='a',<4>,1:16]\n" +
"[@7,17:17='<EOF>',<-1>,1:17]\n";
"[@7,17:16='<EOF>',<-1>,1:17]\n";
assertEquals(expecting, found);
}
@ -93,7 +93,31 @@ public class TestLexerExec extends BaseTest {
"[@10,10:10='a',<7>,1:10]\n" +
"[@11,11:11='.',<6>,1:11]\n" +
"[@12,12:12='l',<7>,1:12]\n" +
"[@13,13:13='<EOF>',<-1>,1:13]\n";
"[@13,13:12='<EOF>',<-1>,1:13]\n";
assertEquals(expecting, found);
}
@Test public void testEOFByItself() throws Exception {
String grammar =
"lexer grammar L;\n" +
"DONE : EOF ;\n" +
"A : 'a';\n";
String found = execLexer("L.g", grammar, "L", "");
String expecting =
"[@0,0:0='x',<7>,1:0]\n" +
"[@1,1:1=' ',<8>,1:1]\n" +
"[@2,2:2='0',<4>,1:2]\n" +
"[@3,3:3=' ',<8>,1:3]\n" +
"[@4,4:4='1',<4>,1:4]\n" +
"[@5,5:5=' ',<8>,1:5]\n" +
"[@6,6:6='a',<7>,1:6]\n" +
"[@7,7:7='.',<6>,1:7]\n" +
"[@8,8:8='b',<7>,1:8]\n" +
"[@9,9:9=' ',<8>,1:9]\n" +
"[@10,10:10='a',<7>,1:10]\n" +
"[@11,11:11='.',<6>,1:11]\n" +
"[@12,12:12='l',<7>,1:12]\n" +
"[@13,13:12='<EOF>',<-1>,1:13]\n";
assertEquals(expecting, found);
}

View File

@ -148,12 +148,12 @@ public class TestParserExec extends BaseTest {
input, false);
assertEquals("{}\n", found);
input =
"{a b { } ;";
"{a b { }";
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("{ab{}\n", found);
input =
"{ } a 2) { } ;"; // FAILS to match since it terminates loop at first { }
"{ } a 2) { }"; // FAILS to match since it terminates loop at first { }
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("", found); // should not print output; resync kills rest of input
@ -227,19 +227,23 @@ public class TestParserExec extends BaseTest {
input =
"if ( 1 ) { x=3; { return 4; } } return 99; abc=def;";
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
input, false);
assertEquals("if(1){x=3;{return4;}}return99;abc=def;\n", found);
input =
"x=1; a=3;"; // FAILS to match since it can't match last element
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
// can't match EOF to ID '=' '0' ';'
assertEquals("no viable token at input EOF, index 8\n", found);
"x=1; a=3;"; // FAILS to match since it can't match last element
execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
// can't match EOF to ID '=' '3' ';'
assertEquals("line 1:9 no viable alternative at input ''\n",
this.stderrDuringParse);
input =
"x=1; a=b; z=3;"; // FAILS to match since it can't match last element
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("no viable token at input EOF, index 12\n", found); // should not finish to print output
"x=1; a=b; z=3;"; // FAILS to match since it can't match last element
execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("line 1:14 no viable alternative at input ''\n",
this.stderrDuringParse);
// should not finish to print output
}
@Test public void testStatLoopNongreedyNecessary() throws Exception {
@ -274,10 +278,11 @@ public class TestParserExec extends BaseTest {
assertEquals("if(1){x=3;{return4;}}return99;abc=def;\n", found);
input =
"x=1; a=3;"; // FAILS to match since it can't match either stat
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
// can't match EOF to ID '=' '0' ';'
assertEquals("no viable token at input EOF, index 8\n", found);
assertEquals("line 1:9 no viable alternative at input ''\n",
this.stderrDuringParse);
input =
"x=1; a=b; z=3;"; // stops at a=b; ignores z=3;
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
@ -359,7 +364,7 @@ public class TestParserExec extends BaseTest {
input, false);
assertEquals("if(34)ab\n", found);
input =
"if ( 34 ))) ) ( a = = b( ;";
"if ( 34 ))) ) ( a = = b(";
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("if(34))))(a==b(\n", found);
@ -367,7 +372,7 @@ public class TestParserExec extends BaseTest {
/** When .* is on the end of a rule, no tokens predict the exit branch of the loop
* since it immediately hits the end of the rule. Non-greedy loops
* never consume more tokens than exist following the .* end that
* never consume more tokens than exist following the .* in that
* same rule. So, in this case, the greedy loop always wins and it will
* suck tokens until end of file. Unfortunately, the '.' in rule s
* will not match, leading to a syntax error.
@ -385,6 +390,8 @@ public class TestParserExec extends BaseTest {
"if ( 34 ) a b .";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("no viable token at input EOF, index 7\nif(34)ab.\n", found);
assertEquals("if(34)ab.\n", found);
assertEquals("line 1:15 no viable alternative at input ''\n",
this.stderrDuringParse);
}
}