more tests
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9224]
This commit is contained in:
parent
6e2bbcdb42
commit
bc5577307c
|
@ -203,7 +203,10 @@ public class ANTLRStringStream implements CharStream {
|
|||
}
|
||||
|
||||
public String substring(int start, int stop) {
|
||||
return new String(data,start,stop-start+1);
|
||||
int last = stop - start + 1;
|
||||
if ( last >= data.length ) last = data.length-1;
|
||||
if ( start >= data.length ) return "";
|
||||
return new String(data, start, last);
|
||||
}
|
||||
|
||||
public int getLine() {
|
||||
|
|
|
@ -120,20 +120,22 @@ public abstract class Lexer extends Recognizer<LexerATNSimulator>
|
|||
text = null;
|
||||
do {
|
||||
type = Token.INVALID_TYPE;
|
||||
if ( input.LA(1)==CharStream.EOF ) {
|
||||
WritableToken eof = new CommonToken(this,Token.EOF,
|
||||
Token.DEFAULT_CHANNEL,
|
||||
input.index(),input.index());
|
||||
eof.setLine(getLine());
|
||||
eof.setCharPositionInLine(getCharPositionInLine());
|
||||
return eof;
|
||||
}
|
||||
// System.out.println("nextToken line "+tokenStartLine+" at "+((char)input.LA(1))+
|
||||
// " in mode "+mode+
|
||||
// " at index "+input.index());
|
||||
int ttype = _interp.match(input, mode);
|
||||
// System.out.println("accepted ttype "+ttype);
|
||||
if ( type == Token.INVALID_TYPE) type = ttype;
|
||||
|
||||
// if ( input.LA(1)==CharStream.EOF ) {
|
||||
// WritableToken eof = new CommonToken(this,Token.EOF,
|
||||
// Token.DEFAULT_CHANNEL,
|
||||
// input.index(),input.index());
|
||||
// eof.setLine(getLine());
|
||||
// eof.setCharPositionInLine(getCharPositionInLine());
|
||||
// return eof;
|
||||
// }
|
||||
|
||||
if ( type == Token.INVALID_TYPE ) type = ttype;
|
||||
if ( type==SKIP ) {
|
||||
continue outer;
|
||||
}
|
||||
|
|
|
@ -35,8 +35,8 @@ import org.antlr.v4.runtime.misc.OrderedHashSet;
|
|||
|
||||
/** "dup" of ParserInterpreter */
|
||||
public class LexerATNSimulator extends ATNSimulator {
|
||||
public static boolean debug = true;
|
||||
public static boolean dfa_debug = true;
|
||||
public static boolean debug = false;
|
||||
public static boolean dfa_debug = false;
|
||||
public static final int NUM_EDGES = 255;
|
||||
|
||||
protected Lexer recog;
|
||||
|
@ -103,7 +103,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
DFAState s = s0;
|
||||
int startIndex = input.index();
|
||||
int t = input.LA(1);
|
||||
if ( t==CharStream.EOF ) return -1; // TODO: how to match EOF in lexer rule?
|
||||
// if ( t==CharStream.EOF ) return -1; // TODO: how to match EOF in lexer rule?
|
||||
loop:
|
||||
while ( true ) {
|
||||
if ( dfa_debug ) System.out.println("state "+s.stateNumber+" LA(1)=="+(char)t);
|
||||
|
@ -119,14 +119,16 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
if ( t==CharStream.EOF ) break;
|
||||
}
|
||||
// if no edge, pop over to ATN interpreter, update DFA and return
|
||||
if ( s.edges == null || t >= s.edges.length || s.edges[t] == null ) {
|
||||
if ( s.edges == null || t >= s.edges.length || t <= CharStream.EOF ||
|
||||
s.edges[t] == null )
|
||||
{
|
||||
if ( dfa_debug ) System.out.println("no edge for "+(char)t);
|
||||
int ttype = -1;
|
||||
try {
|
||||
if ( dfa_debug ) {
|
||||
System.out.println("ATN exec upon "+
|
||||
input.substring(startIndex,input.index())+
|
||||
" at DFA state "+s.stateNumber+" = "+s.configs);
|
||||
System.out.println("ATN exec upon "+
|
||||
input.substring(startIndex,input.index())+
|
||||
" at DFA state "+s.stateNumber+" = "+s.configs);
|
||||
}
|
||||
ATN_failover++;
|
||||
ttype = exec(input, s.configs);
|
||||
|
@ -140,7 +142,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
}
|
||||
|
||||
if ( ttype==-1 ) {
|
||||
addDFAEdge(s, t, ERROR);
|
||||
if ( t != CharStream.EOF ) addDFAEdge(s, t, ERROR);
|
||||
break loop; // dead end; no where to go, fall back on prev if any
|
||||
}
|
||||
// action already executed
|
||||
|
@ -153,8 +155,11 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
t = input.LA(1);
|
||||
}
|
||||
if ( prevAcceptState==null ) {
|
||||
if ( t==CharStream.EOF ) {
|
||||
return Token.EOF;
|
||||
}
|
||||
if ( debug ) System.out.println("!!! no viable alt in dfa");
|
||||
return -1;
|
||||
throw new LexerNoViableAltException(recog, input, s.configs); // TODO: closure is empty
|
||||
}
|
||||
if ( recog!=null ) {
|
||||
int actionIndex = atn.ruleToActionIndex[prevAcceptState.ruleIndex];
|
||||
|
@ -184,7 +189,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
OrderedHashSet<ATNConfig> reach = new OrderedHashSet<ATNConfig>();
|
||||
|
||||
int t = input.LA(1);
|
||||
if ( t==Token.EOF ) return Token.EOF;
|
||||
// if ( t==Token.EOF ) return Token.EOF;
|
||||
|
||||
do { // while more work
|
||||
if ( debug ) System.out.println("in reach starting closure: " + closure);
|
||||
|
@ -217,7 +222,6 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
for (int ci=0; ci<reach.size(); ci++) { // TODO: foreach
|
||||
ATNConfig c = reach.get(ci);
|
||||
if ( c.state instanceof RuleStopState ) {
|
||||
System.out.println("found stop in reach: "+c.state);
|
||||
if ( debug ) {
|
||||
System.out.println("in reach we hit accept state "+c+" index "+
|
||||
input.index()+", reach="+reach+
|
||||
|
@ -260,9 +264,8 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
} while ( true );
|
||||
|
||||
if ( prevAccept==null ) {
|
||||
if ( t==Token.EOF ) {
|
||||
System.out.println("EOF in token at input index "+input.index());
|
||||
//return Token.EOF;
|
||||
if ( t==CharStream.EOF ) {
|
||||
return Token.EOF;
|
||||
}
|
||||
// System.out.println("no viable token at input "+getTokenName(input.LA(1))+", index "+input.index());
|
||||
throw new LexerNoViableAltException(recog, input, closure); // TODO: closure is empty
|
||||
|
|
|
@ -188,7 +188,6 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
if ( dfa_debug ) System.out.println("no edge for "+t);
|
||||
int alt = -1;
|
||||
if ( dfa_debug ) {
|
||||
|
||||
System.out.println("ATN exec upon "+
|
||||
getInputString(input, startIndex) +
|
||||
" at DFA state "+s.stateNumber);
|
||||
|
|
|
@ -33,24 +33,15 @@ import org.antlr.v4.automata.*;
|
|||
import org.antlr.v4.codegen.CodeGenerator;
|
||||
import org.antlr.v4.misc.Utils;
|
||||
import org.antlr.v4.runtime.*;
|
||||
import org.antlr.v4.runtime.atn.ATN;
|
||||
import org.antlr.v4.runtime.atn.ATNState;
|
||||
import org.antlr.v4.runtime.atn.DecisionState;
|
||||
import org.antlr.v4.runtime.atn.LexerATNSimulator;
|
||||
import org.antlr.v4.runtime.atn.*;
|
||||
import org.antlr.v4.runtime.dfa.DFA;
|
||||
import org.antlr.v4.semantics.SemanticPipeline;
|
||||
import org.antlr.v4.tool.*;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.stringtemplate.v4.ST;
|
||||
import org.stringtemplate.v4.STGroup;
|
||||
import org.stringtemplate.v4.STGroupString;
|
||||
import org.antlr.v4.tool.Rule;
|
||||
import org.junit.*;
|
||||
import org.stringtemplate.v4.*;
|
||||
|
||||
import javax.tools.JavaCompiler;
|
||||
import javax.tools.JavaFileObject;
|
||||
import javax.tools.StandardJavaFileManager;
|
||||
import javax.tools.ToolProvider;
|
||||
import javax.tools.*;
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
|
@ -169,14 +160,20 @@ public abstract class BaseTest {
|
|||
LexerATNSimulator interp = new LexerATNSimulator(atn);
|
||||
List<String> tokenTypes = new ArrayList<String>();
|
||||
int ttype;
|
||||
int t;
|
||||
do {
|
||||
t = input.LA(1);
|
||||
if ( adaptive ) ttype = interp.match(input, Lexer.DEFAULT_MODE);
|
||||
else ttype = interp.matchATN(input);
|
||||
if ( ttype == Token.EOF ) tokenTypes.add("EOF");
|
||||
if ( ttype == Token.EOF ) {
|
||||
tokenTypes.add("EOF");
|
||||
}
|
||||
else {
|
||||
tokenTypes.add(lg.typeToTokenList.get(ttype));
|
||||
}
|
||||
} while ( ttype!=Token.EOF );
|
||||
// stop upon EOF token or when we see EOF on input since we might
|
||||
// match DONE : EOF ; in lexer and need to know when to stop.
|
||||
} while ( ttype!=Token.EOF && t!=CharStream.EOF );
|
||||
return tokenTypes;
|
||||
}
|
||||
|
||||
|
|
|
@ -236,7 +236,7 @@ public class TestATNLexerInterpreter extends BaseTest {
|
|||
"lexer grammar L;\n"+
|
||||
"DONE : EOF ;\n"+
|
||||
"A : 'a';\n");
|
||||
String expecting = "A, DONE, EOF";
|
||||
String expecting = "A, DONE";
|
||||
checkLexerMatches(lg, "a", expecting);
|
||||
}
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ public class TestLexerExec extends BaseTest {
|
|||
"[@0,0:1='34',<4>,1:0]\n" +
|
||||
"[@1,3:5='-21',<3>,1:3]\n" +
|
||||
"[@2,7:7='3',<4>,1:7]\n" +
|
||||
"[@3,8:8='<EOF>',<-1>,1:8]\n";
|
||||
"[@3,8:7='<EOF>',<-1>,1:8]\n"; // EOF has no length so range is 8:7 not 8:8
|
||||
assertEquals(expecting, found);
|
||||
}
|
||||
|
||||
|
@ -29,7 +29,7 @@ public class TestLexerExec extends BaseTest {
|
|||
"I\n" +
|
||||
"[@0,0:1='34',<3>,1:0]\n" +
|
||||
"[@1,3:4='34',<3>,1:3]\n" +
|
||||
"[@2,5:5='<EOF>',<-1>,1:5]\n";
|
||||
"[@2,5:4='<EOF>',<-1>,1:5]\n";
|
||||
assertEquals(expecting, found);
|
||||
}
|
||||
|
||||
|
@ -45,7 +45,7 @@ public class TestLexerExec extends BaseTest {
|
|||
String expecting =
|
||||
"[@0,0:4='\"abc\"',<5>,1:0]\n" +
|
||||
"[@1,6:9='\"ab\"',<5>,1:6]\n" +
|
||||
"[@2,10:10='<EOF>',<-1>,1:10]\n";
|
||||
"[@2,10:9='<EOF>',<-1>,1:10]\n";
|
||||
assertEquals(expecting, found);
|
||||
}
|
||||
|
||||
|
@ -64,7 +64,7 @@ public class TestLexerExec extends BaseTest {
|
|||
"[@4,9:14='ending',<4>,1:9]\n" +
|
||||
"[@5,15:15=' ',<5>,1:15]\n" +
|
||||
"[@6,16:16='a',<4>,1:16]\n" +
|
||||
"[@7,17:17='<EOF>',<-1>,1:17]\n";
|
||||
"[@7,17:16='<EOF>',<-1>,1:17]\n";
|
||||
assertEquals(expecting, found);
|
||||
}
|
||||
|
||||
|
@ -93,7 +93,31 @@ public class TestLexerExec extends BaseTest {
|
|||
"[@10,10:10='a',<7>,1:10]\n" +
|
||||
"[@11,11:11='.',<6>,1:11]\n" +
|
||||
"[@12,12:12='l',<7>,1:12]\n" +
|
||||
"[@13,13:13='<EOF>',<-1>,1:13]\n";
|
||||
"[@13,13:12='<EOF>',<-1>,1:13]\n";
|
||||
assertEquals(expecting, found);
|
||||
}
|
||||
|
||||
@Test public void testEOFByItself() throws Exception {
|
||||
String grammar =
|
||||
"lexer grammar L;\n" +
|
||||
"DONE : EOF ;\n" +
|
||||
"A : 'a';\n";
|
||||
String found = execLexer("L.g", grammar, "L", "");
|
||||
String expecting =
|
||||
"[@0,0:0='x',<7>,1:0]\n" +
|
||||
"[@1,1:1=' ',<8>,1:1]\n" +
|
||||
"[@2,2:2='0',<4>,1:2]\n" +
|
||||
"[@3,3:3=' ',<8>,1:3]\n" +
|
||||
"[@4,4:4='1',<4>,1:4]\n" +
|
||||
"[@5,5:5=' ',<8>,1:5]\n" +
|
||||
"[@6,6:6='a',<7>,1:6]\n" +
|
||||
"[@7,7:7='.',<6>,1:7]\n" +
|
||||
"[@8,8:8='b',<7>,1:8]\n" +
|
||||
"[@9,9:9=' ',<8>,1:9]\n" +
|
||||
"[@10,10:10='a',<7>,1:10]\n" +
|
||||
"[@11,11:11='.',<6>,1:11]\n" +
|
||||
"[@12,12:12='l',<7>,1:12]\n" +
|
||||
"[@13,13:12='<EOF>',<-1>,1:13]\n";
|
||||
assertEquals(expecting, found);
|
||||
}
|
||||
|
||||
|
|
|
@ -148,12 +148,12 @@ public class TestParserExec extends BaseTest {
|
|||
input, false);
|
||||
assertEquals("{}\n", found);
|
||||
input =
|
||||
"{a b { } ;";
|
||||
"{a b { }";
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("{ab{}\n", found);
|
||||
input =
|
||||
"{ } a 2) { } ;"; // FAILS to match since it terminates loop at first { }
|
||||
"{ } a 2) { }"; // FAILS to match since it terminates loop at first { }
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("", found); // should not print output; resync kills rest of input
|
||||
|
@ -227,19 +227,23 @@ public class TestParserExec extends BaseTest {
|
|||
input =
|
||||
"if ( 1 ) { x=3; { return 4; } } return 99; abc=def;";
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
input, false);
|
||||
assertEquals("if(1){x=3;{return4;}}return99;abc=def;\n", found);
|
||||
input =
|
||||
"x=1; a=3;"; // FAILS to match since it can't match last element
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
// can't match EOF to ID '=' '0' ';'
|
||||
assertEquals("no viable token at input EOF, index 8\n", found);
|
||||
"x=1; a=3;"; // FAILS to match since it can't match last element
|
||||
execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
// can't match EOF to ID '=' '3' ';'
|
||||
assertEquals("line 1:9 no viable alternative at input ''\n",
|
||||
this.stderrDuringParse);
|
||||
|
||||
input =
|
||||
"x=1; a=b; z=3;"; // FAILS to match since it can't match last element
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("no viable token at input EOF, index 12\n", found); // should not finish to print output
|
||||
"x=1; a=b; z=3;"; // FAILS to match since it can't match last element
|
||||
execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("line 1:14 no viable alternative at input ''\n",
|
||||
this.stderrDuringParse);
|
||||
// should not finish to print output
|
||||
}
|
||||
|
||||
@Test public void testStatLoopNongreedyNecessary() throws Exception {
|
||||
|
@ -274,10 +278,11 @@ public class TestParserExec extends BaseTest {
|
|||
assertEquals("if(1){x=3;{return4;}}return99;abc=def;\n", found);
|
||||
input =
|
||||
"x=1; a=3;"; // FAILS to match since it can't match either stat
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
// can't match EOF to ID '=' '0' ';'
|
||||
assertEquals("no viable token at input EOF, index 8\n", found);
|
||||
assertEquals("line 1:9 no viable alternative at input ''\n",
|
||||
this.stderrDuringParse);
|
||||
input =
|
||||
"x=1; a=b; z=3;"; // stops at a=b; ignores z=3;
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
|
@ -359,7 +364,7 @@ public class TestParserExec extends BaseTest {
|
|||
input, false);
|
||||
assertEquals("if(34)ab\n", found);
|
||||
input =
|
||||
"if ( 34 ))) ) ( a = = b( ;";
|
||||
"if ( 34 ))) ) ( a = = b(";
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("if(34))))(a==b(\n", found);
|
||||
|
@ -367,7 +372,7 @@ public class TestParserExec extends BaseTest {
|
|||
|
||||
/** When .* is on the end of a rule, no tokens predict the exit branch of the loop
|
||||
* since it immediately hits the end of the rule. Non-greedy loops
|
||||
* never consume more tokens than exist following the .* end that
|
||||
* never consume more tokens than exist following the .* in that
|
||||
* same rule. So, in this case, the greedy loop always wins and it will
|
||||
* suck tokens until end of file. Unfortunately, the '.' in rule s
|
||||
* will not match, leading to a syntax error.
|
||||
|
@ -385,6 +390,8 @@ public class TestParserExec extends BaseTest {
|
|||
"if ( 34 ) a b .";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("no viable token at input EOF, index 7\nif(34)ab.\n", found);
|
||||
assertEquals("if(34)ab.\n", found);
|
||||
assertEquals("line 1:15 no viable alternative at input ''\n",
|
||||
this.stderrDuringParse);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue