added exec version that handles stack and new ThreadState / context. unit tests

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6829]
This commit is contained in:
parrt 2010-05-01 11:38:18 -08:00
parent 541350bbe3
commit 9dbc6a43fd
11 changed files with 548 additions and 42 deletions

View File

@ -48,7 +48,9 @@ public class Bytecode {
public static final short RANGE16 = 7; public static final short RANGE16 = 7;
public static final short WILDCARD = 8; public static final short WILDCARD = 8;
//public static final short NOT = 8; ??? //public static final short NOT = 8; ???
public static final short SAVE = 9; public static final short CALL = 9; // JMP with a push
public static final short RET = 10; // an accept instr for fragment rules
public static final short SAVE = 11;
/** Used for disassembly; describes instruction set */ /** Used for disassembly; describes instruction set */
public static Instruction[] instructions = new Instruction[] { public static Instruction[] instructions = new Instruction[] {
@ -61,6 +63,8 @@ public class Bytecode {
new Instruction("range8", OperandType.BYTE, OperandType.BYTE), new Instruction("range8", OperandType.BYTE, OperandType.BYTE),
new Instruction("range16", OperandType.CHAR, OperandType.CHAR), new Instruction("range16", OperandType.CHAR, OperandType.CHAR),
new Instruction("wildcard"), new Instruction("wildcard"),
new Instruction("call", OperandType.ADDR),
new Instruction("ret"),
new Instruction("save", OperandType.SHORT), new Instruction("save", OperandType.SHORT),
}; };

View File

@ -185,7 +185,159 @@ workLoop:
return 0; return 0;
} }
public int execThompson(CharStream input, int ip) { public int execThompson(CharStream input) {
int ip = 0; // always start at SPLIT instr at address 0
int c = input.LA(1);
if ( c==Token.EOF ) return Token.EOF;
List<ThreadState> closure = computeStartState(ip);
List<ThreadState> reach = new ArrayList<ThreadState>();
int prevAcceptAddr = Integer.MAX_VALUE;
int prevAcceptLastCharIndex = -1;
int prevAcceptInputMarker = -1;
int firstAcceptInputMarker = -1;
do { // while more work
c = input.LA(1);
int i = 0;
processOneChar:
while ( i<closure.size() ) {
//for (int i=0; i<closure.size(); i++) {
System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
ThreadState t = closure.get(i);
ip = t.addr;
NFAStack context = t.context;
int alt = t.alt;
trace(ip);
short opcode = code[ip];
ip++; // move to next instruction or first byte of operand
switch (opcode) {
case Bytecode.MATCH8 :
if ( c == code[ip] ) {
addToClosure(reach, ip+1, alt, context);
}
break;
case Bytecode.MATCH16 :
if ( c == getShort(code, ip) ) {
addToClosure(reach, ip+2, alt, context);
}
break;
case Bytecode.RANGE8 :
if ( c>=code[ip] && c<=code[ip+1] ) {
addToClosure(reach, ip+2, alt, context);
}
break;
case Bytecode.RANGE16 :
if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) {
addToClosure(reach, ip+4, alt, context);
}
break;
case Bytecode.WILDCARD :
if ( c!=Token.EOF ) addToClosure(reach, ip, alt, context);
break;
case Bytecode.ACCEPT :
int tokenLastCharIndex = input.index() - 1;
int ttype = getShort(code, ip);
System.out.println("ACCEPT "+ ttype +" with last char position "+ tokenLastCharIndex);
if ( tokenLastCharIndex > prevAcceptLastCharIndex ) {
prevAcceptLastCharIndex = tokenLastCharIndex;
// choose longest match so far regardless of rule priority
System.out.println("replacing old best match @ "+prevAcceptAddr);
prevAcceptAddr = ip-1;
prevAcceptInputMarker = input.mark();
firstAcceptInputMarker = prevAcceptInputMarker;
}
else if ( tokenLastCharIndex == prevAcceptLastCharIndex ) {
// choose first rule matched if match is of same length
if ( ip-1 < prevAcceptAddr ) { // it will see both accepts for ambig rules
System.out.println("replacing old best match @ "+prevAcceptAddr);
prevAcceptAddr = ip-1;
prevAcceptInputMarker = input.mark();
}
}
// if we reach accept state, toss out any addresses in rest
// of work list associated with accept's rule; that rule is done
int j=i+1;
while ( j<closure.size() ) {
ThreadState cl = closure.get(j);
System.out.println("remaining "+ cl);
if ( cl.alt==alt ) closure.remove(j);
else j++;
}
// then, move to next char, looking for longer match
// (we continue processing if there are states in reach)
break;
case Bytecode.JMP : // ignore
case Bytecode.SPLIT :
case Bytecode.CALL :
case Bytecode.RET :
break;
default :
throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
}
i++;
}
if ( reach.size()>0 ) { // if we reached other states, consume and process them
input.consume();
}
// swap to avoid reallocating space
List<ThreadState> tmp = reach;
reach = closure;
closure = tmp;
reach.clear();
} while ( closure.size()>0 );
if ( prevAcceptAddr >= code.length ) return Token.INVALID_TOKEN_TYPE;
int ttype = getShort(code, prevAcceptAddr+1);
System.out.println("done at index "+input.index());
System.out.println("accept marker="+prevAcceptInputMarker);
input.rewind(prevAcceptInputMarker); // does nothing if we accept'd at input.index() but might need to rewind
input.release(firstAcceptInputMarker); // kill any other markers in stream we made
System.out.println("leaving with index "+input.index());
return ttype;
}
void addToClosure(List<ThreadState> closure, int ip, int alt, NFAStack context) {
ThreadState t = new ThreadState(ip, alt, context);
//System.out.println("add to closure "+ip+" "+closure);
if ( closure.contains(t) ) return; // TODO: VERY INEFFICIENT! use int[num-states] as set test
closure.add(t);
short opcode = code[ip];
ip++; // move to next instruction or first byte of operand
switch (opcode) {
case Bytecode.JMP :
addToClosure(closure, getShort(code, ip), alt, context);
break;
case Bytecode.SAVE :
int labelIndex = getShort(code, ip);
ip += 2;
addToClosure(closure, ip, alt, context); // do closure pass SAVE
// TODO: impl
break;
case Bytecode.SPLIT :
int nopnds = getShort(code, ip);
ip += 2;
// add split addresses to work queue in reverse order ('cept first one)
for (int i=0; i<nopnds; i++) {
addToClosure(closure, getShort(code, ip+i*2), alt, context);
}
break;
}
}
List<ThreadState> computeStartState(int ip) { // assume SPLIT at ip
List<ThreadState> closure = new ArrayList<ThreadState>();
ip++;
int nalts = getShort(code, ip);
ip += 2;
// add split addresses to work queue in reverse order ('cept first one)
for (int i=1; i<=nalts; i++) {
addToClosure(closure, getShort(code, ip), i, NFAStack.EMPTY);
ip += Bytecode.ADDR_SIZE;
}
return closure;
}
public int execThompson_no_stack(CharStream input, int ip) {
int c = input.LA(1); int c = input.LA(1);
if ( c==Token.EOF ) return Token.EOF; if ( c==Token.EOF ) return Token.EOF;
@ -195,7 +347,7 @@ workLoop:
int prevAcceptLastCharIndex = -1; int prevAcceptLastCharIndex = -1;
int prevAcceptInputMarker = -1; int prevAcceptInputMarker = -1;
int firstAcceptInputMarker = -1; int firstAcceptInputMarker = -1;
addToClosure(closure, ip); addToClosure_no_stack(closure, ip);
do { // while more work do { // while more work
c = input.LA(1); c = input.LA(1);
int i = 0; int i = 0;
@ -210,26 +362,26 @@ processOneChar:
switch (opcode) { switch (opcode) {
case Bytecode.MATCH8 : case Bytecode.MATCH8 :
if ( c == code[ip] ) { if ( c == code[ip] ) {
addToClosure(reach, ip+1); addToClosure_no_stack(reach, ip+1);
} }
break; break;
case Bytecode.MATCH16 : case Bytecode.MATCH16 :
if ( c == getShort(code, ip) ) { if ( c == getShort(code, ip) ) {
addToClosure(reach, ip+2); addToClosure_no_stack(reach, ip+2);
} }
break; break;
case Bytecode.RANGE8 : case Bytecode.RANGE8 :
if ( c>=code[ip] && c<=code[ip+1] ) { if ( c>=code[ip] && c<=code[ip+1] ) {
addToClosure(reach, ip+2); addToClosure_no_stack(reach, ip+2);
} }
break; break;
case Bytecode.RANGE16 : case Bytecode.RANGE16 :
if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) { if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) {
addToClosure(reach, ip+4); addToClosure_no_stack(reach, ip+4);
} }
break; break;
case Bytecode.WILDCARD : case Bytecode.WILDCARD :
if ( c!=Token.EOF ) addToClosure(reach, ip); if ( c!=Token.EOF ) addToClosure_no_stack(reach, ip);
break; break;
case Bytecode.ACCEPT : case Bytecode.ACCEPT :
int tokenLastCharIndex = input.index() - 1; int tokenLastCharIndex = input.index() - 1;
@ -298,7 +450,7 @@ processOneChar:
return ttype; return ttype;
} }
void addToClosure(List<Integer> closure, int ip) { void addToClosure_no_stack(List<Integer> closure, int ip) {
//System.out.println("add to closure "+ip+" "+closure); //System.out.println("add to closure "+ip+" "+closure);
if ( closure.contains(ip) ) return; // TODO: VERY INEFFICIENT! use int[num-states] as set test if ( closure.contains(ip) ) return; // TODO: VERY INEFFICIENT! use int[num-states] as set test
closure.add(ip); closure.add(ip);
@ -306,12 +458,12 @@ processOneChar:
ip++; // move to next instruction or first byte of operand ip++; // move to next instruction or first byte of operand
switch (opcode) { switch (opcode) {
case Bytecode.JMP : case Bytecode.JMP :
addToClosure(closure, getShort(code, ip)); addToClosure_no_stack(closure, getShort(code, ip));
break; break;
case Bytecode.SAVE : case Bytecode.SAVE :
int labelIndex = getShort(code, ip); int labelIndex = getShort(code, ip);
ip += 2; ip += 2;
addToClosure(closure, ip); // do closure pass SAVE addToClosure_no_stack(closure, ip); // do closure pass SAVE
// TODO: impl // TODO: impl
break; break;
case Bytecode.SPLIT : case Bytecode.SPLIT :
@ -319,7 +471,7 @@ processOneChar:
ip += 2; ip += 2;
// add split addresses to work queue in reverse order ('cept first one) // add split addresses to work queue in reverse order ('cept first one)
for (int i=0; i<nopnds; i++) { for (int i=0; i<nopnds; i++) {
addToClosure(closure, getShort(code, ip+i*2)); addToClosure_no_stack(closure, getShort(code, ip+i*2));
} }
break; break;
} }

View File

@ -0,0 +1,75 @@
package org.antlr.v4.runtime.nfa;
/** Identical to ANTLR's static grammar analysis NFAContext object */
public class NFAStack {
public static final NFAStack EMPTY = new NFAStack(null, -1);
public NFAStack parent;
/** The NFA state following state that invoked another rule's start state
* is recorded on the rule invocation context stack.
*/
public int returnAddr;
/** Computing the hashCode is very expensive and NFA.addToClosure()
* uses it to track when it's seen a state|ctx before to avoid
* infinite loops. As we add new contexts, record the hash code
* as this + parent.cachedHashCode. Avoids walking
* up the tree for every hashCode(). Note that this caching works
* because a context is a monotonically growing tree of context nodes
* and nothing on the stack is ever modified...ctx just grows
* or shrinks.
*/
protected int cachedHashCode;
public NFAStack(NFAStack parent, int returnAddr) {
this.parent = parent;
this.returnAddr = returnAddr;
if ( returnAddr >= 0 ) {
this.cachedHashCode = returnAddr;
}
if ( parent!=null ) {
this.cachedHashCode += parent.cachedHashCode;
}
}
public int hashCode() { return cachedHashCode; }
/** Two contexts are equals() if both have
* same call stack; walk upwards to the root.
* Recall that the root sentinel node has no parent.
* Note that you may be comparing contextsv in different alt trees.
*/
public boolean equals(Object o) {
NFAStack other = ((NFAStack)o);
if ( this.cachedHashCode != other.cachedHashCode ) {
return false; // can't be same if hash is different
}
if ( this==other ) return true;
// System.out.println("comparing "+this+" with "+other);
NFAStack sp = this;
while ( sp.parent!=null && other.parent!=null ) {
if ( sp.returnAddr != other.returnAddr) return false;
sp = sp.parent;
other = other.parent;
}
if ( !(sp.parent==null && other.parent==null) ) {
return false; // both pointers must be at their roots after walk
}
return true;
}
public String toString() {
StringBuffer buf = new StringBuffer();
NFAStack sp = this;
buf.append("[");
while ( sp.parent!=null ) {
buf.append(sp.returnAddr);
buf.append(" ");
sp = sp.parent;
}
buf.append("$]");
return buf.toString();
}
}

View File

@ -0,0 +1,31 @@
package org.antlr.v4.runtime.nfa;
/** NFA simulation thread state */
public class ThreadState {
public int addr;
public int alt; // or speculatively matched token type for lexers
public NFAStack context;
public ThreadState(int addr, int alt, NFAStack context) {
this.addr = addr;
this.alt = alt;
this.context = context;
}
public boolean equals(Object o) {
if ( o==null ) return false;
if ( this==o ) return true;
ThreadState other = (ThreadState)o;
return this.addr==other.addr &&
this.alt==other.alt &&
this.context.equals(other.context);
}
public int hashCode() { return addr + context.hashCode(); }
public String toString() {
if ( context.parent==null ) {
return "("+addr+","+alt+")";
}
return "("+addr+","+alt+","+context+")";
}
}

View File

@ -178,13 +178,14 @@ public class NFAContext {
} }
public int hashCode() { public int hashCode() {
int h = 0; return cachedHashCode; // works with tests; don't recompute.
NFAContext sp = this; // int h = 0;
while ( sp.parent!=null ) { // NFAContext sp = this;
h += sp.returnState.stateNumber; // while ( sp.parent!=null ) {
sp = sp.parent; // h += sp.returnState.stateNumber;
} // sp = sp.parent;
return h; // }
// return h;
} }
/** How many rule invocations in this context? I.e., how many /** How many rule invocations in this context? I.e., how many

View File

@ -19,11 +19,12 @@ public class CodeGenPipeline {
LexerGrammar lg = (LexerGrammar)g; LexerGrammar lg = (LexerGrammar)g;
for (String modeName : lg.modes.keySet()) { // for each mode for (String modeName : lg.modes.keySet()) { // for each mode
NFA nfa = NFABytecodeGenerator.getBytecode(lg, modeName); NFA nfa = NFABytecodeGenerator.getBytecode(lg, modeName);
ANTLRStringStream input = new ANTLRStringStream("/*x*/ab"); //ANTLRStringStream input = new ANTLRStringStream("32");
ANTLRStringStream input = new ANTLRStringStream("/*x*/!ab");
//ANTLRStringStream input = new ANTLRStringStream("abc32ab"); //ANTLRStringStream input = new ANTLRStringStream("abc32ab");
int ttype = 0; int ttype = 0;
while ( ttype!= Token.EOF ) { while ( ttype!= Token.EOF ) {
ttype = nfa.execThompson(input, 0); System.out.println("ttype="+ttype); ttype = nfa.execThompson(input); System.out.println("ttype="+ttype);
} }
} }
} }

View File

@ -103,6 +103,11 @@ public class NFABytecodeGenerator extends TreeParser {
public String toString() { return addr+":AcceptInstr "+ruleIndex; } public String toString() { return addr+":AcceptInstr "+ruleIndex; }
} }
public static class RetInstr extends Instr {
public short opcode() { return Bytecode.RET; }
public int nBytes() { return 1; }
}
public static class JumpInstr extends Instr { public static class JumpInstr extends Instr {
int target; int target;
public short opcode() { return Bytecode.JMP; }; public short opcode() { return Bytecode.JMP; };
@ -120,6 +125,25 @@ public class NFABytecodeGenerator extends TreeParser {
} }
} }
public static class CallInstr extends Instr {
Token token;
int target;
public CallInstr(Token token) { this.token = token; }
public short opcode() { return Bytecode.CALL; };
public int nBytes() { return 1+Bytecode.ADDR_SIZE; }
public void write(byte[] code) {
super.write(code);
writeShort(code, addr+1, (short)target);
}
@Override
public String toString() {
return addr+":CallInstr{" +
"target=" + target +
'}';
}
}
public static class SplitInstr extends Instr { public static class SplitInstr extends Instr {
List<Integer> addrs = new ArrayList<Integer>(); List<Integer> addrs = new ArrayList<Integer>();
int nAlts; int nAlts;
@ -212,21 +236,26 @@ public class NFABytecodeGenerator extends TreeParser {
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk); CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
gen.setTreeNodeStream(nodes); gen.setTreeNodeStream(nodes);
int ttype = lg.getTokenType(r.name); int ttype = lg.getTokenType(r.name);
tokenTypeToAddr[ttype] = gen.ip;
ruleToAddr.put(r.name, gen.ip); ruleToAddr.put(r.name, gen.ip);
if ( !r.isFragment() ) s0.addrs.add(gen.ip); if ( !r.isFragment() ) {
s0.addrs.add(gen.ip);
tokenTypeToAddr[ttype] = gen.ip;
}
try { try {
gen.block(); gen.block();
int ruleTokenType = lg.getTokenType(r.name); int ruleTokenType = lg.getTokenType(r.name);
gen.emit(new NFABytecodeGenerator.AcceptInstr(ruleTokenType)); if ( !r.isFragment() ) {
gen.emit(new NFABytecodeGenerator.AcceptInstr(ruleTokenType));
}
else {
gen.emit(new NFABytecodeGenerator.RetInstr());
}
} }
catch (Exception e){ catch (Exception e){
e.printStackTrace(System.err); e.printStackTrace(System.err);
} }
} }
byte[] code = NFABytecodeGenerator.getByteCode(gen.instrs); byte[] code = NFABytecodeGenerator.getByteCode(gen.instrs);
System.out.println("all:");
System.out.println(Bytecode.disassemble(code));
System.out.println("rule addrs="+ruleToAddr); System.out.println("rule addrs="+ruleToAddr);
NFA nfa = new NFA(code, ruleToAddr); NFA nfa = new NFA(code, ruleToAddr);

View File

@ -168,8 +168,8 @@ range
terminal terminal
: ^(STRING_LITERAL .) {emitString($STRING_LITERAL.token);} : ^(STRING_LITERAL .) {emitString($STRING_LITERAL.token);}
| STRING_LITERAL {emitString($STRING_LITERAL.token);} | STRING_LITERAL {emitString($STRING_LITERAL.token);}
| ^(TOKEN_REF ARG_ACTION .) | ^(TOKEN_REF ARG_ACTION .) {emit(new CallInstr($TOKEN_REF.token));}
| ^(TOKEN_REF .) | ^(TOKEN_REF .) {emit(new CallInstr($TOKEN_REF.token));}
| TOKEN_REF | TOKEN_REF
| ^(ROOT terminal) | ^(ROOT terminal)
| ^(BANG terminal) | ^(BANG terminal)

View File

@ -1,4 +1,4 @@
// $ANTLR ${project.version} ${buildNumber} NFABytecodeTriggers.g 2010-04-30 18:19:35 // $ANTLR ${project.version} ${buildNumber} NFABytecodeTriggers.g 2010-05-01 11:23:08
package org.antlr.v4.codegen; package org.antlr.v4.codegen;
@ -1319,6 +1319,8 @@ public class NFABytecodeTriggers extends NFABytecodeGenerator {
public final void terminal() throws RecognitionException { public final void terminal() throws RecognitionException {
GrammarAST STRING_LITERAL3=null; GrammarAST STRING_LITERAL3=null;
GrammarAST STRING_LITERAL4=null; GrammarAST STRING_LITERAL4=null;
GrammarAST TOKEN_REF5=null;
GrammarAST TOKEN_REF6=null;
try { try {
// NFABytecodeTriggers.g:169:5: ( ^( STRING_LITERAL . ) | STRING_LITERAL | ^( TOKEN_REF ARG_ACTION . ) | ^( TOKEN_REF . ) | TOKEN_REF | ^( ROOT terminal ) | ^( BANG terminal ) ) // NFABytecodeTriggers.g:169:5: ( ^( STRING_LITERAL . ) | STRING_LITERAL | ^( TOKEN_REF ARG_ACTION . ) | ^( TOKEN_REF . ) | TOKEN_REF | ^( ROOT terminal ) | ^( BANG terminal ) )
@ -1349,42 +1351,44 @@ public class NFABytecodeTriggers extends NFABytecodeGenerator {
case 3 : case 3 :
// NFABytecodeTriggers.g:171:7: ^( TOKEN_REF ARG_ACTION . ) // NFABytecodeTriggers.g:171:7: ^( TOKEN_REF ARG_ACTION . )
{ {
match(input,TOKEN_REF,FOLLOW_TOKEN_REF_in_terminal847); TOKEN_REF5=(GrammarAST)match(input,TOKEN_REF,FOLLOW_TOKEN_REF_in_terminal847);
match(input, Token.DOWN, null); match(input, Token.DOWN, null);
match(input,ARG_ACTION,FOLLOW_ARG_ACTION_in_terminal849); match(input,ARG_ACTION,FOLLOW_ARG_ACTION_in_terminal849);
matchAny(input); matchAny(input);
match(input, Token.UP, null); match(input, Token.UP, null);
emit(new CallInstr(TOKEN_REF5.token));
} }
break; break;
case 4 : case 4 :
// NFABytecodeTriggers.g:172:7: ^( TOKEN_REF . ) // NFABytecodeTriggers.g:172:7: ^( TOKEN_REF . )
{ {
match(input,TOKEN_REF,FOLLOW_TOKEN_REF_in_terminal862); TOKEN_REF6=(GrammarAST)match(input,TOKEN_REF,FOLLOW_TOKEN_REF_in_terminal863);
match(input, Token.DOWN, null); match(input, Token.DOWN, null);
matchAny(input); matchAny(input);
match(input, Token.UP, null); match(input, Token.UP, null);
emit(new CallInstr(TOKEN_REF6.token));
} }
break; break;
case 5 : case 5 :
// NFABytecodeTriggers.g:173:7: TOKEN_REF // NFABytecodeTriggers.g:173:7: TOKEN_REF
{ {
match(input,TOKEN_REF,FOLLOW_TOKEN_REF_in_terminal877); match(input,TOKEN_REF,FOLLOW_TOKEN_REF_in_terminal879);
} }
break; break;
case 6 : case 6 :
// NFABytecodeTriggers.g:174:7: ^( ROOT terminal ) // NFABytecodeTriggers.g:174:7: ^( ROOT terminal )
{ {
match(input,ROOT,FOLLOW_ROOT_in_terminal891); match(input,ROOT,FOLLOW_ROOT_in_terminal893);
match(input, Token.DOWN, null); match(input, Token.DOWN, null);
pushFollow(FOLLOW_terminal_in_terminal893); pushFollow(FOLLOW_terminal_in_terminal895);
terminal(); terminal();
state._fsp--; state._fsp--;
@ -1397,10 +1401,10 @@ public class NFABytecodeTriggers extends NFABytecodeGenerator {
case 7 : case 7 :
// NFABytecodeTriggers.g:175:7: ^( BANG terminal ) // NFABytecodeTriggers.g:175:7: ^( BANG terminal )
{ {
match(input,BANG,FOLLOW_BANG_in_terminal906); match(input,BANG,FOLLOW_BANG_in_terminal908);
match(input, Token.DOWN, null); match(input, Token.DOWN, null);
pushFollow(FOLLOW_terminal_in_terminal908); pushFollow(FOLLOW_terminal_in_terminal910);
terminal(); terminal();
state._fsp--; state._fsp--;
@ -1785,11 +1789,11 @@ public class NFABytecodeTriggers extends NFABytecodeGenerator {
public static final BitSet FOLLOW_STRING_LITERAL_in_terminal833 = new BitSet(new long[]{0x0000000000000002L}); public static final BitSet FOLLOW_STRING_LITERAL_in_terminal833 = new BitSet(new long[]{0x0000000000000002L});
public static final BitSet FOLLOW_TOKEN_REF_in_terminal847 = new BitSet(new long[]{0x0000000000000004L}); public static final BitSet FOLLOW_TOKEN_REF_in_terminal847 = new BitSet(new long[]{0x0000000000000004L});
public static final BitSet FOLLOW_ARG_ACTION_in_terminal849 = new BitSet(new long[]{0xFFFFFFFFFFFFFFF0L,0x0000007FFFFFFFFFL}); public static final BitSet FOLLOW_ARG_ACTION_in_terminal849 = new BitSet(new long[]{0xFFFFFFFFFFFFFFF0L,0x0000007FFFFFFFFFL});
public static final BitSet FOLLOW_TOKEN_REF_in_terminal862 = new BitSet(new long[]{0x0000000000000004L}); public static final BitSet FOLLOW_TOKEN_REF_in_terminal863 = new BitSet(new long[]{0x0000000000000004L});
public static final BitSet FOLLOW_TOKEN_REF_in_terminal877 = new BitSet(new long[]{0x0000000000000002L}); public static final BitSet FOLLOW_TOKEN_REF_in_terminal879 = new BitSet(new long[]{0x0000000000000002L});
public static final BitSet FOLLOW_ROOT_in_terminal891 = new BitSet(new long[]{0x0000000000000004L}); public static final BitSet FOLLOW_ROOT_in_terminal893 = new BitSet(new long[]{0x0000000000000004L});
public static final BitSet FOLLOW_terminal_in_terminal893 = new BitSet(new long[]{0x0000000000000008L}); public static final BitSet FOLLOW_terminal_in_terminal895 = new BitSet(new long[]{0x0000000000000008L});
public static final BitSet FOLLOW_BANG_in_terminal906 = new BitSet(new long[]{0x0000000000000004L}); public static final BitSet FOLLOW_BANG_in_terminal908 = new BitSet(new long[]{0x0000000000000004L});
public static final BitSet FOLLOW_terminal_in_terminal908 = new BitSet(new long[]{0x0000000000000008L}); public static final BitSet FOLLOW_terminal_in_terminal910 = new BitSet(new long[]{0x0000000000000008L});
} }

View File

@ -0,0 +1,105 @@
package org.antlr.v4.test;
import org.antlr.v4.Tool;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
import org.antlr.v4.runtime.nfa.NFA;
import org.antlr.v4.semantics.SemanticPipeline;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.LexerGrammar;
import org.junit.Test;
public class TestNFABytecodeGeneration extends BaseTest {
@Test public void testString() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n"+
"A : 'ab' ;");
String expecting =
"0000:\tsplit 5\n" +
"0005:\tmatch8 'a'\n" +
"0007:\tmatch8 'b'\n" +
"0009:\taccept 4\n";
checkBytecode(g, expecting);
}
@Test public void testIDandIntandKeyword() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"A : 'ab';\n" +
"B : 'a'..'z'+ ;\n" +
"I : '0'..'9'+ ;\n");
String expecting =
"0000:\tsplit 9, 16, 29\n" +
"0009:\tmatch8 'a'\n" +
"0011:\tmatch8 'b'\n" +
"0013:\taccept 4\n" +
"0016:\trange8 'a', 'z'\n" +
"0019:\tsplit 16, 26\n" +
"0026:\taccept 5\n" +
"0029:\trange8 '0', '9'\n" +
"0032:\tsplit 29, 39\n" +
"0039:\taccept 6\n";
checkBytecode(g, expecting);
}
@Test public void testNonGreedy() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"\n" +
"CMT : '/*' (options {greedy=false;}:.)* '*/' ;\n" +
"ID : 'ab' ;\n");
String expecting =
"0000:\tsplit 7, 29\n" +
"0007:\tmatch8 '/'\n" +
"0009:\tmatch8 '*'\n" +
"0011:\tsplit 22, 18\n" +
"0018:\twildcard \n" +
"0019:\tjmp 11\n" +
"0022:\tmatch8 '*'\n" +
"0024:\tmatch8 '/'\n" +
"0026:\taccept 4\n" +
"0029:\tmatch8 'a'\n" +
"0031:\tmatch8 'b'\n" +
"0033:\taccept 5\n";
checkBytecode(g, expecting);
}
@Test public void testCallFragment() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"I : D+ ;\n" +
"fragment D : '0'..'9'+ ;\n");
String expecting =
"0000:\tsplit 5\n" +
"0005:\tsplit 5, 12\n" +
"0012:\taccept 4\n" +
"0015:\trange8 '0', '9'\n" +
"0018:\tsplit 15, 25\n" +
"0025:\tret \n";
checkBytecode(g, expecting);
}
public void _template() throws Exception {
LexerGrammar g = new LexerGrammar(
"\n");
String expecting =
"\n";
checkBytecode(g, expecting);
}
void checkBytecode(LexerGrammar g, String expecting) {
if ( g.ast!=null && !g.ast.hasErrors ) {
System.out.println(g.ast.toStringTree());
Tool antlr = new Tool();
SemanticPipeline sem = new SemanticPipeline(g);
sem.process();
if ( g.getImportedGrammars()!=null ) { // process imported grammars (if any)
for (Grammar imp : g.getImportedGrammars()) {
antlr.process(imp);
}
}
}
NFA nfa = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
assertEquals(expecting, Bytecode.disassemble(nfa.code));
}
}

View File

@ -0,0 +1,104 @@
package org.antlr.v4.test;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.Token;
import org.antlr.v4.Tool;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.NFA;
import org.antlr.v4.semantics.SemanticPipeline;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.LexerGrammar;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
/** */
public class TestNFABytecodeInterp extends BaseTest {
@Test public void testString() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n"+
"A : 'ab' ;");
String expecting = "A, A, EOF";
checkMatches(g, "abab", expecting);
}
@Test public void testIDandIntandKeyword() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"A : 'ab';\n" +
"B : 'a'..'z'+ ;\n" +
"I : '0'..'9'+ ;\n");
String expecting = "A, I, B, EOF";
checkMatches(g, "ab32abc", expecting);
}
@Test public void testNonGreedy() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"\n" +
"CMT : '/*' (options {greedy=false;}:.)* '*/' ;\n" +
"ID : 'ab' ;\n");
String expecting = "ID, CMT, EOF";
checkMatches(g, "ab/* x */", expecting);
}
@Test public void testNonGreedyAndCommonLeftPrefix() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"\n" +
"CMT : '/*' (options {greedy=false;}:.)* '*/' ;\n" +
"CMT2: '/*' (options {greedy=false;}:.)* '*/' '!' ;\n" +
"ID : 'ab' ;\n");
String expecting = "ID, CMT2, CMT, EOF";
checkMatches(g, "ab/* x */!/* foo */", expecting);
}
@Test public void testCallFragment() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"I : D+ ;\n" +
"fragment D : '0'..'9'+ ;\n");
String expecting = "";
checkMatches(g, "a", expecting);
}
public void _template() throws Exception {
LexerGrammar g = new LexerGrammar(
"\n");
String expecting = "";
checkMatches(g, "input", expecting);
}
void checkMatches(LexerGrammar g, String input, String expecting) {
if ( g.ast!=null && !g.ast.hasErrors ) {
System.out.println(g.ast.toStringTree());
Tool antlr = new Tool();
SemanticPipeline sem = new SemanticPipeline(g);
sem.process();
if ( g.getImportedGrammars()!=null ) { // process imported grammars (if any)
for (Grammar imp : g.getImportedGrammars()) {
antlr.process(imp);
}
}
}
List<Integer> expectingTokens = new ArrayList<Integer>();
if ( expecting!=null && !expecting.trim().equals("") ) {
for (String tname : expecting.replace(" ", "").split(",")) {
int ttype = g.getTokenType(tname);
expectingTokens.add(ttype);
}
}
NFA nfa = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
ANTLRStringStream in = new ANTLRStringStream(input);
List<Integer> tokens = new ArrayList<Integer>();
int ttype = 0;
do {
ttype = nfa.execThompson(in);
tokens.add(ttype);
} while ( ttype!= Token.EOF );
assertEquals(expectingTokens, tokens);
}
}