added exec version that handles stack and new ThreadState / context. unit tests

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6829]
2010-05-01 11:38:18 -08:00 · 2010-05-01 11:38:18 -08:00 · 9dbc6a43fd
parent 541350bbe3
commit 9dbc6a43fd
11 changed files with 548 additions and 42 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/nfa/Bytecode.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/nfa/Bytecode.java
@ -48,7 +48,9 @@ public class Bytecode {
 	public static final short RANGE16	= 7;
 	public static final short WILDCARD	= 8;
 	//public static final short NOT	= 8; ???
-	public static final short SAVE		= 9;
+	public static final short CALL		= 9; // JMP with a push
+	public static final short RET		= 10; // an accept instr for fragment rules
+	public static final short SAVE		= 11;

 	/** Used for disassembly; describes instruction set */
 	public static Instruction[] instructions = new Instruction[] {
@ -61,6 +63,8 @@ public class Bytecode {
 		new Instruction("range8", OperandType.BYTE, OperandType.BYTE),
 		new Instruction("range16", OperandType.CHAR, OperandType.CHAR),
 		new Instruction("wildcard"),
+		new Instruction("call", OperandType.ADDR),
+		new Instruction("ret"),
 		new Instruction("save", OperandType.SHORT),
 	};

--- a/runtime/Java/src/org/antlr/v4/runtime/nfa/NFA.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/nfa/NFA.java
@ -185,7 +185,159 @@ workLoop:
 		return 0;
 	}

-	public int execThompson(CharStream input, int ip) {
+	public int execThompson(CharStream input) {
+		int ip = 0; // always start at SPLIT instr at address 0
+		int c = input.LA(1);
+		if ( c==Token.EOF ) return Token.EOF;
+
+		List<ThreadState> closure = computeStartState(ip);
+		List<ThreadState> reach = new ArrayList<ThreadState>();
+		int prevAcceptAddr = Integer.MAX_VALUE;
+		int prevAcceptLastCharIndex = -1;
+		int prevAcceptInputMarker = -1;
+		int firstAcceptInputMarker = -1;
+		do { // while more work
+			c = input.LA(1);
+			int i = 0;
+processOneChar:
+			while ( i<closure.size() ) {
+			//for (int i=0; i<closure.size(); i++) {
+				System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
+				ThreadState t = closure.get(i);
+				ip = t.addr;
+				NFAStack context = t.context;
+				int alt = t.alt;
+				trace(ip);
+				short opcode = code[ip];
+				ip++; // move to next instruction or first byte of operand
+				switch (opcode) {
+					case Bytecode.MATCH8 :
+						if ( c == code[ip] ) {
+							addToClosure(reach, ip+1, alt, context);
+						}
+						break;
+					case Bytecode.MATCH16 :
+						if ( c == getShort(code, ip) ) {
+							addToClosure(reach, ip+2, alt, context);
+						}
+						break;
+					case Bytecode.RANGE8 :
+						if ( c>=code[ip] && c<=code[ip+1] ) {
+							addToClosure(reach, ip+2, alt, context);
+						}
+						break;
+					case Bytecode.RANGE16 :
+						if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) {
+							addToClosure(reach, ip+4, alt, context);
+						}
+						break;
+					case Bytecode.WILDCARD :
+						if ( c!=Token.EOF ) addToClosure(reach, ip, alt, context);
+						break;
+					case Bytecode.ACCEPT :
+						int tokenLastCharIndex = input.index() - 1;
+						int ttype = getShort(code, ip);
+						System.out.println("ACCEPT "+ ttype +" with last char position "+ tokenLastCharIndex);
+						if ( tokenLastCharIndex > prevAcceptLastCharIndex ) {
+							prevAcceptLastCharIndex = tokenLastCharIndex;
+							// choose longest match so far regardless of rule priority
+							System.out.println("replacing old best match @ "+prevAcceptAddr);
+							prevAcceptAddr = ip-1;
+							prevAcceptInputMarker = input.mark();
+							firstAcceptInputMarker = prevAcceptInputMarker;
+						}
+						else if ( tokenLastCharIndex == prevAcceptLastCharIndex ) {
+							// choose first rule matched if match is of same length
+							if ( ip-1 < prevAcceptAddr ) { // it will see both accepts for ambig rules
+								System.out.println("replacing old best match @ "+prevAcceptAddr);
+								prevAcceptAddr = ip-1;
+								prevAcceptInputMarker = input.mark();
+							}
+						}
+						// if we reach accept state, toss out any addresses in rest
+						// of work list associated with accept's rule; that rule is done
+						int j=i+1;
+						while ( j<closure.size() ) {
+							ThreadState cl = closure.get(j);
+							System.out.println("remaining "+ cl);
+							if ( cl.alt==alt ) closure.remove(j);
+							else j++;
+						}
+						// then, move to next char, looking for longer match
+						// (we continue processing if there are states in reach)
+						break;
+					case Bytecode.JMP : // ignore
+					case Bytecode.SPLIT :
+					case Bytecode.CALL :
+					case Bytecode.RET :
+						break;
+					default :
+						throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
+				}
+				i++;
+			}
+			if ( reach.size()>0 ) { // if we reached other states, consume and process them
+				input.consume();
+			}
+			// swap to avoid reallocating space
+			List<ThreadState> tmp = reach;
+			reach = closure;
+			closure = tmp;
+			reach.clear();
+		} while ( closure.size()>0 );
+
+		if ( prevAcceptAddr >= code.length ) return Token.INVALID_TOKEN_TYPE;
+		int ttype = getShort(code, prevAcceptAddr+1);
+		System.out.println("done at index "+input.index());
+		System.out.println("accept marker="+prevAcceptInputMarker);
+		input.rewind(prevAcceptInputMarker); // does nothing if we accept'd at input.index() but might need to rewind
+		input.release(firstAcceptInputMarker); // kill any other markers in stream we made
+		System.out.println("leaving with index "+input.index());
+		return ttype;
+	}
+
+	void addToClosure(List<ThreadState> closure, int ip, int alt, NFAStack context) {
+		ThreadState t = new ThreadState(ip, alt, context);
+		//System.out.println("add to closure "+ip+" "+closure);
+		if ( closure.contains(t) ) return; // TODO: VERY INEFFICIENT! use int[num-states] as set test
+		closure.add(t);
+		short opcode = code[ip];
+		ip++; // move to next instruction or first byte of operand
+		switch (opcode) {
+			case Bytecode.JMP :
+				addToClosure(closure, getShort(code, ip), alt, context);
+				break;
+			case Bytecode.SAVE :
+				int labelIndex = getShort(code, ip);
+				ip += 2;
+				addToClosure(closure, ip, alt, context); // do closure pass SAVE
+				// TODO: impl
+				break;
+			case Bytecode.SPLIT :
+				int nopnds = getShort(code, ip);
+				ip += 2;
+				// add split addresses to work queue in reverse order ('cept first one)
+				for (int i=0; i<nopnds; i++) {
+					addToClosure(closure, getShort(code, ip+i*2), alt, context);
+				}
+				break;
+		}
+	}
+
+	List<ThreadState> computeStartState(int ip) { // assume SPLIT at ip
+		List<ThreadState> closure = new ArrayList<ThreadState>();
+		ip++;
+		int nalts = getShort(code, ip);
+		ip += 2;
+		// add split addresses to work queue in reverse order ('cept first one)
+		for (int i=1; i<=nalts; i++) {
+			addToClosure(closure, getShort(code, ip), i, NFAStack.EMPTY);
+			ip += Bytecode.ADDR_SIZE;
+		}
+		return closure;
+	}
+
+	public int execThompson_no_stack(CharStream input, int ip) {
 		int c = input.LA(1);
 		if ( c==Token.EOF ) return Token.EOF;

@ -195,7 +347,7 @@ workLoop:
 		int prevAcceptLastCharIndex = -1;
 		int prevAcceptInputMarker = -1;
 		int firstAcceptInputMarker = -1;
-		addToClosure(closure, ip);
+		addToClosure_no_stack(closure, ip);
 		do { // while more work
 			c = input.LA(1);
 			int i = 0;
@ -210,26 +362,26 @@ processOneChar:
 				switch (opcode) {
 					case Bytecode.MATCH8 :
 						if ( c == code[ip] ) {
-							addToClosure(reach, ip+1);
+							addToClosure_no_stack(reach, ip+1);
 						}
 						break;
 					case Bytecode.MATCH16 :
 						if ( c == getShort(code, ip) ) {
-							addToClosure(reach, ip+2);
+							addToClosure_no_stack(reach, ip+2);
 						}
 						break;
 					case Bytecode.RANGE8 :
 						if ( c>=code[ip] && c<=code[ip+1] ) {
-							addToClosure(reach, ip+2);
+							addToClosure_no_stack(reach, ip+2);
 						}
 						break;
 					case Bytecode.RANGE16 :
 						if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) {
-							addToClosure(reach, ip+4);
+							addToClosure_no_stack(reach, ip+4);
 						}
 						break;
 					case Bytecode.WILDCARD :
-						if ( c!=Token.EOF ) addToClosure(reach, ip);
+						if ( c!=Token.EOF ) addToClosure_no_stack(reach, ip);
 						break;
 					case Bytecode.ACCEPT :
 						int tokenLastCharIndex = input.index() - 1;
@ -298,7 +450,7 @@ processOneChar:
 		return ttype;
 	}

-	void addToClosure(List<Integer> closure, int ip) {
+	void addToClosure_no_stack(List<Integer> closure, int ip) {
 		//System.out.println("add to closure "+ip+" "+closure);
 		if ( closure.contains(ip) ) return; // TODO: VERY INEFFICIENT! use int[num-states] as set test
 		closure.add(ip);
@ -306,12 +458,12 @@ processOneChar:
 		ip++; // move to next instruction or first byte of operand
 		switch (opcode) {
 			case Bytecode.JMP :
-				addToClosure(closure, getShort(code, ip));
+				addToClosure_no_stack(closure, getShort(code, ip));
 				break;
 			case Bytecode.SAVE :
 				int labelIndex = getShort(code, ip);
 				ip += 2;
-				addToClosure(closure, ip); // do closure pass SAVE
+				addToClosure_no_stack(closure, ip); // do closure pass SAVE
 				// TODO: impl
 				break;
 			case Bytecode.SPLIT :
@ -319,7 +471,7 @@ processOneChar:
 				ip += 2;
 				// add split addresses to work queue in reverse order ('cept first one)
 				for (int i=0; i<nopnds; i++) {
-					addToClosure(closure, getShort(code, ip+i*2));
+					addToClosure_no_stack(closure, getShort(code, ip+i*2));
 				}
 				break;
 		}
--- a/runtime/Java/src/org/antlr/v4/runtime/nfa/NFAStack.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/nfa/NFAStack.java
@ -0,0 +1,75 @@
+package org.antlr.v4.runtime.nfa;
+
+/** Identical to ANTLR's static grammar analysis NFAContext object */
+public class NFAStack {
+	public static final NFAStack EMPTY = new NFAStack(null, -1);
+
+	public NFAStack parent;
+
+    /** The NFA state following state that invoked another rule's start state
+	 *  is recorded on the rule invocation context stack.
+     */
+    public int returnAddr;
+
+    /** Computing the hashCode is very expensive and NFA.addToClosure()
+     *  uses it to track when it's seen a state|ctx before to avoid
+     *  infinite loops.  As we add new contexts, record the hash code
+     *  as this + parent.cachedHashCode.  Avoids walking
+     *  up the tree for every hashCode().  Note that this caching works
+     *  because a context is a monotonically growing tree of context nodes
+     *  and nothing on the stack is ever modified...ctx just grows
+     *  or shrinks.
+     */
+    protected int cachedHashCode;
+
+	public NFAStack(NFAStack parent, int returnAddr) {
+        this.parent = parent;
+        this.returnAddr = returnAddr;
+        if ( returnAddr >= 0 ) {
+            this.cachedHashCode = returnAddr;
+        }
+        if ( parent!=null ) {
+            this.cachedHashCode += parent.cachedHashCode;
+        }
+    }
+
+	public int hashCode() {	return cachedHashCode; }
+	
+	/** Two contexts are equals() if both have
+	 *  same call stack; walk upwards to the root.
+	 *  Recall that the root sentinel node has no parent.
+	 *  Note that you may be comparing contextsv in different alt trees.
+	 */
+	public boolean equals(Object o) {
+		NFAStack other = ((NFAStack)o);
+		if ( this.cachedHashCode != other.cachedHashCode ) {
+			return false; // can't be same if hash is different
+		}
+		if ( this==other ) return true;
+
+		// System.out.println("comparing "+this+" with "+other);
+		NFAStack sp = this;
+		while ( sp.parent!=null && other.parent!=null ) {
+			if ( sp.returnAddr != other.returnAddr) return false;
+			sp = sp.parent;
+			other = other.parent;
+		}
+		if ( !(sp.parent==null && other.parent==null) ) {
+			return false; // both pointers must be at their roots after walk
+		}
+		return true;
+	}
+
+	public String toString() {
+		StringBuffer buf = new StringBuffer();
+		NFAStack sp = this;
+		buf.append("[");
+		while ( sp.parent!=null ) {
+			buf.append(sp.returnAddr);
+			buf.append(" ");
+			sp = sp.parent;
+		}
+		buf.append("$]");
+		return buf.toString();
+	}
+}
--- a/runtime/Java/src/org/antlr/v4/runtime/nfa/ThreadState.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/nfa/ThreadState.java
@ -0,0 +1,31 @@
+package org.antlr.v4.runtime.nfa;
+
+/** NFA simulation thread state */
+public class ThreadState {
+	public int addr;
+	public int alt; // or speculatively matched token type for lexers 
+	public NFAStack context;
+	public ThreadState(int addr, int alt, NFAStack context) {
+		this.addr = addr;
+		this.alt = alt;
+		this.context = context;
+	}
+	
+	public boolean equals(Object o) {
+		if ( o==null ) return false;
+		if ( this==o ) return true;
+		ThreadState other = (ThreadState)o;
+		return this.addr==other.addr &&
+			   this.alt==other.alt &&
+			   this.context.equals(other.context);
+	}
+
+	public int hashCode() {	return addr + context.hashCode(); }
+	
+	public String toString() {
+		if ( context.parent==null ) {
+			return "("+addr+","+alt+")";
+		}
+		return "("+addr+","+alt+","+context+")";
+	}
+}
--- a/tool/src/org/antlr/v4/analysis/NFAContext.java
+++ b/tool/src/org/antlr/v4/analysis/NFAContext.java
@ -178,13 +178,14 @@ public class NFAContext {
 	}

 	public int hashCode() {
-		int h = 0;
-		NFAContext sp = this;
-		while ( sp.parent!=null ) {
-			h += sp.returnState.stateNumber;
-			sp = sp.parent;
-		}
-		return h;
+		return cachedHashCode; // works with tests; don't recompute.
+//		int h = 0;
+//		NFAContext sp = this;
+//		while ( sp.parent!=null ) {
+//			h += sp.returnState.stateNumber;
+//			sp = sp.parent;
+//		}
+//		return h;
 	}

 	/** How many rule invocations in this context? I.e., how many
--- a/tool/src/org/antlr/v4/codegen/CodeGenPipeline.java
+++ b/tool/src/org/antlr/v4/codegen/CodeGenPipeline.java
@ -19,11 +19,12 @@ public class CodeGenPipeline {
 		LexerGrammar lg = (LexerGrammar)g;
 		for (String modeName : lg.modes.keySet()) { // for each mode
 			NFA nfa = NFABytecodeGenerator.getBytecode(lg, modeName);
-			ANTLRStringStream input = new ANTLRStringStream("/*x*/ab");
+			//ANTLRStringStream input = new ANTLRStringStream("32");
+			ANTLRStringStream input = new ANTLRStringStream("/*x*/!ab");
 			//ANTLRStringStream input = new ANTLRStringStream("abc32ab");
 			int ttype = 0;
 			while ( ttype!= Token.EOF ) {
-				ttype = nfa.execThompson(input, 0); System.out.println("ttype="+ttype);
+				ttype = nfa.execThompson(input); System.out.println("ttype="+ttype);
 			}
 		}
 	}
--- a/tool/src/org/antlr/v4/codegen/NFABytecodeGenerator.java
+++ b/tool/src/org/antlr/v4/codegen/NFABytecodeGenerator.java
@ -103,6 +103,11 @@ public class NFABytecodeGenerator extends TreeParser {
 		public String toString() { return addr+":AcceptInstr "+ruleIndex; }
 	}

+	public static class RetInstr extends Instr {
+		public short opcode() { return Bytecode.RET; }
+		public int nBytes() { return 1; }
+	}
+
 	public static class JumpInstr extends Instr {
 		int target;
 		public short opcode() { return Bytecode.JMP; };
@ -120,6 +125,25 @@ public class NFABytecodeGenerator extends TreeParser {
 		}
 	}

+	public static class CallInstr extends Instr {
+		Token token;
+		int target;
+		public CallInstr(Token token) { this.token = token; }
+		public short opcode() { return Bytecode.CALL; };
+		public int nBytes() { return 1+Bytecode.ADDR_SIZE; }
+		public void write(byte[] code) {
+			super.write(code);
+			writeShort(code, addr+1, (short)target);
+		}
+
+		@Override
+		public String toString() {
+			return addr+":CallInstr{" +
+				   "target=" + target +
+				   '}';
+		}
+	}
+
 	public static class SplitInstr extends Instr {
 		List<Integer> addrs = new ArrayList<Integer>();
 		int nAlts;
@ -212,21 +236,26 @@ public class NFABytecodeGenerator extends TreeParser {
 			CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
 			gen.setTreeNodeStream(nodes);
 			int ttype = lg.getTokenType(r.name);
-			tokenTypeToAddr[ttype] = gen.ip;
 			ruleToAddr.put(r.name, gen.ip);
-			if ( !r.isFragment() ) s0.addrs.add(gen.ip);
+			if ( !r.isFragment() ) {
+				s0.addrs.add(gen.ip);
+				tokenTypeToAddr[ttype] = gen.ip;				
+			}
 			try {
 				gen.block();
 				int ruleTokenType = lg.getTokenType(r.name);
-				gen.emit(new NFABytecodeGenerator.AcceptInstr(ruleTokenType));
+				if ( !r.isFragment() ) {
+					gen.emit(new NFABytecodeGenerator.AcceptInstr(ruleTokenType));
+				}
+				else {
+					gen.emit(new NFABytecodeGenerator.RetInstr());
+				}
 			}
 			catch (Exception e){
 				e.printStackTrace(System.err);
 			}
 		}
 		byte[] code = NFABytecodeGenerator.getByteCode(gen.instrs);
-		System.out.println("all:");
-		System.out.println(Bytecode.disassemble(code));
 		System.out.println("rule addrs="+ruleToAddr);

 		NFA nfa = new NFA(code, ruleToAddr);
--- a/tool/src/org/antlr/v4/codegen/NFABytecodeTriggers.g
+++ b/tool/src/org/antlr/v4/codegen/NFABytecodeTriggers.g
@ -168,8 +168,8 @@ range
 terminal
    :  ^(STRING_LITERAL .)			{emitString($STRING_LITERAL.token);}
    |	STRING_LITERAL				{emitString($STRING_LITERAL.token);}
-    |	^(TOKEN_REF ARG_ACTION .)	
-    |	^(TOKEN_REF .)				
+    |	^(TOKEN_REF ARG_ACTION .)	{emit(new CallInstr($TOKEN_REF.token));}
+    |	^(TOKEN_REF .)				{emit(new CallInstr($TOKEN_REF.token));}
    |	TOKEN_REF					
    |	^(ROOT terminal)			
    |	^(BANG terminal)			
--- a/tool/src/org/antlr/v4/codegen/NFABytecodeTriggers.java
+++ b/tool/src/org/antlr/v4/codegen/NFABytecodeTriggers.java
@ -1,4 +1,4 @@
-// $ANTLR ${project.version} ${buildNumber} NFABytecodeTriggers.g 2010-04-30 18:19:35
+// $ANTLR ${project.version} ${buildNumber} NFABytecodeTriggers.g 2010-05-01 11:23:08

 package org.antlr.v4.codegen;

@ -1319,6 +1319,8 @@ public class NFABytecodeTriggers extends NFABytecodeGenerator {
    public final void terminal() throws RecognitionException {
        GrammarAST STRING_LITERAL3=null;
        GrammarAST STRING_LITERAL4=null;
+        GrammarAST TOKEN_REF5=null;
+        GrammarAST TOKEN_REF6=null;

        try {
            // NFABytecodeTriggers.g:169:5: ( ^( STRING_LITERAL . ) | STRING_LITERAL | ^( TOKEN_REF ARG_ACTION . ) | ^( TOKEN_REF . ) | TOKEN_REF | ^( ROOT terminal ) | ^( BANG terminal ) )
@ -1349,42 +1351,44 @@ public class NFABytecodeTriggers extends NFABytecodeGenerator {
                case 3 :
                    // NFABytecodeTriggers.g:171:7: ^( TOKEN_REF ARG_ACTION . )
                    {
-                    match(input,TOKEN_REF,FOLLOW_TOKEN_REF_in_terminal847); 
+                    TOKEN_REF5=(GrammarAST)match(input,TOKEN_REF,FOLLOW_TOKEN_REF_in_terminal847); 

                    match(input, Token.DOWN, null); 
                    match(input,ARG_ACTION,FOLLOW_ARG_ACTION_in_terminal849); 
                    matchAny(input); 

                    match(input, Token.UP, null); 
+                    emit(new CallInstr(TOKEN_REF5.token));

                    }
                    break;
                case 4 :
                    // NFABytecodeTriggers.g:172:7: ^( TOKEN_REF . )
                    {
-                    match(input,TOKEN_REF,FOLLOW_TOKEN_REF_in_terminal862); 
+                    TOKEN_REF6=(GrammarAST)match(input,TOKEN_REF,FOLLOW_TOKEN_REF_in_terminal863); 

                    match(input, Token.DOWN, null); 
                    matchAny(input); 

                    match(input, Token.UP, null); 
+                    emit(new CallInstr(TOKEN_REF6.token));

                    }
                    break;
                case 5 :
                    // NFABytecodeTriggers.g:173:7: TOKEN_REF
                    {
-                    match(input,TOKEN_REF,FOLLOW_TOKEN_REF_in_terminal877); 
+                    match(input,TOKEN_REF,FOLLOW_TOKEN_REF_in_terminal879); 

                    }
                    break;
                case 6 :
                    // NFABytecodeTriggers.g:174:7: ^( ROOT terminal )
                    {
-                    match(input,ROOT,FOLLOW_ROOT_in_terminal891); 
+                    match(input,ROOT,FOLLOW_ROOT_in_terminal893); 

                    match(input, Token.DOWN, null); 
-                    pushFollow(FOLLOW_terminal_in_terminal893);
+                    pushFollow(FOLLOW_terminal_in_terminal895);
                    terminal();

                    state._fsp--;
@ -1397,10 +1401,10 @@ public class NFABytecodeTriggers extends NFABytecodeGenerator {
                case 7 :
                    // NFABytecodeTriggers.g:175:7: ^( BANG terminal )
                    {
-                    match(input,BANG,FOLLOW_BANG_in_terminal906); 
+                    match(input,BANG,FOLLOW_BANG_in_terminal908); 

                    match(input, Token.DOWN, null); 
-                    pushFollow(FOLLOW_terminal_in_terminal908);
+                    pushFollow(FOLLOW_terminal_in_terminal910);
                    terminal();

                    state._fsp--;
@ -1785,11 +1789,11 @@ public class NFABytecodeTriggers extends NFABytecodeGenerator {
    public static final BitSet FOLLOW_STRING_LITERAL_in_terminal833 = new BitSet(new long[]{0x0000000000000002L});
    public static final BitSet FOLLOW_TOKEN_REF_in_terminal847 = new BitSet(new long[]{0x0000000000000004L});
    public static final BitSet FOLLOW_ARG_ACTION_in_terminal849 = new BitSet(new long[]{0xFFFFFFFFFFFFFFF0L,0x0000007FFFFFFFFFL});
-    public static final BitSet FOLLOW_TOKEN_REF_in_terminal862 = new BitSet(new long[]{0x0000000000000004L});
-    public static final BitSet FOLLOW_TOKEN_REF_in_terminal877 = new BitSet(new long[]{0x0000000000000002L});
-    public static final BitSet FOLLOW_ROOT_in_terminal891 = new BitSet(new long[]{0x0000000000000004L});
-    public static final BitSet FOLLOW_terminal_in_terminal893 = new BitSet(new long[]{0x0000000000000008L});
-    public static final BitSet FOLLOW_BANG_in_terminal906 = new BitSet(new long[]{0x0000000000000004L});
-    public static final BitSet FOLLOW_terminal_in_terminal908 = new BitSet(new long[]{0x0000000000000008L});
+    public static final BitSet FOLLOW_TOKEN_REF_in_terminal863 = new BitSet(new long[]{0x0000000000000004L});
+    public static final BitSet FOLLOW_TOKEN_REF_in_terminal879 = new BitSet(new long[]{0x0000000000000002L});
+    public static final BitSet FOLLOW_ROOT_in_terminal893 = new BitSet(new long[]{0x0000000000000004L});
+    public static final BitSet FOLLOW_terminal_in_terminal895 = new BitSet(new long[]{0x0000000000000008L});
+    public static final BitSet FOLLOW_BANG_in_terminal908 = new BitSet(new long[]{0x0000000000000004L});
+    public static final BitSet FOLLOW_terminal_in_terminal910 = new BitSet(new long[]{0x0000000000000008L});

 }
--- a/tool/test/org/antlr/v4/test/TestNFABytecodeGeneration.java
+++ b/tool/test/org/antlr/v4/test/TestNFABytecodeGeneration.java
@ -0,0 +1,105 @@
+package org.antlr.v4.test;
+
+import org.antlr.v4.Tool;
+import org.antlr.v4.codegen.NFABytecodeGenerator;
+import org.antlr.v4.runtime.nfa.Bytecode;
+import org.antlr.v4.runtime.nfa.NFA;
+import org.antlr.v4.semantics.SemanticPipeline;
+import org.antlr.v4.tool.Grammar;
+import org.antlr.v4.tool.LexerGrammar;
+import org.junit.Test;
+
+public class TestNFABytecodeGeneration extends BaseTest {
+	@Test public void testString() throws Exception {
+		LexerGrammar g = new LexerGrammar(
+			"lexer grammar L;\n"+
+			"A : 'ab' ;");
+		String expecting =
+			"0000:\tsplit         5\n" +
+			"0005:\tmatch8        'a'\n" +
+			"0007:\tmatch8        'b'\n" +
+			"0009:\taccept        4\n";
+		checkBytecode(g, expecting);
+	}
+
+	@Test public void testIDandIntandKeyword() throws Exception {
+		LexerGrammar g = new LexerGrammar(
+			"lexer grammar L;\n" +
+			"A : 'ab';\n" +
+			"B : 'a'..'z'+ ;\n" +
+			"I : '0'..'9'+ ;\n");
+		String expecting =
+			"0000:\tsplit         9, 16, 29\n" +
+			"0009:\tmatch8        'a'\n" +
+			"0011:\tmatch8        'b'\n" +
+			"0013:\taccept        4\n" +
+			"0016:\trange8        'a', 'z'\n" +
+			"0019:\tsplit         16, 26\n" +
+			"0026:\taccept        5\n" +
+			"0029:\trange8        '0', '9'\n" +
+			"0032:\tsplit         29, 39\n" +
+			"0039:\taccept        6\n";
+		checkBytecode(g, expecting);
+	}
+
+	@Test public void testNonGreedy() throws Exception {
+		LexerGrammar g = new LexerGrammar(
+			"lexer grammar L;\n" +
+			"\n" +
+			"CMT : '/*' (options {greedy=false;}:.)* '*/' ;\n" +
+			"ID  : 'ab' ;\n");
+		String expecting =
+			"0000:\tsplit         7, 29\n" +
+			"0007:\tmatch8        '/'\n" +
+			"0009:\tmatch8        '*'\n" +
+			"0011:\tsplit         22, 18\n" +
+			"0018:\twildcard        \n" +
+			"0019:\tjmp           11\n" +
+			"0022:\tmatch8        '*'\n" +
+			"0024:\tmatch8        '/'\n" +
+			"0026:\taccept        4\n" +
+			"0029:\tmatch8        'a'\n" +
+			"0031:\tmatch8        'b'\n" +
+			"0033:\taccept        5\n";
+		checkBytecode(g, expecting);
+	}
+
+	@Test public void testCallFragment() throws Exception {
+		LexerGrammar g = new LexerGrammar(
+			"lexer grammar L;\n" +
+			"I : D+ ;\n" +
+			"fragment D : '0'..'9'+ ;\n");
+		String expecting =
+			"0000:\tsplit         5\n" +
+			"0005:\tsplit         5, 12\n" +
+			"0012:\taccept        4\n" +
+			"0015:\trange8        '0', '9'\n" +
+			"0018:\tsplit         15, 25\n" +
+			"0025:\tret             \n";
+		checkBytecode(g, expecting);
+	}
+
+	public void _template() throws Exception {
+		LexerGrammar g = new LexerGrammar(
+			"\n");
+		String expecting =
+			"\n";
+		checkBytecode(g, expecting);
+	}
+
+	void checkBytecode(LexerGrammar g, String expecting) {
+		if ( g.ast!=null && !g.ast.hasErrors ) {
+			System.out.println(g.ast.toStringTree());
+			Tool antlr = new Tool();
+			SemanticPipeline sem = new SemanticPipeline(g);
+			sem.process();
+			if ( g.getImportedGrammars()!=null ) { // process imported grammars (if any)
+				for (Grammar imp : g.getImportedGrammars()) {
+					antlr.process(imp);
+				}
+			}
+		}
+		NFA nfa = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
+		assertEquals(expecting, Bytecode.disassemble(nfa.code));
+	}
+}
--- a/tool/test/org/antlr/v4/test/TestNFABytecodeInterp.java
+++ b/tool/test/org/antlr/v4/test/TestNFABytecodeInterp.java
@ -0,0 +1,104 @@
+package org.antlr.v4.test;
+
+import org.antlr.runtime.ANTLRStringStream;
+import org.antlr.runtime.Token;
+import org.antlr.v4.Tool;
+import org.antlr.v4.codegen.NFABytecodeGenerator;
+import org.antlr.v4.runtime.nfa.NFA;
+import org.antlr.v4.semantics.SemanticPipeline;
+import org.antlr.v4.tool.Grammar;
+import org.antlr.v4.tool.LexerGrammar;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/** */
+public class TestNFABytecodeInterp extends BaseTest {
+	@Test public void testString() throws Exception {
+		LexerGrammar g = new LexerGrammar(
+			"lexer grammar L;\n"+
+			"A : 'ab' ;");
+		String expecting = "A, A, EOF";
+		checkMatches(g, "abab", expecting);
+	}
+
+	@Test public void testIDandIntandKeyword() throws Exception {
+		LexerGrammar g = new LexerGrammar(
+			"lexer grammar L;\n" +
+			"A : 'ab';\n" +
+			"B : 'a'..'z'+ ;\n" +
+			"I : '0'..'9'+ ;\n");
+		String expecting = "A, I, B, EOF";
+		checkMatches(g, "ab32abc", expecting);
+	}
+
+	@Test public void testNonGreedy() throws Exception {
+		LexerGrammar g = new LexerGrammar(
+			"lexer grammar L;\n" +
+			"\n" +
+			"CMT : '/*' (options {greedy=false;}:.)* '*/' ;\n" +
+			"ID  : 'ab' ;\n");
+		String expecting = "ID, CMT, EOF";
+		checkMatches(g, "ab/* x */", expecting);
+	}
+
+	@Test public void testNonGreedyAndCommonLeftPrefix() throws Exception {
+		LexerGrammar g = new LexerGrammar(
+			"lexer grammar L;\n" +
+			"\n" +
+			"CMT : '/*' (options {greedy=false;}:.)* '*/' ;\n" +
+			"CMT2: '/*' (options {greedy=false;}:.)* '*/' '!' ;\n" +
+			"ID  : 'ab' ;\n");
+		String expecting = "ID, CMT2, CMT, EOF";
+		checkMatches(g, "ab/* x */!/* foo */", expecting);
+	}
+
+	@Test public void testCallFragment() throws Exception {
+		LexerGrammar g = new LexerGrammar(
+			"lexer grammar L;\n" +
+			"I : D+ ;\n" +
+			"fragment D : '0'..'9'+ ;\n");
+		String expecting = "";
+		checkMatches(g, "a", expecting);
+	}
+
+	public void _template() throws Exception {
+		LexerGrammar g = new LexerGrammar(
+			"\n");
+		String expecting = "";
+		checkMatches(g, "input", expecting);
+	}
+
+	void checkMatches(LexerGrammar g, String input, String expecting) {
+		if ( g.ast!=null && !g.ast.hasErrors ) {
+			System.out.println(g.ast.toStringTree());
+			Tool antlr = new Tool();
+			SemanticPipeline sem = new SemanticPipeline(g);
+			sem.process();
+			if ( g.getImportedGrammars()!=null ) { // process imported grammars (if any)
+				for (Grammar imp : g.getImportedGrammars()) {
+					antlr.process(imp);
+				}
+			}
+		}
+
+		List<Integer> expectingTokens = new ArrayList<Integer>();
+		if ( expecting!=null && !expecting.trim().equals("") ) {
+			for (String tname : expecting.replace(" ", "").split(",")) {
+				int ttype = g.getTokenType(tname);
+				expectingTokens.add(ttype);
+			}
+		}
+
+		NFA nfa = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
+		ANTLRStringStream in = new ANTLRStringStream(input);
+		List<Integer> tokens = new ArrayList<Integer>();
+		int ttype = 0;
+		do {
+			ttype = nfa.execThompson(in);
+			tokens.add(ttype);
+		} while ( ttype!= Token.EOF );
+		assertEquals(expectingTokens, tokens);
+	}
+}