added NOT/SET ops, refactored PDA generation

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6900]
2010-05-27 16:01:55 -08:00 · 2010-05-27 16:01:55 -08:00 · 19aecd3163
parent acf962bc28
commit 19aecd3163
15 changed files with 701 additions and 554 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/pda/Bytecode.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/pda/Bytecode.java
@ -39,21 +39,22 @@ public class Bytecode {
 	// be an array of objects (Bytecode[]). We want it to be byte[].

 	// INSTRUCTION BYTECODES (byte is signed; use a short to keep 0..255)
-	public static final short ACCEPT	= 1;
-	public static final short JMP		= 2;
-	public static final short SPLIT		= 3;
-	public static final short MATCH8	= 4;
-	public static final short MATCH16	= 5;
-	public static final short RANGE8	= 6;
-	public static final short RANGE16	= 7;
-	public static final short WILDCARD	= 8;
-	//public static final short NOT	= 8; ???
-	public static final short CALL		= 9; // JMP with a push
-	public static final short RET		= 10; // an accept instr for fragment rules
-	public static final short LABEL		= 11;
-	public static final short SAVE		= 12;
-	public static final short SEMPRED	= 13;
-	public static final short ACTION	= 14;
+	public static final short ACCEPT	 = 1;
+	public static final short JMP		 = 2;
+	public static final short SPLIT		 = 3;
+	public static final short MATCH8	 = 4;
+	public static final short MATCH16	 = 5;
+	public static final short RANGE8	 = 6;
+	public static final short RANGE16	 = 7;
+	public static final short WILDCARD	 = 8;
+	public static final short SET	     = 9;
+	public static final short CALL		 = 10; // JMP with a push
+	public static final short RET		 = 11; // an accept instr for fragment rules
+	public static final short LABEL		 = 12;
+	public static final short SAVE		 = 13;
+	public static final short SEMPRED	 = 14;
+	public static final short ACTION	 = 15;
+	public static final short NOT	     = 16; // not next match instr

 	/** Used for disassembly; describes instruction set */
 	public static Instruction[] instructions = new Instruction[] {
@ -66,12 +67,14 @@ public class Bytecode {
 		new Instruction("range8", OperandType.BYTE, OperandType.BYTE),
 		new Instruction("range16", OperandType.CHAR, OperandType.CHAR),
 		new Instruction("wildcard"),
+		new Instruction("set", OperandType.SHORT),
 		new Instruction("call", OperandType.ADDR),
 		new Instruction("ret"),
 		new Instruction("label", OperandType.SHORT),
 		new Instruction("save", OperandType.SHORT),
 		new Instruction("sempred", OperandType.SHORT, OperandType.SHORT), // sempred ruleIndex, predIndex
 		new Instruction("action", OperandType.SHORT, OperandType.SHORT), // action ruleIndex, actionIndex
+		new Instruction("not"),
 	};

 	public static String disassemble(byte[] code, int start, boolean operandsAreChars) {
--- a/runtime/Java/src/org/antlr/v4/runtime/pda/PDA.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/pda/PDA.java
@ -8,7 +8,6 @@ import org.antlr.v4.runtime.CommonToken;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
-import java.util.Map;

 /** A (nondeterministic) pushdown bytecode machine for lexing and LL prediction.
 *  Derived partially from Cox' description of Thompson's 1960s work:
@ -22,28 +21,20 @@ public class PDA {
 	public interface sempred_fptr { boolean eval(int predIndex); }

 	public byte[] code;
-	public Map<String, Integer> ruleToAddr;
-	public int[] tokenTypeToAddr;
+	//public Map<String, Integer> ruleToAddr;
+	public int[] altToAddr; // either token type (in lexer) or alt num for DFA in parser
 	public CommonToken[] labelValues;
 	public int nLabels;

 	/** If we hit an action, we'll have to rewind and do the winning rule again */
 	boolean bypassedAction;

-    public PDA() {;}
-	
-	public PDA(byte[] code, Map<String, Integer> ruleToAddr, int[] tokenTypeToAddr, int nLabels) {
-		this.code = code;
-		this.ruleToAddr = ruleToAddr;
-		this.tokenTypeToAddr = tokenTypeToAddr;
-		this.nLabels = nLabels;
-		labelValues = new CommonToken[nLabels];
-	}
+	boolean notNextMatch;

-	public PDA(byte[] code, int[] tokenTypeToAddr, int nLabels) {
+	public PDA(byte[] code, int[] altToAddr, int nLabels) {
 		System.out.println("code="+Arrays.toString(code));
 		this.code = code;
-		this.tokenTypeToAddr = tokenTypeToAddr;
+		this.altToAddr = altToAddr;
 		this.nLabels = nLabels;
 		labelValues = new CommonToken[nLabels];
 	}
@ -58,7 +49,7 @@ public class PDA {
 			System.out.println("Bypassed action; rewinding to "+input.index()+" doing with feeling");
 			bypassedAction = false;
 			Arrays.fill(labelValues, null);
-			int ttype2 = execThompson(input, tokenTypeToAddr[ttype], true);
+			int ttype2 = execThompson(input, altToAddr[ttype], true);
 			if ( ttype!=ttype2 ) {
 				System.err.println("eh? token diff with action(s)");
 			}
@ -92,33 +83,48 @@ processOneChar:
 				//System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
 				trace(ip);
 				short opcode = code[ip];
+				boolean matched;
 				ip++; // move to next instruction or first byte of operand
 				switch (opcode) {
+					case Bytecode.NOT :
+						notNextMatch = true;
+						break;
 					case Bytecode.MATCH8 :
-						if ( c == code[ip] ) {
+						if ( c == code[ip] || (notNextMatch && c != code[ip]) ) {
 							addToClosure(reach, ip+1, alt, context);
 						}
+						notNextMatch = false;
 						break;
 					case Bytecode.MATCH16 :
-						if ( c == getShort(code, ip) ) {
+						matched = c == getShort(code, ip);
+						if ( matched || (notNextMatch && matched) ) {
 							addToClosure(reach, ip+2, alt, context);
 						}
+						notNextMatch = false;
 						break;
 					case Bytecode.RANGE8 :
-						if ( c>=code[ip] && c<=code[ip+1] ) {
+						matched = c >= code[ip] && c <= code[ip + 1];
+						if ( matched || (notNextMatch && matched) ) {
 							addToClosure(reach, ip+2, alt, context);
 						}
+						notNextMatch = false;
 						break;
 					case Bytecode.RANGE16 :
-						if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) {
+						matched = c < getShort(code, ip) || c > getShort(code, ip + 2);
+						if ( matched || (notNextMatch && matched) ) {
 							addToClosure(reach, ip+4, alt, context);
 						}
+						notNextMatch = false;
 						break;
 					case Bytecode.WILDCARD :
 						if ( c!=Token.EOF ) {
 							addToClosure(reach, ip, alt, context);
 						}
 						break;
+					case Bytecode.SET :
+						System.err.println("not impl");
+						notNextMatch = false;
+						break;
 					case Bytecode.LABEL : // lexers only
 						int labelIndex = getShort(code, ip);
 						labelValues[labelIndex] =
@ -217,6 +223,10 @@ processOneChar:
 		short opcode = code[ip];
 		ip++; // move to next instruction or first byte of operand
 		switch (opcode) {
+			case Bytecode.NOT : // see thru NOT but include in closure so we exec during reach
+				closure.add(t);	// add to closure; need to execute during reach
+				addToClosure(closure, ip, alt, context);				
+				break;
 			case Bytecode.JMP :
 				addToClosure(closure, getShort(code, ip), alt, context);
 				break;
@ -360,10 +370,10 @@ processOneChar:
 						}
 						// if we reach accept state, toss out any addresses in rest
 						// of work list associated with accept's rule; that rule is done
-						int ruleStart = tokenTypeToAddr[ttype];
+						int ruleStart = altToAddr[ttype];
 						int ruleStop = code.length;
-						if ( ttype+1 < tokenTypeToAddr.length ) {
-							ruleStop = tokenTypeToAddr[ttype+1]-1;
+						if ( ttype+1 < altToAddr.length ) {
+							ruleStop = altToAddr[ttype+1]-1;
 						}
 						System.out.println("kill range "+ruleStart+".."+ruleStop);
 						int j=i+1;
--- a/tool/src/org/antlr/v4/codegen/CompiledPDA.java
+++ b/tool/src/org/antlr/v4/codegen/CompiledPDA.java
@ -3,6 +3,7 @@ package org.antlr.v4.codegen;
 import org.antlr.runtime.Token;
 import org.antlr.v4.codegen.pda.Instr;
 import org.antlr.v4.misc.DoubleKeyMap;
+import org.antlr.v4.misc.IntervalSet;
 import org.antlr.v4.tool.Rule;

 import java.util.ArrayList;
@ -14,12 +15,18 @@ import java.util.Map;
 public class CompiledPDA {
 	public List<Instr> instrs = new ArrayList<Instr>();
 	public byte[] code; // instrs in bytecode form
-	public int ip = 0; // where to write next
+	public List<IntervalSet> set8table = new ArrayList<IntervalSet>();
+	public List<IntervalSet> set16table = new ArrayList<IntervalSet>();
 	public Map<String, Integer> ruleToAddr = new HashMap<String, Integer>();
-	public int[] tokenTypeToAddr;
+
+	public int[] altToAddr; // either token type (in lexer) or alt num for DFA in parser

 	public DoubleKeyMap<Rule, String, Integer> ruleLabels = new DoubleKeyMap<Rule, String, Integer>();
 	public DoubleKeyMap<Rule, Token, Integer> ruleActions = new DoubleKeyMap<Rule, Token, Integer>();
 	public DoubleKeyMap<Rule, Token, Integer> ruleSempreds = new DoubleKeyMap<Rule, Token, Integer>();
 	public int nLabels;
+
+	public CompiledPDA(int numAlts) {
+		altToAddr = new int[numAlts+1];
+	}
 }
--- a/tool/src/org/antlr/v4/codegen/DFACompiler.java
+++ b/tool/src/org/antlr/v4/codegen/DFACompiler.java
@ -0,0 +1,77 @@
+package org.antlr.v4.codegen;
+
+import org.antlr.v4.automata.DFA;
+import org.antlr.v4.automata.DFAState;
+import org.antlr.v4.automata.Edge;
+import org.antlr.v4.codegen.pda.*;
+import org.antlr.v4.runtime.pda.PDA;
+
+/** */
+public class DFACompiler {
+	public DFA dfa;
+	boolean[] marked;
+	int[] stateToAddr;
+	PDABytecodeGenerator gen;
+
+	public DFACompiler(DFA dfa) {
+		this.dfa = dfa;
+		gen = new PDABytecodeGenerator(dfa.g.getMaxTokenType());
+	}
+	
+	public CompiledPDA compile() {
+		walk();
+		gen.compile();
+		return gen.obj;
+	}
+
+	public PDA walk() {
+		marked = new boolean[dfa.stateSet.size()+1];
+		stateToAddr = new int[dfa.stateSet.size()+1];
+		walk(dfa.startState);
+
+		// walk code, update jump targets.
+		for (Instr I : gen.obj.instrs) {
+			System.out.println("instr "+I);
+			if ( I instanceof JumpInstr) {
+				JumpInstr J = (JumpInstr)I;
+				J.target = stateToAddr[J.target];
+			}
+		}
+
+		return null;
+	}
+
+	// recursive so we follow chains in DFA, leading to fewer
+	// jmp instructions.
+	// start by assuming state num is bytecode addr then translate after
+	// in one pass
+	public void walk(DFAState d) {
+		if ( marked[d.stateNumber] ) return;
+		marked[d.stateNumber] = true;
+		stateToAddr[d.stateNumber] = gen.ip;
+		System.out.println("visit "+d.stateNumber+" @"+ gen.ip);
+		if ( d.isAcceptState ) {
+			AcceptInstr A = new AcceptInstr(d.predictsAlt);
+			gen.emit(A);
+			return;
+		}
+		SplitInstr S = null;
+		if ( d.edges.size()>1 ) {
+			S = new SplitInstr(d.edges.size());
+			gen.emit(S);
+		}
+		for (Edge e : d.edges) {
+			if ( S!=null ) S.addrs.add(gen.ip);
+			if ( e.label.getMinElement() == e.label.getMaxElement() ) {
+				MatchInstr M = new MatchInstr(e.label.getSingleElement());
+				gen.emit(M);
+			}
+			else {
+				gen.emit(new SetInstr(e.label));
+			}
+			JumpInstr J = new JumpInstr(e.target.stateNumber);
+			gen.emit(J);
+			walk(e.target);
+		}
+	}
+}
--- a/tool/src/org/antlr/v4/codegen/LexerCompiler.java
+++ b/tool/src/org/antlr/v4/codegen/LexerCompiler.java
@ -0,0 +1,65 @@
+package org.antlr.v4.codegen;
+
+import org.antlr.runtime.tree.CommonTreeNodeStream;
+import org.antlr.v4.codegen.pda.AcceptInstr;
+import org.antlr.v4.codegen.pda.RetInstr;
+import org.antlr.v4.codegen.pda.SplitInstr;
+import org.antlr.v4.parse.ANTLRParser;
+import org.antlr.v4.parse.GrammarASTAdaptor;
+import org.antlr.v4.runtime.pda.Bytecode;
+import org.antlr.v4.tool.GrammarAST;
+import org.antlr.v4.tool.LexerGrammar;
+import org.antlr.v4.tool.Rule;
+
+/** */
+public class LexerCompiler {
+	LexerGrammar lg;
+	public LexerCompiler(LexerGrammar lg) {
+		this.lg = lg;
+	}
+	
+	public CompiledPDA compileMode(String modeName) {
+		GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
+		PDABytecodeGenerator gen = new PDABytecodeGenerator(lg.getMaxTokenType());
+		PDABytecodeTriggers trigger = new PDABytecodeTriggers(null, gen);
+
+		// add split for s0 to hook up rules (fill in operands as we gen rules)
+		int numRules = lg.modes.get(modeName).size();
+		int numFragmentRules = 0;
+		for (Rule r : lg.modes.get(modeName)) { if ( r.isFragment() ) numFragmentRules++; }
+		SplitInstr s0 = new SplitInstr(numRules - numFragmentRules);
+		gen.emit(s0);
+
+
+		for (Rule r : lg.modes.get(modeName)) { // for each rule in mode
+			gen.currentRule = r;
+			GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK);
+			CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
+			trigger.setTreeNodeStream(nodes);
+			int ttype = lg.getTokenType(r.name);
+			gen.defineRuleAddr(r.name, gen.ip);
+			if ( !r.isFragment() ) {
+				s0.addrs.add(gen.ip);
+				gen.defineTokenTypeToAddr(ttype, gen.ip);
+			}
+			try {
+				trigger.block(); // GEN Instr OBJECTS
+				int ruleTokenType = lg.getTokenType(r.name);
+				if ( !r.isFragment() ) {
+					gen.emit(new AcceptInstr(ruleTokenType));
+				}
+				else {
+					gen.emit(new RetInstr());
+				}
+			}
+			catch (Exception e){
+				e.printStackTrace(System.err);
+			}
+		}
+		gen.compile();
+		gen.obj.nLabels = gen.labelIndex;
+		System.out.println(Bytecode.disassemble(gen.obj.code));
+		System.out.println("rule addrs="+ gen.obj.ruleToAddr);
+		return gen.obj;
+	}
+}
--- a/tool/src/org/antlr/v4/codegen/LexerFactory.java
+++ b/tool/src/org/antlr/v4/codegen/LexerFactory.java
@ -25,7 +25,8 @@ public class LexerFactory {
 		fileST.add("fileName", gen.getRecognizerFileName());
 		fileST.add("lexer", lexerST);
 		for (String modeName : lg.modes.keySet()) { // for each mode
-			CompiledPDA pda = PDABytecodeGenerator.compileLexerMode(lg, modeName);
+			LexerCompiler comp = new LexerCompiler(lg);
+			CompiledPDA pda = comp.compileMode(modeName);
 			ST pdaST = gen.templates.getInstanceOf("PDA");
 			for (Rule r : pda.ruleActions.keySet()) {
 				Set<Token> actionTokens = pda.ruleActions.keySet(r);
--- a/tool/src/org/antlr/v4/codegen/PDABytecodeGenerator.java
+++ b/tool/src/org/antlr/v4/codegen/PDABytecodeGenerator.java
@ -1,66 +1,61 @@
 package org.antlr.v4.codegen;

-import org.antlr.runtime.RecognizerSharedState;
 import org.antlr.runtime.Token;
-import org.antlr.runtime.tree.CommonTreeNodeStream;
-import org.antlr.runtime.tree.Tree;
-import org.antlr.runtime.tree.TreeNodeStream;
-import org.antlr.v4.automata.DFA;
-import org.antlr.v4.automata.DFAState;
-import org.antlr.v4.automata.Edge;
-import org.antlr.v4.codegen.pda.*;
+import org.antlr.v4.codegen.pda.CallInstr;
+import org.antlr.v4.codegen.pda.Instr;
+import org.antlr.v4.codegen.pda.MatchInstr;
+import org.antlr.v4.codegen.pda.NotInstr;
 import org.antlr.v4.misc.CharSupport;
 import org.antlr.v4.misc.IntervalSet;
-import org.antlr.v4.parse.ANTLRParser;
-import org.antlr.v4.parse.GrammarASTAdaptor;
-import org.antlr.v4.runtime.pda.Bytecode;
-import org.antlr.v4.runtime.pda.PDA;
-import org.antlr.v4.runtime.tree.TreeParser;
-import org.antlr.v4.tool.*;
+import org.antlr.v4.tool.Rule;

 import java.util.Map;

 /** http://swtch.com/~rsc/regexp/regexp2.html */
-public class PDABytecodeGenerator extends TreeParser {
-	public Grammar g;
-
+public class PDABytecodeGenerator {
 	public Rule currentRule;

-	CompiledPDA pda = new CompiledPDA();
+	public CompiledPDA obj;

-	public int labelIndex = 0; // first time we ask for labels we index
+	public int ip = 0; // where to write next

-	public PDABytecodeGenerator(TreeNodeStream input, RecognizerSharedState state) {
-		super(input, state);
+	int labelIndex = 0; // first time we ask for labels we index
+
+	public PDABytecodeGenerator(int numAlts) {
+		obj = new CompiledPDA(numAlts);
+	}
+
+	public void compile() {
+		obj.code = convertInstrsToBytecode();
 	}

 	public void emit(Instr I) {
-		I.addr = pda.ip;
+		I.addr = ip;
 		I.rule = currentRule;
 		I.gen = this;
-		pda.ip += I.nBytes();
-		pda.instrs.add(I);
+		ip += I.nBytes();
+		obj.instrs.add(I);
 	}

 	// indexed from 0 per rule
 	public int getActionIndex(Rule r, Token actionToken) {
-		Integer I = pda.ruleActions.get(r, actionToken);
+		Integer I = obj.ruleActions.get(r, actionToken);
 		if ( I!=null ) return I; // already got its label
-		Map<Token, Integer> labels = pda.ruleActions.get(r);
+		Map<Token, Integer> labels = obj.ruleActions.get(r);
 		int i = 0;
 		if ( labels!=null ) i = labels.size();
-		pda.ruleActions.put(r, actionToken, i);
+		obj.ruleActions.put(r, actionToken, i);
 		return i;
 	}

 	// indexed from 0 per rule
 	public int getSempredIndex(Rule r, Token actionToken) {
-		Integer I = pda.ruleSempreds.get(r, actionToken);
+		Integer I = obj.ruleSempreds.get(r, actionToken);
 		if ( I!=null ) return I; // already got its label
-		Map<Token, Integer> labels = pda.ruleSempreds.get(r);
+		Map<Token, Integer> labels = obj.ruleSempreds.get(r);
 		int i = 0;
 		if ( labels!=null ) i = labels.size();
-		pda.ruleSempreds.put(r, actionToken, i);
+		obj.ruleSempreds.put(r, actionToken, i);
 		return i;
 	}

@ -69,129 +64,55 @@ public class PDABytecodeGenerator extends TreeParser {
 	 *  to an index in an action.
 	 */
 	public int getLabelIndex(Rule r, String labelName) {
-		Integer I = pda.ruleLabels.get(r, labelName);
+		Integer I = obj.ruleLabels.get(r, labelName);
 		if ( I!=null ) return I; // already got its label
 		int i = labelIndex++;
-		pda.ruleLabels.put(r, labelName, i);
+		obj.ruleLabels.put(r, labelName, i);
 		return i;
 	}

+	public int getSetIndex(IntervalSet set) {
+		obj.set8table.add(set);
+		return obj.set8table.size()-1;
+	}
+
 	public void emitString(Token t, boolean not) {
 		String chars = CharSupport.getStringFromGrammarStringLiteral(t.getText());
-		if ( not && chars.length()==1 ) {
-			emitNotChar(t, chars);
-			return;
-		}
+		if ( not && chars.length()==1 ) emit(new NotInstr());
 		for (char c : chars.toCharArray()) {
 			emit(new MatchInstr(t, c));
 		}
 	}

-	public void emitNotChar(Token t, String chars) {
-		IntervalSet all = (IntervalSet)g.getTokenTypes();
-		int c = chars.charAt(0);
-		SplitInstr s = new SplitInstr(2);
-		RangeInstr left = new RangeInstr(t, t);
-		left.a = all.getMinElement();
-		left.b = c-1;
-		RangeInstr right = new RangeInstr(t, t);
-		right.a = c+1;
-		right.b = 127; // all.getMaxElement();
-		emit(s);
-		emit(left);
-		JumpInstr J = new JumpInstr();
-		emit(J);
-		emit(right);
-		s.addrs.add(left.addr);
-		s.addrs.add(right.addr);
-		int END = pda.ip;
-		J.target = END;
-		return;
-	}
-
 	public byte[] convertInstrsToBytecode() {
-		Instr last = pda.instrs.get(pda.instrs.size() - 1);
+		Instr last = obj.instrs.get(obj.instrs.size() - 1);
 		int size = last.addr + last.nBytes();
 		byte[] code = new byte[size];

 		// resolve CALL instruction targets before generating code
-		for (Instr I : pda.instrs) {
+		for (Instr I : obj.instrs) {
 			if ( I instanceof CallInstr ) {
 				CallInstr C = (CallInstr) I;
 				String ruleName = C.token.getText();
-				C.target = pda.ruleToAddr.get(ruleName);
+				C.target = obj.ruleToAddr.get(ruleName);
 			}
 		}
-		for (Instr I : pda.instrs) {
+		for (Instr I : obj.instrs) {
 			I.write(code);
 		}
 		return code;
 	}

-	public static CompiledPDA compileLexerMode(LexerGrammar lg, String modeName) {
-		GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
-		PDABytecodeTriggers gen = new PDABytecodeTriggers(null);
-		gen.g = lg;
-		gen.pda.tokenTypeToAddr = new int[lg.getMaxTokenType()+1];
-
-		// add split for s0 to hook up rules (fill in operands as we gen rules)
-		int numRules = lg.modes.get(modeName).size();
-		int numFragmentRules = 0;
-		for (Rule r : lg.modes.get(modeName)) { if ( r.isFragment() ) numFragmentRules++; }
-		SplitInstr s0 = new SplitInstr(numRules - numFragmentRules);
-		gen.emit(s0);
-
-
-		for (Rule r : lg.modes.get(modeName)) { // for each rule in mode
-			gen.currentRule = r;
-			GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK);
-			CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
-			gen.setTreeNodeStream(nodes);
-			int ttype = lg.getTokenType(r.name);
-			gen.pda.ruleToAddr.put(r.name, gen.pda.ip);
-			if ( !r.isFragment() ) {
-				s0.addrs.add(gen.pda.ip);
-				gen.pda.tokenTypeToAddr[ttype] = gen.pda.ip;
-			}
-			try {
-				gen.block(); // GEN Instr OBJECTS
-				int ruleTokenType = lg.getTokenType(r.name);
-				if ( !r.isFragment() ) {
-					gen.emit(new AcceptInstr(ruleTokenType));
-				}
-				else {
-					gen.emit(new RetInstr());
-				}
-			}
-			catch (Exception e){
-				e.printStackTrace(System.err);
-			}
-		}
-		gen.pda.code = gen.convertInstrsToBytecode();
-		gen.pda.nLabels = gen.labelIndex;
-		System.out.println(Bytecode.disassemble(gen.pda.code));
-		System.out.println("rule addrs="+gen.pda.ruleToAddr);
-		return gen.pda;
+	public void defineRuleAddr(String name, int ip) {
+		obj.ruleToAddr.put(name, ip);
 	}

-	// (BLOCK (ALT .)) or (BLOCK (ALT 'a') (ALT .))
-	public boolean blockHasWildcardAlt(GrammarAST block) {
-		for (Object alt : block.getChildren()) {
-			AltAST altAST = (AltAST)alt;
-			if ( altAST.getChildCount()==1 ) {
-				Tree e = altAST.getChild(0);
-				if ( e.getType()==ANTLRParser.WILDCARD ) {
-					return true;
-				}
-			}
-		}
-		return false;
+	public void defineRuleIndexToAddr(int index, int ip) {
+		obj.altToAddr[index] = ip;
 	}

-	// testing
-	public static PDA getPDA(LexerGrammar lg, String modeName) {
-		CompiledPDA info = compileLexerMode(lg, modeName);
-		return new PDA(info.code, info.ruleToAddr, info.tokenTypeToAddr, info.nLabels);
+	public void defineTokenTypeToAddr(int ttype, int ip) {
+		defineRuleIndexToAddr(ttype, ip);
 	}

 	/** Write value at index into a byte array highest to lowest byte,
@ -202,66 +123,4 @@ public class PDABytecodeGenerator extends TreeParser {
 		memory[index+1] = (byte)(value&0xFF);
 	}

-	// ----------
-
-	public static PDA getPDA(DFA dfa) {
-		PDABytecodeTriggers gen = new PDABytecodeTriggers(null);
-		gen.g = dfa.g;
-		gen.pda.tokenTypeToAddr = new int[gen.g.getMaxTokenType()+1];
-		gen.walk(dfa);
-		gen.pda.code = gen.convertInstrsToBytecode();
-		CompiledPDA c = gen.pda;
-		return new PDA(c.code, c.ruleToAddr, c.tokenTypeToAddr, c.nLabels);
-	}
-
-	boolean[] marked;
-	int[] stateToAddr;
-
-	public PDA walk(DFA dfa) {
-		marked = new boolean[dfa.stateSet.size()+1];
-		stateToAddr = new int[dfa.stateSet.size()+1];
-		walk(dfa.startState);
-
-		// walk code, update jump targets.
-		for (Instr I : pda.instrs) {
-			System.out.println("instr "+I);
-			if ( I instanceof JumpInstr ) {
-				JumpInstr J = (JumpInstr)I;
-				J.target = stateToAddr[J.target];
-			}
-		}
-
-		return null;
-	}
-
-	// recursive so we follow chains in DFA, leading to fewer
-	// jmp instructions.
-	// start by assuming state num is bytecode addr then translate after
-	// in one pass
-	public void walk(DFAState d) {
-		if ( marked[d.stateNumber] ) return;
-		marked[d.stateNumber] = true;
-		stateToAddr[d.stateNumber] = pda.ip;
-		System.out.println("visit "+d.stateNumber+" @"+pda.ip);
-		if ( d.isAcceptState ) {
-			AcceptInstr A = new AcceptInstr(d.predictsAlt);
-			emit(A);
-			return;
-		}
-		SplitInstr S = null;
-		if ( d.edges.size()>1 ) {
-			S = new SplitInstr(d.edges.size());
-			emit(S);
-		}
-		for (Edge e : d.edges) {
-			if ( S!=null ) S.addrs.add(pda.ip);
-			// TODO: assumes no sets yet!
-			MatchInstr M = new MatchInstr(e.label.getSingleElement());
-			JumpInstr J = new JumpInstr(e.target.stateNumber);
-			emit(M);
-			emit(J);
-			walk(e.target);
-		}
-
-	}
 }
--- a/tool/src/org/antlr/v4/codegen/PDABytecodeTriggers.g
+++ b/tool/src/org/antlr/v4/codegen/PDABytecodeTriggers.g
@ -3,13 +3,14 @@ options {
 	language     = Java;
 	tokenVocab   = ANTLRParser;
 	ASTLabelType = GrammarAST;
-	superClass   = PDABytecodeGenerator;
+//	superClass   = PDABytecodeGenerator;
 }

@header {
 package org.antlr.v4.codegen;
 import org.antlr.v4.codegen.pda.*;
 import org.antlr.v4.tool.GrammarAST;
+import org.antlr.v4.tool.AltAST;
 import org.antlr.v4.tool.GrammarASTWithOptions;
 import org.antlr.v4.tool.LexerGrammar;
 import java.util.Collections;
@ -17,6 +18,30 @@ import java.util.Map;
 import java.util.HashMap;
 }

+@members {
+	PDABytecodeGenerator gen;
+	
+	public PDABytecodeTriggers(TreeNodeStream input, PDABytecodeGenerator gen) {
+		this(input);
+		this.gen = gen;
+	}
+
+	// (BLOCK (ALT .)) or (BLOCK (ALT 'a') (ALT .))
+	public boolean blockHasWildcardAlt(GrammarAST block) {
+		for (Object alt : block.getChildren()) {
+			if ( !(alt instanceof AltAST) ) continue;
+			AltAST altAST = (AltAST)alt;
+			if ( altAST.getChildCount()==1 ) {
+				Tree e = altAST.getChild(0);
+				if ( e.getType()==WILDCARD ) {
+					return true;
+				}
+			}
+		}
+		return false;
+	}
+}
+
 block
    :	^(	BLOCK (^(OPTIONS .+))?
    		{
@ -28,8 +53,8 @@ block
    		SplitInstr S = null;
    		if ( nAlts>1 ) {
 	    		S = new SplitInstr(nAlts);
-	    		emit(S);
-	    		S.addrs.add(pda.ip);
+	    		gen.emit(S);
+	    		S.addrs.add(gen.ip);
    		}
    		int alt = 1;
    		}
@ -38,14 +63,14 @@ block
    			if ( alt < nAlts ) {
 	    			JumpInstr J = new JumpInstr();
 	    			jumps.add(J);
-	    			emit(J);
-	    			S.addrs.add(pda.ip);
+	    			gen.emit(J);
+	    			S.addrs.add(gen.ip);
    			}
    			alt++;
    			}
    		)+
    		{
-    		int END = pda.ip;
+    		int END = gen.ip;
    		for (JumpInstr J : jumps) J.target = END;
    		}
    	)
@ -61,14 +86,14 @@ element
 	:	labeledElement				
 	|	atom						
 	|	ebnf						
-	|   ACTION			{emit(new ActionInstr($ACTION.token));}			
-	|   SEMPRED			{emit(new SemPredInstr($SEMPRED.token));}		
-	|	GATED_SEMPRED	{emit(new SemPredInstr($GATED_SEMPRED.token));}
+	|   ACTION			{gen.emit(new ActionInstr($ACTION.token));}			
+	|   SEMPRED			{gen.emit(new SemPredInstr($SEMPRED.token));}		
+	|	GATED_SEMPRED	{gen.emit(new SemPredInstr($GATED_SEMPRED.token));}
 	|	treeSpec					
 	;
 	
 labeledElement
-	:	^(ASSIGN ID {emit(new LabelInstr($ID.token));} atom {emit(new SaveInstr($ID.token));} )
+	:	^(ASSIGN ID {gen.emit(new LabelInstr($ID.token));} atom {gen.emit(new SaveInstr($ID.token));} )
 	|	^(ASSIGN ID block)			
 	|	^(PLUS_ASSIGN ID atom)		
 	|	^(PLUS_ASSIGN ID block)		
@ -87,33 +112,33 @@ ebnf
 	:	^(astBlockSuffix block)		
 	|	{
 	   	SplitInstr S = new SplitInstr(2);
-		emit(S);
-   		S.addrs.add(pda.ip);
+		gen.emit(S);
+   		S.addrs.add(gen.ip);
 		}
 		^(OPTIONAL block)			
 		{
-   		S.addrs.add(pda.ip);
+   		S.addrs.add(gen.ip);
 		}
 	|	{
-		int start=pda.ip;
+		int start=gen.ip;
 	   	SplitInstr S = new SplitInstr(2);
-		emit(S);
-		int blkStart = pda.ip;
+		gen.emit(S);
+		int blkStart = gen.ip;
 		}
 		^(CLOSURE block)			
 		{
 	    JumpInstr J = new JumpInstr();
-	    emit(J);
+	    gen.emit(J);
 	    J.target = start;
   		S.addrs.add(blkStart);
-	    S.addrs.add(pda.ip);
+	    S.addrs.add(gen.ip);
 	    if ( greedyOption!=null && greedyOption.equals("false") ) Collections.reverse(S.addrs);
 		}
-	|	{int start=pda.ip;} ^(POSITIVE_CLOSURE block)
+	|	{int start=gen.ip;} ^(POSITIVE_CLOSURE block)
 		{
   		SplitInstr S = new SplitInstr(2);
-		emit(S);
-		int stop = pda.ip;
+		gen.emit(S);
+		int stop = gen.ip;
   		S.addrs.add(start);
   		S.addrs.add(stop);
 	    if ( greedyOption!=null && greedyOption.equals("false") ) Collections.reverse(S.addrs);
@ -136,8 +161,8 @@ atom
 	|	range					
 	|	^(DOT ID terminal[false])		
 	|	^(DOT ID ruleref)		
-    |	^(WILDCARD .)		{emit(new WildcardInstr($WILDCARD.token));}		
-    |	WILDCARD			{emit(new WildcardInstr($WILDCARD.token));}	
+    |	^(WILDCARD .)		{gen.emit(new WildcardInstr($WILDCARD.token));}		
+    |	WILDCARD			{gen.emit(new WildcardInstr($WILDCARD.token));}	
    |   terminal[false]				
    |   ruleref					
    ;
@ -155,15 +180,15 @@ ruleref

 range
    :	^(RANGE a=STRING_LITERAL b=STRING_LITERAL)
-    	{emit(new RangeInstr($a.token, $b.token));}
+    	{gen.emit(new RangeInstr($a.token, $b.token));}
    ;

 terminal[boolean not]
-    :  ^(STRING_LITERAL .)			{emitString($STRING_LITERAL.token, $not);}
-    |	STRING_LITERAL				{emitString($STRING_LITERAL.token, $not);}
-    |	^(TOKEN_REF ARG_ACTION .)	{emit(new CallInstr($TOKEN_REF.token));}
-    |	^(TOKEN_REF .)				{emit(new CallInstr($TOKEN_REF.token));}
-    |	TOKEN_REF					{emit(new CallInstr($TOKEN_REF.token));}
+    :  ^(STRING_LITERAL .)			{gen.emitString($STRING_LITERAL.token, $not);}
+    |	STRING_LITERAL				{gen.emitString($STRING_LITERAL.token, $not);}
+    |	^(TOKEN_REF ARG_ACTION .)	{gen.emit(new CallInstr($TOKEN_REF.token));}
+    |	^(TOKEN_REF .)				{gen.emit(new CallInstr($TOKEN_REF.token));}
+    |	TOKEN_REF					{gen.emit(new CallInstr($TOKEN_REF.token));}
    |	^(ROOT terminal[false])			
    |	^(BANG terminal[false])			
    ;
--- a/tool/src/org/antlr/v4/codegen/PDABytecodeTriggers.java
+++ b/tool/src/org/antlr/v4/codegen/PDABytecodeTriggers.java
--- a/tool/src/org/antlr/v4/codegen/SourceGenTriggers.java
+++ b/tool/src/org/antlr/v4/codegen/SourceGenTriggers.java
@ -1,4 +1,4 @@
-// $ANTLR 3.2.1-SNAPSHOT May 24, 2010 15:02:05 SourceGenTriggers.g 2010-05-26 14:22:40
+// $ANTLR 3.2.1-SNAPSHOT May 24, 2010 15:02:05 SourceGenTriggers.g 2010-05-27 16:58:15

 package org.antlr.v4.codegen;

--- a/tool/src/org/antlr/v4/codegen/pda/NotInstr.java
+++ b/tool/src/org/antlr/v4/codegen/pda/NotInstr.java
@ -0,0 +1,9 @@
+package org.antlr.v4.codegen.pda;
+
+import org.antlr.v4.runtime.pda.Bytecode;
+
+/** */
+public class NotInstr extends Instr {
+	public short opcode() { return Bytecode.NOT; }
+	public int nBytes() { return 1; }	
+}
--- a/tool/src/org/antlr/v4/codegen/pda/SetInstr.java
+++ b/tool/src/org/antlr/v4/codegen/pda/SetInstr.java
@ -0,0 +1,20 @@
+package org.antlr.v4.codegen.pda;
+
+import org.antlr.v4.codegen.PDABytecodeGenerator;
+import org.antlr.v4.misc.IntervalSet;
+import org.antlr.v4.runtime.pda.Bytecode;
+
+/** */
+public class SetInstr extends Instr {
+	public IntervalSet set;
+	public int setIndex;
+
+	public SetInstr(IntervalSet set) { this.set = set; }
+	public short opcode() { return Bytecode.SET; }
+	public int nBytes() { return 1+2; }
+	public void write(byte[] code) {
+		super.write(code);
+		setIndex = gen.getSetIndex(set);
+		PDABytecodeGenerator.writeShort(code, addr+1, (short)setIndex);
+	}
+}
--- a/tool/test/org/antlr/v4/test/TestDFAtoPDABytecodeGeneration.java
+++ b/tool/test/org/antlr/v4/test/TestDFAtoPDABytecodeGeneration.java
@ -3,7 +3,8 @@ package org.antlr.v4.test;
 import org.antlr.v4.automata.DFA;
 import org.antlr.v4.automata.DecisionState;
 import org.antlr.v4.automata.NFA;
-import org.antlr.v4.codegen.PDABytecodeGenerator;
+import org.antlr.v4.codegen.CompiledPDA;
+import org.antlr.v4.codegen.DFACompiler;
 import org.antlr.v4.runtime.pda.Bytecode;
 import org.antlr.v4.runtime.pda.PDA;
 import org.antlr.v4.tool.Grammar;
@ -11,6 +12,21 @@ import org.junit.Test;

 /** */
 public class TestDFAtoPDABytecodeGeneration extends BaseTest {
+	@Test public void testNotAisSet() throws Exception {
+		Grammar g = new Grammar(
+			"parser grammar T;\n"+
+			"a : ~A B C | A ;");
+		String expecting =
+			"0000:\tsplit         7, 16\n" +
+			"0007:\tset           0\n" +
+			"0010:\tjmp           13\n" +
+			"0013:\taccept        1\n" +
+			"0016:\tmatch8        5\n" +
+			"0018:\tjmp           21\n" +
+			"0021:\taccept        2\n";
+		checkBytecode(g, 0, expecting);
+	}
+
 	@Test public void testAorB() throws Exception {
 		Grammar g = new Grammar(
 			"parser grammar T;\n"+
@ -62,8 +78,6 @@ public class TestDFAtoPDABytecodeGeneration extends BaseTest {
 		checkBytecode(g, 2, expecting);
 	}

-	// TODO: ORDER OF TESTS MATTERS? DFA edge orders get changed. ack!
-
 	void checkBytecode(Grammar g, int decision, String expecting) {
 		NFA nfa = createNFA(g);
 		DecisionState blk = nfa.decisionToNFAState.get(decision);
@ -71,8 +85,10 @@ public class TestDFAtoPDABytecodeGeneration extends BaseTest {
 //		Edge e0 = dfa.states.get(1).edge(0);
 //		Edge e1 = dfa.states.get(1).edge(1);
 //		e0.target = e1.target;
-//		System.out.print("altered DFA="+dfa);		
-		PDA PDA = PDABytecodeGenerator.getPDA(dfa);
-		assertEquals(expecting, Bytecode.disassemble(PDA.code, false));
+//		System.out.print("altered DFA="+dfa);
+		DFACompiler comp = new DFACompiler(dfa);
+		CompiledPDA obj = comp.compile();
+		PDA pda = new PDA(obj.code, obj.altToAddr, obj.nLabels);
+		assertEquals(expecting, Bytecode.disassemble(pda.code, false));
 	}	
 }
--- a/tool/test/org/antlr/v4/test/TestPDABytecodeGeneration.java
+++ b/tool/test/org/antlr/v4/test/TestPDABytecodeGeneration.java
@ -1,7 +1,8 @@
 package org.antlr.v4.test;

 import org.antlr.v4.Tool;
-import org.antlr.v4.codegen.PDABytecodeGenerator;
+import org.antlr.v4.codegen.CompiledPDA;
+import org.antlr.v4.codegen.LexerCompiler;
 import org.antlr.v4.runtime.pda.Bytecode;
 import org.antlr.v4.runtime.pda.PDA;
 import org.antlr.v4.semantics.SemanticPipeline;
@ -22,6 +23,18 @@ public class TestPDABytecodeGeneration extends BaseTest {
 		checkBytecode(g, expecting);
 	}

+	@Test public void testNotChar() throws Exception {
+		LexerGrammar g = new LexerGrammar(
+			"lexer grammar L;\n"+
+			"A : ~'a' ;");
+		String expecting =
+			"0000:\tsplit         5\n" +
+			"0005:\tnot             \n" +
+			"0006:\tmatch8        'a'\n" +
+			"0008:\taccept        4\n";
+		checkBytecode(g, expecting);
+	}
+
 	@Test public void testIDandIntandKeyword() throws Exception {
 		LexerGrammar g = new LexerGrammar(
 			"lexer grammar L;\n" +
@ -215,7 +228,9 @@ public class TestPDABytecodeGeneration extends BaseTest {
 				}
 			}
 		}
-		PDA PDA = PDABytecodeGenerator.getPDA(g, LexerGrammar.DEFAULT_MODE_NAME);
+		LexerCompiler comp = new LexerCompiler(g);
+		CompiledPDA obj = comp.compileMode(LexerGrammar.DEFAULT_MODE_NAME);
+		PDA PDA = new PDA(obj.code, obj.altToAddr, obj.nLabels);
 		assertEquals(expecting, Bytecode.disassemble(PDA.code));
 	}
 }
--- a/tool/test/org/antlr/v4/test/TestPDABytecodeInterp.java
+++ b/tool/test/org/antlr/v4/test/TestPDABytecodeInterp.java
@ -3,7 +3,8 @@ package org.antlr.v4.test;
 import org.antlr.runtime.ANTLRStringStream;
 import org.antlr.runtime.Token;
 import org.antlr.v4.Tool;
-import org.antlr.v4.codegen.PDABytecodeGenerator;
+import org.antlr.v4.codegen.CompiledPDA;
+import org.antlr.v4.codegen.LexerCompiler;
 import org.antlr.v4.runtime.pda.PDA;
 import org.antlr.v4.semantics.SemanticPipeline;
 import org.antlr.v4.tool.Grammar;
@ -24,6 +25,14 @@ public class TestPDABytecodeInterp extends BaseTest {
 		checkMatches(g, "abab", expecting);
 	}

+	@Test public void testNotChar() throws Exception {
+		LexerGrammar g = new LexerGrammar(
+			"lexer grammar L;\n"+
+			"A : ~'a' ;");
+		String expecting = "A, EOF";
+		checkMatches(g, "b", expecting);
+	}
+
 	@Test public void testIDandIntandKeyword() throws Exception {
 		LexerGrammar g = new LexerGrammar(
 			"lexer grammar L;\n" +
@ -202,7 +211,10 @@ public class TestPDABytecodeInterp extends BaseTest {
 			}
 		}

-		PDA PDA = PDABytecodeGenerator.getPDA(g, LexerGrammar.DEFAULT_MODE_NAME);
+		LexerCompiler comp = new LexerCompiler(g);
+		CompiledPDA obj = comp.compileMode(LexerGrammar.DEFAULT_MODE_NAME);
+		PDA PDA = new PDA(obj.code, obj.altToAddr, obj.nLabels);
+
 		ANTLRStringStream in = new ANTLRStringStream(input);
 		List<Integer> tokenTypes = new ArrayList<Integer>();
 		int ttype = 0;
@ -236,7 +248,9 @@ public class TestPDABytecodeInterp extends BaseTest {
 			}
 		}

-		PDA PDA = PDABytecodeGenerator.getPDA(g, LexerGrammar.DEFAULT_MODE_NAME);
+		LexerCompiler comp = new LexerCompiler(g);
+		CompiledPDA obj = comp.compileMode(LexerGrammar.DEFAULT_MODE_NAME);
+		PDA PDA = new PDA(obj.code, obj.altToAddr, obj.nLabels);
 		ANTLRStringStream in = new ANTLRStringStream(input);
 		List<Integer> tokenTypes = new ArrayList<Integer>();
 		int ttype = PDA.execThompson(in);