rewinds now and does with feeling for actions in rules

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6837]
2010-05-03 17:02:47 -08:00 · 2010-05-03 17:02:47 -08:00 · e6d65be27b
parent cf7a730b31
commit e6d65be27b
9 changed files with 108 additions and 19 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/nfa/Bytecode.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/nfa/Bytecode.java
@ -71,7 +71,7 @@ public class Bytecode {
 		new Instruction("label", OperandType.SHORT),
 		new Instruction("save", OperandType.SHORT),
 		new Instruction("sempred", OperandType.SHORT),
-		new Instruction("action", OperandType.SHORT),
+		new Instruction("action", OperandType.SHORT, OperandType.SHORT), // action ruleIndex, actionIndex
 	};

 	public static String disassemble(byte[] code, int start) {
--- a/runtime/Java/src/org/antlr/v4/runtime/nfa/NFA.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/nfa/NFA.java
@ -15,6 +15,10 @@ public class NFA {
 	public int[] tokenTypeToAddr;
 	public CommonToken[] labelValues;

+	/** If we hit an action, we'll have to rewind and do the winning rule again */
+	boolean bypassedAction;
+
+
 	public NFA(byte[] code, Map<String, Integer> ruleToAddr, int[] tokenTypeToAddr, int nLabels) {
 		this.code = code;
 		this.ruleToAddr = ruleToAddr;
@ -23,7 +27,21 @@ public class NFA {
 	}

 	public int execThompson(CharStream input) {
-		return execThompson(input, 0, false);
+		int m = input.mark();
+		int ttype = execThompson(input, 0, false);
+		System.out.println("ttype="+ttype);
+		if ( bypassedAction ) {
+			input.rewind(m);
+			System.out.println("Bypassed action; rewinding to "+input.index()+" doing with feeling");
+			bypassedAction = false;
+			int ttype2 = execThompson(input, tokenTypeToAddr[ttype], true);
+			if ( ttype!=ttype2 ) {
+				System.err.println("eh? token diff with action(s)");
+			}
+			else System.out.println("types are same");
+		}
+		else input.release(m);
+		return ttype;
 	}

 	public int execThompson(CharStream input, int ip, boolean doActions) {
@ -47,7 +65,7 @@ processOneChar:
 				ip = t.addr;
 				NFAStack context = t.context;
 				int alt = t.alt;
-				//System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
+				System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
 				trace(ip);
 				short opcode = code[ip];
 				ip++; // move to next instruction or first byte of operand
@ -91,9 +109,11 @@ processOneChar:
 						}
 						break;
 					case Bytecode.ACTION :
+						bypassedAction = true;
 						if ( doActions ) {
-							int actionIndex = getShort(code, ip);
-							System.out.println("action "+ actionIndex);
+							int ruleIndex = getShort(code, ip);
+							int actionIndex = getShort(code, ip+2);
+							System.out.println("action "+ ruleIndex+", "+actionIndex);
 						}
 						break;
 					case Bytecode.ACCEPT :
@ -180,11 +200,11 @@ processOneChar:
 			case Bytecode.JMP :
 				addToClosure(closure, getShort(code, ip), alt, context);
 				break;
+			case Bytecode.ACTION :
+				ip += 2; // has 2 more bytes than LABEL/SAVE
 			case Bytecode.LABEL :
 			case Bytecode.SAVE :
-			case Bytecode.ACTION :
 				// see through them for closure ops
-				int labelIndex = getShort(code, ip);
 				ip += 2;
 				addToClosure(closure, ip, alt, context); // do closure past SAVE
 				break;
--- a/tool/src/org/antlr/v4/codegen/NFABytecodeGenerator.java
+++ b/tool/src/org/antlr/v4/codegen/NFABytecodeGenerator.java
@ -27,15 +27,13 @@ public class NFABytecodeGenerator extends TreeParser {
 	Map<String, Integer> ruleToAddr = new HashMap<String, Integer>();
 	int[] tokenTypeToAddr;

-	Map<Rule, Map<String, Integer>> labels = new HashMap<Rule, Map<String, Integer>>();
+	Map<Rule, Map<String, Integer>> ruleLabels = new HashMap<Rule, Map<String, Integer>>();
+
+	Map<Rule, Map<Token, Integer>> ruleActions = new HashMap<Rule, Map<Token, Integer>>();

 	public Rule currentRule;

-	/** labels in all rules share single label space
-	 *  but we still track labels per rule so we can translate $label
-	 *  to an index in an action.
-	 */
-	public int numLabels = 0;
+	public int labelIndex = 0;

 	public NFABytecodeGenerator(TreeNodeStream input, RecognizerSharedState state) {
 		super(input, state);
@ -44,10 +42,47 @@ public class NFABytecodeGenerator extends TreeParser {
 	public void emit(Instr I) {
 		I.addr = ip;
 		I.rule = currentRule;
+		I.gen = this;
 		ip += I.nBytes();
 		instrs.add(I);
 	}

+	// indexed from 0 per rule
+	public int getActionIndex(Rule r, Token actionToken) {
+		Map<Token, Integer> actions = ruleActions.get(r);
+		if ( actions==null ) {
+			actions = new HashMap<Token, Integer>();
+			ruleActions.put(r, actions);
+		}
+		if ( actions.get(actionToken)!=null ) {
+			return actions.get(actionToken);
+		}
+		else {
+			int i = actions.size();
+			actions.put(actionToken, i);
+			return i;
+		}
+	}
+
+	/** labels in all rules share single label space
+	 *  but we still track labels per rule so we can translate $label
+	 *  to an index in an action.
+	 */
+	public int getLabelIndex(Rule r, String labelName) {
+		Map<String, Integer> labels = ruleLabels.get(r);
+		if ( labels==null ) {
+			labels = new HashMap<String, Integer>();
+			ruleLabels.put(r, labels);
+		}
+		if ( labels.get(labelName)!=null ) {
+			return labels.get(labelName);
+		}
+		else {
+			labels.put(labelName, labelIndex);
+			return labelIndex++;
+		}
+	}
+
 	public void emitString(Token t) {
 		String chars = Target.getStringFromGrammarStringLiteral(t.getText());
 		for (char c : chars.toCharArray()) {
@ -61,12 +96,14 @@ public class NFABytecodeGenerator extends TreeParser {
 		byte[] code = new byte[size];

 		// resolve CALL instruction targets and index labels before generating code
+		// TODO: move this code to Instr objects? Need code gen pointer then.
 		for (Instr I : instrs) {
 			if ( I instanceof CallInstr ) {
 				CallInstr C = (CallInstr) I;
 				String ruleName = C.token.getText();
 				C.target = ruleToAddr.get(ruleName);
 			}
+/*
 			else if ( I instanceof LabelInstr ) {
 				LabelInstr L = (LabelInstr)I;
 				Map<String, Integer> ruleLabels = labels.get(I.rule);
@ -79,8 +116,8 @@ public class NFABytecodeGenerator extends TreeParser {
 					L.labelIndex = ruleLabels.get(labelName);
 				}
 				else {
-					ruleLabels.put(labelName, numLabels);
-					L.labelIndex = numLabels++;
+					ruleLabels.put(labelName, labelIndex);
+					L.labelIndex = labelIndex++;
 				}
 			}
 			else if ( I instanceof SaveInstr ) {
@ -88,6 +125,7 @@ public class NFABytecodeGenerator extends TreeParser {
 				Map<String, Integer> ruleLabels = labels.get(I.rule);
 				S.labelIndex = ruleLabels.get(S.token.getText());
 			}
+			 */
 		}
 		for (Instr I : instrs) {
 			I.write(code);
@ -138,7 +176,7 @@ public class NFABytecodeGenerator extends TreeParser {
 		System.out.println(Bytecode.disassemble(code));
 		System.out.println("rule addrs="+gen.ruleToAddr);

-		return new NFA(code, gen.ruleToAddr, gen.tokenTypeToAddr, gen.numLabels);
+		return new NFA(code, gen.ruleToAddr, gen.tokenTypeToAddr, gen.labelIndex);
 	}

 	/** Write value at index into a byte array highest to lowest byte,
--- a/tool/src/org/antlr/v4/codegen/nfa/ActionInstr.java
+++ b/tool/src/org/antlr/v4/codegen/nfa/ActionInstr.java
@ -12,10 +12,11 @@ public class ActionInstr extends Instr {
 		this.token = token;
 	}
 	public short opcode() { return Bytecode.ACTION; };
-	public int nBytes() { return 1+2; }
+	public int nBytes() { return 1+2*2; }
 	public void write(byte[] code) {
 		super.write(code);
-		NFABytecodeGenerator.writeShort(code, addr+1, (short)actionIndex);
+		NFABytecodeGenerator.writeShort(code, addr+1, (short)rule.index);
+		NFABytecodeGenerator.writeShort(code, addr+1+2, (short)gen.getActionIndex(rule, token));
 	}
 	public String toString() { return addr+":ActionInstr "+actionIndex; }
 }
--- a/tool/src/org/antlr/v4/codegen/nfa/Instr.java
+++ b/tool/src/org/antlr/v4/codegen/nfa/Instr.java
@ -1,11 +1,14 @@
 package org.antlr.v4.codegen.nfa;

+import org.antlr.v4.codegen.NFABytecodeGenerator;
 import org.antlr.v4.tool.Rule;

 /** */
 public abstract class Instr {
 	public int addr;
 	public Rule rule;
+	public NFABytecodeGenerator gen;
+	
 	public abstract short opcode();
 	public abstract int nBytes();
 	public int charSize(int a, int b) { return Math.max(charSize(a), charSize(b)); }
--- a/tool/src/org/antlr/v4/codegen/nfa/LabelInstr.java
+++ b/tool/src/org/antlr/v4/codegen/nfa/LabelInstr.java
@ -15,6 +15,7 @@ public class LabelInstr extends Instr {
 	public int nBytes() { return 1+2; }
 	public void write(byte[] code) {
 		super.write(code);
+		labelIndex = gen.getLabelIndex(rule, token.getText());
 		NFABytecodeGenerator.writeShort(code, addr+1, (short)labelIndex);
 	}
 	public String toString() { return addr+":LabelInstr "+ labelIndex; }
--- a/tool/src/org/antlr/v4/codegen/nfa/SaveInstr.java
+++ b/tool/src/org/antlr/v4/codegen/nfa/SaveInstr.java
@ -15,6 +15,7 @@ public class SaveInstr extends Instr {
 	public int nBytes() { return 1+2; }
 	public void write(byte[] code) {
 		super.write(code);
+		labelIndex = gen.getLabelIndex(rule, token.getText());		
 		NFABytecodeGenerator.writeShort(code, addr+1, (short) labelIndex);
 	}
 	public String toString() { return addr+":SaveInstr "+ labelIndex; }
--- a/tool/test/org/antlr/v4/test/TestNFABytecodeGeneration.java
+++ b/tool/test/org/antlr/v4/test/TestNFABytecodeGeneration.java
@ -163,6 +163,22 @@ public class TestNFABytecodeGeneration extends BaseTest {
 		checkBytecode(g, expecting);
 	}

+	@Test public void testAction() throws Exception {
+		LexerGrammar g = new LexerGrammar(
+			"lexer grammar L;\n" +
+			"A : {foo} 'a' | 'b' {bar} ;\n");
+		String expecting =
+			"0000:\tsplit         5\n" +
+			"0005:\tsplit         12, 22\n" +
+			"0012:\taction        1, 0\n" +
+			"0017:\tmatch8        'a'\n" +
+			"0019:\tjmp           29\n" +
+			"0022:\tmatch8        'b'\n" +
+			"0024:\taction        1, 1\n" +
+			"0029:\taccept        4\n";
+		checkBytecode(g, expecting);
+	}
+
 	public void _template() throws Exception {
 		LexerGrammar g = new LexerGrammar(
 			"\n");
--- a/tool/test/org/antlr/v4/test/TestNFABytecodeInterp.java
+++ b/tool/test/org/antlr/v4/test/TestNFABytecodeInterp.java
@ -157,6 +157,15 @@ public class TestNFABytecodeInterp extends BaseTest {
 		checkMatches(g, "bc", "B, EOF", "[[@-1,0:-1='',<0>,1:0], [@-1,0:0='b',<0>,1:0], [@-1,1:1='c',<0>,1:1]]");
 	}

+	@Test public void testAction() throws Exception {
+		LexerGrammar g = new LexerGrammar(
+			"lexer grammar L;\n" +
+			"I : {a1} d=D {a2} ;\n" +
+			"fragment D : ('0'..'9' {a3})+ ;\n");
+		checkMatches(g, "901", "I, EOF", "[[@-1,0:2='901',<0>,1:0]]");
+	}
+
+
 	public void _template() throws Exception {
 		LexerGrammar g = new LexerGrammar(
 			"\n");
@ -196,7 +205,7 @@ public class TestNFABytecodeInterp extends BaseTest {
 		List<Integer> tokenTypes = new ArrayList<Integer>();
 		int ttype = 0;
 		do {
-			ttype = nfa.execThompson(in, 0, true);
+			ttype = nfa.execThompson(in);
 			tokenTypes.add(ttype);
 		} while ( ttype!= Token.EOF );
 		assertEquals(expectingTokenTypes, tokenTypes);