From b0ca11f46b97a92e2ba51e69bdbe313162b73a28 Mon Sep 17 00:00:00 2001
From: parrt <parrt@antlr.org>
Date: Tue, 4 May 2010 11:17:29 -0800
Subject: [PATCH] got sempreds in

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6838]
---
 .../org/antlr/v4/runtime/nfa/Bytecode.java    |  2 +-
 .../src/org/antlr/v4/runtime/nfa/NFA.java     | 43 +++++++++-----
 .../v4/codegen/NFABytecodeGenerator.java      | 45 +++++++--------
 .../antlr/v4/codegen/nfa/SemPredInstr.java    |  5 +-
 .../v4/test/TestNFABytecodeGeneration.java    | 16 ++++++
 .../antlr/v4/test/TestNFABytecodeInterp.java  | 57 +++++++++++++++----
 6 files changed, 114 insertions(+), 54 deletions(-)

diff --git a/runtime/Java/src/org/antlr/v4/runtime/nfa/Bytecode.java b/runtime/Java/src/org/antlr/v4/runtime/nfa/Bytecode.java
index cb3580850..a6326d338 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/nfa/Bytecode.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/nfa/Bytecode.java
@@ -70,7 +70,7 @@ public class Bytecode {
 		new Instruction("ret"),
 		new Instruction("label", OperandType.SHORT),
 		new Instruction("save", OperandType.SHORT),
-		new Instruction("sempred", OperandType.SHORT),
+		new Instruction("sempred", OperandType.SHORT, OperandType.SHORT), // sempred ruleIndex, predIndex
 		new Instruction("action", OperandType.SHORT, OperandType.SHORT), // action ruleIndex, actionIndex
 	};
 
diff --git a/runtime/Java/src/org/antlr/v4/runtime/nfa/NFA.java b/runtime/Java/src/org/antlr/v4/runtime/nfa/NFA.java
index 45e7ff218..81928f381 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/nfa/NFA.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/nfa/NFA.java
@@ -5,6 +5,7 @@ import org.antlr.runtime.Token;
 import org.antlr.v4.runtime.CommonToken;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 
@@ -14,6 +15,7 @@ public class NFA {
 	public Map<String, Integer> ruleToAddr;
 	public int[] tokenTypeToAddr;
 	public CommonToken[] labelValues;
+	public int nLabels;
 
 	/** If we hit an action, we'll have to rewind and do the winning rule again */
 	boolean bypassedAction;
@@ -23,17 +25,20 @@ public class NFA {
 		this.code = code;
 		this.ruleToAddr = ruleToAddr;
 		this.tokenTypeToAddr = tokenTypeToAddr;
+		this.nLabels = nLabels;
 		labelValues = new CommonToken[nLabels];
 	}
 
 	public int execThompson(CharStream input) {
 		int m = input.mark();
+		Arrays.fill(labelValues, null);
 		int ttype = execThompson(input, 0, false);
-		System.out.println("ttype="+ttype);
+		System.out.println("first attempt ttype="+ttype);
 		if ( bypassedAction ) {
 			input.rewind(m);
 			System.out.println("Bypassed action; rewinding to "+input.index()+" doing with feeling");
 			bypassedAction = false;
+			Arrays.fill(labelValues, null);
 			int ttype2 = execThompson(input, tokenTypeToAddr[ttype], true);
 			if ( ttype!=ttype2 ) {
 				System.err.println("eh? token diff with action(s)");
@@ -96,24 +101,20 @@ processOneChar:
 						}
 						break;
 					case Bytecode.LABEL :
-						if ( doActions ) {
-							int labelIndex = getShort(code, ip);
-							labelValues[labelIndex] =
-								new CommonToken(input, 0, 0, input.index(), -1);
-						}
+						int labelIndex = getShort(code, ip);
+						labelValues[labelIndex] =
+							new CommonToken(input, 0, 0, input.index(), -1);
 						break;
 					case Bytecode.SAVE :
-						if ( doActions ) {
-							int labelIndex = getShort(code, ip);
-							labelValues[labelIndex].setStopIndex(input.index()-1);
-						}
+						labelIndex = getShort(code, ip);
+						labelValues[labelIndex].setStopIndex(input.index()-1);
 						break;
 					case Bytecode.ACTION :
 						bypassedAction = true;
 						if ( doActions ) {
 							int ruleIndex = getShort(code, ip);
 							int actionIndex = getShort(code, ip+2);
-							System.out.println("action "+ ruleIndex+", "+actionIndex);
+							action(ruleIndex, actionIndex);
 						}
 						break;
 					case Bytecode.ACCEPT :
@@ -154,6 +155,7 @@ processOneChar:
 					case Bytecode.SPLIT :
 					case Bytecode.CALL :
 					case Bytecode.RET :
+					case Bytecode.SEMPRED :
 						break;
 					default :
 						throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
@@ -231,7 +233,13 @@ processOneChar:
 				}
 				break;
 			case Bytecode.SEMPRED :
-				// TODO: add next instruction only if sempred succeeds
+				// add next instruction only if sempred succeeds
+				int ruleIndex = getShort(code, ip);
+				int actionIndex = getShort(code, ip+2);
+				System.out.println("eval sempred "+ ruleIndex+", "+actionIndex);
+				if ( sempred(ruleIndex, actionIndex) ) {
+					addToClosure(closure, ip+4, alt, context);
+				}
 				break;
 		}
 	}
@@ -257,7 +265,7 @@ processOneChar:
 
 	// ---------------------------------------------------------------------
 
-	// this stuff below can't do SAVE nor CALL/RET but faster.
+	// this stuff below can't do SAVE nor CALL/RET but faster.  (nor preds)
 	
 	public int execThompson_no_stack(CharStream input, int ip) {
 		int c = input.LA(1);
@@ -408,6 +416,15 @@ processOneChar:
 		return (memory[index]&0xFF) <<(8*1) | (memory[index+1]&0xFF); // prevent sign extension with mask
 	}
 
+	// subclass needs to override these if there are sempreds or actions in lexer rules
+
+	public boolean sempred(int ruleIndex, int actionIndex) {
+		return true;
+	}
+
+	public void action(int ruleIndex, int actionIndex) {
+	}
+
 /*
 	public int exec(CharStream input, String ruleName) {
 		return exec(input, ruleToAddr.get(ruleName));
diff --git a/tool/src/org/antlr/v4/codegen/NFABytecodeGenerator.java b/tool/src/org/antlr/v4/codegen/NFABytecodeGenerator.java
index 1c20abec3..b36320309 100644
--- a/tool/src/org/antlr/v4/codegen/NFABytecodeGenerator.java
+++ b/tool/src/org/antlr/v4/codegen/NFABytecodeGenerator.java
@@ -28,8 +28,8 @@ public class NFABytecodeGenerator extends TreeParser {
 	int[] tokenTypeToAddr;
 
 	Map<Rule, Map<String, Integer>> ruleLabels = new HashMap<Rule, Map<String, Integer>>();
-
 	Map<Rule, Map<Token, Integer>> ruleActions = new HashMap<Rule, Map<Token, Integer>>();
+	Map<Rule, Map<Token, Integer>> ruleSempreds = new HashMap<Rule, Map<Token, Integer>>();
 
 	public Rule currentRule;
 
@@ -64,6 +64,23 @@ public class NFABytecodeGenerator extends TreeParser {
 		}
 	}
 
+	// indexed from 0 per rule
+	public int getSempredIndex(Rule r, Token actionToken) {
+		Map<Token, Integer> actions = ruleSempreds.get(r);
+		if ( actions==null ) {
+			actions = new HashMap<Token, Integer>();
+			ruleSempreds.put(r, actions);
+		}
+		if ( actions.get(actionToken)!=null ) {
+			return actions.get(actionToken);
+		}
+		else {
+			int i = actions.size();
+			actions.put(actionToken, i);
+			return i;
+		}
+	}
+
 	/** labels in all rules share single label space
 	 *  but we still track labels per rule so we can translate $label
 	 *  to an index in an action.
@@ -95,37 +112,13 @@ public class NFABytecodeGenerator extends TreeParser {
 		int size = last.addr + last.nBytes();
 		byte[] code = new byte[size];
 
-		// resolve CALL instruction targets and index labels before generating code
-		// TODO: move this code to Instr objects? Need code gen pointer then.
+		// resolve CALL instruction targets before generating code
 		for (Instr I : instrs) {
 			if ( I instanceof CallInstr ) {
 				CallInstr C = (CallInstr) I;
 				String ruleName = C.token.getText();
 				C.target = ruleToAddr.get(ruleName);
 			}
-/*
-			else if ( I instanceof LabelInstr ) {
-				LabelInstr L = (LabelInstr)I;
-				Map<String, Integer> ruleLabels = labels.get(I.rule);
-				if ( ruleLabels==null ) {
-					ruleLabels = new HashMap<String, Integer>();
-					labels.put(I.rule, ruleLabels);
-				}
-				String labelName = L.token.getText();
-				if ( ruleLabels.get(labelName)!=null ) {
-					L.labelIndex = ruleLabels.get(labelName);
-				}
-				else {
-					ruleLabels.put(labelName, labelIndex);
-					L.labelIndex = labelIndex++;
-				}
-			}
-			else if ( I instanceof SaveInstr ) {
-				SaveInstr S = (SaveInstr)I;
-				Map<String, Integer> ruleLabels = labels.get(I.rule);
-				S.labelIndex = ruleLabels.get(S.token.getText());
-			}
-			 */
 		}
 		for (Instr I : instrs) {
 			I.write(code);
diff --git a/tool/src/org/antlr/v4/codegen/nfa/SemPredInstr.java b/tool/src/org/antlr/v4/codegen/nfa/SemPredInstr.java
index dcafbad8d..d8fef4265 100644
--- a/tool/src/org/antlr/v4/codegen/nfa/SemPredInstr.java
+++ b/tool/src/org/antlr/v4/codegen/nfa/SemPredInstr.java
@@ -12,10 +12,11 @@ public class SemPredInstr extends Instr {
 		this.token = token;
 	}
 	public short opcode() { return Bytecode.SEMPRED; };
-	public int nBytes() { return 1+2; }
+	public int nBytes() { return 1+2*2; }
 	public void write(byte[] code) {
 		super.write(code);
-		NFABytecodeGenerator.writeShort(code, addr+1, (short) predIndex);
+		NFABytecodeGenerator.writeShort(code, addr+1, (short)rule.index);
+		NFABytecodeGenerator.writeShort(code, addr+1+2, (short)gen.getSempredIndex(rule, token));
 	}
 	public String toString() { return addr+":SemPredInstr "+ predIndex; }
 }
diff --git a/tool/test/org/antlr/v4/test/TestNFABytecodeGeneration.java b/tool/test/org/antlr/v4/test/TestNFABytecodeGeneration.java
index ca9dde0c1..cb60b18c2 100644
--- a/tool/test/org/antlr/v4/test/TestNFABytecodeGeneration.java
+++ b/tool/test/org/antlr/v4/test/TestNFABytecodeGeneration.java
@@ -179,6 +179,22 @@ public class TestNFABytecodeGeneration extends BaseTest {
 		checkBytecode(g, expecting);
 	}
 
+	@Test public void testSempred() throws Exception {
+		LexerGrammar g = new LexerGrammar(
+			"lexer grammar L;\n" +
+			"A : {foo}? 'a' | 'b' {bar}? ;\n");
+		String expecting =
+			"0000:\tsplit         5\n" +
+			"0005:\tsplit         12, 22\n" +
+			"0012:\tsempred       1, 0\n" +
+			"0017:\tmatch8        'a'\n" +
+			"0019:\tjmp           29\n" +
+			"0022:\tmatch8        'b'\n" +
+			"0024:\tsempred       1, 1\n" +
+			"0029:\taccept        4\n";
+		checkBytecode(g, expecting);
+	}
+
 	public void _template() throws Exception {
 		LexerGrammar g = new LexerGrammar(
 			"\n");
diff --git a/tool/test/org/antlr/v4/test/TestNFABytecodeInterp.java b/tool/test/org/antlr/v4/test/TestNFABytecodeInterp.java
index cb1bc077a..996757887 100644
--- a/tool/test/org/antlr/v4/test/TestNFABytecodeInterp.java
+++ b/tool/test/org/antlr/v4/test/TestNFABytecodeInterp.java
@@ -123,14 +123,14 @@ public class TestNFABytecodeInterp extends BaseTest {
 		LexerGrammar g = new LexerGrammar(
 			"lexer grammar L;\n" +
 			"A : a='a' ;\n");
-		checkMatches(g, "a", "A, EOF", "[[@-1,0:0='a',<0>,1:0]]");
+		checkLabels(g, "a", "A", "[[@-1,0:0='a',<0>,1:0]]");
 	}
 
 	@Test public void testLabeledString() throws Exception {
 		LexerGrammar g = new LexerGrammar(
 			"lexer grammar L;\n" +
 			"A : a='abc' ;\n");
-		checkMatches(g, "abc", "A, EOF", "[[@-1,0:2='abc',<0>,1:0]]");
+		checkLabels(g, "abc", "A", "[[@-1,0:2='abc',<0>,1:0]]");
 	}
 
 	@Test public void testLabeledToken() throws Exception {
@@ -138,7 +138,7 @@ public class TestNFABytecodeInterp extends BaseTest {
 			"lexer grammar L;\n" +
 			"I : d=D ;\n" +
 			"fragment D : '0'..'9'+ ;\n");
-		checkMatches(g, "901", "I, EOF", "[[@-1,0:2='901',<0>,1:0]]");
+		checkLabels(g, "901", "I", "[[@-1,0:2='901',<0>,1:0]]");
 	}
 
 	@Test public void testLabelInLoopIsLastElement() throws Exception {
@@ -146,7 +146,7 @@ public class TestNFABytecodeInterp extends BaseTest {
 			"lexer grammar L;\n" +
 			"I : d=D+ ;\n" +
 			"fragment D : '0'..'9' ;\n");
-		checkMatches(g, "901", "I, EOF", "[[@-1,2:2='1',<0>,1:2]]");
+		checkLabels(g, "901", "I", "[[@-1,2:2='1',<0>,1:2]]");
 	}
 
 	@Test public void testLabelIndexes() throws Exception {
@@ -154,7 +154,7 @@ public class TestNFABytecodeInterp extends BaseTest {
 			"lexer grammar L;\n" +
 			"A : a='a' ;\n" +
 			"B : a='b' b='c' ;\n");
-		checkMatches(g, "bc", "B, EOF", "[[@-1,0:-1='',<0>,1:0], [@-1,0:0='b',<0>,1:0], [@-1,1:1='c',<0>,1:1]]");
+		checkLabels(g, "bc", "B", "[[@-1,0:-1='',<0>,1:0], [@-1,0:0='b',<0>,1:0], [@-1,1:1='c',<0>,1:1]]");
 	}
 
 	@Test public void testAction() throws Exception {
@@ -162,7 +162,15 @@ public class TestNFABytecodeInterp extends BaseTest {
 			"lexer grammar L;\n" +
 			"I : {a1} d=D {a2} ;\n" +
 			"fragment D : ('0'..'9' {a3})+ ;\n");
-		checkMatches(g, "901", "I, EOF", "[[@-1,0:2='901',<0>,1:0]]");
+		checkLabels(g, "901", "I", "[[@-1,0:2='901',<0>,1:0]]");
+	}
+
+	@Test public void testSempred() throws Exception {
+		// not actually evaluating preds since we're interpreting; assumes true.
+		LexerGrammar g = new LexerGrammar(
+			"lexer grammar L;\n" +
+			"A : {true}? 'a' | 'b' {true}? ;\n");
+		checkMatches(g, "ab", "A, A, EOF");
 	}
 
 
@@ -174,12 +182,6 @@ public class TestNFABytecodeInterp extends BaseTest {
 	}
 
 	void checkMatches(LexerGrammar g, String input, String expecting) {
-		checkMatches(g, input, expecting, null);
-	}
-	
-	void checkMatches(LexerGrammar g, String input, String expecting,
-					  String expectingTokens)
-	{
 		if ( g.ast!=null && !g.ast.hasErrors ) {
 			System.out.println(g.ast.toStringTree());
 			Tool antlr = new Tool();
@@ -209,6 +211,37 @@ public class TestNFABytecodeInterp extends BaseTest {
 			tokenTypes.add(ttype);
 		} while ( ttype!= Token.EOF );
 		assertEquals(expectingTokenTypes, tokenTypes);
+	}
+
+	void checkLabels(LexerGrammar g, String input, String expecting,
+					  String expectingTokens)
+	{
+		if ( g.ast!=null && !g.ast.hasErrors ) {
+			System.out.println(g.ast.toStringTree());
+			Tool antlr = new Tool();
+			SemanticPipeline sem = new SemanticPipeline(g);
+			sem.process();
+			if ( g.getImportedGrammars()!=null ) { // process imported grammars (if any)
+				for (Grammar imp : g.getImportedGrammars()) {
+					antlr.process(imp);
+				}
+			}
+		}
+
+		List<Integer> expectingTokenTypes = new ArrayList<Integer>();
+		if ( expecting!=null && !expecting.trim().equals("") ) {
+			for (String tname : expecting.replace(" ", "").split(",")) {
+				int ttype = g.getTokenType(tname);
+				expectingTokenTypes.add(ttype);
+			}
+		}
+
+		NFA nfa = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
+		ANTLRStringStream in = new ANTLRStringStream(input);
+		List<Integer> tokenTypes = new ArrayList<Integer>();
+		int ttype = nfa.execThompson(in);
+		tokenTypes.add(ttype);
+		assertEquals(expectingTokenTypes, tokenTypes);
 
 		if ( expectingTokens!=null ) {
 			assertEquals(expectingTokens, Arrays.toString(nfa.labelValues));