From b0ca11f46b97a92e2ba51e69bdbe313162b73a28 Mon Sep 17 00:00:00 2001 From: parrt Date: Tue, 4 May 2010 11:17:29 -0800 Subject: [PATCH] got sempreds in [git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6838] --- .../org/antlr/v4/runtime/nfa/Bytecode.java | 2 +- .../src/org/antlr/v4/runtime/nfa/NFA.java | 43 +++++++++----- .../v4/codegen/NFABytecodeGenerator.java | 45 +++++++-------- .../antlr/v4/codegen/nfa/SemPredInstr.java | 5 +- .../v4/test/TestNFABytecodeGeneration.java | 16 ++++++ .../antlr/v4/test/TestNFABytecodeInterp.java | 57 +++++++++++++++---- 6 files changed, 114 insertions(+), 54 deletions(-) diff --git a/runtime/Java/src/org/antlr/v4/runtime/nfa/Bytecode.java b/runtime/Java/src/org/antlr/v4/runtime/nfa/Bytecode.java index cb3580850..a6326d338 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/nfa/Bytecode.java +++ b/runtime/Java/src/org/antlr/v4/runtime/nfa/Bytecode.java @@ -70,7 +70,7 @@ public class Bytecode { new Instruction("ret"), new Instruction("label", OperandType.SHORT), new Instruction("save", OperandType.SHORT), - new Instruction("sempred", OperandType.SHORT), + new Instruction("sempred", OperandType.SHORT, OperandType.SHORT), // sempred ruleIndex, predIndex new Instruction("action", OperandType.SHORT, OperandType.SHORT), // action ruleIndex, actionIndex }; diff --git a/runtime/Java/src/org/antlr/v4/runtime/nfa/NFA.java b/runtime/Java/src/org/antlr/v4/runtime/nfa/NFA.java index 45e7ff218..81928f381 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/nfa/NFA.java +++ b/runtime/Java/src/org/antlr/v4/runtime/nfa/NFA.java @@ -5,6 +5,7 @@ import org.antlr.runtime.Token; import org.antlr.v4.runtime.CommonToken; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; @@ -14,6 +15,7 @@ public class NFA { public Map ruleToAddr; public int[] tokenTypeToAddr; public CommonToken[] labelValues; + public int nLabels; /** If we hit an action, we'll have to rewind and do the winning rule again */ boolean bypassedAction; @@ -23,17 +25,20 @@ public class NFA { this.code = code; this.ruleToAddr = ruleToAddr; this.tokenTypeToAddr = tokenTypeToAddr; + this.nLabels = nLabels; labelValues = new CommonToken[nLabels]; } public int execThompson(CharStream input) { int m = input.mark(); + Arrays.fill(labelValues, null); int ttype = execThompson(input, 0, false); - System.out.println("ttype="+ttype); + System.out.println("first attempt ttype="+ttype); if ( bypassedAction ) { input.rewind(m); System.out.println("Bypassed action; rewinding to "+input.index()+" doing with feeling"); bypassedAction = false; + Arrays.fill(labelValues, null); int ttype2 = execThompson(input, tokenTypeToAddr[ttype], true); if ( ttype!=ttype2 ) { System.err.println("eh? token diff with action(s)"); @@ -96,24 +101,20 @@ processOneChar: } break; case Bytecode.LABEL : - if ( doActions ) { - int labelIndex = getShort(code, ip); - labelValues[labelIndex] = - new CommonToken(input, 0, 0, input.index(), -1); - } + int labelIndex = getShort(code, ip); + labelValues[labelIndex] = + new CommonToken(input, 0, 0, input.index(), -1); break; case Bytecode.SAVE : - if ( doActions ) { - int labelIndex = getShort(code, ip); - labelValues[labelIndex].setStopIndex(input.index()-1); - } + labelIndex = getShort(code, ip); + labelValues[labelIndex].setStopIndex(input.index()-1); break; case Bytecode.ACTION : bypassedAction = true; if ( doActions ) { int ruleIndex = getShort(code, ip); int actionIndex = getShort(code, ip+2); - System.out.println("action "+ ruleIndex+", "+actionIndex); + action(ruleIndex, actionIndex); } break; case Bytecode.ACCEPT : @@ -154,6 +155,7 @@ processOneChar: case Bytecode.SPLIT : case Bytecode.CALL : case Bytecode.RET : + case Bytecode.SEMPRED : break; default : throw new RuntimeException("invalid instruction @ "+ip+": "+opcode); @@ -231,7 +233,13 @@ processOneChar: } break; case Bytecode.SEMPRED : - // TODO: add next instruction only if sempred succeeds + // add next instruction only if sempred succeeds + int ruleIndex = getShort(code, ip); + int actionIndex = getShort(code, ip+2); + System.out.println("eval sempred "+ ruleIndex+", "+actionIndex); + if ( sempred(ruleIndex, actionIndex) ) { + addToClosure(closure, ip+4, alt, context); + } break; } } @@ -257,7 +265,7 @@ processOneChar: // --------------------------------------------------------------------- - // this stuff below can't do SAVE nor CALL/RET but faster. + // this stuff below can't do SAVE nor CALL/RET but faster. (nor preds) public int execThompson_no_stack(CharStream input, int ip) { int c = input.LA(1); @@ -408,6 +416,15 @@ processOneChar: return (memory[index]&0xFF) <<(8*1) | (memory[index+1]&0xFF); // prevent sign extension with mask } + // subclass needs to override these if there are sempreds or actions in lexer rules + + public boolean sempred(int ruleIndex, int actionIndex) { + return true; + } + + public void action(int ruleIndex, int actionIndex) { + } + /* public int exec(CharStream input, String ruleName) { return exec(input, ruleToAddr.get(ruleName)); diff --git a/tool/src/org/antlr/v4/codegen/NFABytecodeGenerator.java b/tool/src/org/antlr/v4/codegen/NFABytecodeGenerator.java index 1c20abec3..b36320309 100644 --- a/tool/src/org/antlr/v4/codegen/NFABytecodeGenerator.java +++ b/tool/src/org/antlr/v4/codegen/NFABytecodeGenerator.java @@ -28,8 +28,8 @@ public class NFABytecodeGenerator extends TreeParser { int[] tokenTypeToAddr; Map> ruleLabels = new HashMap>(); - Map> ruleActions = new HashMap>(); + Map> ruleSempreds = new HashMap>(); public Rule currentRule; @@ -64,6 +64,23 @@ public class NFABytecodeGenerator extends TreeParser { } } + // indexed from 0 per rule + public int getSempredIndex(Rule r, Token actionToken) { + Map actions = ruleSempreds.get(r); + if ( actions==null ) { + actions = new HashMap(); + ruleSempreds.put(r, actions); + } + if ( actions.get(actionToken)!=null ) { + return actions.get(actionToken); + } + else { + int i = actions.size(); + actions.put(actionToken, i); + return i; + } + } + /** labels in all rules share single label space * but we still track labels per rule so we can translate $label * to an index in an action. @@ -95,37 +112,13 @@ public class NFABytecodeGenerator extends TreeParser { int size = last.addr + last.nBytes(); byte[] code = new byte[size]; - // resolve CALL instruction targets and index labels before generating code - // TODO: move this code to Instr objects? Need code gen pointer then. + // resolve CALL instruction targets before generating code for (Instr I : instrs) { if ( I instanceof CallInstr ) { CallInstr C = (CallInstr) I; String ruleName = C.token.getText(); C.target = ruleToAddr.get(ruleName); } -/* - else if ( I instanceof LabelInstr ) { - LabelInstr L = (LabelInstr)I; - Map ruleLabels = labels.get(I.rule); - if ( ruleLabels==null ) { - ruleLabels = new HashMap(); - labels.put(I.rule, ruleLabels); - } - String labelName = L.token.getText(); - if ( ruleLabels.get(labelName)!=null ) { - L.labelIndex = ruleLabels.get(labelName); - } - else { - ruleLabels.put(labelName, labelIndex); - L.labelIndex = labelIndex++; - } - } - else if ( I instanceof SaveInstr ) { - SaveInstr S = (SaveInstr)I; - Map ruleLabels = labels.get(I.rule); - S.labelIndex = ruleLabels.get(S.token.getText()); - } - */ } for (Instr I : instrs) { I.write(code); diff --git a/tool/src/org/antlr/v4/codegen/nfa/SemPredInstr.java b/tool/src/org/antlr/v4/codegen/nfa/SemPredInstr.java index dcafbad8d..d8fef4265 100644 --- a/tool/src/org/antlr/v4/codegen/nfa/SemPredInstr.java +++ b/tool/src/org/antlr/v4/codegen/nfa/SemPredInstr.java @@ -12,10 +12,11 @@ public class SemPredInstr extends Instr { this.token = token; } public short opcode() { return Bytecode.SEMPRED; }; - public int nBytes() { return 1+2; } + public int nBytes() { return 1+2*2; } public void write(byte[] code) { super.write(code); - NFABytecodeGenerator.writeShort(code, addr+1, (short) predIndex); + NFABytecodeGenerator.writeShort(code, addr+1, (short)rule.index); + NFABytecodeGenerator.writeShort(code, addr+1+2, (short)gen.getSempredIndex(rule, token)); } public String toString() { return addr+":SemPredInstr "+ predIndex; } } diff --git a/tool/test/org/antlr/v4/test/TestNFABytecodeGeneration.java b/tool/test/org/antlr/v4/test/TestNFABytecodeGeneration.java index ca9dde0c1..cb60b18c2 100644 --- a/tool/test/org/antlr/v4/test/TestNFABytecodeGeneration.java +++ b/tool/test/org/antlr/v4/test/TestNFABytecodeGeneration.java @@ -179,6 +179,22 @@ public class TestNFABytecodeGeneration extends BaseTest { checkBytecode(g, expecting); } + @Test public void testSempred() throws Exception { + LexerGrammar g = new LexerGrammar( + "lexer grammar L;\n" + + "A : {foo}? 'a' | 'b' {bar}? ;\n"); + String expecting = + "0000:\tsplit 5\n" + + "0005:\tsplit 12, 22\n" + + "0012:\tsempred 1, 0\n" + + "0017:\tmatch8 'a'\n" + + "0019:\tjmp 29\n" + + "0022:\tmatch8 'b'\n" + + "0024:\tsempred 1, 1\n" + + "0029:\taccept 4\n"; + checkBytecode(g, expecting); + } + public void _template() throws Exception { LexerGrammar g = new LexerGrammar( "\n"); diff --git a/tool/test/org/antlr/v4/test/TestNFABytecodeInterp.java b/tool/test/org/antlr/v4/test/TestNFABytecodeInterp.java index cb1bc077a..996757887 100644 --- a/tool/test/org/antlr/v4/test/TestNFABytecodeInterp.java +++ b/tool/test/org/antlr/v4/test/TestNFABytecodeInterp.java @@ -123,14 +123,14 @@ public class TestNFABytecodeInterp extends BaseTest { LexerGrammar g = new LexerGrammar( "lexer grammar L;\n" + "A : a='a' ;\n"); - checkMatches(g, "a", "A, EOF", "[[@-1,0:0='a',<0>,1:0]]"); + checkLabels(g, "a", "A", "[[@-1,0:0='a',<0>,1:0]]"); } @Test public void testLabeledString() throws Exception { LexerGrammar g = new LexerGrammar( "lexer grammar L;\n" + "A : a='abc' ;\n"); - checkMatches(g, "abc", "A, EOF", "[[@-1,0:2='abc',<0>,1:0]]"); + checkLabels(g, "abc", "A", "[[@-1,0:2='abc',<0>,1:0]]"); } @Test public void testLabeledToken() throws Exception { @@ -138,7 +138,7 @@ public class TestNFABytecodeInterp extends BaseTest { "lexer grammar L;\n" + "I : d=D ;\n" + "fragment D : '0'..'9'+ ;\n"); - checkMatches(g, "901", "I, EOF", "[[@-1,0:2='901',<0>,1:0]]"); + checkLabels(g, "901", "I", "[[@-1,0:2='901',<0>,1:0]]"); } @Test public void testLabelInLoopIsLastElement() throws Exception { @@ -146,7 +146,7 @@ public class TestNFABytecodeInterp extends BaseTest { "lexer grammar L;\n" + "I : d=D+ ;\n" + "fragment D : '0'..'9' ;\n"); - checkMatches(g, "901", "I, EOF", "[[@-1,2:2='1',<0>,1:2]]"); + checkLabels(g, "901", "I", "[[@-1,2:2='1',<0>,1:2]]"); } @Test public void testLabelIndexes() throws Exception { @@ -154,7 +154,7 @@ public class TestNFABytecodeInterp extends BaseTest { "lexer grammar L;\n" + "A : a='a' ;\n" + "B : a='b' b='c' ;\n"); - checkMatches(g, "bc", "B, EOF", "[[@-1,0:-1='',<0>,1:0], [@-1,0:0='b',<0>,1:0], [@-1,1:1='c',<0>,1:1]]"); + checkLabels(g, "bc", "B", "[[@-1,0:-1='',<0>,1:0], [@-1,0:0='b',<0>,1:0], [@-1,1:1='c',<0>,1:1]]"); } @Test public void testAction() throws Exception { @@ -162,7 +162,15 @@ public class TestNFABytecodeInterp extends BaseTest { "lexer grammar L;\n" + "I : {a1} d=D {a2} ;\n" + "fragment D : ('0'..'9' {a3})+ ;\n"); - checkMatches(g, "901", "I, EOF", "[[@-1,0:2='901',<0>,1:0]]"); + checkLabels(g, "901", "I", "[[@-1,0:2='901',<0>,1:0]]"); + } + + @Test public void testSempred() throws Exception { + // not actually evaluating preds since we're interpreting; assumes true. + LexerGrammar g = new LexerGrammar( + "lexer grammar L;\n" + + "A : {true}? 'a' | 'b' {true}? ;\n"); + checkMatches(g, "ab", "A, A, EOF"); } @@ -174,12 +182,6 @@ public class TestNFABytecodeInterp extends BaseTest { } void checkMatches(LexerGrammar g, String input, String expecting) { - checkMatches(g, input, expecting, null); - } - - void checkMatches(LexerGrammar g, String input, String expecting, - String expectingTokens) - { if ( g.ast!=null && !g.ast.hasErrors ) { System.out.println(g.ast.toStringTree()); Tool antlr = new Tool(); @@ -209,6 +211,37 @@ public class TestNFABytecodeInterp extends BaseTest { tokenTypes.add(ttype); } while ( ttype!= Token.EOF ); assertEquals(expectingTokenTypes, tokenTypes); + } + + void checkLabels(LexerGrammar g, String input, String expecting, + String expectingTokens) + { + if ( g.ast!=null && !g.ast.hasErrors ) { + System.out.println(g.ast.toStringTree()); + Tool antlr = new Tool(); + SemanticPipeline sem = new SemanticPipeline(g); + sem.process(); + if ( g.getImportedGrammars()!=null ) { // process imported grammars (if any) + for (Grammar imp : g.getImportedGrammars()) { + antlr.process(imp); + } + } + } + + List expectingTokenTypes = new ArrayList(); + if ( expecting!=null && !expecting.trim().equals("") ) { + for (String tname : expecting.replace(" ", "").split(",")) { + int ttype = g.getTokenType(tname); + expectingTokenTypes.add(ttype); + } + } + + NFA nfa = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME); + ANTLRStringStream in = new ANTLRStringStream(input); + List tokenTypes = new ArrayList(); + int ttype = nfa.execThompson(in); + tokenTypes.add(ttype); + assertEquals(expectingTokenTypes, tokenTypes); if ( expectingTokens!=null ) { assertEquals(expectingTokens, Arrays.toString(nfa.labelValues));