From e6d65be27b7507786d3df46debf215367b0a757d Mon Sep 17 00:00:00 2001 From: parrt Date: Mon, 3 May 2010 17:02:47 -0800 Subject: [PATCH] rewinds now and does with feeling for actions in rules [git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6837] --- .../org/antlr/v4/runtime/nfa/Bytecode.java | 2 +- .../src/org/antlr/v4/runtime/nfa/NFA.java | 32 +++++++++-- .../v4/codegen/NFABytecodeGenerator.java | 56 ++++++++++++++++--- .../org/antlr/v4/codegen/nfa/ActionInstr.java | 5 +- tool/src/org/antlr/v4/codegen/nfa/Instr.java | 3 + .../org/antlr/v4/codegen/nfa/LabelInstr.java | 1 + .../org/antlr/v4/codegen/nfa/SaveInstr.java | 1 + .../v4/test/TestNFABytecodeGeneration.java | 16 ++++++ .../antlr/v4/test/TestNFABytecodeInterp.java | 11 +++- 9 files changed, 108 insertions(+), 19 deletions(-) diff --git a/runtime/Java/src/org/antlr/v4/runtime/nfa/Bytecode.java b/runtime/Java/src/org/antlr/v4/runtime/nfa/Bytecode.java index a81a8f865..cb3580850 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/nfa/Bytecode.java +++ b/runtime/Java/src/org/antlr/v4/runtime/nfa/Bytecode.java @@ -71,7 +71,7 @@ public class Bytecode { new Instruction("label", OperandType.SHORT), new Instruction("save", OperandType.SHORT), new Instruction("sempred", OperandType.SHORT), - new Instruction("action", OperandType.SHORT), + new Instruction("action", OperandType.SHORT, OperandType.SHORT), // action ruleIndex, actionIndex }; public static String disassemble(byte[] code, int start) { diff --git a/runtime/Java/src/org/antlr/v4/runtime/nfa/NFA.java b/runtime/Java/src/org/antlr/v4/runtime/nfa/NFA.java index 57bed1fc5..45e7ff218 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/nfa/NFA.java +++ b/runtime/Java/src/org/antlr/v4/runtime/nfa/NFA.java @@ -15,6 +15,10 @@ public class NFA { public int[] tokenTypeToAddr; public CommonToken[] labelValues; + /** If we hit an action, we'll have to rewind and do the winning rule again */ + boolean bypassedAction; + + public NFA(byte[] code, Map ruleToAddr, int[] tokenTypeToAddr, int nLabels) { this.code = code; this.ruleToAddr = ruleToAddr; @@ -23,7 +27,21 @@ public class NFA { } public int execThompson(CharStream input) { - return execThompson(input, 0, false); + int m = input.mark(); + int ttype = execThompson(input, 0, false); + System.out.println("ttype="+ttype); + if ( bypassedAction ) { + input.rewind(m); + System.out.println("Bypassed action; rewinding to "+input.index()+" doing with feeling"); + bypassedAction = false; + int ttype2 = execThompson(input, tokenTypeToAddr[ttype], true); + if ( ttype!=ttype2 ) { + System.err.println("eh? token diff with action(s)"); + } + else System.out.println("types are same"); + } + else input.release(m); + return ttype; } public int execThompson(CharStream input, int ip, boolean doActions) { @@ -47,7 +65,7 @@ processOneChar: ip = t.addr; NFAStack context = t.context; int alt = t.alt; - //System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach); + System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach); trace(ip); short opcode = code[ip]; ip++; // move to next instruction or first byte of operand @@ -91,9 +109,11 @@ processOneChar: } break; case Bytecode.ACTION : + bypassedAction = true; if ( doActions ) { - int actionIndex = getShort(code, ip); - System.out.println("action "+ actionIndex); + int ruleIndex = getShort(code, ip); + int actionIndex = getShort(code, ip+2); + System.out.println("action "+ ruleIndex+", "+actionIndex); } break; case Bytecode.ACCEPT : @@ -180,11 +200,11 @@ processOneChar: case Bytecode.JMP : addToClosure(closure, getShort(code, ip), alt, context); break; + case Bytecode.ACTION : + ip += 2; // has 2 more bytes than LABEL/SAVE case Bytecode.LABEL : case Bytecode.SAVE : - case Bytecode.ACTION : // see through them for closure ops - int labelIndex = getShort(code, ip); ip += 2; addToClosure(closure, ip, alt, context); // do closure past SAVE break; diff --git a/tool/src/org/antlr/v4/codegen/NFABytecodeGenerator.java b/tool/src/org/antlr/v4/codegen/NFABytecodeGenerator.java index 3db3258ed..1c20abec3 100644 --- a/tool/src/org/antlr/v4/codegen/NFABytecodeGenerator.java +++ b/tool/src/org/antlr/v4/codegen/NFABytecodeGenerator.java @@ -27,15 +27,13 @@ public class NFABytecodeGenerator extends TreeParser { Map ruleToAddr = new HashMap(); int[] tokenTypeToAddr; - Map> labels = new HashMap>(); + Map> ruleLabels = new HashMap>(); + + Map> ruleActions = new HashMap>(); public Rule currentRule; - /** labels in all rules share single label space - * but we still track labels per rule so we can translate $label - * to an index in an action. - */ - public int numLabels = 0; + public int labelIndex = 0; public NFABytecodeGenerator(TreeNodeStream input, RecognizerSharedState state) { super(input, state); @@ -44,10 +42,47 @@ public class NFABytecodeGenerator extends TreeParser { public void emit(Instr I) { I.addr = ip; I.rule = currentRule; + I.gen = this; ip += I.nBytes(); instrs.add(I); } + // indexed from 0 per rule + public int getActionIndex(Rule r, Token actionToken) { + Map actions = ruleActions.get(r); + if ( actions==null ) { + actions = new HashMap(); + ruleActions.put(r, actions); + } + if ( actions.get(actionToken)!=null ) { + return actions.get(actionToken); + } + else { + int i = actions.size(); + actions.put(actionToken, i); + return i; + } + } + + /** labels in all rules share single label space + * but we still track labels per rule so we can translate $label + * to an index in an action. + */ + public int getLabelIndex(Rule r, String labelName) { + Map labels = ruleLabels.get(r); + if ( labels==null ) { + labels = new HashMap(); + ruleLabels.put(r, labels); + } + if ( labels.get(labelName)!=null ) { + return labels.get(labelName); + } + else { + labels.put(labelName, labelIndex); + return labelIndex++; + } + } + public void emitString(Token t) { String chars = Target.getStringFromGrammarStringLiteral(t.getText()); for (char c : chars.toCharArray()) { @@ -61,12 +96,14 @@ public class NFABytecodeGenerator extends TreeParser { byte[] code = new byte[size]; // resolve CALL instruction targets and index labels before generating code + // TODO: move this code to Instr objects? Need code gen pointer then. for (Instr I : instrs) { if ( I instanceof CallInstr ) { CallInstr C = (CallInstr) I; String ruleName = C.token.getText(); C.target = ruleToAddr.get(ruleName); } +/* else if ( I instanceof LabelInstr ) { LabelInstr L = (LabelInstr)I; Map ruleLabels = labels.get(I.rule); @@ -79,8 +116,8 @@ public class NFABytecodeGenerator extends TreeParser { L.labelIndex = ruleLabels.get(labelName); } else { - ruleLabels.put(labelName, numLabels); - L.labelIndex = numLabels++; + ruleLabels.put(labelName, labelIndex); + L.labelIndex = labelIndex++; } } else if ( I instanceof SaveInstr ) { @@ -88,6 +125,7 @@ public class NFABytecodeGenerator extends TreeParser { Map ruleLabels = labels.get(I.rule); S.labelIndex = ruleLabels.get(S.token.getText()); } + */ } for (Instr I : instrs) { I.write(code); @@ -138,7 +176,7 @@ public class NFABytecodeGenerator extends TreeParser { System.out.println(Bytecode.disassemble(code)); System.out.println("rule addrs="+gen.ruleToAddr); - return new NFA(code, gen.ruleToAddr, gen.tokenTypeToAddr, gen.numLabels); + return new NFA(code, gen.ruleToAddr, gen.tokenTypeToAddr, gen.labelIndex); } /** Write value at index into a byte array highest to lowest byte, diff --git a/tool/src/org/antlr/v4/codegen/nfa/ActionInstr.java b/tool/src/org/antlr/v4/codegen/nfa/ActionInstr.java index f7c8a6a8c..5c28dd94f 100644 --- a/tool/src/org/antlr/v4/codegen/nfa/ActionInstr.java +++ b/tool/src/org/antlr/v4/codegen/nfa/ActionInstr.java @@ -12,10 +12,11 @@ public class ActionInstr extends Instr { this.token = token; } public short opcode() { return Bytecode.ACTION; }; - public int nBytes() { return 1+2; } + public int nBytes() { return 1+2*2; } public void write(byte[] code) { super.write(code); - NFABytecodeGenerator.writeShort(code, addr+1, (short)actionIndex); + NFABytecodeGenerator.writeShort(code, addr+1, (short)rule.index); + NFABytecodeGenerator.writeShort(code, addr+1+2, (short)gen.getActionIndex(rule, token)); } public String toString() { return addr+":ActionInstr "+actionIndex; } } diff --git a/tool/src/org/antlr/v4/codegen/nfa/Instr.java b/tool/src/org/antlr/v4/codegen/nfa/Instr.java index d914f4ba2..2707375d0 100644 --- a/tool/src/org/antlr/v4/codegen/nfa/Instr.java +++ b/tool/src/org/antlr/v4/codegen/nfa/Instr.java @@ -1,11 +1,14 @@ package org.antlr.v4.codegen.nfa; +import org.antlr.v4.codegen.NFABytecodeGenerator; import org.antlr.v4.tool.Rule; /** */ public abstract class Instr { public int addr; public Rule rule; + public NFABytecodeGenerator gen; + public abstract short opcode(); public abstract int nBytes(); public int charSize(int a, int b) { return Math.max(charSize(a), charSize(b)); } diff --git a/tool/src/org/antlr/v4/codegen/nfa/LabelInstr.java b/tool/src/org/antlr/v4/codegen/nfa/LabelInstr.java index 0aa68c5b5..316a4e41c 100644 --- a/tool/src/org/antlr/v4/codegen/nfa/LabelInstr.java +++ b/tool/src/org/antlr/v4/codegen/nfa/LabelInstr.java @@ -15,6 +15,7 @@ public class LabelInstr extends Instr { public int nBytes() { return 1+2; } public void write(byte[] code) { super.write(code); + labelIndex = gen.getLabelIndex(rule, token.getText()); NFABytecodeGenerator.writeShort(code, addr+1, (short)labelIndex); } public String toString() { return addr+":LabelInstr "+ labelIndex; } diff --git a/tool/src/org/antlr/v4/codegen/nfa/SaveInstr.java b/tool/src/org/antlr/v4/codegen/nfa/SaveInstr.java index 78f50190c..75c865481 100644 --- a/tool/src/org/antlr/v4/codegen/nfa/SaveInstr.java +++ b/tool/src/org/antlr/v4/codegen/nfa/SaveInstr.java @@ -15,6 +15,7 @@ public class SaveInstr extends Instr { public int nBytes() { return 1+2; } public void write(byte[] code) { super.write(code); + labelIndex = gen.getLabelIndex(rule, token.getText()); NFABytecodeGenerator.writeShort(code, addr+1, (short) labelIndex); } public String toString() { return addr+":SaveInstr "+ labelIndex; } diff --git a/tool/test/org/antlr/v4/test/TestNFABytecodeGeneration.java b/tool/test/org/antlr/v4/test/TestNFABytecodeGeneration.java index fa3ab5b07..ca9dde0c1 100644 --- a/tool/test/org/antlr/v4/test/TestNFABytecodeGeneration.java +++ b/tool/test/org/antlr/v4/test/TestNFABytecodeGeneration.java @@ -163,6 +163,22 @@ public class TestNFABytecodeGeneration extends BaseTest { checkBytecode(g, expecting); } + @Test public void testAction() throws Exception { + LexerGrammar g = new LexerGrammar( + "lexer grammar L;\n" + + "A : {foo} 'a' | 'b' {bar} ;\n"); + String expecting = + "0000:\tsplit 5\n" + + "0005:\tsplit 12, 22\n" + + "0012:\taction 1, 0\n" + + "0017:\tmatch8 'a'\n" + + "0019:\tjmp 29\n" + + "0022:\tmatch8 'b'\n" + + "0024:\taction 1, 1\n" + + "0029:\taccept 4\n"; + checkBytecode(g, expecting); + } + public void _template() throws Exception { LexerGrammar g = new LexerGrammar( "\n"); diff --git a/tool/test/org/antlr/v4/test/TestNFABytecodeInterp.java b/tool/test/org/antlr/v4/test/TestNFABytecodeInterp.java index 55c795fb6..cb1bc077a 100644 --- a/tool/test/org/antlr/v4/test/TestNFABytecodeInterp.java +++ b/tool/test/org/antlr/v4/test/TestNFABytecodeInterp.java @@ -157,6 +157,15 @@ public class TestNFABytecodeInterp extends BaseTest { checkMatches(g, "bc", "B, EOF", "[[@-1,0:-1='',<0>,1:0], [@-1,0:0='b',<0>,1:0], [@-1,1:1='c',<0>,1:1]]"); } + @Test public void testAction() throws Exception { + LexerGrammar g = new LexerGrammar( + "lexer grammar L;\n" + + "I : {a1} d=D {a2} ;\n" + + "fragment D : ('0'..'9' {a3})+ ;\n"); + checkMatches(g, "901", "I, EOF", "[[@-1,0:2='901',<0>,1:0]]"); + } + + public void _template() throws Exception { LexerGrammar g = new LexerGrammar( "\n"); @@ -196,7 +205,7 @@ public class TestNFABytecodeInterp extends BaseTest { List tokenTypes = new ArrayList(); int ttype = 0; do { - ttype = nfa.execThompson(in, 0, true); + ttype = nfa.execThompson(in); tokenTypes.add(ttype); } while ( ttype!= Token.EOF ); assertEquals(expectingTokenTypes, tokenTypes);