rewinds now and does with feeling for actions in rules
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6837]
This commit is contained in:
parent
cf7a730b31
commit
e6d65be27b
|
@ -71,7 +71,7 @@ public class Bytecode {
|
||||||
new Instruction("label", OperandType.SHORT),
|
new Instruction("label", OperandType.SHORT),
|
||||||
new Instruction("save", OperandType.SHORT),
|
new Instruction("save", OperandType.SHORT),
|
||||||
new Instruction("sempred", OperandType.SHORT),
|
new Instruction("sempred", OperandType.SHORT),
|
||||||
new Instruction("action", OperandType.SHORT),
|
new Instruction("action", OperandType.SHORT, OperandType.SHORT), // action ruleIndex, actionIndex
|
||||||
};
|
};
|
||||||
|
|
||||||
public static String disassemble(byte[] code, int start) {
|
public static String disassemble(byte[] code, int start) {
|
||||||
|
|
|
@ -15,6 +15,10 @@ public class NFA {
|
||||||
public int[] tokenTypeToAddr;
|
public int[] tokenTypeToAddr;
|
||||||
public CommonToken[] labelValues;
|
public CommonToken[] labelValues;
|
||||||
|
|
||||||
|
/** If we hit an action, we'll have to rewind and do the winning rule again */
|
||||||
|
boolean bypassedAction;
|
||||||
|
|
||||||
|
|
||||||
public NFA(byte[] code, Map<String, Integer> ruleToAddr, int[] tokenTypeToAddr, int nLabels) {
|
public NFA(byte[] code, Map<String, Integer> ruleToAddr, int[] tokenTypeToAddr, int nLabels) {
|
||||||
this.code = code;
|
this.code = code;
|
||||||
this.ruleToAddr = ruleToAddr;
|
this.ruleToAddr = ruleToAddr;
|
||||||
|
@ -23,7 +27,21 @@ public class NFA {
|
||||||
}
|
}
|
||||||
|
|
||||||
public int execThompson(CharStream input) {
|
public int execThompson(CharStream input) {
|
||||||
return execThompson(input, 0, false);
|
int m = input.mark();
|
||||||
|
int ttype = execThompson(input, 0, false);
|
||||||
|
System.out.println("ttype="+ttype);
|
||||||
|
if ( bypassedAction ) {
|
||||||
|
input.rewind(m);
|
||||||
|
System.out.println("Bypassed action; rewinding to "+input.index()+" doing with feeling");
|
||||||
|
bypassedAction = false;
|
||||||
|
int ttype2 = execThompson(input, tokenTypeToAddr[ttype], true);
|
||||||
|
if ( ttype!=ttype2 ) {
|
||||||
|
System.err.println("eh? token diff with action(s)");
|
||||||
|
}
|
||||||
|
else System.out.println("types are same");
|
||||||
|
}
|
||||||
|
else input.release(m);
|
||||||
|
return ttype;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int execThompson(CharStream input, int ip, boolean doActions) {
|
public int execThompson(CharStream input, int ip, boolean doActions) {
|
||||||
|
@ -47,7 +65,7 @@ processOneChar:
|
||||||
ip = t.addr;
|
ip = t.addr;
|
||||||
NFAStack context = t.context;
|
NFAStack context = t.context;
|
||||||
int alt = t.alt;
|
int alt = t.alt;
|
||||||
//System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
|
System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
|
||||||
trace(ip);
|
trace(ip);
|
||||||
short opcode = code[ip];
|
short opcode = code[ip];
|
||||||
ip++; // move to next instruction or first byte of operand
|
ip++; // move to next instruction or first byte of operand
|
||||||
|
@ -91,9 +109,11 @@ processOneChar:
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Bytecode.ACTION :
|
case Bytecode.ACTION :
|
||||||
|
bypassedAction = true;
|
||||||
if ( doActions ) {
|
if ( doActions ) {
|
||||||
int actionIndex = getShort(code, ip);
|
int ruleIndex = getShort(code, ip);
|
||||||
System.out.println("action "+ actionIndex);
|
int actionIndex = getShort(code, ip+2);
|
||||||
|
System.out.println("action "+ ruleIndex+", "+actionIndex);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Bytecode.ACCEPT :
|
case Bytecode.ACCEPT :
|
||||||
|
@ -180,11 +200,11 @@ processOneChar:
|
||||||
case Bytecode.JMP :
|
case Bytecode.JMP :
|
||||||
addToClosure(closure, getShort(code, ip), alt, context);
|
addToClosure(closure, getShort(code, ip), alt, context);
|
||||||
break;
|
break;
|
||||||
|
case Bytecode.ACTION :
|
||||||
|
ip += 2; // has 2 more bytes than LABEL/SAVE
|
||||||
case Bytecode.LABEL :
|
case Bytecode.LABEL :
|
||||||
case Bytecode.SAVE :
|
case Bytecode.SAVE :
|
||||||
case Bytecode.ACTION :
|
|
||||||
// see through them for closure ops
|
// see through them for closure ops
|
||||||
int labelIndex = getShort(code, ip);
|
|
||||||
ip += 2;
|
ip += 2;
|
||||||
addToClosure(closure, ip, alt, context); // do closure past SAVE
|
addToClosure(closure, ip, alt, context); // do closure past SAVE
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -27,15 +27,13 @@ public class NFABytecodeGenerator extends TreeParser {
|
||||||
Map<String, Integer> ruleToAddr = new HashMap<String, Integer>();
|
Map<String, Integer> ruleToAddr = new HashMap<String, Integer>();
|
||||||
int[] tokenTypeToAddr;
|
int[] tokenTypeToAddr;
|
||||||
|
|
||||||
Map<Rule, Map<String, Integer>> labels = new HashMap<Rule, Map<String, Integer>>();
|
Map<Rule, Map<String, Integer>> ruleLabels = new HashMap<Rule, Map<String, Integer>>();
|
||||||
|
|
||||||
|
Map<Rule, Map<Token, Integer>> ruleActions = new HashMap<Rule, Map<Token, Integer>>();
|
||||||
|
|
||||||
public Rule currentRule;
|
public Rule currentRule;
|
||||||
|
|
||||||
/** labels in all rules share single label space
|
public int labelIndex = 0;
|
||||||
* but we still track labels per rule so we can translate $label
|
|
||||||
* to an index in an action.
|
|
||||||
*/
|
|
||||||
public int numLabels = 0;
|
|
||||||
|
|
||||||
public NFABytecodeGenerator(TreeNodeStream input, RecognizerSharedState state) {
|
public NFABytecodeGenerator(TreeNodeStream input, RecognizerSharedState state) {
|
||||||
super(input, state);
|
super(input, state);
|
||||||
|
@ -44,10 +42,47 @@ public class NFABytecodeGenerator extends TreeParser {
|
||||||
public void emit(Instr I) {
|
public void emit(Instr I) {
|
||||||
I.addr = ip;
|
I.addr = ip;
|
||||||
I.rule = currentRule;
|
I.rule = currentRule;
|
||||||
|
I.gen = this;
|
||||||
ip += I.nBytes();
|
ip += I.nBytes();
|
||||||
instrs.add(I);
|
instrs.add(I);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// indexed from 0 per rule
|
||||||
|
public int getActionIndex(Rule r, Token actionToken) {
|
||||||
|
Map<Token, Integer> actions = ruleActions.get(r);
|
||||||
|
if ( actions==null ) {
|
||||||
|
actions = new HashMap<Token, Integer>();
|
||||||
|
ruleActions.put(r, actions);
|
||||||
|
}
|
||||||
|
if ( actions.get(actionToken)!=null ) {
|
||||||
|
return actions.get(actionToken);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
int i = actions.size();
|
||||||
|
actions.put(actionToken, i);
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** labels in all rules share single label space
|
||||||
|
* but we still track labels per rule so we can translate $label
|
||||||
|
* to an index in an action.
|
||||||
|
*/
|
||||||
|
public int getLabelIndex(Rule r, String labelName) {
|
||||||
|
Map<String, Integer> labels = ruleLabels.get(r);
|
||||||
|
if ( labels==null ) {
|
||||||
|
labels = new HashMap<String, Integer>();
|
||||||
|
ruleLabels.put(r, labels);
|
||||||
|
}
|
||||||
|
if ( labels.get(labelName)!=null ) {
|
||||||
|
return labels.get(labelName);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
labels.put(labelName, labelIndex);
|
||||||
|
return labelIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void emitString(Token t) {
|
public void emitString(Token t) {
|
||||||
String chars = Target.getStringFromGrammarStringLiteral(t.getText());
|
String chars = Target.getStringFromGrammarStringLiteral(t.getText());
|
||||||
for (char c : chars.toCharArray()) {
|
for (char c : chars.toCharArray()) {
|
||||||
|
@ -61,12 +96,14 @@ public class NFABytecodeGenerator extends TreeParser {
|
||||||
byte[] code = new byte[size];
|
byte[] code = new byte[size];
|
||||||
|
|
||||||
// resolve CALL instruction targets and index labels before generating code
|
// resolve CALL instruction targets and index labels before generating code
|
||||||
|
// TODO: move this code to Instr objects? Need code gen pointer then.
|
||||||
for (Instr I : instrs) {
|
for (Instr I : instrs) {
|
||||||
if ( I instanceof CallInstr ) {
|
if ( I instanceof CallInstr ) {
|
||||||
CallInstr C = (CallInstr) I;
|
CallInstr C = (CallInstr) I;
|
||||||
String ruleName = C.token.getText();
|
String ruleName = C.token.getText();
|
||||||
C.target = ruleToAddr.get(ruleName);
|
C.target = ruleToAddr.get(ruleName);
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
else if ( I instanceof LabelInstr ) {
|
else if ( I instanceof LabelInstr ) {
|
||||||
LabelInstr L = (LabelInstr)I;
|
LabelInstr L = (LabelInstr)I;
|
||||||
Map<String, Integer> ruleLabels = labels.get(I.rule);
|
Map<String, Integer> ruleLabels = labels.get(I.rule);
|
||||||
|
@ -79,8 +116,8 @@ public class NFABytecodeGenerator extends TreeParser {
|
||||||
L.labelIndex = ruleLabels.get(labelName);
|
L.labelIndex = ruleLabels.get(labelName);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
ruleLabels.put(labelName, numLabels);
|
ruleLabels.put(labelName, labelIndex);
|
||||||
L.labelIndex = numLabels++;
|
L.labelIndex = labelIndex++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if ( I instanceof SaveInstr ) {
|
else if ( I instanceof SaveInstr ) {
|
||||||
|
@ -88,6 +125,7 @@ public class NFABytecodeGenerator extends TreeParser {
|
||||||
Map<String, Integer> ruleLabels = labels.get(I.rule);
|
Map<String, Integer> ruleLabels = labels.get(I.rule);
|
||||||
S.labelIndex = ruleLabels.get(S.token.getText());
|
S.labelIndex = ruleLabels.get(S.token.getText());
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
for (Instr I : instrs) {
|
for (Instr I : instrs) {
|
||||||
I.write(code);
|
I.write(code);
|
||||||
|
@ -138,7 +176,7 @@ public class NFABytecodeGenerator extends TreeParser {
|
||||||
System.out.println(Bytecode.disassemble(code));
|
System.out.println(Bytecode.disassemble(code));
|
||||||
System.out.println("rule addrs="+gen.ruleToAddr);
|
System.out.println("rule addrs="+gen.ruleToAddr);
|
||||||
|
|
||||||
return new NFA(code, gen.ruleToAddr, gen.tokenTypeToAddr, gen.numLabels);
|
return new NFA(code, gen.ruleToAddr, gen.tokenTypeToAddr, gen.labelIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Write value at index into a byte array highest to lowest byte,
|
/** Write value at index into a byte array highest to lowest byte,
|
||||||
|
|
|
@ -12,10 +12,11 @@ public class ActionInstr extends Instr {
|
||||||
this.token = token;
|
this.token = token;
|
||||||
}
|
}
|
||||||
public short opcode() { return Bytecode.ACTION; };
|
public short opcode() { return Bytecode.ACTION; };
|
||||||
public int nBytes() { return 1+2; }
|
public int nBytes() { return 1+2*2; }
|
||||||
public void write(byte[] code) {
|
public void write(byte[] code) {
|
||||||
super.write(code);
|
super.write(code);
|
||||||
NFABytecodeGenerator.writeShort(code, addr+1, (short)actionIndex);
|
NFABytecodeGenerator.writeShort(code, addr+1, (short)rule.index);
|
||||||
|
NFABytecodeGenerator.writeShort(code, addr+1+2, (short)gen.getActionIndex(rule, token));
|
||||||
}
|
}
|
||||||
public String toString() { return addr+":ActionInstr "+actionIndex; }
|
public String toString() { return addr+":ActionInstr "+actionIndex; }
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,11 +1,14 @@
|
||||||
package org.antlr.v4.codegen.nfa;
|
package org.antlr.v4.codegen.nfa;
|
||||||
|
|
||||||
|
import org.antlr.v4.codegen.NFABytecodeGenerator;
|
||||||
import org.antlr.v4.tool.Rule;
|
import org.antlr.v4.tool.Rule;
|
||||||
|
|
||||||
/** */
|
/** */
|
||||||
public abstract class Instr {
|
public abstract class Instr {
|
||||||
public int addr;
|
public int addr;
|
||||||
public Rule rule;
|
public Rule rule;
|
||||||
|
public NFABytecodeGenerator gen;
|
||||||
|
|
||||||
public abstract short opcode();
|
public abstract short opcode();
|
||||||
public abstract int nBytes();
|
public abstract int nBytes();
|
||||||
public int charSize(int a, int b) { return Math.max(charSize(a), charSize(b)); }
|
public int charSize(int a, int b) { return Math.max(charSize(a), charSize(b)); }
|
||||||
|
|
|
@ -15,6 +15,7 @@ public class LabelInstr extends Instr {
|
||||||
public int nBytes() { return 1+2; }
|
public int nBytes() { return 1+2; }
|
||||||
public void write(byte[] code) {
|
public void write(byte[] code) {
|
||||||
super.write(code);
|
super.write(code);
|
||||||
|
labelIndex = gen.getLabelIndex(rule, token.getText());
|
||||||
NFABytecodeGenerator.writeShort(code, addr+1, (short)labelIndex);
|
NFABytecodeGenerator.writeShort(code, addr+1, (short)labelIndex);
|
||||||
}
|
}
|
||||||
public String toString() { return addr+":LabelInstr "+ labelIndex; }
|
public String toString() { return addr+":LabelInstr "+ labelIndex; }
|
||||||
|
|
|
@ -15,6 +15,7 @@ public class SaveInstr extends Instr {
|
||||||
public int nBytes() { return 1+2; }
|
public int nBytes() { return 1+2; }
|
||||||
public void write(byte[] code) {
|
public void write(byte[] code) {
|
||||||
super.write(code);
|
super.write(code);
|
||||||
|
labelIndex = gen.getLabelIndex(rule, token.getText());
|
||||||
NFABytecodeGenerator.writeShort(code, addr+1, (short) labelIndex);
|
NFABytecodeGenerator.writeShort(code, addr+1, (short) labelIndex);
|
||||||
}
|
}
|
||||||
public String toString() { return addr+":SaveInstr "+ labelIndex; }
|
public String toString() { return addr+":SaveInstr "+ labelIndex; }
|
||||||
|
|
|
@ -163,6 +163,22 @@ public class TestNFABytecodeGeneration extends BaseTest {
|
||||||
checkBytecode(g, expecting);
|
checkBytecode(g, expecting);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test public void testAction() throws Exception {
|
||||||
|
LexerGrammar g = new LexerGrammar(
|
||||||
|
"lexer grammar L;\n" +
|
||||||
|
"A : {foo} 'a' | 'b' {bar} ;\n");
|
||||||
|
String expecting =
|
||||||
|
"0000:\tsplit 5\n" +
|
||||||
|
"0005:\tsplit 12, 22\n" +
|
||||||
|
"0012:\taction 1, 0\n" +
|
||||||
|
"0017:\tmatch8 'a'\n" +
|
||||||
|
"0019:\tjmp 29\n" +
|
||||||
|
"0022:\tmatch8 'b'\n" +
|
||||||
|
"0024:\taction 1, 1\n" +
|
||||||
|
"0029:\taccept 4\n";
|
||||||
|
checkBytecode(g, expecting);
|
||||||
|
}
|
||||||
|
|
||||||
public void _template() throws Exception {
|
public void _template() throws Exception {
|
||||||
LexerGrammar g = new LexerGrammar(
|
LexerGrammar g = new LexerGrammar(
|
||||||
"\n");
|
"\n");
|
||||||
|
|
|
@ -157,6 +157,15 @@ public class TestNFABytecodeInterp extends BaseTest {
|
||||||
checkMatches(g, "bc", "B, EOF", "[[@-1,0:-1='',<0>,1:0], [@-1,0:0='b',<0>,1:0], [@-1,1:1='c',<0>,1:1]]");
|
checkMatches(g, "bc", "B, EOF", "[[@-1,0:-1='',<0>,1:0], [@-1,0:0='b',<0>,1:0], [@-1,1:1='c',<0>,1:1]]");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test public void testAction() throws Exception {
|
||||||
|
LexerGrammar g = new LexerGrammar(
|
||||||
|
"lexer grammar L;\n" +
|
||||||
|
"I : {a1} d=D {a2} ;\n" +
|
||||||
|
"fragment D : ('0'..'9' {a3})+ ;\n");
|
||||||
|
checkMatches(g, "901", "I, EOF", "[[@-1,0:2='901',<0>,1:0]]");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public void _template() throws Exception {
|
public void _template() throws Exception {
|
||||||
LexerGrammar g = new LexerGrammar(
|
LexerGrammar g = new LexerGrammar(
|
||||||
"\n");
|
"\n");
|
||||||
|
@ -196,7 +205,7 @@ public class TestNFABytecodeInterp extends BaseTest {
|
||||||
List<Integer> tokenTypes = new ArrayList<Integer>();
|
List<Integer> tokenTypes = new ArrayList<Integer>();
|
||||||
int ttype = 0;
|
int ttype = 0;
|
||||||
do {
|
do {
|
||||||
ttype = nfa.execThompson(in, 0, true);
|
ttype = nfa.execThompson(in);
|
||||||
tokenTypes.add(ttype);
|
tokenTypes.add(ttype);
|
||||||
} while ( ttype!= Token.EOF );
|
} while ( ttype!= Token.EOF );
|
||||||
assertEquals(expectingTokenTypes, tokenTypes);
|
assertEquals(expectingTokenTypes, tokenTypes);
|
||||||
|
|
Loading…
Reference in New Issue