got sempreds in
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6838]
This commit is contained in:
parent
e6d65be27b
commit
b0ca11f46b
|
@ -70,7 +70,7 @@ public class Bytecode {
|
||||||
new Instruction("ret"),
|
new Instruction("ret"),
|
||||||
new Instruction("label", OperandType.SHORT),
|
new Instruction("label", OperandType.SHORT),
|
||||||
new Instruction("save", OperandType.SHORT),
|
new Instruction("save", OperandType.SHORT),
|
||||||
new Instruction("sempred", OperandType.SHORT),
|
new Instruction("sempred", OperandType.SHORT, OperandType.SHORT), // sempred ruleIndex, predIndex
|
||||||
new Instruction("action", OperandType.SHORT, OperandType.SHORT), // action ruleIndex, actionIndex
|
new Instruction("action", OperandType.SHORT, OperandType.SHORT), // action ruleIndex, actionIndex
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@ import org.antlr.runtime.Token;
|
||||||
import org.antlr.v4.runtime.CommonToken;
|
import org.antlr.v4.runtime.CommonToken;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
@ -14,6 +15,7 @@ public class NFA {
|
||||||
public Map<String, Integer> ruleToAddr;
|
public Map<String, Integer> ruleToAddr;
|
||||||
public int[] tokenTypeToAddr;
|
public int[] tokenTypeToAddr;
|
||||||
public CommonToken[] labelValues;
|
public CommonToken[] labelValues;
|
||||||
|
public int nLabels;
|
||||||
|
|
||||||
/** If we hit an action, we'll have to rewind and do the winning rule again */
|
/** If we hit an action, we'll have to rewind and do the winning rule again */
|
||||||
boolean bypassedAction;
|
boolean bypassedAction;
|
||||||
|
@ -23,17 +25,20 @@ public class NFA {
|
||||||
this.code = code;
|
this.code = code;
|
||||||
this.ruleToAddr = ruleToAddr;
|
this.ruleToAddr = ruleToAddr;
|
||||||
this.tokenTypeToAddr = tokenTypeToAddr;
|
this.tokenTypeToAddr = tokenTypeToAddr;
|
||||||
|
this.nLabels = nLabels;
|
||||||
labelValues = new CommonToken[nLabels];
|
labelValues = new CommonToken[nLabels];
|
||||||
}
|
}
|
||||||
|
|
||||||
public int execThompson(CharStream input) {
|
public int execThompson(CharStream input) {
|
||||||
int m = input.mark();
|
int m = input.mark();
|
||||||
|
Arrays.fill(labelValues, null);
|
||||||
int ttype = execThompson(input, 0, false);
|
int ttype = execThompson(input, 0, false);
|
||||||
System.out.println("ttype="+ttype);
|
System.out.println("first attempt ttype="+ttype);
|
||||||
if ( bypassedAction ) {
|
if ( bypassedAction ) {
|
||||||
input.rewind(m);
|
input.rewind(m);
|
||||||
System.out.println("Bypassed action; rewinding to "+input.index()+" doing with feeling");
|
System.out.println("Bypassed action; rewinding to "+input.index()+" doing with feeling");
|
||||||
bypassedAction = false;
|
bypassedAction = false;
|
||||||
|
Arrays.fill(labelValues, null);
|
||||||
int ttype2 = execThompson(input, tokenTypeToAddr[ttype], true);
|
int ttype2 = execThompson(input, tokenTypeToAddr[ttype], true);
|
||||||
if ( ttype!=ttype2 ) {
|
if ( ttype!=ttype2 ) {
|
||||||
System.err.println("eh? token diff with action(s)");
|
System.err.println("eh? token diff with action(s)");
|
||||||
|
@ -96,24 +101,20 @@ processOneChar:
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Bytecode.LABEL :
|
case Bytecode.LABEL :
|
||||||
if ( doActions ) {
|
int labelIndex = getShort(code, ip);
|
||||||
int labelIndex = getShort(code, ip);
|
labelValues[labelIndex] =
|
||||||
labelValues[labelIndex] =
|
new CommonToken(input, 0, 0, input.index(), -1);
|
||||||
new CommonToken(input, 0, 0, input.index(), -1);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case Bytecode.SAVE :
|
case Bytecode.SAVE :
|
||||||
if ( doActions ) {
|
labelIndex = getShort(code, ip);
|
||||||
int labelIndex = getShort(code, ip);
|
labelValues[labelIndex].setStopIndex(input.index()-1);
|
||||||
labelValues[labelIndex].setStopIndex(input.index()-1);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case Bytecode.ACTION :
|
case Bytecode.ACTION :
|
||||||
bypassedAction = true;
|
bypassedAction = true;
|
||||||
if ( doActions ) {
|
if ( doActions ) {
|
||||||
int ruleIndex = getShort(code, ip);
|
int ruleIndex = getShort(code, ip);
|
||||||
int actionIndex = getShort(code, ip+2);
|
int actionIndex = getShort(code, ip+2);
|
||||||
System.out.println("action "+ ruleIndex+", "+actionIndex);
|
action(ruleIndex, actionIndex);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Bytecode.ACCEPT :
|
case Bytecode.ACCEPT :
|
||||||
|
@ -154,6 +155,7 @@ processOneChar:
|
||||||
case Bytecode.SPLIT :
|
case Bytecode.SPLIT :
|
||||||
case Bytecode.CALL :
|
case Bytecode.CALL :
|
||||||
case Bytecode.RET :
|
case Bytecode.RET :
|
||||||
|
case Bytecode.SEMPRED :
|
||||||
break;
|
break;
|
||||||
default :
|
default :
|
||||||
throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
|
throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
|
||||||
|
@ -231,7 +233,13 @@ processOneChar:
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Bytecode.SEMPRED :
|
case Bytecode.SEMPRED :
|
||||||
// TODO: add next instruction only if sempred succeeds
|
// add next instruction only if sempred succeeds
|
||||||
|
int ruleIndex = getShort(code, ip);
|
||||||
|
int actionIndex = getShort(code, ip+2);
|
||||||
|
System.out.println("eval sempred "+ ruleIndex+", "+actionIndex);
|
||||||
|
if ( sempred(ruleIndex, actionIndex) ) {
|
||||||
|
addToClosure(closure, ip+4, alt, context);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -257,7 +265,7 @@ processOneChar:
|
||||||
|
|
||||||
// ---------------------------------------------------------------------
|
// ---------------------------------------------------------------------
|
||||||
|
|
||||||
// this stuff below can't do SAVE nor CALL/RET but faster.
|
// this stuff below can't do SAVE nor CALL/RET but faster. (nor preds)
|
||||||
|
|
||||||
public int execThompson_no_stack(CharStream input, int ip) {
|
public int execThompson_no_stack(CharStream input, int ip) {
|
||||||
int c = input.LA(1);
|
int c = input.LA(1);
|
||||||
|
@ -408,6 +416,15 @@ processOneChar:
|
||||||
return (memory[index]&0xFF) <<(8*1) | (memory[index+1]&0xFF); // prevent sign extension with mask
|
return (memory[index]&0xFF) <<(8*1) | (memory[index+1]&0xFF); // prevent sign extension with mask
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// subclass needs to override these if there are sempreds or actions in lexer rules
|
||||||
|
|
||||||
|
public boolean sempred(int ruleIndex, int actionIndex) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void action(int ruleIndex, int actionIndex) {
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
public int exec(CharStream input, String ruleName) {
|
public int exec(CharStream input, String ruleName) {
|
||||||
return exec(input, ruleToAddr.get(ruleName));
|
return exec(input, ruleToAddr.get(ruleName));
|
||||||
|
|
|
@ -28,8 +28,8 @@ public class NFABytecodeGenerator extends TreeParser {
|
||||||
int[] tokenTypeToAddr;
|
int[] tokenTypeToAddr;
|
||||||
|
|
||||||
Map<Rule, Map<String, Integer>> ruleLabels = new HashMap<Rule, Map<String, Integer>>();
|
Map<Rule, Map<String, Integer>> ruleLabels = new HashMap<Rule, Map<String, Integer>>();
|
||||||
|
|
||||||
Map<Rule, Map<Token, Integer>> ruleActions = new HashMap<Rule, Map<Token, Integer>>();
|
Map<Rule, Map<Token, Integer>> ruleActions = new HashMap<Rule, Map<Token, Integer>>();
|
||||||
|
Map<Rule, Map<Token, Integer>> ruleSempreds = new HashMap<Rule, Map<Token, Integer>>();
|
||||||
|
|
||||||
public Rule currentRule;
|
public Rule currentRule;
|
||||||
|
|
||||||
|
@ -64,6 +64,23 @@ public class NFABytecodeGenerator extends TreeParser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// indexed from 0 per rule
|
||||||
|
public int getSempredIndex(Rule r, Token actionToken) {
|
||||||
|
Map<Token, Integer> actions = ruleSempreds.get(r);
|
||||||
|
if ( actions==null ) {
|
||||||
|
actions = new HashMap<Token, Integer>();
|
||||||
|
ruleSempreds.put(r, actions);
|
||||||
|
}
|
||||||
|
if ( actions.get(actionToken)!=null ) {
|
||||||
|
return actions.get(actionToken);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
int i = actions.size();
|
||||||
|
actions.put(actionToken, i);
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** labels in all rules share single label space
|
/** labels in all rules share single label space
|
||||||
* but we still track labels per rule so we can translate $label
|
* but we still track labels per rule so we can translate $label
|
||||||
* to an index in an action.
|
* to an index in an action.
|
||||||
|
@ -95,37 +112,13 @@ public class NFABytecodeGenerator extends TreeParser {
|
||||||
int size = last.addr + last.nBytes();
|
int size = last.addr + last.nBytes();
|
||||||
byte[] code = new byte[size];
|
byte[] code = new byte[size];
|
||||||
|
|
||||||
// resolve CALL instruction targets and index labels before generating code
|
// resolve CALL instruction targets before generating code
|
||||||
// TODO: move this code to Instr objects? Need code gen pointer then.
|
|
||||||
for (Instr I : instrs) {
|
for (Instr I : instrs) {
|
||||||
if ( I instanceof CallInstr ) {
|
if ( I instanceof CallInstr ) {
|
||||||
CallInstr C = (CallInstr) I;
|
CallInstr C = (CallInstr) I;
|
||||||
String ruleName = C.token.getText();
|
String ruleName = C.token.getText();
|
||||||
C.target = ruleToAddr.get(ruleName);
|
C.target = ruleToAddr.get(ruleName);
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
else if ( I instanceof LabelInstr ) {
|
|
||||||
LabelInstr L = (LabelInstr)I;
|
|
||||||
Map<String, Integer> ruleLabels = labels.get(I.rule);
|
|
||||||
if ( ruleLabels==null ) {
|
|
||||||
ruleLabels = new HashMap<String, Integer>();
|
|
||||||
labels.put(I.rule, ruleLabels);
|
|
||||||
}
|
|
||||||
String labelName = L.token.getText();
|
|
||||||
if ( ruleLabels.get(labelName)!=null ) {
|
|
||||||
L.labelIndex = ruleLabels.get(labelName);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
ruleLabels.put(labelName, labelIndex);
|
|
||||||
L.labelIndex = labelIndex++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if ( I instanceof SaveInstr ) {
|
|
||||||
SaveInstr S = (SaveInstr)I;
|
|
||||||
Map<String, Integer> ruleLabels = labels.get(I.rule);
|
|
||||||
S.labelIndex = ruleLabels.get(S.token.getText());
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
for (Instr I : instrs) {
|
for (Instr I : instrs) {
|
||||||
I.write(code);
|
I.write(code);
|
||||||
|
|
|
@ -12,10 +12,11 @@ public class SemPredInstr extends Instr {
|
||||||
this.token = token;
|
this.token = token;
|
||||||
}
|
}
|
||||||
public short opcode() { return Bytecode.SEMPRED; };
|
public short opcode() { return Bytecode.SEMPRED; };
|
||||||
public int nBytes() { return 1+2; }
|
public int nBytes() { return 1+2*2; }
|
||||||
public void write(byte[] code) {
|
public void write(byte[] code) {
|
||||||
super.write(code);
|
super.write(code);
|
||||||
NFABytecodeGenerator.writeShort(code, addr+1, (short) predIndex);
|
NFABytecodeGenerator.writeShort(code, addr+1, (short)rule.index);
|
||||||
|
NFABytecodeGenerator.writeShort(code, addr+1+2, (short)gen.getSempredIndex(rule, token));
|
||||||
}
|
}
|
||||||
public String toString() { return addr+":SemPredInstr "+ predIndex; }
|
public String toString() { return addr+":SemPredInstr "+ predIndex; }
|
||||||
}
|
}
|
||||||
|
|
|
@ -179,6 +179,22 @@ public class TestNFABytecodeGeneration extends BaseTest {
|
||||||
checkBytecode(g, expecting);
|
checkBytecode(g, expecting);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test public void testSempred() throws Exception {
|
||||||
|
LexerGrammar g = new LexerGrammar(
|
||||||
|
"lexer grammar L;\n" +
|
||||||
|
"A : {foo}? 'a' | 'b' {bar}? ;\n");
|
||||||
|
String expecting =
|
||||||
|
"0000:\tsplit 5\n" +
|
||||||
|
"0005:\tsplit 12, 22\n" +
|
||||||
|
"0012:\tsempred 1, 0\n" +
|
||||||
|
"0017:\tmatch8 'a'\n" +
|
||||||
|
"0019:\tjmp 29\n" +
|
||||||
|
"0022:\tmatch8 'b'\n" +
|
||||||
|
"0024:\tsempred 1, 1\n" +
|
||||||
|
"0029:\taccept 4\n";
|
||||||
|
checkBytecode(g, expecting);
|
||||||
|
}
|
||||||
|
|
||||||
public void _template() throws Exception {
|
public void _template() throws Exception {
|
||||||
LexerGrammar g = new LexerGrammar(
|
LexerGrammar g = new LexerGrammar(
|
||||||
"\n");
|
"\n");
|
||||||
|
|
|
@ -123,14 +123,14 @@ public class TestNFABytecodeInterp extends BaseTest {
|
||||||
LexerGrammar g = new LexerGrammar(
|
LexerGrammar g = new LexerGrammar(
|
||||||
"lexer grammar L;\n" +
|
"lexer grammar L;\n" +
|
||||||
"A : a='a' ;\n");
|
"A : a='a' ;\n");
|
||||||
checkMatches(g, "a", "A, EOF", "[[@-1,0:0='a',<0>,1:0]]");
|
checkLabels(g, "a", "A", "[[@-1,0:0='a',<0>,1:0]]");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test public void testLabeledString() throws Exception {
|
@Test public void testLabeledString() throws Exception {
|
||||||
LexerGrammar g = new LexerGrammar(
|
LexerGrammar g = new LexerGrammar(
|
||||||
"lexer grammar L;\n" +
|
"lexer grammar L;\n" +
|
||||||
"A : a='abc' ;\n");
|
"A : a='abc' ;\n");
|
||||||
checkMatches(g, "abc", "A, EOF", "[[@-1,0:2='abc',<0>,1:0]]");
|
checkLabels(g, "abc", "A", "[[@-1,0:2='abc',<0>,1:0]]");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test public void testLabeledToken() throws Exception {
|
@Test public void testLabeledToken() throws Exception {
|
||||||
|
@ -138,7 +138,7 @@ public class TestNFABytecodeInterp extends BaseTest {
|
||||||
"lexer grammar L;\n" +
|
"lexer grammar L;\n" +
|
||||||
"I : d=D ;\n" +
|
"I : d=D ;\n" +
|
||||||
"fragment D : '0'..'9'+ ;\n");
|
"fragment D : '0'..'9'+ ;\n");
|
||||||
checkMatches(g, "901", "I, EOF", "[[@-1,0:2='901',<0>,1:0]]");
|
checkLabels(g, "901", "I", "[[@-1,0:2='901',<0>,1:0]]");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test public void testLabelInLoopIsLastElement() throws Exception {
|
@Test public void testLabelInLoopIsLastElement() throws Exception {
|
||||||
|
@ -146,7 +146,7 @@ public class TestNFABytecodeInterp extends BaseTest {
|
||||||
"lexer grammar L;\n" +
|
"lexer grammar L;\n" +
|
||||||
"I : d=D+ ;\n" +
|
"I : d=D+ ;\n" +
|
||||||
"fragment D : '0'..'9' ;\n");
|
"fragment D : '0'..'9' ;\n");
|
||||||
checkMatches(g, "901", "I, EOF", "[[@-1,2:2='1',<0>,1:2]]");
|
checkLabels(g, "901", "I", "[[@-1,2:2='1',<0>,1:2]]");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test public void testLabelIndexes() throws Exception {
|
@Test public void testLabelIndexes() throws Exception {
|
||||||
|
@ -154,7 +154,7 @@ public class TestNFABytecodeInterp extends BaseTest {
|
||||||
"lexer grammar L;\n" +
|
"lexer grammar L;\n" +
|
||||||
"A : a='a' ;\n" +
|
"A : a='a' ;\n" +
|
||||||
"B : a='b' b='c' ;\n");
|
"B : a='b' b='c' ;\n");
|
||||||
checkMatches(g, "bc", "B, EOF", "[[@-1,0:-1='',<0>,1:0], [@-1,0:0='b',<0>,1:0], [@-1,1:1='c',<0>,1:1]]");
|
checkLabels(g, "bc", "B", "[[@-1,0:-1='',<0>,1:0], [@-1,0:0='b',<0>,1:0], [@-1,1:1='c',<0>,1:1]]");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test public void testAction() throws Exception {
|
@Test public void testAction() throws Exception {
|
||||||
|
@ -162,7 +162,15 @@ public class TestNFABytecodeInterp extends BaseTest {
|
||||||
"lexer grammar L;\n" +
|
"lexer grammar L;\n" +
|
||||||
"I : {a1} d=D {a2} ;\n" +
|
"I : {a1} d=D {a2} ;\n" +
|
||||||
"fragment D : ('0'..'9' {a3})+ ;\n");
|
"fragment D : ('0'..'9' {a3})+ ;\n");
|
||||||
checkMatches(g, "901", "I, EOF", "[[@-1,0:2='901',<0>,1:0]]");
|
checkLabels(g, "901", "I", "[[@-1,0:2='901',<0>,1:0]]");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test public void testSempred() throws Exception {
|
||||||
|
// not actually evaluating preds since we're interpreting; assumes true.
|
||||||
|
LexerGrammar g = new LexerGrammar(
|
||||||
|
"lexer grammar L;\n" +
|
||||||
|
"A : {true}? 'a' | 'b' {true}? ;\n");
|
||||||
|
checkMatches(g, "ab", "A, A, EOF");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -174,12 +182,6 @@ public class TestNFABytecodeInterp extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
void checkMatches(LexerGrammar g, String input, String expecting) {
|
void checkMatches(LexerGrammar g, String input, String expecting) {
|
||||||
checkMatches(g, input, expecting, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
void checkMatches(LexerGrammar g, String input, String expecting,
|
|
||||||
String expectingTokens)
|
|
||||||
{
|
|
||||||
if ( g.ast!=null && !g.ast.hasErrors ) {
|
if ( g.ast!=null && !g.ast.hasErrors ) {
|
||||||
System.out.println(g.ast.toStringTree());
|
System.out.println(g.ast.toStringTree());
|
||||||
Tool antlr = new Tool();
|
Tool antlr = new Tool();
|
||||||
|
@ -209,6 +211,37 @@ public class TestNFABytecodeInterp extends BaseTest {
|
||||||
tokenTypes.add(ttype);
|
tokenTypes.add(ttype);
|
||||||
} while ( ttype!= Token.EOF );
|
} while ( ttype!= Token.EOF );
|
||||||
assertEquals(expectingTokenTypes, tokenTypes);
|
assertEquals(expectingTokenTypes, tokenTypes);
|
||||||
|
}
|
||||||
|
|
||||||
|
void checkLabels(LexerGrammar g, String input, String expecting,
|
||||||
|
String expectingTokens)
|
||||||
|
{
|
||||||
|
if ( g.ast!=null && !g.ast.hasErrors ) {
|
||||||
|
System.out.println(g.ast.toStringTree());
|
||||||
|
Tool antlr = new Tool();
|
||||||
|
SemanticPipeline sem = new SemanticPipeline(g);
|
||||||
|
sem.process();
|
||||||
|
if ( g.getImportedGrammars()!=null ) { // process imported grammars (if any)
|
||||||
|
for (Grammar imp : g.getImportedGrammars()) {
|
||||||
|
antlr.process(imp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
List<Integer> expectingTokenTypes = new ArrayList<Integer>();
|
||||||
|
if ( expecting!=null && !expecting.trim().equals("") ) {
|
||||||
|
for (String tname : expecting.replace(" ", "").split(",")) {
|
||||||
|
int ttype = g.getTokenType(tname);
|
||||||
|
expectingTokenTypes.add(ttype);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
NFA nfa = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
|
||||||
|
ANTLRStringStream in = new ANTLRStringStream(input);
|
||||||
|
List<Integer> tokenTypes = new ArrayList<Integer>();
|
||||||
|
int ttype = nfa.execThompson(in);
|
||||||
|
tokenTypes.add(ttype);
|
||||||
|
assertEquals(expectingTokenTypes, tokenTypes);
|
||||||
|
|
||||||
if ( expectingTokens!=null ) {
|
if ( expectingTokens!=null ) {
|
||||||
assertEquals(expectingTokens, Arrays.toString(nfa.labelValues));
|
assertEquals(expectingTokens, Arrays.toString(nfa.labelValues));
|
||||||
|
|
Loading…
Reference in New Issue