added NOT/SET ops, refactored PDA generation
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6900]
This commit is contained in:
parent
acf962bc28
commit
19aecd3163
|
@ -39,21 +39,22 @@ public class Bytecode {
|
||||||
// be an array of objects (Bytecode[]). We want it to be byte[].
|
// be an array of objects (Bytecode[]). We want it to be byte[].
|
||||||
|
|
||||||
// INSTRUCTION BYTECODES (byte is signed; use a short to keep 0..255)
|
// INSTRUCTION BYTECODES (byte is signed; use a short to keep 0..255)
|
||||||
public static final short ACCEPT = 1;
|
public static final short ACCEPT = 1;
|
||||||
public static final short JMP = 2;
|
public static final short JMP = 2;
|
||||||
public static final short SPLIT = 3;
|
public static final short SPLIT = 3;
|
||||||
public static final short MATCH8 = 4;
|
public static final short MATCH8 = 4;
|
||||||
public static final short MATCH16 = 5;
|
public static final short MATCH16 = 5;
|
||||||
public static final short RANGE8 = 6;
|
public static final short RANGE8 = 6;
|
||||||
public static final short RANGE16 = 7;
|
public static final short RANGE16 = 7;
|
||||||
public static final short WILDCARD = 8;
|
public static final short WILDCARD = 8;
|
||||||
//public static final short NOT = 8; ???
|
public static final short SET = 9;
|
||||||
public static final short CALL = 9; // JMP with a push
|
public static final short CALL = 10; // JMP with a push
|
||||||
public static final short RET = 10; // an accept instr for fragment rules
|
public static final short RET = 11; // an accept instr for fragment rules
|
||||||
public static final short LABEL = 11;
|
public static final short LABEL = 12;
|
||||||
public static final short SAVE = 12;
|
public static final short SAVE = 13;
|
||||||
public static final short SEMPRED = 13;
|
public static final short SEMPRED = 14;
|
||||||
public static final short ACTION = 14;
|
public static final short ACTION = 15;
|
||||||
|
public static final short NOT = 16; // not next match instr
|
||||||
|
|
||||||
/** Used for disassembly; describes instruction set */
|
/** Used for disassembly; describes instruction set */
|
||||||
public static Instruction[] instructions = new Instruction[] {
|
public static Instruction[] instructions = new Instruction[] {
|
||||||
|
@ -66,12 +67,14 @@ public class Bytecode {
|
||||||
new Instruction("range8", OperandType.BYTE, OperandType.BYTE),
|
new Instruction("range8", OperandType.BYTE, OperandType.BYTE),
|
||||||
new Instruction("range16", OperandType.CHAR, OperandType.CHAR),
|
new Instruction("range16", OperandType.CHAR, OperandType.CHAR),
|
||||||
new Instruction("wildcard"),
|
new Instruction("wildcard"),
|
||||||
|
new Instruction("set", OperandType.SHORT),
|
||||||
new Instruction("call", OperandType.ADDR),
|
new Instruction("call", OperandType.ADDR),
|
||||||
new Instruction("ret"),
|
new Instruction("ret"),
|
||||||
new Instruction("label", OperandType.SHORT),
|
new Instruction("label", OperandType.SHORT),
|
||||||
new Instruction("save", OperandType.SHORT),
|
new Instruction("save", OperandType.SHORT),
|
||||||
new Instruction("sempred", OperandType.SHORT, OperandType.SHORT), // sempred ruleIndex, predIndex
|
new Instruction("sempred", OperandType.SHORT, OperandType.SHORT), // sempred ruleIndex, predIndex
|
||||||
new Instruction("action", OperandType.SHORT, OperandType.SHORT), // action ruleIndex, actionIndex
|
new Instruction("action", OperandType.SHORT, OperandType.SHORT), // action ruleIndex, actionIndex
|
||||||
|
new Instruction("not"),
|
||||||
};
|
};
|
||||||
|
|
||||||
public static String disassemble(byte[] code, int start, boolean operandsAreChars) {
|
public static String disassemble(byte[] code, int start, boolean operandsAreChars) {
|
||||||
|
|
|
@ -8,7 +8,6 @@ import org.antlr.v4.runtime.CommonToken;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/** A (nondeterministic) pushdown bytecode machine for lexing and LL prediction.
|
/** A (nondeterministic) pushdown bytecode machine for lexing and LL prediction.
|
||||||
* Derived partially from Cox' description of Thompson's 1960s work:
|
* Derived partially from Cox' description of Thompson's 1960s work:
|
||||||
|
@ -22,28 +21,20 @@ public class PDA {
|
||||||
public interface sempred_fptr { boolean eval(int predIndex); }
|
public interface sempred_fptr { boolean eval(int predIndex); }
|
||||||
|
|
||||||
public byte[] code;
|
public byte[] code;
|
||||||
public Map<String, Integer> ruleToAddr;
|
//public Map<String, Integer> ruleToAddr;
|
||||||
public int[] tokenTypeToAddr;
|
public int[] altToAddr; // either token type (in lexer) or alt num for DFA in parser
|
||||||
public CommonToken[] labelValues;
|
public CommonToken[] labelValues;
|
||||||
public int nLabels;
|
public int nLabels;
|
||||||
|
|
||||||
/** If we hit an action, we'll have to rewind and do the winning rule again */
|
/** If we hit an action, we'll have to rewind and do the winning rule again */
|
||||||
boolean bypassedAction;
|
boolean bypassedAction;
|
||||||
|
|
||||||
public PDA() {;}
|
boolean notNextMatch;
|
||||||
|
|
||||||
public PDA(byte[] code, Map<String, Integer> ruleToAddr, int[] tokenTypeToAddr, int nLabels) {
|
|
||||||
this.code = code;
|
|
||||||
this.ruleToAddr = ruleToAddr;
|
|
||||||
this.tokenTypeToAddr = tokenTypeToAddr;
|
|
||||||
this.nLabels = nLabels;
|
|
||||||
labelValues = new CommonToken[nLabels];
|
|
||||||
}
|
|
||||||
|
|
||||||
public PDA(byte[] code, int[] tokenTypeToAddr, int nLabels) {
|
public PDA(byte[] code, int[] altToAddr, int nLabels) {
|
||||||
System.out.println("code="+Arrays.toString(code));
|
System.out.println("code="+Arrays.toString(code));
|
||||||
this.code = code;
|
this.code = code;
|
||||||
this.tokenTypeToAddr = tokenTypeToAddr;
|
this.altToAddr = altToAddr;
|
||||||
this.nLabels = nLabels;
|
this.nLabels = nLabels;
|
||||||
labelValues = new CommonToken[nLabels];
|
labelValues = new CommonToken[nLabels];
|
||||||
}
|
}
|
||||||
|
@ -58,7 +49,7 @@ public class PDA {
|
||||||
System.out.println("Bypassed action; rewinding to "+input.index()+" doing with feeling");
|
System.out.println("Bypassed action; rewinding to "+input.index()+" doing with feeling");
|
||||||
bypassedAction = false;
|
bypassedAction = false;
|
||||||
Arrays.fill(labelValues, null);
|
Arrays.fill(labelValues, null);
|
||||||
int ttype2 = execThompson(input, tokenTypeToAddr[ttype], true);
|
int ttype2 = execThompson(input, altToAddr[ttype], true);
|
||||||
if ( ttype!=ttype2 ) {
|
if ( ttype!=ttype2 ) {
|
||||||
System.err.println("eh? token diff with action(s)");
|
System.err.println("eh? token diff with action(s)");
|
||||||
}
|
}
|
||||||
|
@ -92,33 +83,48 @@ processOneChar:
|
||||||
//System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
|
//System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
|
||||||
trace(ip);
|
trace(ip);
|
||||||
short opcode = code[ip];
|
short opcode = code[ip];
|
||||||
|
boolean matched;
|
||||||
ip++; // move to next instruction or first byte of operand
|
ip++; // move to next instruction or first byte of operand
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
|
case Bytecode.NOT :
|
||||||
|
notNextMatch = true;
|
||||||
|
break;
|
||||||
case Bytecode.MATCH8 :
|
case Bytecode.MATCH8 :
|
||||||
if ( c == code[ip] ) {
|
if ( c == code[ip] || (notNextMatch && c != code[ip]) ) {
|
||||||
addToClosure(reach, ip+1, alt, context);
|
addToClosure(reach, ip+1, alt, context);
|
||||||
}
|
}
|
||||||
|
notNextMatch = false;
|
||||||
break;
|
break;
|
||||||
case Bytecode.MATCH16 :
|
case Bytecode.MATCH16 :
|
||||||
if ( c == getShort(code, ip) ) {
|
matched = c == getShort(code, ip);
|
||||||
|
if ( matched || (notNextMatch && matched) ) {
|
||||||
addToClosure(reach, ip+2, alt, context);
|
addToClosure(reach, ip+2, alt, context);
|
||||||
}
|
}
|
||||||
|
notNextMatch = false;
|
||||||
break;
|
break;
|
||||||
case Bytecode.RANGE8 :
|
case Bytecode.RANGE8 :
|
||||||
if ( c>=code[ip] && c<=code[ip+1] ) {
|
matched = c >= code[ip] && c <= code[ip + 1];
|
||||||
|
if ( matched || (notNextMatch && matched) ) {
|
||||||
addToClosure(reach, ip+2, alt, context);
|
addToClosure(reach, ip+2, alt, context);
|
||||||
}
|
}
|
||||||
|
notNextMatch = false;
|
||||||
break;
|
break;
|
||||||
case Bytecode.RANGE16 :
|
case Bytecode.RANGE16 :
|
||||||
if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) {
|
matched = c < getShort(code, ip) || c > getShort(code, ip + 2);
|
||||||
|
if ( matched || (notNextMatch && matched) ) {
|
||||||
addToClosure(reach, ip+4, alt, context);
|
addToClosure(reach, ip+4, alt, context);
|
||||||
}
|
}
|
||||||
|
notNextMatch = false;
|
||||||
break;
|
break;
|
||||||
case Bytecode.WILDCARD :
|
case Bytecode.WILDCARD :
|
||||||
if ( c!=Token.EOF ) {
|
if ( c!=Token.EOF ) {
|
||||||
addToClosure(reach, ip, alt, context);
|
addToClosure(reach, ip, alt, context);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case Bytecode.SET :
|
||||||
|
System.err.println("not impl");
|
||||||
|
notNextMatch = false;
|
||||||
|
break;
|
||||||
case Bytecode.LABEL : // lexers only
|
case Bytecode.LABEL : // lexers only
|
||||||
int labelIndex = getShort(code, ip);
|
int labelIndex = getShort(code, ip);
|
||||||
labelValues[labelIndex] =
|
labelValues[labelIndex] =
|
||||||
|
@ -217,6 +223,10 @@ processOneChar:
|
||||||
short opcode = code[ip];
|
short opcode = code[ip];
|
||||||
ip++; // move to next instruction or first byte of operand
|
ip++; // move to next instruction or first byte of operand
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
|
case Bytecode.NOT : // see thru NOT but include in closure so we exec during reach
|
||||||
|
closure.add(t); // add to closure; need to execute during reach
|
||||||
|
addToClosure(closure, ip, alt, context);
|
||||||
|
break;
|
||||||
case Bytecode.JMP :
|
case Bytecode.JMP :
|
||||||
addToClosure(closure, getShort(code, ip), alt, context);
|
addToClosure(closure, getShort(code, ip), alt, context);
|
||||||
break;
|
break;
|
||||||
|
@ -360,10 +370,10 @@ processOneChar:
|
||||||
}
|
}
|
||||||
// if we reach accept state, toss out any addresses in rest
|
// if we reach accept state, toss out any addresses in rest
|
||||||
// of work list associated with accept's rule; that rule is done
|
// of work list associated with accept's rule; that rule is done
|
||||||
int ruleStart = tokenTypeToAddr[ttype];
|
int ruleStart = altToAddr[ttype];
|
||||||
int ruleStop = code.length;
|
int ruleStop = code.length;
|
||||||
if ( ttype+1 < tokenTypeToAddr.length ) {
|
if ( ttype+1 < altToAddr.length ) {
|
||||||
ruleStop = tokenTypeToAddr[ttype+1]-1;
|
ruleStop = altToAddr[ttype+1]-1;
|
||||||
}
|
}
|
||||||
System.out.println("kill range "+ruleStart+".."+ruleStop);
|
System.out.println("kill range "+ruleStart+".."+ruleStop);
|
||||||
int j=i+1;
|
int j=i+1;
|
||||||
|
|
|
@ -3,6 +3,7 @@ package org.antlr.v4.codegen;
|
||||||
import org.antlr.runtime.Token;
|
import org.antlr.runtime.Token;
|
||||||
import org.antlr.v4.codegen.pda.Instr;
|
import org.antlr.v4.codegen.pda.Instr;
|
||||||
import org.antlr.v4.misc.DoubleKeyMap;
|
import org.antlr.v4.misc.DoubleKeyMap;
|
||||||
|
import org.antlr.v4.misc.IntervalSet;
|
||||||
import org.antlr.v4.tool.Rule;
|
import org.antlr.v4.tool.Rule;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -14,12 +15,18 @@ import java.util.Map;
|
||||||
public class CompiledPDA {
|
public class CompiledPDA {
|
||||||
public List<Instr> instrs = new ArrayList<Instr>();
|
public List<Instr> instrs = new ArrayList<Instr>();
|
||||||
public byte[] code; // instrs in bytecode form
|
public byte[] code; // instrs in bytecode form
|
||||||
public int ip = 0; // where to write next
|
public List<IntervalSet> set8table = new ArrayList<IntervalSet>();
|
||||||
|
public List<IntervalSet> set16table = new ArrayList<IntervalSet>();
|
||||||
public Map<String, Integer> ruleToAddr = new HashMap<String, Integer>();
|
public Map<String, Integer> ruleToAddr = new HashMap<String, Integer>();
|
||||||
public int[] tokenTypeToAddr;
|
|
||||||
|
public int[] altToAddr; // either token type (in lexer) or alt num for DFA in parser
|
||||||
|
|
||||||
public DoubleKeyMap<Rule, String, Integer> ruleLabels = new DoubleKeyMap<Rule, String, Integer>();
|
public DoubleKeyMap<Rule, String, Integer> ruleLabels = new DoubleKeyMap<Rule, String, Integer>();
|
||||||
public DoubleKeyMap<Rule, Token, Integer> ruleActions = new DoubleKeyMap<Rule, Token, Integer>();
|
public DoubleKeyMap<Rule, Token, Integer> ruleActions = new DoubleKeyMap<Rule, Token, Integer>();
|
||||||
public DoubleKeyMap<Rule, Token, Integer> ruleSempreds = new DoubleKeyMap<Rule, Token, Integer>();
|
public DoubleKeyMap<Rule, Token, Integer> ruleSempreds = new DoubleKeyMap<Rule, Token, Integer>();
|
||||||
public int nLabels;
|
public int nLabels;
|
||||||
|
|
||||||
|
public CompiledPDA(int numAlts) {
|
||||||
|
altToAddr = new int[numAlts+1];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,77 @@
|
||||||
|
package org.antlr.v4.codegen;
|
||||||
|
|
||||||
|
import org.antlr.v4.automata.DFA;
|
||||||
|
import org.antlr.v4.automata.DFAState;
|
||||||
|
import org.antlr.v4.automata.Edge;
|
||||||
|
import org.antlr.v4.codegen.pda.*;
|
||||||
|
import org.antlr.v4.runtime.pda.PDA;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class DFACompiler {
|
||||||
|
public DFA dfa;
|
||||||
|
boolean[] marked;
|
||||||
|
int[] stateToAddr;
|
||||||
|
PDABytecodeGenerator gen;
|
||||||
|
|
||||||
|
public DFACompiler(DFA dfa) {
|
||||||
|
this.dfa = dfa;
|
||||||
|
gen = new PDABytecodeGenerator(dfa.g.getMaxTokenType());
|
||||||
|
}
|
||||||
|
|
||||||
|
public CompiledPDA compile() {
|
||||||
|
walk();
|
||||||
|
gen.compile();
|
||||||
|
return gen.obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
public PDA walk() {
|
||||||
|
marked = new boolean[dfa.stateSet.size()+1];
|
||||||
|
stateToAddr = new int[dfa.stateSet.size()+1];
|
||||||
|
walk(dfa.startState);
|
||||||
|
|
||||||
|
// walk code, update jump targets.
|
||||||
|
for (Instr I : gen.obj.instrs) {
|
||||||
|
System.out.println("instr "+I);
|
||||||
|
if ( I instanceof JumpInstr) {
|
||||||
|
JumpInstr J = (JumpInstr)I;
|
||||||
|
J.target = stateToAddr[J.target];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// recursive so we follow chains in DFA, leading to fewer
|
||||||
|
// jmp instructions.
|
||||||
|
// start by assuming state num is bytecode addr then translate after
|
||||||
|
// in one pass
|
||||||
|
public void walk(DFAState d) {
|
||||||
|
if ( marked[d.stateNumber] ) return;
|
||||||
|
marked[d.stateNumber] = true;
|
||||||
|
stateToAddr[d.stateNumber] = gen.ip;
|
||||||
|
System.out.println("visit "+d.stateNumber+" @"+ gen.ip);
|
||||||
|
if ( d.isAcceptState ) {
|
||||||
|
AcceptInstr A = new AcceptInstr(d.predictsAlt);
|
||||||
|
gen.emit(A);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
SplitInstr S = null;
|
||||||
|
if ( d.edges.size()>1 ) {
|
||||||
|
S = new SplitInstr(d.edges.size());
|
||||||
|
gen.emit(S);
|
||||||
|
}
|
||||||
|
for (Edge e : d.edges) {
|
||||||
|
if ( S!=null ) S.addrs.add(gen.ip);
|
||||||
|
if ( e.label.getMinElement() == e.label.getMaxElement() ) {
|
||||||
|
MatchInstr M = new MatchInstr(e.label.getSingleElement());
|
||||||
|
gen.emit(M);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
gen.emit(new SetInstr(e.label));
|
||||||
|
}
|
||||||
|
JumpInstr J = new JumpInstr(e.target.stateNumber);
|
||||||
|
gen.emit(J);
|
||||||
|
walk(e.target);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,65 @@
|
||||||
|
package org.antlr.v4.codegen;
|
||||||
|
|
||||||
|
import org.antlr.runtime.tree.CommonTreeNodeStream;
|
||||||
|
import org.antlr.v4.codegen.pda.AcceptInstr;
|
||||||
|
import org.antlr.v4.codegen.pda.RetInstr;
|
||||||
|
import org.antlr.v4.codegen.pda.SplitInstr;
|
||||||
|
import org.antlr.v4.parse.ANTLRParser;
|
||||||
|
import org.antlr.v4.parse.GrammarASTAdaptor;
|
||||||
|
import org.antlr.v4.runtime.pda.Bytecode;
|
||||||
|
import org.antlr.v4.tool.GrammarAST;
|
||||||
|
import org.antlr.v4.tool.LexerGrammar;
|
||||||
|
import org.antlr.v4.tool.Rule;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class LexerCompiler {
|
||||||
|
LexerGrammar lg;
|
||||||
|
public LexerCompiler(LexerGrammar lg) {
|
||||||
|
this.lg = lg;
|
||||||
|
}
|
||||||
|
|
||||||
|
public CompiledPDA compileMode(String modeName) {
|
||||||
|
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
|
||||||
|
PDABytecodeGenerator gen = new PDABytecodeGenerator(lg.getMaxTokenType());
|
||||||
|
PDABytecodeTriggers trigger = new PDABytecodeTriggers(null, gen);
|
||||||
|
|
||||||
|
// add split for s0 to hook up rules (fill in operands as we gen rules)
|
||||||
|
int numRules = lg.modes.get(modeName).size();
|
||||||
|
int numFragmentRules = 0;
|
||||||
|
for (Rule r : lg.modes.get(modeName)) { if ( r.isFragment() ) numFragmentRules++; }
|
||||||
|
SplitInstr s0 = new SplitInstr(numRules - numFragmentRules);
|
||||||
|
gen.emit(s0);
|
||||||
|
|
||||||
|
|
||||||
|
for (Rule r : lg.modes.get(modeName)) { // for each rule in mode
|
||||||
|
gen.currentRule = r;
|
||||||
|
GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK);
|
||||||
|
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
|
||||||
|
trigger.setTreeNodeStream(nodes);
|
||||||
|
int ttype = lg.getTokenType(r.name);
|
||||||
|
gen.defineRuleAddr(r.name, gen.ip);
|
||||||
|
if ( !r.isFragment() ) {
|
||||||
|
s0.addrs.add(gen.ip);
|
||||||
|
gen.defineTokenTypeToAddr(ttype, gen.ip);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
trigger.block(); // GEN Instr OBJECTS
|
||||||
|
int ruleTokenType = lg.getTokenType(r.name);
|
||||||
|
if ( !r.isFragment() ) {
|
||||||
|
gen.emit(new AcceptInstr(ruleTokenType));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
gen.emit(new RetInstr());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception e){
|
||||||
|
e.printStackTrace(System.err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
gen.compile();
|
||||||
|
gen.obj.nLabels = gen.labelIndex;
|
||||||
|
System.out.println(Bytecode.disassemble(gen.obj.code));
|
||||||
|
System.out.println("rule addrs="+ gen.obj.ruleToAddr);
|
||||||
|
return gen.obj;
|
||||||
|
}
|
||||||
|
}
|
|
@ -25,7 +25,8 @@ public class LexerFactory {
|
||||||
fileST.add("fileName", gen.getRecognizerFileName());
|
fileST.add("fileName", gen.getRecognizerFileName());
|
||||||
fileST.add("lexer", lexerST);
|
fileST.add("lexer", lexerST);
|
||||||
for (String modeName : lg.modes.keySet()) { // for each mode
|
for (String modeName : lg.modes.keySet()) { // for each mode
|
||||||
CompiledPDA pda = PDABytecodeGenerator.compileLexerMode(lg, modeName);
|
LexerCompiler comp = new LexerCompiler(lg);
|
||||||
|
CompiledPDA pda = comp.compileMode(modeName);
|
||||||
ST pdaST = gen.templates.getInstanceOf("PDA");
|
ST pdaST = gen.templates.getInstanceOf("PDA");
|
||||||
for (Rule r : pda.ruleActions.keySet()) {
|
for (Rule r : pda.ruleActions.keySet()) {
|
||||||
Set<Token> actionTokens = pda.ruleActions.keySet(r);
|
Set<Token> actionTokens = pda.ruleActions.keySet(r);
|
||||||
|
|
|
@ -1,66 +1,61 @@
|
||||||
package org.antlr.v4.codegen;
|
package org.antlr.v4.codegen;
|
||||||
|
|
||||||
import org.antlr.runtime.RecognizerSharedState;
|
|
||||||
import org.antlr.runtime.Token;
|
import org.antlr.runtime.Token;
|
||||||
import org.antlr.runtime.tree.CommonTreeNodeStream;
|
import org.antlr.v4.codegen.pda.CallInstr;
|
||||||
import org.antlr.runtime.tree.Tree;
|
import org.antlr.v4.codegen.pda.Instr;
|
||||||
import org.antlr.runtime.tree.TreeNodeStream;
|
import org.antlr.v4.codegen.pda.MatchInstr;
|
||||||
import org.antlr.v4.automata.DFA;
|
import org.antlr.v4.codegen.pda.NotInstr;
|
||||||
import org.antlr.v4.automata.DFAState;
|
|
||||||
import org.antlr.v4.automata.Edge;
|
|
||||||
import org.antlr.v4.codegen.pda.*;
|
|
||||||
import org.antlr.v4.misc.CharSupport;
|
import org.antlr.v4.misc.CharSupport;
|
||||||
import org.antlr.v4.misc.IntervalSet;
|
import org.antlr.v4.misc.IntervalSet;
|
||||||
import org.antlr.v4.parse.ANTLRParser;
|
import org.antlr.v4.tool.Rule;
|
||||||
import org.antlr.v4.parse.GrammarASTAdaptor;
|
|
||||||
import org.antlr.v4.runtime.pda.Bytecode;
|
|
||||||
import org.antlr.v4.runtime.pda.PDA;
|
|
||||||
import org.antlr.v4.runtime.tree.TreeParser;
|
|
||||||
import org.antlr.v4.tool.*;
|
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/** http://swtch.com/~rsc/regexp/regexp2.html */
|
/** http://swtch.com/~rsc/regexp/regexp2.html */
|
||||||
public class PDABytecodeGenerator extends TreeParser {
|
public class PDABytecodeGenerator {
|
||||||
public Grammar g;
|
|
||||||
|
|
||||||
public Rule currentRule;
|
public Rule currentRule;
|
||||||
|
|
||||||
CompiledPDA pda = new CompiledPDA();
|
public CompiledPDA obj;
|
||||||
|
|
||||||
public int labelIndex = 0; // first time we ask for labels we index
|
public int ip = 0; // where to write next
|
||||||
|
|
||||||
public PDABytecodeGenerator(TreeNodeStream input, RecognizerSharedState state) {
|
int labelIndex = 0; // first time we ask for labels we index
|
||||||
super(input, state);
|
|
||||||
|
public PDABytecodeGenerator(int numAlts) {
|
||||||
|
obj = new CompiledPDA(numAlts);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void compile() {
|
||||||
|
obj.code = convertInstrsToBytecode();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void emit(Instr I) {
|
public void emit(Instr I) {
|
||||||
I.addr = pda.ip;
|
I.addr = ip;
|
||||||
I.rule = currentRule;
|
I.rule = currentRule;
|
||||||
I.gen = this;
|
I.gen = this;
|
||||||
pda.ip += I.nBytes();
|
ip += I.nBytes();
|
||||||
pda.instrs.add(I);
|
obj.instrs.add(I);
|
||||||
}
|
}
|
||||||
|
|
||||||
// indexed from 0 per rule
|
// indexed from 0 per rule
|
||||||
public int getActionIndex(Rule r, Token actionToken) {
|
public int getActionIndex(Rule r, Token actionToken) {
|
||||||
Integer I = pda.ruleActions.get(r, actionToken);
|
Integer I = obj.ruleActions.get(r, actionToken);
|
||||||
if ( I!=null ) return I; // already got its label
|
if ( I!=null ) return I; // already got its label
|
||||||
Map<Token, Integer> labels = pda.ruleActions.get(r);
|
Map<Token, Integer> labels = obj.ruleActions.get(r);
|
||||||
int i = 0;
|
int i = 0;
|
||||||
if ( labels!=null ) i = labels.size();
|
if ( labels!=null ) i = labels.size();
|
||||||
pda.ruleActions.put(r, actionToken, i);
|
obj.ruleActions.put(r, actionToken, i);
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
// indexed from 0 per rule
|
// indexed from 0 per rule
|
||||||
public int getSempredIndex(Rule r, Token actionToken) {
|
public int getSempredIndex(Rule r, Token actionToken) {
|
||||||
Integer I = pda.ruleSempreds.get(r, actionToken);
|
Integer I = obj.ruleSempreds.get(r, actionToken);
|
||||||
if ( I!=null ) return I; // already got its label
|
if ( I!=null ) return I; // already got its label
|
||||||
Map<Token, Integer> labels = pda.ruleSempreds.get(r);
|
Map<Token, Integer> labels = obj.ruleSempreds.get(r);
|
||||||
int i = 0;
|
int i = 0;
|
||||||
if ( labels!=null ) i = labels.size();
|
if ( labels!=null ) i = labels.size();
|
||||||
pda.ruleSempreds.put(r, actionToken, i);
|
obj.ruleSempreds.put(r, actionToken, i);
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -69,129 +64,55 @@ public class PDABytecodeGenerator extends TreeParser {
|
||||||
* to an index in an action.
|
* to an index in an action.
|
||||||
*/
|
*/
|
||||||
public int getLabelIndex(Rule r, String labelName) {
|
public int getLabelIndex(Rule r, String labelName) {
|
||||||
Integer I = pda.ruleLabels.get(r, labelName);
|
Integer I = obj.ruleLabels.get(r, labelName);
|
||||||
if ( I!=null ) return I; // already got its label
|
if ( I!=null ) return I; // already got its label
|
||||||
int i = labelIndex++;
|
int i = labelIndex++;
|
||||||
pda.ruleLabels.put(r, labelName, i);
|
obj.ruleLabels.put(r, labelName, i);
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getSetIndex(IntervalSet set) {
|
||||||
|
obj.set8table.add(set);
|
||||||
|
return obj.set8table.size()-1;
|
||||||
|
}
|
||||||
|
|
||||||
public void emitString(Token t, boolean not) {
|
public void emitString(Token t, boolean not) {
|
||||||
String chars = CharSupport.getStringFromGrammarStringLiteral(t.getText());
|
String chars = CharSupport.getStringFromGrammarStringLiteral(t.getText());
|
||||||
if ( not && chars.length()==1 ) {
|
if ( not && chars.length()==1 ) emit(new NotInstr());
|
||||||
emitNotChar(t, chars);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
for (char c : chars.toCharArray()) {
|
for (char c : chars.toCharArray()) {
|
||||||
emit(new MatchInstr(t, c));
|
emit(new MatchInstr(t, c));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void emitNotChar(Token t, String chars) {
|
|
||||||
IntervalSet all = (IntervalSet)g.getTokenTypes();
|
|
||||||
int c = chars.charAt(0);
|
|
||||||
SplitInstr s = new SplitInstr(2);
|
|
||||||
RangeInstr left = new RangeInstr(t, t);
|
|
||||||
left.a = all.getMinElement();
|
|
||||||
left.b = c-1;
|
|
||||||
RangeInstr right = new RangeInstr(t, t);
|
|
||||||
right.a = c+1;
|
|
||||||
right.b = 127; // all.getMaxElement();
|
|
||||||
emit(s);
|
|
||||||
emit(left);
|
|
||||||
JumpInstr J = new JumpInstr();
|
|
||||||
emit(J);
|
|
||||||
emit(right);
|
|
||||||
s.addrs.add(left.addr);
|
|
||||||
s.addrs.add(right.addr);
|
|
||||||
int END = pda.ip;
|
|
||||||
J.target = END;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
public byte[] convertInstrsToBytecode() {
|
public byte[] convertInstrsToBytecode() {
|
||||||
Instr last = pda.instrs.get(pda.instrs.size() - 1);
|
Instr last = obj.instrs.get(obj.instrs.size() - 1);
|
||||||
int size = last.addr + last.nBytes();
|
int size = last.addr + last.nBytes();
|
||||||
byte[] code = new byte[size];
|
byte[] code = new byte[size];
|
||||||
|
|
||||||
// resolve CALL instruction targets before generating code
|
// resolve CALL instruction targets before generating code
|
||||||
for (Instr I : pda.instrs) {
|
for (Instr I : obj.instrs) {
|
||||||
if ( I instanceof CallInstr ) {
|
if ( I instanceof CallInstr ) {
|
||||||
CallInstr C = (CallInstr) I;
|
CallInstr C = (CallInstr) I;
|
||||||
String ruleName = C.token.getText();
|
String ruleName = C.token.getText();
|
||||||
C.target = pda.ruleToAddr.get(ruleName);
|
C.target = obj.ruleToAddr.get(ruleName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (Instr I : pda.instrs) {
|
for (Instr I : obj.instrs) {
|
||||||
I.write(code);
|
I.write(code);
|
||||||
}
|
}
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static CompiledPDA compileLexerMode(LexerGrammar lg, String modeName) {
|
public void defineRuleAddr(String name, int ip) {
|
||||||
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
|
obj.ruleToAddr.put(name, ip);
|
||||||
PDABytecodeTriggers gen = new PDABytecodeTriggers(null);
|
|
||||||
gen.g = lg;
|
|
||||||
gen.pda.tokenTypeToAddr = new int[lg.getMaxTokenType()+1];
|
|
||||||
|
|
||||||
// add split for s0 to hook up rules (fill in operands as we gen rules)
|
|
||||||
int numRules = lg.modes.get(modeName).size();
|
|
||||||
int numFragmentRules = 0;
|
|
||||||
for (Rule r : lg.modes.get(modeName)) { if ( r.isFragment() ) numFragmentRules++; }
|
|
||||||
SplitInstr s0 = new SplitInstr(numRules - numFragmentRules);
|
|
||||||
gen.emit(s0);
|
|
||||||
|
|
||||||
|
|
||||||
for (Rule r : lg.modes.get(modeName)) { // for each rule in mode
|
|
||||||
gen.currentRule = r;
|
|
||||||
GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK);
|
|
||||||
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
|
|
||||||
gen.setTreeNodeStream(nodes);
|
|
||||||
int ttype = lg.getTokenType(r.name);
|
|
||||||
gen.pda.ruleToAddr.put(r.name, gen.pda.ip);
|
|
||||||
if ( !r.isFragment() ) {
|
|
||||||
s0.addrs.add(gen.pda.ip);
|
|
||||||
gen.pda.tokenTypeToAddr[ttype] = gen.pda.ip;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
gen.block(); // GEN Instr OBJECTS
|
|
||||||
int ruleTokenType = lg.getTokenType(r.name);
|
|
||||||
if ( !r.isFragment() ) {
|
|
||||||
gen.emit(new AcceptInstr(ruleTokenType));
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
gen.emit(new RetInstr());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (Exception e){
|
|
||||||
e.printStackTrace(System.err);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
gen.pda.code = gen.convertInstrsToBytecode();
|
|
||||||
gen.pda.nLabels = gen.labelIndex;
|
|
||||||
System.out.println(Bytecode.disassemble(gen.pda.code));
|
|
||||||
System.out.println("rule addrs="+gen.pda.ruleToAddr);
|
|
||||||
return gen.pda;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// (BLOCK (ALT .)) or (BLOCK (ALT 'a') (ALT .))
|
public void defineRuleIndexToAddr(int index, int ip) {
|
||||||
public boolean blockHasWildcardAlt(GrammarAST block) {
|
obj.altToAddr[index] = ip;
|
||||||
for (Object alt : block.getChildren()) {
|
|
||||||
AltAST altAST = (AltAST)alt;
|
|
||||||
if ( altAST.getChildCount()==1 ) {
|
|
||||||
Tree e = altAST.getChild(0);
|
|
||||||
if ( e.getType()==ANTLRParser.WILDCARD ) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// testing
|
public void defineTokenTypeToAddr(int ttype, int ip) {
|
||||||
public static PDA getPDA(LexerGrammar lg, String modeName) {
|
defineRuleIndexToAddr(ttype, ip);
|
||||||
CompiledPDA info = compileLexerMode(lg, modeName);
|
|
||||||
return new PDA(info.code, info.ruleToAddr, info.tokenTypeToAddr, info.nLabels);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Write value at index into a byte array highest to lowest byte,
|
/** Write value at index into a byte array highest to lowest byte,
|
||||||
|
@ -202,66 +123,4 @@ public class PDABytecodeGenerator extends TreeParser {
|
||||||
memory[index+1] = (byte)(value&0xFF);
|
memory[index+1] = (byte)(value&0xFF);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ----------
|
|
||||||
|
|
||||||
public static PDA getPDA(DFA dfa) {
|
|
||||||
PDABytecodeTriggers gen = new PDABytecodeTriggers(null);
|
|
||||||
gen.g = dfa.g;
|
|
||||||
gen.pda.tokenTypeToAddr = new int[gen.g.getMaxTokenType()+1];
|
|
||||||
gen.walk(dfa);
|
|
||||||
gen.pda.code = gen.convertInstrsToBytecode();
|
|
||||||
CompiledPDA c = gen.pda;
|
|
||||||
return new PDA(c.code, c.ruleToAddr, c.tokenTypeToAddr, c.nLabels);
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean[] marked;
|
|
||||||
int[] stateToAddr;
|
|
||||||
|
|
||||||
public PDA walk(DFA dfa) {
|
|
||||||
marked = new boolean[dfa.stateSet.size()+1];
|
|
||||||
stateToAddr = new int[dfa.stateSet.size()+1];
|
|
||||||
walk(dfa.startState);
|
|
||||||
|
|
||||||
// walk code, update jump targets.
|
|
||||||
for (Instr I : pda.instrs) {
|
|
||||||
System.out.println("instr "+I);
|
|
||||||
if ( I instanceof JumpInstr ) {
|
|
||||||
JumpInstr J = (JumpInstr)I;
|
|
||||||
J.target = stateToAddr[J.target];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// recursive so we follow chains in DFA, leading to fewer
|
|
||||||
// jmp instructions.
|
|
||||||
// start by assuming state num is bytecode addr then translate after
|
|
||||||
// in one pass
|
|
||||||
public void walk(DFAState d) {
|
|
||||||
if ( marked[d.stateNumber] ) return;
|
|
||||||
marked[d.stateNumber] = true;
|
|
||||||
stateToAddr[d.stateNumber] = pda.ip;
|
|
||||||
System.out.println("visit "+d.stateNumber+" @"+pda.ip);
|
|
||||||
if ( d.isAcceptState ) {
|
|
||||||
AcceptInstr A = new AcceptInstr(d.predictsAlt);
|
|
||||||
emit(A);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
SplitInstr S = null;
|
|
||||||
if ( d.edges.size()>1 ) {
|
|
||||||
S = new SplitInstr(d.edges.size());
|
|
||||||
emit(S);
|
|
||||||
}
|
|
||||||
for (Edge e : d.edges) {
|
|
||||||
if ( S!=null ) S.addrs.add(pda.ip);
|
|
||||||
// TODO: assumes no sets yet!
|
|
||||||
MatchInstr M = new MatchInstr(e.label.getSingleElement());
|
|
||||||
JumpInstr J = new JumpInstr(e.target.stateNumber);
|
|
||||||
emit(M);
|
|
||||||
emit(J);
|
|
||||||
walk(e.target);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,13 +3,14 @@ options {
|
||||||
language = Java;
|
language = Java;
|
||||||
tokenVocab = ANTLRParser;
|
tokenVocab = ANTLRParser;
|
||||||
ASTLabelType = GrammarAST;
|
ASTLabelType = GrammarAST;
|
||||||
superClass = PDABytecodeGenerator;
|
// superClass = PDABytecodeGenerator;
|
||||||
}
|
}
|
||||||
|
|
||||||
@header {
|
@header {
|
||||||
package org.antlr.v4.codegen;
|
package org.antlr.v4.codegen;
|
||||||
import org.antlr.v4.codegen.pda.*;
|
import org.antlr.v4.codegen.pda.*;
|
||||||
import org.antlr.v4.tool.GrammarAST;
|
import org.antlr.v4.tool.GrammarAST;
|
||||||
|
import org.antlr.v4.tool.AltAST;
|
||||||
import org.antlr.v4.tool.GrammarASTWithOptions;
|
import org.antlr.v4.tool.GrammarASTWithOptions;
|
||||||
import org.antlr.v4.tool.LexerGrammar;
|
import org.antlr.v4.tool.LexerGrammar;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
@ -17,6 +18,30 @@ import java.util.Map;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@members {
|
||||||
|
PDABytecodeGenerator gen;
|
||||||
|
|
||||||
|
public PDABytecodeTriggers(TreeNodeStream input, PDABytecodeGenerator gen) {
|
||||||
|
this(input);
|
||||||
|
this.gen = gen;
|
||||||
|
}
|
||||||
|
|
||||||
|
// (BLOCK (ALT .)) or (BLOCK (ALT 'a') (ALT .))
|
||||||
|
public boolean blockHasWildcardAlt(GrammarAST block) {
|
||||||
|
for (Object alt : block.getChildren()) {
|
||||||
|
if ( !(alt instanceof AltAST) ) continue;
|
||||||
|
AltAST altAST = (AltAST)alt;
|
||||||
|
if ( altAST.getChildCount()==1 ) {
|
||||||
|
Tree e = altAST.getChild(0);
|
||||||
|
if ( e.getType()==WILDCARD ) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
block
|
block
|
||||||
: ^( BLOCK (^(OPTIONS .+))?
|
: ^( BLOCK (^(OPTIONS .+))?
|
||||||
{
|
{
|
||||||
|
@ -28,8 +53,8 @@ block
|
||||||
SplitInstr S = null;
|
SplitInstr S = null;
|
||||||
if ( nAlts>1 ) {
|
if ( nAlts>1 ) {
|
||||||
S = new SplitInstr(nAlts);
|
S = new SplitInstr(nAlts);
|
||||||
emit(S);
|
gen.emit(S);
|
||||||
S.addrs.add(pda.ip);
|
S.addrs.add(gen.ip);
|
||||||
}
|
}
|
||||||
int alt = 1;
|
int alt = 1;
|
||||||
}
|
}
|
||||||
|
@ -38,14 +63,14 @@ block
|
||||||
if ( alt < nAlts ) {
|
if ( alt < nAlts ) {
|
||||||
JumpInstr J = new JumpInstr();
|
JumpInstr J = new JumpInstr();
|
||||||
jumps.add(J);
|
jumps.add(J);
|
||||||
emit(J);
|
gen.emit(J);
|
||||||
S.addrs.add(pda.ip);
|
S.addrs.add(gen.ip);
|
||||||
}
|
}
|
||||||
alt++;
|
alt++;
|
||||||
}
|
}
|
||||||
)+
|
)+
|
||||||
{
|
{
|
||||||
int END = pda.ip;
|
int END = gen.ip;
|
||||||
for (JumpInstr J : jumps) J.target = END;
|
for (JumpInstr J : jumps) J.target = END;
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -61,14 +86,14 @@ element
|
||||||
: labeledElement
|
: labeledElement
|
||||||
| atom
|
| atom
|
||||||
| ebnf
|
| ebnf
|
||||||
| ACTION {emit(new ActionInstr($ACTION.token));}
|
| ACTION {gen.emit(new ActionInstr($ACTION.token));}
|
||||||
| SEMPRED {emit(new SemPredInstr($SEMPRED.token));}
|
| SEMPRED {gen.emit(new SemPredInstr($SEMPRED.token));}
|
||||||
| GATED_SEMPRED {emit(new SemPredInstr($GATED_SEMPRED.token));}
|
| GATED_SEMPRED {gen.emit(new SemPredInstr($GATED_SEMPRED.token));}
|
||||||
| treeSpec
|
| treeSpec
|
||||||
;
|
;
|
||||||
|
|
||||||
labeledElement
|
labeledElement
|
||||||
: ^(ASSIGN ID {emit(new LabelInstr($ID.token));} atom {emit(new SaveInstr($ID.token));} )
|
: ^(ASSIGN ID {gen.emit(new LabelInstr($ID.token));} atom {gen.emit(new SaveInstr($ID.token));} )
|
||||||
| ^(ASSIGN ID block)
|
| ^(ASSIGN ID block)
|
||||||
| ^(PLUS_ASSIGN ID atom)
|
| ^(PLUS_ASSIGN ID atom)
|
||||||
| ^(PLUS_ASSIGN ID block)
|
| ^(PLUS_ASSIGN ID block)
|
||||||
|
@ -87,33 +112,33 @@ ebnf
|
||||||
: ^(astBlockSuffix block)
|
: ^(astBlockSuffix block)
|
||||||
| {
|
| {
|
||||||
SplitInstr S = new SplitInstr(2);
|
SplitInstr S = new SplitInstr(2);
|
||||||
emit(S);
|
gen.emit(S);
|
||||||
S.addrs.add(pda.ip);
|
S.addrs.add(gen.ip);
|
||||||
}
|
}
|
||||||
^(OPTIONAL block)
|
^(OPTIONAL block)
|
||||||
{
|
{
|
||||||
S.addrs.add(pda.ip);
|
S.addrs.add(gen.ip);
|
||||||
}
|
}
|
||||||
| {
|
| {
|
||||||
int start=pda.ip;
|
int start=gen.ip;
|
||||||
SplitInstr S = new SplitInstr(2);
|
SplitInstr S = new SplitInstr(2);
|
||||||
emit(S);
|
gen.emit(S);
|
||||||
int blkStart = pda.ip;
|
int blkStart = gen.ip;
|
||||||
}
|
}
|
||||||
^(CLOSURE block)
|
^(CLOSURE block)
|
||||||
{
|
{
|
||||||
JumpInstr J = new JumpInstr();
|
JumpInstr J = new JumpInstr();
|
||||||
emit(J);
|
gen.emit(J);
|
||||||
J.target = start;
|
J.target = start;
|
||||||
S.addrs.add(blkStart);
|
S.addrs.add(blkStart);
|
||||||
S.addrs.add(pda.ip);
|
S.addrs.add(gen.ip);
|
||||||
if ( greedyOption!=null && greedyOption.equals("false") ) Collections.reverse(S.addrs);
|
if ( greedyOption!=null && greedyOption.equals("false") ) Collections.reverse(S.addrs);
|
||||||
}
|
}
|
||||||
| {int start=pda.ip;} ^(POSITIVE_CLOSURE block)
|
| {int start=gen.ip;} ^(POSITIVE_CLOSURE block)
|
||||||
{
|
{
|
||||||
SplitInstr S = new SplitInstr(2);
|
SplitInstr S = new SplitInstr(2);
|
||||||
emit(S);
|
gen.emit(S);
|
||||||
int stop = pda.ip;
|
int stop = gen.ip;
|
||||||
S.addrs.add(start);
|
S.addrs.add(start);
|
||||||
S.addrs.add(stop);
|
S.addrs.add(stop);
|
||||||
if ( greedyOption!=null && greedyOption.equals("false") ) Collections.reverse(S.addrs);
|
if ( greedyOption!=null && greedyOption.equals("false") ) Collections.reverse(S.addrs);
|
||||||
|
@ -136,8 +161,8 @@ atom
|
||||||
| range
|
| range
|
||||||
| ^(DOT ID terminal[false])
|
| ^(DOT ID terminal[false])
|
||||||
| ^(DOT ID ruleref)
|
| ^(DOT ID ruleref)
|
||||||
| ^(WILDCARD .) {emit(new WildcardInstr($WILDCARD.token));}
|
| ^(WILDCARD .) {gen.emit(new WildcardInstr($WILDCARD.token));}
|
||||||
| WILDCARD {emit(new WildcardInstr($WILDCARD.token));}
|
| WILDCARD {gen.emit(new WildcardInstr($WILDCARD.token));}
|
||||||
| terminal[false]
|
| terminal[false]
|
||||||
| ruleref
|
| ruleref
|
||||||
;
|
;
|
||||||
|
@ -155,15 +180,15 @@ ruleref
|
||||||
|
|
||||||
range
|
range
|
||||||
: ^(RANGE a=STRING_LITERAL b=STRING_LITERAL)
|
: ^(RANGE a=STRING_LITERAL b=STRING_LITERAL)
|
||||||
{emit(new RangeInstr($a.token, $b.token));}
|
{gen.emit(new RangeInstr($a.token, $b.token));}
|
||||||
;
|
;
|
||||||
|
|
||||||
terminal[boolean not]
|
terminal[boolean not]
|
||||||
: ^(STRING_LITERAL .) {emitString($STRING_LITERAL.token, $not);}
|
: ^(STRING_LITERAL .) {gen.emitString($STRING_LITERAL.token, $not);}
|
||||||
| STRING_LITERAL {emitString($STRING_LITERAL.token, $not);}
|
| STRING_LITERAL {gen.emitString($STRING_LITERAL.token, $not);}
|
||||||
| ^(TOKEN_REF ARG_ACTION .) {emit(new CallInstr($TOKEN_REF.token));}
|
| ^(TOKEN_REF ARG_ACTION .) {gen.emit(new CallInstr($TOKEN_REF.token));}
|
||||||
| ^(TOKEN_REF .) {emit(new CallInstr($TOKEN_REF.token));}
|
| ^(TOKEN_REF .) {gen.emit(new CallInstr($TOKEN_REF.token));}
|
||||||
| TOKEN_REF {emit(new CallInstr($TOKEN_REF.token));}
|
| TOKEN_REF {gen.emit(new CallInstr($TOKEN_REF.token));}
|
||||||
| ^(ROOT terminal[false])
|
| ^(ROOT terminal[false])
|
||||||
| ^(BANG terminal[false])
|
| ^(BANG terminal[false])
|
||||||
;
|
;
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
||||||
// $ANTLR 3.2.1-SNAPSHOT May 24, 2010 15:02:05 SourceGenTriggers.g 2010-05-26 14:22:40
|
// $ANTLR 3.2.1-SNAPSHOT May 24, 2010 15:02:05 SourceGenTriggers.g 2010-05-27 16:58:15
|
||||||
|
|
||||||
package org.antlr.v4.codegen;
|
package org.antlr.v4.codegen;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
package org.antlr.v4.codegen.pda;
|
||||||
|
|
||||||
|
import org.antlr.v4.runtime.pda.Bytecode;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class NotInstr extends Instr {
|
||||||
|
public short opcode() { return Bytecode.NOT; }
|
||||||
|
public int nBytes() { return 1; }
|
||||||
|
}
|
|
@ -0,0 +1,20 @@
|
||||||
|
package org.antlr.v4.codegen.pda;
|
||||||
|
|
||||||
|
import org.antlr.v4.codegen.PDABytecodeGenerator;
|
||||||
|
import org.antlr.v4.misc.IntervalSet;
|
||||||
|
import org.antlr.v4.runtime.pda.Bytecode;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class SetInstr extends Instr {
|
||||||
|
public IntervalSet set;
|
||||||
|
public int setIndex;
|
||||||
|
|
||||||
|
public SetInstr(IntervalSet set) { this.set = set; }
|
||||||
|
public short opcode() { return Bytecode.SET; }
|
||||||
|
public int nBytes() { return 1+2; }
|
||||||
|
public void write(byte[] code) {
|
||||||
|
super.write(code);
|
||||||
|
setIndex = gen.getSetIndex(set);
|
||||||
|
PDABytecodeGenerator.writeShort(code, addr+1, (short)setIndex);
|
||||||
|
}
|
||||||
|
}
|
|
@ -3,7 +3,8 @@ package org.antlr.v4.test;
|
||||||
import org.antlr.v4.automata.DFA;
|
import org.antlr.v4.automata.DFA;
|
||||||
import org.antlr.v4.automata.DecisionState;
|
import org.antlr.v4.automata.DecisionState;
|
||||||
import org.antlr.v4.automata.NFA;
|
import org.antlr.v4.automata.NFA;
|
||||||
import org.antlr.v4.codegen.PDABytecodeGenerator;
|
import org.antlr.v4.codegen.CompiledPDA;
|
||||||
|
import org.antlr.v4.codegen.DFACompiler;
|
||||||
import org.antlr.v4.runtime.pda.Bytecode;
|
import org.antlr.v4.runtime.pda.Bytecode;
|
||||||
import org.antlr.v4.runtime.pda.PDA;
|
import org.antlr.v4.runtime.pda.PDA;
|
||||||
import org.antlr.v4.tool.Grammar;
|
import org.antlr.v4.tool.Grammar;
|
||||||
|
@ -11,6 +12,21 @@ import org.junit.Test;
|
||||||
|
|
||||||
/** */
|
/** */
|
||||||
public class TestDFAtoPDABytecodeGeneration extends BaseTest {
|
public class TestDFAtoPDABytecodeGeneration extends BaseTest {
|
||||||
|
@Test public void testNotAisSet() throws Exception {
|
||||||
|
Grammar g = new Grammar(
|
||||||
|
"parser grammar T;\n"+
|
||||||
|
"a : ~A B C | A ;");
|
||||||
|
String expecting =
|
||||||
|
"0000:\tsplit 7, 16\n" +
|
||||||
|
"0007:\tset 0\n" +
|
||||||
|
"0010:\tjmp 13\n" +
|
||||||
|
"0013:\taccept 1\n" +
|
||||||
|
"0016:\tmatch8 5\n" +
|
||||||
|
"0018:\tjmp 21\n" +
|
||||||
|
"0021:\taccept 2\n";
|
||||||
|
checkBytecode(g, 0, expecting);
|
||||||
|
}
|
||||||
|
|
||||||
@Test public void testAorB() throws Exception {
|
@Test public void testAorB() throws Exception {
|
||||||
Grammar g = new Grammar(
|
Grammar g = new Grammar(
|
||||||
"parser grammar T;\n"+
|
"parser grammar T;\n"+
|
||||||
|
@ -62,8 +78,6 @@ public class TestDFAtoPDABytecodeGeneration extends BaseTest {
|
||||||
checkBytecode(g, 2, expecting);
|
checkBytecode(g, 2, expecting);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: ORDER OF TESTS MATTERS? DFA edge orders get changed. ack!
|
|
||||||
|
|
||||||
void checkBytecode(Grammar g, int decision, String expecting) {
|
void checkBytecode(Grammar g, int decision, String expecting) {
|
||||||
NFA nfa = createNFA(g);
|
NFA nfa = createNFA(g);
|
||||||
DecisionState blk = nfa.decisionToNFAState.get(decision);
|
DecisionState blk = nfa.decisionToNFAState.get(decision);
|
||||||
|
@ -71,8 +85,10 @@ public class TestDFAtoPDABytecodeGeneration extends BaseTest {
|
||||||
// Edge e0 = dfa.states.get(1).edge(0);
|
// Edge e0 = dfa.states.get(1).edge(0);
|
||||||
// Edge e1 = dfa.states.get(1).edge(1);
|
// Edge e1 = dfa.states.get(1).edge(1);
|
||||||
// e0.target = e1.target;
|
// e0.target = e1.target;
|
||||||
// System.out.print("altered DFA="+dfa);
|
// System.out.print("altered DFA="+dfa);
|
||||||
PDA PDA = PDABytecodeGenerator.getPDA(dfa);
|
DFACompiler comp = new DFACompiler(dfa);
|
||||||
assertEquals(expecting, Bytecode.disassemble(PDA.code, false));
|
CompiledPDA obj = comp.compile();
|
||||||
|
PDA pda = new PDA(obj.code, obj.altToAddr, obj.nLabels);
|
||||||
|
assertEquals(expecting, Bytecode.disassemble(pda.code, false));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
package org.antlr.v4.test;
|
package org.antlr.v4.test;
|
||||||
|
|
||||||
import org.antlr.v4.Tool;
|
import org.antlr.v4.Tool;
|
||||||
import org.antlr.v4.codegen.PDABytecodeGenerator;
|
import org.antlr.v4.codegen.CompiledPDA;
|
||||||
|
import org.antlr.v4.codegen.LexerCompiler;
|
||||||
import org.antlr.v4.runtime.pda.Bytecode;
|
import org.antlr.v4.runtime.pda.Bytecode;
|
||||||
import org.antlr.v4.runtime.pda.PDA;
|
import org.antlr.v4.runtime.pda.PDA;
|
||||||
import org.antlr.v4.semantics.SemanticPipeline;
|
import org.antlr.v4.semantics.SemanticPipeline;
|
||||||
|
@ -22,6 +23,18 @@ public class TestPDABytecodeGeneration extends BaseTest {
|
||||||
checkBytecode(g, expecting);
|
checkBytecode(g, expecting);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test public void testNotChar() throws Exception {
|
||||||
|
LexerGrammar g = new LexerGrammar(
|
||||||
|
"lexer grammar L;\n"+
|
||||||
|
"A : ~'a' ;");
|
||||||
|
String expecting =
|
||||||
|
"0000:\tsplit 5\n" +
|
||||||
|
"0005:\tnot \n" +
|
||||||
|
"0006:\tmatch8 'a'\n" +
|
||||||
|
"0008:\taccept 4\n";
|
||||||
|
checkBytecode(g, expecting);
|
||||||
|
}
|
||||||
|
|
||||||
@Test public void testIDandIntandKeyword() throws Exception {
|
@Test public void testIDandIntandKeyword() throws Exception {
|
||||||
LexerGrammar g = new LexerGrammar(
|
LexerGrammar g = new LexerGrammar(
|
||||||
"lexer grammar L;\n" +
|
"lexer grammar L;\n" +
|
||||||
|
@ -215,7 +228,9 @@ public class TestPDABytecodeGeneration extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
PDA PDA = PDABytecodeGenerator.getPDA(g, LexerGrammar.DEFAULT_MODE_NAME);
|
LexerCompiler comp = new LexerCompiler(g);
|
||||||
|
CompiledPDA obj = comp.compileMode(LexerGrammar.DEFAULT_MODE_NAME);
|
||||||
|
PDA PDA = new PDA(obj.code, obj.altToAddr, obj.nLabels);
|
||||||
assertEquals(expecting, Bytecode.disassemble(PDA.code));
|
assertEquals(expecting, Bytecode.disassemble(PDA.code));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,7 +3,8 @@ package org.antlr.v4.test;
|
||||||
import org.antlr.runtime.ANTLRStringStream;
|
import org.antlr.runtime.ANTLRStringStream;
|
||||||
import org.antlr.runtime.Token;
|
import org.antlr.runtime.Token;
|
||||||
import org.antlr.v4.Tool;
|
import org.antlr.v4.Tool;
|
||||||
import org.antlr.v4.codegen.PDABytecodeGenerator;
|
import org.antlr.v4.codegen.CompiledPDA;
|
||||||
|
import org.antlr.v4.codegen.LexerCompiler;
|
||||||
import org.antlr.v4.runtime.pda.PDA;
|
import org.antlr.v4.runtime.pda.PDA;
|
||||||
import org.antlr.v4.semantics.SemanticPipeline;
|
import org.antlr.v4.semantics.SemanticPipeline;
|
||||||
import org.antlr.v4.tool.Grammar;
|
import org.antlr.v4.tool.Grammar;
|
||||||
|
@ -24,6 +25,14 @@ public class TestPDABytecodeInterp extends BaseTest {
|
||||||
checkMatches(g, "abab", expecting);
|
checkMatches(g, "abab", expecting);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test public void testNotChar() throws Exception {
|
||||||
|
LexerGrammar g = new LexerGrammar(
|
||||||
|
"lexer grammar L;\n"+
|
||||||
|
"A : ~'a' ;");
|
||||||
|
String expecting = "A, EOF";
|
||||||
|
checkMatches(g, "b", expecting);
|
||||||
|
}
|
||||||
|
|
||||||
@Test public void testIDandIntandKeyword() throws Exception {
|
@Test public void testIDandIntandKeyword() throws Exception {
|
||||||
LexerGrammar g = new LexerGrammar(
|
LexerGrammar g = new LexerGrammar(
|
||||||
"lexer grammar L;\n" +
|
"lexer grammar L;\n" +
|
||||||
|
@ -202,7 +211,10 @@ public class TestPDABytecodeInterp extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PDA PDA = PDABytecodeGenerator.getPDA(g, LexerGrammar.DEFAULT_MODE_NAME);
|
LexerCompiler comp = new LexerCompiler(g);
|
||||||
|
CompiledPDA obj = comp.compileMode(LexerGrammar.DEFAULT_MODE_NAME);
|
||||||
|
PDA PDA = new PDA(obj.code, obj.altToAddr, obj.nLabels);
|
||||||
|
|
||||||
ANTLRStringStream in = new ANTLRStringStream(input);
|
ANTLRStringStream in = new ANTLRStringStream(input);
|
||||||
List<Integer> tokenTypes = new ArrayList<Integer>();
|
List<Integer> tokenTypes = new ArrayList<Integer>();
|
||||||
int ttype = 0;
|
int ttype = 0;
|
||||||
|
@ -236,7 +248,9 @@ public class TestPDABytecodeInterp extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PDA PDA = PDABytecodeGenerator.getPDA(g, LexerGrammar.DEFAULT_MODE_NAME);
|
LexerCompiler comp = new LexerCompiler(g);
|
||||||
|
CompiledPDA obj = comp.compileMode(LexerGrammar.DEFAULT_MODE_NAME);
|
||||||
|
PDA PDA = new PDA(obj.code, obj.altToAddr, obj.nLabels);
|
||||||
ANTLRStringStream in = new ANTLRStringStream(input);
|
ANTLRStringStream in = new ANTLRStringStream(input);
|
||||||
List<Integer> tokenTypes = new ArrayList<Integer>();
|
List<Integer> tokenTypes = new ArrayList<Integer>();
|
||||||
int ttype = PDA.execThompson(in);
|
int ttype = PDA.execThompson(in);
|
||||||
|
|
Loading…
Reference in New Issue