added NOT/SET ops, refactored PDA generation

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6900]
This commit is contained in:
parrt 2010-05-27 16:01:55 -08:00
parent acf962bc28
commit 19aecd3163
15 changed files with 701 additions and 554 deletions

View File

@ -39,21 +39,22 @@ public class Bytecode {
// be an array of objects (Bytecode[]). We want it to be byte[]. // be an array of objects (Bytecode[]). We want it to be byte[].
// INSTRUCTION BYTECODES (byte is signed; use a short to keep 0..255) // INSTRUCTION BYTECODES (byte is signed; use a short to keep 0..255)
public static final short ACCEPT = 1; public static final short ACCEPT = 1;
public static final short JMP = 2; public static final short JMP = 2;
public static final short SPLIT = 3; public static final short SPLIT = 3;
public static final short MATCH8 = 4; public static final short MATCH8 = 4;
public static final short MATCH16 = 5; public static final short MATCH16 = 5;
public static final short RANGE8 = 6; public static final short RANGE8 = 6;
public static final short RANGE16 = 7; public static final short RANGE16 = 7;
public static final short WILDCARD = 8; public static final short WILDCARD = 8;
//public static final short NOT = 8; ??? public static final short SET = 9;
public static final short CALL = 9; // JMP with a push public static final short CALL = 10; // JMP with a push
public static final short RET = 10; // an accept instr for fragment rules public static final short RET = 11; // an accept instr for fragment rules
public static final short LABEL = 11; public static final short LABEL = 12;
public static final short SAVE = 12; public static final short SAVE = 13;
public static final short SEMPRED = 13; public static final short SEMPRED = 14;
public static final short ACTION = 14; public static final short ACTION = 15;
public static final short NOT = 16; // not next match instr
/** Used for disassembly; describes instruction set */ /** Used for disassembly; describes instruction set */
public static Instruction[] instructions = new Instruction[] { public static Instruction[] instructions = new Instruction[] {
@ -66,12 +67,14 @@ public class Bytecode {
new Instruction("range8", OperandType.BYTE, OperandType.BYTE), new Instruction("range8", OperandType.BYTE, OperandType.BYTE),
new Instruction("range16", OperandType.CHAR, OperandType.CHAR), new Instruction("range16", OperandType.CHAR, OperandType.CHAR),
new Instruction("wildcard"), new Instruction("wildcard"),
new Instruction("set", OperandType.SHORT),
new Instruction("call", OperandType.ADDR), new Instruction("call", OperandType.ADDR),
new Instruction("ret"), new Instruction("ret"),
new Instruction("label", OperandType.SHORT), new Instruction("label", OperandType.SHORT),
new Instruction("save", OperandType.SHORT), new Instruction("save", OperandType.SHORT),
new Instruction("sempred", OperandType.SHORT, OperandType.SHORT), // sempred ruleIndex, predIndex new Instruction("sempred", OperandType.SHORT, OperandType.SHORT), // sempred ruleIndex, predIndex
new Instruction("action", OperandType.SHORT, OperandType.SHORT), // action ruleIndex, actionIndex new Instruction("action", OperandType.SHORT, OperandType.SHORT), // action ruleIndex, actionIndex
new Instruction("not"),
}; };
public static String disassemble(byte[] code, int start, boolean operandsAreChars) { public static String disassemble(byte[] code, int start, boolean operandsAreChars) {

View File

@ -8,7 +8,6 @@ import org.antlr.v4.runtime.CommonToken;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Map;
/** A (nondeterministic) pushdown bytecode machine for lexing and LL prediction. /** A (nondeterministic) pushdown bytecode machine for lexing and LL prediction.
* Derived partially from Cox' description of Thompson's 1960s work: * Derived partially from Cox' description of Thompson's 1960s work:
@ -22,28 +21,20 @@ public class PDA {
public interface sempred_fptr { boolean eval(int predIndex); } public interface sempred_fptr { boolean eval(int predIndex); }
public byte[] code; public byte[] code;
public Map<String, Integer> ruleToAddr; //public Map<String, Integer> ruleToAddr;
public int[] tokenTypeToAddr; public int[] altToAddr; // either token type (in lexer) or alt num for DFA in parser
public CommonToken[] labelValues; public CommonToken[] labelValues;
public int nLabels; public int nLabels;
/** If we hit an action, we'll have to rewind and do the winning rule again */ /** If we hit an action, we'll have to rewind and do the winning rule again */
boolean bypassedAction; boolean bypassedAction;
public PDA() {;} boolean notNextMatch;
public PDA(byte[] code, Map<String, Integer> ruleToAddr, int[] tokenTypeToAddr, int nLabels) {
this.code = code;
this.ruleToAddr = ruleToAddr;
this.tokenTypeToAddr = tokenTypeToAddr;
this.nLabels = nLabels;
labelValues = new CommonToken[nLabels];
}
public PDA(byte[] code, int[] tokenTypeToAddr, int nLabels) { public PDA(byte[] code, int[] altToAddr, int nLabels) {
System.out.println("code="+Arrays.toString(code)); System.out.println("code="+Arrays.toString(code));
this.code = code; this.code = code;
this.tokenTypeToAddr = tokenTypeToAddr; this.altToAddr = altToAddr;
this.nLabels = nLabels; this.nLabels = nLabels;
labelValues = new CommonToken[nLabels]; labelValues = new CommonToken[nLabels];
} }
@ -58,7 +49,7 @@ public class PDA {
System.out.println("Bypassed action; rewinding to "+input.index()+" doing with feeling"); System.out.println("Bypassed action; rewinding to "+input.index()+" doing with feeling");
bypassedAction = false; bypassedAction = false;
Arrays.fill(labelValues, null); Arrays.fill(labelValues, null);
int ttype2 = execThompson(input, tokenTypeToAddr[ttype], true); int ttype2 = execThompson(input, altToAddr[ttype], true);
if ( ttype!=ttype2 ) { if ( ttype!=ttype2 ) {
System.err.println("eh? token diff with action(s)"); System.err.println("eh? token diff with action(s)");
} }
@ -92,33 +83,48 @@ processOneChar:
//System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach); //System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
trace(ip); trace(ip);
short opcode = code[ip]; short opcode = code[ip];
boolean matched;
ip++; // move to next instruction or first byte of operand ip++; // move to next instruction or first byte of operand
switch (opcode) { switch (opcode) {
case Bytecode.NOT :
notNextMatch = true;
break;
case Bytecode.MATCH8 : case Bytecode.MATCH8 :
if ( c == code[ip] ) { if ( c == code[ip] || (notNextMatch && c != code[ip]) ) {
addToClosure(reach, ip+1, alt, context); addToClosure(reach, ip+1, alt, context);
} }
notNextMatch = false;
break; break;
case Bytecode.MATCH16 : case Bytecode.MATCH16 :
if ( c == getShort(code, ip) ) { matched = c == getShort(code, ip);
if ( matched || (notNextMatch && matched) ) {
addToClosure(reach, ip+2, alt, context); addToClosure(reach, ip+2, alt, context);
} }
notNextMatch = false;
break; break;
case Bytecode.RANGE8 : case Bytecode.RANGE8 :
if ( c>=code[ip] && c<=code[ip+1] ) { matched = c >= code[ip] && c <= code[ip + 1];
if ( matched || (notNextMatch && matched) ) {
addToClosure(reach, ip+2, alt, context); addToClosure(reach, ip+2, alt, context);
} }
notNextMatch = false;
break; break;
case Bytecode.RANGE16 : case Bytecode.RANGE16 :
if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) { matched = c < getShort(code, ip) || c > getShort(code, ip + 2);
if ( matched || (notNextMatch && matched) ) {
addToClosure(reach, ip+4, alt, context); addToClosure(reach, ip+4, alt, context);
} }
notNextMatch = false;
break; break;
case Bytecode.WILDCARD : case Bytecode.WILDCARD :
if ( c!=Token.EOF ) { if ( c!=Token.EOF ) {
addToClosure(reach, ip, alt, context); addToClosure(reach, ip, alt, context);
} }
break; break;
case Bytecode.SET :
System.err.println("not impl");
notNextMatch = false;
break;
case Bytecode.LABEL : // lexers only case Bytecode.LABEL : // lexers only
int labelIndex = getShort(code, ip); int labelIndex = getShort(code, ip);
labelValues[labelIndex] = labelValues[labelIndex] =
@ -217,6 +223,10 @@ processOneChar:
short opcode = code[ip]; short opcode = code[ip];
ip++; // move to next instruction or first byte of operand ip++; // move to next instruction or first byte of operand
switch (opcode) { switch (opcode) {
case Bytecode.NOT : // see thru NOT but include in closure so we exec during reach
closure.add(t); // add to closure; need to execute during reach
addToClosure(closure, ip, alt, context);
break;
case Bytecode.JMP : case Bytecode.JMP :
addToClosure(closure, getShort(code, ip), alt, context); addToClosure(closure, getShort(code, ip), alt, context);
break; break;
@ -360,10 +370,10 @@ processOneChar:
} }
// if we reach accept state, toss out any addresses in rest // if we reach accept state, toss out any addresses in rest
// of work list associated with accept's rule; that rule is done // of work list associated with accept's rule; that rule is done
int ruleStart = tokenTypeToAddr[ttype]; int ruleStart = altToAddr[ttype];
int ruleStop = code.length; int ruleStop = code.length;
if ( ttype+1 < tokenTypeToAddr.length ) { if ( ttype+1 < altToAddr.length ) {
ruleStop = tokenTypeToAddr[ttype+1]-1; ruleStop = altToAddr[ttype+1]-1;
} }
System.out.println("kill range "+ruleStart+".."+ruleStop); System.out.println("kill range "+ruleStart+".."+ruleStop);
int j=i+1; int j=i+1;

View File

@ -3,6 +3,7 @@ package org.antlr.v4.codegen;
import org.antlr.runtime.Token; import org.antlr.runtime.Token;
import org.antlr.v4.codegen.pda.Instr; import org.antlr.v4.codegen.pda.Instr;
import org.antlr.v4.misc.DoubleKeyMap; import org.antlr.v4.misc.DoubleKeyMap;
import org.antlr.v4.misc.IntervalSet;
import org.antlr.v4.tool.Rule; import org.antlr.v4.tool.Rule;
import java.util.ArrayList; import java.util.ArrayList;
@ -14,12 +15,18 @@ import java.util.Map;
public class CompiledPDA { public class CompiledPDA {
public List<Instr> instrs = new ArrayList<Instr>(); public List<Instr> instrs = new ArrayList<Instr>();
public byte[] code; // instrs in bytecode form public byte[] code; // instrs in bytecode form
public int ip = 0; // where to write next public List<IntervalSet> set8table = new ArrayList<IntervalSet>();
public List<IntervalSet> set16table = new ArrayList<IntervalSet>();
public Map<String, Integer> ruleToAddr = new HashMap<String, Integer>(); public Map<String, Integer> ruleToAddr = new HashMap<String, Integer>();
public int[] tokenTypeToAddr;
public int[] altToAddr; // either token type (in lexer) or alt num for DFA in parser
public DoubleKeyMap<Rule, String, Integer> ruleLabels = new DoubleKeyMap<Rule, String, Integer>(); public DoubleKeyMap<Rule, String, Integer> ruleLabels = new DoubleKeyMap<Rule, String, Integer>();
public DoubleKeyMap<Rule, Token, Integer> ruleActions = new DoubleKeyMap<Rule, Token, Integer>(); public DoubleKeyMap<Rule, Token, Integer> ruleActions = new DoubleKeyMap<Rule, Token, Integer>();
public DoubleKeyMap<Rule, Token, Integer> ruleSempreds = new DoubleKeyMap<Rule, Token, Integer>(); public DoubleKeyMap<Rule, Token, Integer> ruleSempreds = new DoubleKeyMap<Rule, Token, Integer>();
public int nLabels; public int nLabels;
public CompiledPDA(int numAlts) {
altToAddr = new int[numAlts+1];
}
} }

View File

@ -0,0 +1,77 @@
package org.antlr.v4.codegen;
import org.antlr.v4.automata.DFA;
import org.antlr.v4.automata.DFAState;
import org.antlr.v4.automata.Edge;
import org.antlr.v4.codegen.pda.*;
import org.antlr.v4.runtime.pda.PDA;
/** */
public class DFACompiler {
public DFA dfa;
boolean[] marked;
int[] stateToAddr;
PDABytecodeGenerator gen;
public DFACompiler(DFA dfa) {
this.dfa = dfa;
gen = new PDABytecodeGenerator(dfa.g.getMaxTokenType());
}
public CompiledPDA compile() {
walk();
gen.compile();
return gen.obj;
}
public PDA walk() {
marked = new boolean[dfa.stateSet.size()+1];
stateToAddr = new int[dfa.stateSet.size()+1];
walk(dfa.startState);
// walk code, update jump targets.
for (Instr I : gen.obj.instrs) {
System.out.println("instr "+I);
if ( I instanceof JumpInstr) {
JumpInstr J = (JumpInstr)I;
J.target = stateToAddr[J.target];
}
}
return null;
}
// recursive so we follow chains in DFA, leading to fewer
// jmp instructions.
// start by assuming state num is bytecode addr then translate after
// in one pass
public void walk(DFAState d) {
if ( marked[d.stateNumber] ) return;
marked[d.stateNumber] = true;
stateToAddr[d.stateNumber] = gen.ip;
System.out.println("visit "+d.stateNumber+" @"+ gen.ip);
if ( d.isAcceptState ) {
AcceptInstr A = new AcceptInstr(d.predictsAlt);
gen.emit(A);
return;
}
SplitInstr S = null;
if ( d.edges.size()>1 ) {
S = new SplitInstr(d.edges.size());
gen.emit(S);
}
for (Edge e : d.edges) {
if ( S!=null ) S.addrs.add(gen.ip);
if ( e.label.getMinElement() == e.label.getMaxElement() ) {
MatchInstr M = new MatchInstr(e.label.getSingleElement());
gen.emit(M);
}
else {
gen.emit(new SetInstr(e.label));
}
JumpInstr J = new JumpInstr(e.target.stateNumber);
gen.emit(J);
walk(e.target);
}
}
}

View File

@ -0,0 +1,65 @@
package org.antlr.v4.codegen;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.v4.codegen.pda.AcceptInstr;
import org.antlr.v4.codegen.pda.RetInstr;
import org.antlr.v4.codegen.pda.SplitInstr;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.parse.GrammarASTAdaptor;
import org.antlr.v4.runtime.pda.Bytecode;
import org.antlr.v4.tool.GrammarAST;
import org.antlr.v4.tool.LexerGrammar;
import org.antlr.v4.tool.Rule;
/** */
public class LexerCompiler {
LexerGrammar lg;
public LexerCompiler(LexerGrammar lg) {
this.lg = lg;
}
public CompiledPDA compileMode(String modeName) {
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
PDABytecodeGenerator gen = new PDABytecodeGenerator(lg.getMaxTokenType());
PDABytecodeTriggers trigger = new PDABytecodeTriggers(null, gen);
// add split for s0 to hook up rules (fill in operands as we gen rules)
int numRules = lg.modes.get(modeName).size();
int numFragmentRules = 0;
for (Rule r : lg.modes.get(modeName)) { if ( r.isFragment() ) numFragmentRules++; }
SplitInstr s0 = new SplitInstr(numRules - numFragmentRules);
gen.emit(s0);
for (Rule r : lg.modes.get(modeName)) { // for each rule in mode
gen.currentRule = r;
GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK);
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
trigger.setTreeNodeStream(nodes);
int ttype = lg.getTokenType(r.name);
gen.defineRuleAddr(r.name, gen.ip);
if ( !r.isFragment() ) {
s0.addrs.add(gen.ip);
gen.defineTokenTypeToAddr(ttype, gen.ip);
}
try {
trigger.block(); // GEN Instr OBJECTS
int ruleTokenType = lg.getTokenType(r.name);
if ( !r.isFragment() ) {
gen.emit(new AcceptInstr(ruleTokenType));
}
else {
gen.emit(new RetInstr());
}
}
catch (Exception e){
e.printStackTrace(System.err);
}
}
gen.compile();
gen.obj.nLabels = gen.labelIndex;
System.out.println(Bytecode.disassemble(gen.obj.code));
System.out.println("rule addrs="+ gen.obj.ruleToAddr);
return gen.obj;
}
}

View File

@ -25,7 +25,8 @@ public class LexerFactory {
fileST.add("fileName", gen.getRecognizerFileName()); fileST.add("fileName", gen.getRecognizerFileName());
fileST.add("lexer", lexerST); fileST.add("lexer", lexerST);
for (String modeName : lg.modes.keySet()) { // for each mode for (String modeName : lg.modes.keySet()) { // for each mode
CompiledPDA pda = PDABytecodeGenerator.compileLexerMode(lg, modeName); LexerCompiler comp = new LexerCompiler(lg);
CompiledPDA pda = comp.compileMode(modeName);
ST pdaST = gen.templates.getInstanceOf("PDA"); ST pdaST = gen.templates.getInstanceOf("PDA");
for (Rule r : pda.ruleActions.keySet()) { for (Rule r : pda.ruleActions.keySet()) {
Set<Token> actionTokens = pda.ruleActions.keySet(r); Set<Token> actionTokens = pda.ruleActions.keySet(r);

View File

@ -1,66 +1,61 @@
package org.antlr.v4.codegen; package org.antlr.v4.codegen;
import org.antlr.runtime.RecognizerSharedState;
import org.antlr.runtime.Token; import org.antlr.runtime.Token;
import org.antlr.runtime.tree.CommonTreeNodeStream; import org.antlr.v4.codegen.pda.CallInstr;
import org.antlr.runtime.tree.Tree; import org.antlr.v4.codegen.pda.Instr;
import org.antlr.runtime.tree.TreeNodeStream; import org.antlr.v4.codegen.pda.MatchInstr;
import org.antlr.v4.automata.DFA; import org.antlr.v4.codegen.pda.NotInstr;
import org.antlr.v4.automata.DFAState;
import org.antlr.v4.automata.Edge;
import org.antlr.v4.codegen.pda.*;
import org.antlr.v4.misc.CharSupport; import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.misc.IntervalSet; import org.antlr.v4.misc.IntervalSet;
import org.antlr.v4.parse.ANTLRParser; import org.antlr.v4.tool.Rule;
import org.antlr.v4.parse.GrammarASTAdaptor;
import org.antlr.v4.runtime.pda.Bytecode;
import org.antlr.v4.runtime.pda.PDA;
import org.antlr.v4.runtime.tree.TreeParser;
import org.antlr.v4.tool.*;
import java.util.Map; import java.util.Map;
/** http://swtch.com/~rsc/regexp/regexp2.html */ /** http://swtch.com/~rsc/regexp/regexp2.html */
public class PDABytecodeGenerator extends TreeParser { public class PDABytecodeGenerator {
public Grammar g;
public Rule currentRule; public Rule currentRule;
CompiledPDA pda = new CompiledPDA(); public CompiledPDA obj;
public int labelIndex = 0; // first time we ask for labels we index public int ip = 0; // where to write next
public PDABytecodeGenerator(TreeNodeStream input, RecognizerSharedState state) { int labelIndex = 0; // first time we ask for labels we index
super(input, state);
public PDABytecodeGenerator(int numAlts) {
obj = new CompiledPDA(numAlts);
}
public void compile() {
obj.code = convertInstrsToBytecode();
} }
public void emit(Instr I) { public void emit(Instr I) {
I.addr = pda.ip; I.addr = ip;
I.rule = currentRule; I.rule = currentRule;
I.gen = this; I.gen = this;
pda.ip += I.nBytes(); ip += I.nBytes();
pda.instrs.add(I); obj.instrs.add(I);
} }
// indexed from 0 per rule // indexed from 0 per rule
public int getActionIndex(Rule r, Token actionToken) { public int getActionIndex(Rule r, Token actionToken) {
Integer I = pda.ruleActions.get(r, actionToken); Integer I = obj.ruleActions.get(r, actionToken);
if ( I!=null ) return I; // already got its label if ( I!=null ) return I; // already got its label
Map<Token, Integer> labels = pda.ruleActions.get(r); Map<Token, Integer> labels = obj.ruleActions.get(r);
int i = 0; int i = 0;
if ( labels!=null ) i = labels.size(); if ( labels!=null ) i = labels.size();
pda.ruleActions.put(r, actionToken, i); obj.ruleActions.put(r, actionToken, i);
return i; return i;
} }
// indexed from 0 per rule // indexed from 0 per rule
public int getSempredIndex(Rule r, Token actionToken) { public int getSempredIndex(Rule r, Token actionToken) {
Integer I = pda.ruleSempreds.get(r, actionToken); Integer I = obj.ruleSempreds.get(r, actionToken);
if ( I!=null ) return I; // already got its label if ( I!=null ) return I; // already got its label
Map<Token, Integer> labels = pda.ruleSempreds.get(r); Map<Token, Integer> labels = obj.ruleSempreds.get(r);
int i = 0; int i = 0;
if ( labels!=null ) i = labels.size(); if ( labels!=null ) i = labels.size();
pda.ruleSempreds.put(r, actionToken, i); obj.ruleSempreds.put(r, actionToken, i);
return i; return i;
} }
@ -69,129 +64,55 @@ public class PDABytecodeGenerator extends TreeParser {
* to an index in an action. * to an index in an action.
*/ */
public int getLabelIndex(Rule r, String labelName) { public int getLabelIndex(Rule r, String labelName) {
Integer I = pda.ruleLabels.get(r, labelName); Integer I = obj.ruleLabels.get(r, labelName);
if ( I!=null ) return I; // already got its label if ( I!=null ) return I; // already got its label
int i = labelIndex++; int i = labelIndex++;
pda.ruleLabels.put(r, labelName, i); obj.ruleLabels.put(r, labelName, i);
return i; return i;
} }
public int getSetIndex(IntervalSet set) {
obj.set8table.add(set);
return obj.set8table.size()-1;
}
public void emitString(Token t, boolean not) { public void emitString(Token t, boolean not) {
String chars = CharSupport.getStringFromGrammarStringLiteral(t.getText()); String chars = CharSupport.getStringFromGrammarStringLiteral(t.getText());
if ( not && chars.length()==1 ) { if ( not && chars.length()==1 ) emit(new NotInstr());
emitNotChar(t, chars);
return;
}
for (char c : chars.toCharArray()) { for (char c : chars.toCharArray()) {
emit(new MatchInstr(t, c)); emit(new MatchInstr(t, c));
} }
} }
public void emitNotChar(Token t, String chars) {
IntervalSet all = (IntervalSet)g.getTokenTypes();
int c = chars.charAt(0);
SplitInstr s = new SplitInstr(2);
RangeInstr left = new RangeInstr(t, t);
left.a = all.getMinElement();
left.b = c-1;
RangeInstr right = new RangeInstr(t, t);
right.a = c+1;
right.b = 127; // all.getMaxElement();
emit(s);
emit(left);
JumpInstr J = new JumpInstr();
emit(J);
emit(right);
s.addrs.add(left.addr);
s.addrs.add(right.addr);
int END = pda.ip;
J.target = END;
return;
}
public byte[] convertInstrsToBytecode() { public byte[] convertInstrsToBytecode() {
Instr last = pda.instrs.get(pda.instrs.size() - 1); Instr last = obj.instrs.get(obj.instrs.size() - 1);
int size = last.addr + last.nBytes(); int size = last.addr + last.nBytes();
byte[] code = new byte[size]; byte[] code = new byte[size];
// resolve CALL instruction targets before generating code // resolve CALL instruction targets before generating code
for (Instr I : pda.instrs) { for (Instr I : obj.instrs) {
if ( I instanceof CallInstr ) { if ( I instanceof CallInstr ) {
CallInstr C = (CallInstr) I; CallInstr C = (CallInstr) I;
String ruleName = C.token.getText(); String ruleName = C.token.getText();
C.target = pda.ruleToAddr.get(ruleName); C.target = obj.ruleToAddr.get(ruleName);
} }
} }
for (Instr I : pda.instrs) { for (Instr I : obj.instrs) {
I.write(code); I.write(code);
} }
return code; return code;
} }
public static CompiledPDA compileLexerMode(LexerGrammar lg, String modeName) { public void defineRuleAddr(String name, int ip) {
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(); obj.ruleToAddr.put(name, ip);
PDABytecodeTriggers gen = new PDABytecodeTriggers(null);
gen.g = lg;
gen.pda.tokenTypeToAddr = new int[lg.getMaxTokenType()+1];
// add split for s0 to hook up rules (fill in operands as we gen rules)
int numRules = lg.modes.get(modeName).size();
int numFragmentRules = 0;
for (Rule r : lg.modes.get(modeName)) { if ( r.isFragment() ) numFragmentRules++; }
SplitInstr s0 = new SplitInstr(numRules - numFragmentRules);
gen.emit(s0);
for (Rule r : lg.modes.get(modeName)) { // for each rule in mode
gen.currentRule = r;
GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK);
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
gen.setTreeNodeStream(nodes);
int ttype = lg.getTokenType(r.name);
gen.pda.ruleToAddr.put(r.name, gen.pda.ip);
if ( !r.isFragment() ) {
s0.addrs.add(gen.pda.ip);
gen.pda.tokenTypeToAddr[ttype] = gen.pda.ip;
}
try {
gen.block(); // GEN Instr OBJECTS
int ruleTokenType = lg.getTokenType(r.name);
if ( !r.isFragment() ) {
gen.emit(new AcceptInstr(ruleTokenType));
}
else {
gen.emit(new RetInstr());
}
}
catch (Exception e){
e.printStackTrace(System.err);
}
}
gen.pda.code = gen.convertInstrsToBytecode();
gen.pda.nLabels = gen.labelIndex;
System.out.println(Bytecode.disassemble(gen.pda.code));
System.out.println("rule addrs="+gen.pda.ruleToAddr);
return gen.pda;
} }
// (BLOCK (ALT .)) or (BLOCK (ALT 'a') (ALT .)) public void defineRuleIndexToAddr(int index, int ip) {
public boolean blockHasWildcardAlt(GrammarAST block) { obj.altToAddr[index] = ip;
for (Object alt : block.getChildren()) {
AltAST altAST = (AltAST)alt;
if ( altAST.getChildCount()==1 ) {
Tree e = altAST.getChild(0);
if ( e.getType()==ANTLRParser.WILDCARD ) {
return true;
}
}
}
return false;
} }
// testing public void defineTokenTypeToAddr(int ttype, int ip) {
public static PDA getPDA(LexerGrammar lg, String modeName) { defineRuleIndexToAddr(ttype, ip);
CompiledPDA info = compileLexerMode(lg, modeName);
return new PDA(info.code, info.ruleToAddr, info.tokenTypeToAddr, info.nLabels);
} }
/** Write value at index into a byte array highest to lowest byte, /** Write value at index into a byte array highest to lowest byte,
@ -202,66 +123,4 @@ public class PDABytecodeGenerator extends TreeParser {
memory[index+1] = (byte)(value&0xFF); memory[index+1] = (byte)(value&0xFF);
} }
// ----------
public static PDA getPDA(DFA dfa) {
PDABytecodeTriggers gen = new PDABytecodeTriggers(null);
gen.g = dfa.g;
gen.pda.tokenTypeToAddr = new int[gen.g.getMaxTokenType()+1];
gen.walk(dfa);
gen.pda.code = gen.convertInstrsToBytecode();
CompiledPDA c = gen.pda;
return new PDA(c.code, c.ruleToAddr, c.tokenTypeToAddr, c.nLabels);
}
boolean[] marked;
int[] stateToAddr;
public PDA walk(DFA dfa) {
marked = new boolean[dfa.stateSet.size()+1];
stateToAddr = new int[dfa.stateSet.size()+1];
walk(dfa.startState);
// walk code, update jump targets.
for (Instr I : pda.instrs) {
System.out.println("instr "+I);
if ( I instanceof JumpInstr ) {
JumpInstr J = (JumpInstr)I;
J.target = stateToAddr[J.target];
}
}
return null;
}
// recursive so we follow chains in DFA, leading to fewer
// jmp instructions.
// start by assuming state num is bytecode addr then translate after
// in one pass
public void walk(DFAState d) {
if ( marked[d.stateNumber] ) return;
marked[d.stateNumber] = true;
stateToAddr[d.stateNumber] = pda.ip;
System.out.println("visit "+d.stateNumber+" @"+pda.ip);
if ( d.isAcceptState ) {
AcceptInstr A = new AcceptInstr(d.predictsAlt);
emit(A);
return;
}
SplitInstr S = null;
if ( d.edges.size()>1 ) {
S = new SplitInstr(d.edges.size());
emit(S);
}
for (Edge e : d.edges) {
if ( S!=null ) S.addrs.add(pda.ip);
// TODO: assumes no sets yet!
MatchInstr M = new MatchInstr(e.label.getSingleElement());
JumpInstr J = new JumpInstr(e.target.stateNumber);
emit(M);
emit(J);
walk(e.target);
}
}
} }

View File

@ -3,13 +3,14 @@ options {
language = Java; language = Java;
tokenVocab = ANTLRParser; tokenVocab = ANTLRParser;
ASTLabelType = GrammarAST; ASTLabelType = GrammarAST;
superClass = PDABytecodeGenerator; // superClass = PDABytecodeGenerator;
} }
@header { @header {
package org.antlr.v4.codegen; package org.antlr.v4.codegen;
import org.antlr.v4.codegen.pda.*; import org.antlr.v4.codegen.pda.*;
import org.antlr.v4.tool.GrammarAST; import org.antlr.v4.tool.GrammarAST;
import org.antlr.v4.tool.AltAST;
import org.antlr.v4.tool.GrammarASTWithOptions; import org.antlr.v4.tool.GrammarASTWithOptions;
import org.antlr.v4.tool.LexerGrammar; import org.antlr.v4.tool.LexerGrammar;
import java.util.Collections; import java.util.Collections;
@ -17,6 +18,30 @@ import java.util.Map;
import java.util.HashMap; import java.util.HashMap;
} }
@members {
PDABytecodeGenerator gen;
public PDABytecodeTriggers(TreeNodeStream input, PDABytecodeGenerator gen) {
this(input);
this.gen = gen;
}
// (BLOCK (ALT .)) or (BLOCK (ALT 'a') (ALT .))
public boolean blockHasWildcardAlt(GrammarAST block) {
for (Object alt : block.getChildren()) {
if ( !(alt instanceof AltAST) ) continue;
AltAST altAST = (AltAST)alt;
if ( altAST.getChildCount()==1 ) {
Tree e = altAST.getChild(0);
if ( e.getType()==WILDCARD ) {
return true;
}
}
}
return false;
}
}
block block
: ^( BLOCK (^(OPTIONS .+))? : ^( BLOCK (^(OPTIONS .+))?
{ {
@ -28,8 +53,8 @@ block
SplitInstr S = null; SplitInstr S = null;
if ( nAlts>1 ) { if ( nAlts>1 ) {
S = new SplitInstr(nAlts); S = new SplitInstr(nAlts);
emit(S); gen.emit(S);
S.addrs.add(pda.ip); S.addrs.add(gen.ip);
} }
int alt = 1; int alt = 1;
} }
@ -38,14 +63,14 @@ block
if ( alt < nAlts ) { if ( alt < nAlts ) {
JumpInstr J = new JumpInstr(); JumpInstr J = new JumpInstr();
jumps.add(J); jumps.add(J);
emit(J); gen.emit(J);
S.addrs.add(pda.ip); S.addrs.add(gen.ip);
} }
alt++; alt++;
} }
)+ )+
{ {
int END = pda.ip; int END = gen.ip;
for (JumpInstr J : jumps) J.target = END; for (JumpInstr J : jumps) J.target = END;
} }
) )
@ -61,14 +86,14 @@ element
: labeledElement : labeledElement
| atom | atom
| ebnf | ebnf
| ACTION {emit(new ActionInstr($ACTION.token));} | ACTION {gen.emit(new ActionInstr($ACTION.token));}
| SEMPRED {emit(new SemPredInstr($SEMPRED.token));} | SEMPRED {gen.emit(new SemPredInstr($SEMPRED.token));}
| GATED_SEMPRED {emit(new SemPredInstr($GATED_SEMPRED.token));} | GATED_SEMPRED {gen.emit(new SemPredInstr($GATED_SEMPRED.token));}
| treeSpec | treeSpec
; ;
labeledElement labeledElement
: ^(ASSIGN ID {emit(new LabelInstr($ID.token));} atom {emit(new SaveInstr($ID.token));} ) : ^(ASSIGN ID {gen.emit(new LabelInstr($ID.token));} atom {gen.emit(new SaveInstr($ID.token));} )
| ^(ASSIGN ID block) | ^(ASSIGN ID block)
| ^(PLUS_ASSIGN ID atom) | ^(PLUS_ASSIGN ID atom)
| ^(PLUS_ASSIGN ID block) | ^(PLUS_ASSIGN ID block)
@ -87,33 +112,33 @@ ebnf
: ^(astBlockSuffix block) : ^(astBlockSuffix block)
| { | {
SplitInstr S = new SplitInstr(2); SplitInstr S = new SplitInstr(2);
emit(S); gen.emit(S);
S.addrs.add(pda.ip); S.addrs.add(gen.ip);
} }
^(OPTIONAL block) ^(OPTIONAL block)
{ {
S.addrs.add(pda.ip); S.addrs.add(gen.ip);
} }
| { | {
int start=pda.ip; int start=gen.ip;
SplitInstr S = new SplitInstr(2); SplitInstr S = new SplitInstr(2);
emit(S); gen.emit(S);
int blkStart = pda.ip; int blkStart = gen.ip;
} }
^(CLOSURE block) ^(CLOSURE block)
{ {
JumpInstr J = new JumpInstr(); JumpInstr J = new JumpInstr();
emit(J); gen.emit(J);
J.target = start; J.target = start;
S.addrs.add(blkStart); S.addrs.add(blkStart);
S.addrs.add(pda.ip); S.addrs.add(gen.ip);
if ( greedyOption!=null && greedyOption.equals("false") ) Collections.reverse(S.addrs); if ( greedyOption!=null && greedyOption.equals("false") ) Collections.reverse(S.addrs);
} }
| {int start=pda.ip;} ^(POSITIVE_CLOSURE block) | {int start=gen.ip;} ^(POSITIVE_CLOSURE block)
{ {
SplitInstr S = new SplitInstr(2); SplitInstr S = new SplitInstr(2);
emit(S); gen.emit(S);
int stop = pda.ip; int stop = gen.ip;
S.addrs.add(start); S.addrs.add(start);
S.addrs.add(stop); S.addrs.add(stop);
if ( greedyOption!=null && greedyOption.equals("false") ) Collections.reverse(S.addrs); if ( greedyOption!=null && greedyOption.equals("false") ) Collections.reverse(S.addrs);
@ -136,8 +161,8 @@ atom
| range | range
| ^(DOT ID terminal[false]) | ^(DOT ID terminal[false])
| ^(DOT ID ruleref) | ^(DOT ID ruleref)
| ^(WILDCARD .) {emit(new WildcardInstr($WILDCARD.token));} | ^(WILDCARD .) {gen.emit(new WildcardInstr($WILDCARD.token));}
| WILDCARD {emit(new WildcardInstr($WILDCARD.token));} | WILDCARD {gen.emit(new WildcardInstr($WILDCARD.token));}
| terminal[false] | terminal[false]
| ruleref | ruleref
; ;
@ -155,15 +180,15 @@ ruleref
range range
: ^(RANGE a=STRING_LITERAL b=STRING_LITERAL) : ^(RANGE a=STRING_LITERAL b=STRING_LITERAL)
{emit(new RangeInstr($a.token, $b.token));} {gen.emit(new RangeInstr($a.token, $b.token));}
; ;
terminal[boolean not] terminal[boolean not]
: ^(STRING_LITERAL .) {emitString($STRING_LITERAL.token, $not);} : ^(STRING_LITERAL .) {gen.emitString($STRING_LITERAL.token, $not);}
| STRING_LITERAL {emitString($STRING_LITERAL.token, $not);} | STRING_LITERAL {gen.emitString($STRING_LITERAL.token, $not);}
| ^(TOKEN_REF ARG_ACTION .) {emit(new CallInstr($TOKEN_REF.token));} | ^(TOKEN_REF ARG_ACTION .) {gen.emit(new CallInstr($TOKEN_REF.token));}
| ^(TOKEN_REF .) {emit(new CallInstr($TOKEN_REF.token));} | ^(TOKEN_REF .) {gen.emit(new CallInstr($TOKEN_REF.token));}
| TOKEN_REF {emit(new CallInstr($TOKEN_REF.token));} | TOKEN_REF {gen.emit(new CallInstr($TOKEN_REF.token));}
| ^(ROOT terminal[false]) | ^(ROOT terminal[false])
| ^(BANG terminal[false]) | ^(BANG terminal[false])
; ;

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
// $ANTLR 3.2.1-SNAPSHOT May 24, 2010 15:02:05 SourceGenTriggers.g 2010-05-26 14:22:40 // $ANTLR 3.2.1-SNAPSHOT May 24, 2010 15:02:05 SourceGenTriggers.g 2010-05-27 16:58:15
package org.antlr.v4.codegen; package org.antlr.v4.codegen;

View File

@ -0,0 +1,9 @@
package org.antlr.v4.codegen.pda;
import org.antlr.v4.runtime.pda.Bytecode;
/** */
public class NotInstr extends Instr {
public short opcode() { return Bytecode.NOT; }
public int nBytes() { return 1; }
}

View File

@ -0,0 +1,20 @@
package org.antlr.v4.codegen.pda;
import org.antlr.v4.codegen.PDABytecodeGenerator;
import org.antlr.v4.misc.IntervalSet;
import org.antlr.v4.runtime.pda.Bytecode;
/** */
public class SetInstr extends Instr {
public IntervalSet set;
public int setIndex;
public SetInstr(IntervalSet set) { this.set = set; }
public short opcode() { return Bytecode.SET; }
public int nBytes() { return 1+2; }
public void write(byte[] code) {
super.write(code);
setIndex = gen.getSetIndex(set);
PDABytecodeGenerator.writeShort(code, addr+1, (short)setIndex);
}
}

View File

@ -3,7 +3,8 @@ package org.antlr.v4.test;
import org.antlr.v4.automata.DFA; import org.antlr.v4.automata.DFA;
import org.antlr.v4.automata.DecisionState; import org.antlr.v4.automata.DecisionState;
import org.antlr.v4.automata.NFA; import org.antlr.v4.automata.NFA;
import org.antlr.v4.codegen.PDABytecodeGenerator; import org.antlr.v4.codegen.CompiledPDA;
import org.antlr.v4.codegen.DFACompiler;
import org.antlr.v4.runtime.pda.Bytecode; import org.antlr.v4.runtime.pda.Bytecode;
import org.antlr.v4.runtime.pda.PDA; import org.antlr.v4.runtime.pda.PDA;
import org.antlr.v4.tool.Grammar; import org.antlr.v4.tool.Grammar;
@ -11,6 +12,21 @@ import org.junit.Test;
/** */ /** */
public class TestDFAtoPDABytecodeGeneration extends BaseTest { public class TestDFAtoPDABytecodeGeneration extends BaseTest {
@Test public void testNotAisSet() throws Exception {
Grammar g = new Grammar(
"parser grammar T;\n"+
"a : ~A B C | A ;");
String expecting =
"0000:\tsplit 7, 16\n" +
"0007:\tset 0\n" +
"0010:\tjmp 13\n" +
"0013:\taccept 1\n" +
"0016:\tmatch8 5\n" +
"0018:\tjmp 21\n" +
"0021:\taccept 2\n";
checkBytecode(g, 0, expecting);
}
@Test public void testAorB() throws Exception { @Test public void testAorB() throws Exception {
Grammar g = new Grammar( Grammar g = new Grammar(
"parser grammar T;\n"+ "parser grammar T;\n"+
@ -62,8 +78,6 @@ public class TestDFAtoPDABytecodeGeneration extends BaseTest {
checkBytecode(g, 2, expecting); checkBytecode(g, 2, expecting);
} }
// TODO: ORDER OF TESTS MATTERS? DFA edge orders get changed. ack!
void checkBytecode(Grammar g, int decision, String expecting) { void checkBytecode(Grammar g, int decision, String expecting) {
NFA nfa = createNFA(g); NFA nfa = createNFA(g);
DecisionState blk = nfa.decisionToNFAState.get(decision); DecisionState blk = nfa.decisionToNFAState.get(decision);
@ -71,8 +85,10 @@ public class TestDFAtoPDABytecodeGeneration extends BaseTest {
// Edge e0 = dfa.states.get(1).edge(0); // Edge e0 = dfa.states.get(1).edge(0);
// Edge e1 = dfa.states.get(1).edge(1); // Edge e1 = dfa.states.get(1).edge(1);
// e0.target = e1.target; // e0.target = e1.target;
// System.out.print("altered DFA="+dfa); // System.out.print("altered DFA="+dfa);
PDA PDA = PDABytecodeGenerator.getPDA(dfa); DFACompiler comp = new DFACompiler(dfa);
assertEquals(expecting, Bytecode.disassemble(PDA.code, false)); CompiledPDA obj = comp.compile();
PDA pda = new PDA(obj.code, obj.altToAddr, obj.nLabels);
assertEquals(expecting, Bytecode.disassemble(pda.code, false));
} }
} }

View File

@ -1,7 +1,8 @@
package org.antlr.v4.test; package org.antlr.v4.test;
import org.antlr.v4.Tool; import org.antlr.v4.Tool;
import org.antlr.v4.codegen.PDABytecodeGenerator; import org.antlr.v4.codegen.CompiledPDA;
import org.antlr.v4.codegen.LexerCompiler;
import org.antlr.v4.runtime.pda.Bytecode; import org.antlr.v4.runtime.pda.Bytecode;
import org.antlr.v4.runtime.pda.PDA; import org.antlr.v4.runtime.pda.PDA;
import org.antlr.v4.semantics.SemanticPipeline; import org.antlr.v4.semantics.SemanticPipeline;
@ -22,6 +23,18 @@ public class TestPDABytecodeGeneration extends BaseTest {
checkBytecode(g, expecting); checkBytecode(g, expecting);
} }
@Test public void testNotChar() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n"+
"A : ~'a' ;");
String expecting =
"0000:\tsplit 5\n" +
"0005:\tnot \n" +
"0006:\tmatch8 'a'\n" +
"0008:\taccept 4\n";
checkBytecode(g, expecting);
}
@Test public void testIDandIntandKeyword() throws Exception { @Test public void testIDandIntandKeyword() throws Exception {
LexerGrammar g = new LexerGrammar( LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" + "lexer grammar L;\n" +
@ -215,7 +228,9 @@ public class TestPDABytecodeGeneration extends BaseTest {
} }
} }
} }
PDA PDA = PDABytecodeGenerator.getPDA(g, LexerGrammar.DEFAULT_MODE_NAME); LexerCompiler comp = new LexerCompiler(g);
CompiledPDA obj = comp.compileMode(LexerGrammar.DEFAULT_MODE_NAME);
PDA PDA = new PDA(obj.code, obj.altToAddr, obj.nLabels);
assertEquals(expecting, Bytecode.disassemble(PDA.code)); assertEquals(expecting, Bytecode.disassemble(PDA.code));
} }
} }

View File

@ -3,7 +3,8 @@ package org.antlr.v4.test;
import org.antlr.runtime.ANTLRStringStream; import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.Token; import org.antlr.runtime.Token;
import org.antlr.v4.Tool; import org.antlr.v4.Tool;
import org.antlr.v4.codegen.PDABytecodeGenerator; import org.antlr.v4.codegen.CompiledPDA;
import org.antlr.v4.codegen.LexerCompiler;
import org.antlr.v4.runtime.pda.PDA; import org.antlr.v4.runtime.pda.PDA;
import org.antlr.v4.semantics.SemanticPipeline; import org.antlr.v4.semantics.SemanticPipeline;
import org.antlr.v4.tool.Grammar; import org.antlr.v4.tool.Grammar;
@ -24,6 +25,14 @@ public class TestPDABytecodeInterp extends BaseTest {
checkMatches(g, "abab", expecting); checkMatches(g, "abab", expecting);
} }
@Test public void testNotChar() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n"+
"A : ~'a' ;");
String expecting = "A, EOF";
checkMatches(g, "b", expecting);
}
@Test public void testIDandIntandKeyword() throws Exception { @Test public void testIDandIntandKeyword() throws Exception {
LexerGrammar g = new LexerGrammar( LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" + "lexer grammar L;\n" +
@ -202,7 +211,10 @@ public class TestPDABytecodeInterp extends BaseTest {
} }
} }
PDA PDA = PDABytecodeGenerator.getPDA(g, LexerGrammar.DEFAULT_MODE_NAME); LexerCompiler comp = new LexerCompiler(g);
CompiledPDA obj = comp.compileMode(LexerGrammar.DEFAULT_MODE_NAME);
PDA PDA = new PDA(obj.code, obj.altToAddr, obj.nLabels);
ANTLRStringStream in = new ANTLRStringStream(input); ANTLRStringStream in = new ANTLRStringStream(input);
List<Integer> tokenTypes = new ArrayList<Integer>(); List<Integer> tokenTypes = new ArrayList<Integer>();
int ttype = 0; int ttype = 0;
@ -236,7 +248,9 @@ public class TestPDABytecodeInterp extends BaseTest {
} }
} }
PDA PDA = PDABytecodeGenerator.getPDA(g, LexerGrammar.DEFAULT_MODE_NAME); LexerCompiler comp = new LexerCompiler(g);
CompiledPDA obj = comp.compileMode(LexerGrammar.DEFAULT_MODE_NAME);
PDA PDA = new PDA(obj.code, obj.altToAddr, obj.nLabels);
ANTLRStringStream in = new ANTLRStringStream(input); ANTLRStringStream in = new ANTLRStringStream(input);
List<Integer> tokenTypes = new ArrayList<Integer>(); List<Integer> tokenTypes = new ArrayList<Integer>();
int ttype = PDA.execThompson(in); int ttype = PDA.execThompson(in);