added NOT/SET ops, refactored PDA generation

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6900]
This commit is contained in:
parrt 2010-05-27 16:01:55 -08:00
parent acf962bc28
commit 19aecd3163
15 changed files with 701 additions and 554 deletions

View File

@ -39,21 +39,22 @@ public class Bytecode {
// be an array of objects (Bytecode[]). We want it to be byte[].
// INSTRUCTION BYTECODES (byte is signed; use a short to keep 0..255)
public static final short ACCEPT = 1;
public static final short JMP = 2;
public static final short SPLIT = 3;
public static final short MATCH8 = 4;
public static final short MATCH16 = 5;
public static final short RANGE8 = 6;
public static final short RANGE16 = 7;
public static final short WILDCARD = 8;
//public static final short NOT = 8; ???
public static final short CALL = 9; // JMP with a push
public static final short RET = 10; // an accept instr for fragment rules
public static final short LABEL = 11;
public static final short SAVE = 12;
public static final short SEMPRED = 13;
public static final short ACTION = 14;
public static final short ACCEPT = 1;
public static final short JMP = 2;
public static final short SPLIT = 3;
public static final short MATCH8 = 4;
public static final short MATCH16 = 5;
public static final short RANGE8 = 6;
public static final short RANGE16 = 7;
public static final short WILDCARD = 8;
public static final short SET = 9;
public static final short CALL = 10; // JMP with a push
public static final short RET = 11; // an accept instr for fragment rules
public static final short LABEL = 12;
public static final short SAVE = 13;
public static final short SEMPRED = 14;
public static final short ACTION = 15;
public static final short NOT = 16; // not next match instr
/** Used for disassembly; describes instruction set */
public static Instruction[] instructions = new Instruction[] {
@ -66,12 +67,14 @@ public class Bytecode {
new Instruction("range8", OperandType.BYTE, OperandType.BYTE),
new Instruction("range16", OperandType.CHAR, OperandType.CHAR),
new Instruction("wildcard"),
new Instruction("set", OperandType.SHORT),
new Instruction("call", OperandType.ADDR),
new Instruction("ret"),
new Instruction("label", OperandType.SHORT),
new Instruction("save", OperandType.SHORT),
new Instruction("sempred", OperandType.SHORT, OperandType.SHORT), // sempred ruleIndex, predIndex
new Instruction("action", OperandType.SHORT, OperandType.SHORT), // action ruleIndex, actionIndex
new Instruction("not"),
};
public static String disassemble(byte[] code, int start, boolean operandsAreChars) {

View File

@ -8,7 +8,6 @@ import org.antlr.v4.runtime.CommonToken;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
/** A (nondeterministic) pushdown bytecode machine for lexing and LL prediction.
* Derived partially from Cox' description of Thompson's 1960s work:
@ -22,28 +21,20 @@ public class PDA {
public interface sempred_fptr { boolean eval(int predIndex); }
public byte[] code;
public Map<String, Integer> ruleToAddr;
public int[] tokenTypeToAddr;
//public Map<String, Integer> ruleToAddr;
public int[] altToAddr; // either token type (in lexer) or alt num for DFA in parser
public CommonToken[] labelValues;
public int nLabels;
/** If we hit an action, we'll have to rewind and do the winning rule again */
boolean bypassedAction;
public PDA() {;}
public PDA(byte[] code, Map<String, Integer> ruleToAddr, int[] tokenTypeToAddr, int nLabels) {
this.code = code;
this.ruleToAddr = ruleToAddr;
this.tokenTypeToAddr = tokenTypeToAddr;
this.nLabels = nLabels;
labelValues = new CommonToken[nLabels];
}
boolean notNextMatch;
public PDA(byte[] code, int[] tokenTypeToAddr, int nLabels) {
public PDA(byte[] code, int[] altToAddr, int nLabels) {
System.out.println("code="+Arrays.toString(code));
this.code = code;
this.tokenTypeToAddr = tokenTypeToAddr;
this.altToAddr = altToAddr;
this.nLabels = nLabels;
labelValues = new CommonToken[nLabels];
}
@ -58,7 +49,7 @@ public class PDA {
System.out.println("Bypassed action; rewinding to "+input.index()+" doing with feeling");
bypassedAction = false;
Arrays.fill(labelValues, null);
int ttype2 = execThompson(input, tokenTypeToAddr[ttype], true);
int ttype2 = execThompson(input, altToAddr[ttype], true);
if ( ttype!=ttype2 ) {
System.err.println("eh? token diff with action(s)");
}
@ -92,33 +83,48 @@ processOneChar:
//System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
trace(ip);
short opcode = code[ip];
boolean matched;
ip++; // move to next instruction or first byte of operand
switch (opcode) {
case Bytecode.NOT :
notNextMatch = true;
break;
case Bytecode.MATCH8 :
if ( c == code[ip] ) {
if ( c == code[ip] || (notNextMatch && c != code[ip]) ) {
addToClosure(reach, ip+1, alt, context);
}
notNextMatch = false;
break;
case Bytecode.MATCH16 :
if ( c == getShort(code, ip) ) {
matched = c == getShort(code, ip);
if ( matched || (notNextMatch && matched) ) {
addToClosure(reach, ip+2, alt, context);
}
notNextMatch = false;
break;
case Bytecode.RANGE8 :
if ( c>=code[ip] && c<=code[ip+1] ) {
matched = c >= code[ip] && c <= code[ip + 1];
if ( matched || (notNextMatch && matched) ) {
addToClosure(reach, ip+2, alt, context);
}
notNextMatch = false;
break;
case Bytecode.RANGE16 :
if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) {
matched = c < getShort(code, ip) || c > getShort(code, ip + 2);
if ( matched || (notNextMatch && matched) ) {
addToClosure(reach, ip+4, alt, context);
}
notNextMatch = false;
break;
case Bytecode.WILDCARD :
if ( c!=Token.EOF ) {
addToClosure(reach, ip, alt, context);
}
break;
case Bytecode.SET :
System.err.println("not impl");
notNextMatch = false;
break;
case Bytecode.LABEL : // lexers only
int labelIndex = getShort(code, ip);
labelValues[labelIndex] =
@ -217,6 +223,10 @@ processOneChar:
short opcode = code[ip];
ip++; // move to next instruction or first byte of operand
switch (opcode) {
case Bytecode.NOT : // see thru NOT but include in closure so we exec during reach
closure.add(t); // add to closure; need to execute during reach
addToClosure(closure, ip, alt, context);
break;
case Bytecode.JMP :
addToClosure(closure, getShort(code, ip), alt, context);
break;
@ -360,10 +370,10 @@ processOneChar:
}
// if we reach accept state, toss out any addresses in rest
// of work list associated with accept's rule; that rule is done
int ruleStart = tokenTypeToAddr[ttype];
int ruleStart = altToAddr[ttype];
int ruleStop = code.length;
if ( ttype+1 < tokenTypeToAddr.length ) {
ruleStop = tokenTypeToAddr[ttype+1]-1;
if ( ttype+1 < altToAddr.length ) {
ruleStop = altToAddr[ttype+1]-1;
}
System.out.println("kill range "+ruleStart+".."+ruleStop);
int j=i+1;

View File

@ -3,6 +3,7 @@ package org.antlr.v4.codegen;
import org.antlr.runtime.Token;
import org.antlr.v4.codegen.pda.Instr;
import org.antlr.v4.misc.DoubleKeyMap;
import org.antlr.v4.misc.IntervalSet;
import org.antlr.v4.tool.Rule;
import java.util.ArrayList;
@ -14,12 +15,18 @@ import java.util.Map;
public class CompiledPDA {
public List<Instr> instrs = new ArrayList<Instr>();
public byte[] code; // instrs in bytecode form
public int ip = 0; // where to write next
public List<IntervalSet> set8table = new ArrayList<IntervalSet>();
public List<IntervalSet> set16table = new ArrayList<IntervalSet>();
public Map<String, Integer> ruleToAddr = new HashMap<String, Integer>();
public int[] tokenTypeToAddr;
public int[] altToAddr; // either token type (in lexer) or alt num for DFA in parser
public DoubleKeyMap<Rule, String, Integer> ruleLabels = new DoubleKeyMap<Rule, String, Integer>();
public DoubleKeyMap<Rule, Token, Integer> ruleActions = new DoubleKeyMap<Rule, Token, Integer>();
public DoubleKeyMap<Rule, Token, Integer> ruleSempreds = new DoubleKeyMap<Rule, Token, Integer>();
public int nLabels;
public CompiledPDA(int numAlts) {
altToAddr = new int[numAlts+1];
}
}

View File

@ -0,0 +1,77 @@
package org.antlr.v4.codegen;
import org.antlr.v4.automata.DFA;
import org.antlr.v4.automata.DFAState;
import org.antlr.v4.automata.Edge;
import org.antlr.v4.codegen.pda.*;
import org.antlr.v4.runtime.pda.PDA;
/** */
public class DFACompiler {
public DFA dfa;
boolean[] marked;
int[] stateToAddr;
PDABytecodeGenerator gen;
public DFACompiler(DFA dfa) {
this.dfa = dfa;
gen = new PDABytecodeGenerator(dfa.g.getMaxTokenType());
}
public CompiledPDA compile() {
walk();
gen.compile();
return gen.obj;
}
public PDA walk() {
marked = new boolean[dfa.stateSet.size()+1];
stateToAddr = new int[dfa.stateSet.size()+1];
walk(dfa.startState);
// walk code, update jump targets.
for (Instr I : gen.obj.instrs) {
System.out.println("instr "+I);
if ( I instanceof JumpInstr) {
JumpInstr J = (JumpInstr)I;
J.target = stateToAddr[J.target];
}
}
return null;
}
// recursive so we follow chains in DFA, leading to fewer
// jmp instructions.
// start by assuming state num is bytecode addr then translate after
// in one pass
public void walk(DFAState d) {
if ( marked[d.stateNumber] ) return;
marked[d.stateNumber] = true;
stateToAddr[d.stateNumber] = gen.ip;
System.out.println("visit "+d.stateNumber+" @"+ gen.ip);
if ( d.isAcceptState ) {
AcceptInstr A = new AcceptInstr(d.predictsAlt);
gen.emit(A);
return;
}
SplitInstr S = null;
if ( d.edges.size()>1 ) {
S = new SplitInstr(d.edges.size());
gen.emit(S);
}
for (Edge e : d.edges) {
if ( S!=null ) S.addrs.add(gen.ip);
if ( e.label.getMinElement() == e.label.getMaxElement() ) {
MatchInstr M = new MatchInstr(e.label.getSingleElement());
gen.emit(M);
}
else {
gen.emit(new SetInstr(e.label));
}
JumpInstr J = new JumpInstr(e.target.stateNumber);
gen.emit(J);
walk(e.target);
}
}
}

View File

@ -0,0 +1,65 @@
package org.antlr.v4.codegen;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.v4.codegen.pda.AcceptInstr;
import org.antlr.v4.codegen.pda.RetInstr;
import org.antlr.v4.codegen.pda.SplitInstr;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.parse.GrammarASTAdaptor;
import org.antlr.v4.runtime.pda.Bytecode;
import org.antlr.v4.tool.GrammarAST;
import org.antlr.v4.tool.LexerGrammar;
import org.antlr.v4.tool.Rule;
/** */
public class LexerCompiler {
LexerGrammar lg;
public LexerCompiler(LexerGrammar lg) {
this.lg = lg;
}
public CompiledPDA compileMode(String modeName) {
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
PDABytecodeGenerator gen = new PDABytecodeGenerator(lg.getMaxTokenType());
PDABytecodeTriggers trigger = new PDABytecodeTriggers(null, gen);
// add split for s0 to hook up rules (fill in operands as we gen rules)
int numRules = lg.modes.get(modeName).size();
int numFragmentRules = 0;
for (Rule r : lg.modes.get(modeName)) { if ( r.isFragment() ) numFragmentRules++; }
SplitInstr s0 = new SplitInstr(numRules - numFragmentRules);
gen.emit(s0);
for (Rule r : lg.modes.get(modeName)) { // for each rule in mode
gen.currentRule = r;
GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK);
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
trigger.setTreeNodeStream(nodes);
int ttype = lg.getTokenType(r.name);
gen.defineRuleAddr(r.name, gen.ip);
if ( !r.isFragment() ) {
s0.addrs.add(gen.ip);
gen.defineTokenTypeToAddr(ttype, gen.ip);
}
try {
trigger.block(); // GEN Instr OBJECTS
int ruleTokenType = lg.getTokenType(r.name);
if ( !r.isFragment() ) {
gen.emit(new AcceptInstr(ruleTokenType));
}
else {
gen.emit(new RetInstr());
}
}
catch (Exception e){
e.printStackTrace(System.err);
}
}
gen.compile();
gen.obj.nLabels = gen.labelIndex;
System.out.println(Bytecode.disassemble(gen.obj.code));
System.out.println("rule addrs="+ gen.obj.ruleToAddr);
return gen.obj;
}
}

View File

@ -25,7 +25,8 @@ public class LexerFactory {
fileST.add("fileName", gen.getRecognizerFileName());
fileST.add("lexer", lexerST);
for (String modeName : lg.modes.keySet()) { // for each mode
CompiledPDA pda = PDABytecodeGenerator.compileLexerMode(lg, modeName);
LexerCompiler comp = new LexerCompiler(lg);
CompiledPDA pda = comp.compileMode(modeName);
ST pdaST = gen.templates.getInstanceOf("PDA");
for (Rule r : pda.ruleActions.keySet()) {
Set<Token> actionTokens = pda.ruleActions.keySet(r);

View File

@ -1,66 +1,61 @@
package org.antlr.v4.codegen;
import org.antlr.runtime.RecognizerSharedState;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.runtime.tree.Tree;
import org.antlr.runtime.tree.TreeNodeStream;
import org.antlr.v4.automata.DFA;
import org.antlr.v4.automata.DFAState;
import org.antlr.v4.automata.Edge;
import org.antlr.v4.codegen.pda.*;
import org.antlr.v4.codegen.pda.CallInstr;
import org.antlr.v4.codegen.pda.Instr;
import org.antlr.v4.codegen.pda.MatchInstr;
import org.antlr.v4.codegen.pda.NotInstr;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.misc.IntervalSet;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.parse.GrammarASTAdaptor;
import org.antlr.v4.runtime.pda.Bytecode;
import org.antlr.v4.runtime.pda.PDA;
import org.antlr.v4.runtime.tree.TreeParser;
import org.antlr.v4.tool.*;
import org.antlr.v4.tool.Rule;
import java.util.Map;
/** http://swtch.com/~rsc/regexp/regexp2.html */
public class PDABytecodeGenerator extends TreeParser {
public Grammar g;
public class PDABytecodeGenerator {
public Rule currentRule;
CompiledPDA pda = new CompiledPDA();
public CompiledPDA obj;
public int labelIndex = 0; // first time we ask for labels we index
public int ip = 0; // where to write next
public PDABytecodeGenerator(TreeNodeStream input, RecognizerSharedState state) {
super(input, state);
int labelIndex = 0; // first time we ask for labels we index
public PDABytecodeGenerator(int numAlts) {
obj = new CompiledPDA(numAlts);
}
public void compile() {
obj.code = convertInstrsToBytecode();
}
public void emit(Instr I) {
I.addr = pda.ip;
I.addr = ip;
I.rule = currentRule;
I.gen = this;
pda.ip += I.nBytes();
pda.instrs.add(I);
ip += I.nBytes();
obj.instrs.add(I);
}
// indexed from 0 per rule
public int getActionIndex(Rule r, Token actionToken) {
Integer I = pda.ruleActions.get(r, actionToken);
Integer I = obj.ruleActions.get(r, actionToken);
if ( I!=null ) return I; // already got its label
Map<Token, Integer> labels = pda.ruleActions.get(r);
Map<Token, Integer> labels = obj.ruleActions.get(r);
int i = 0;
if ( labels!=null ) i = labels.size();
pda.ruleActions.put(r, actionToken, i);
obj.ruleActions.put(r, actionToken, i);
return i;
}
// indexed from 0 per rule
public int getSempredIndex(Rule r, Token actionToken) {
Integer I = pda.ruleSempreds.get(r, actionToken);
Integer I = obj.ruleSempreds.get(r, actionToken);
if ( I!=null ) return I; // already got its label
Map<Token, Integer> labels = pda.ruleSempreds.get(r);
Map<Token, Integer> labels = obj.ruleSempreds.get(r);
int i = 0;
if ( labels!=null ) i = labels.size();
pda.ruleSempreds.put(r, actionToken, i);
obj.ruleSempreds.put(r, actionToken, i);
return i;
}
@ -69,129 +64,55 @@ public class PDABytecodeGenerator extends TreeParser {
* to an index in an action.
*/
public int getLabelIndex(Rule r, String labelName) {
Integer I = pda.ruleLabels.get(r, labelName);
Integer I = obj.ruleLabels.get(r, labelName);
if ( I!=null ) return I; // already got its label
int i = labelIndex++;
pda.ruleLabels.put(r, labelName, i);
obj.ruleLabels.put(r, labelName, i);
return i;
}
public int getSetIndex(IntervalSet set) {
obj.set8table.add(set);
return obj.set8table.size()-1;
}
public void emitString(Token t, boolean not) {
String chars = CharSupport.getStringFromGrammarStringLiteral(t.getText());
if ( not && chars.length()==1 ) {
emitNotChar(t, chars);
return;
}
if ( not && chars.length()==1 ) emit(new NotInstr());
for (char c : chars.toCharArray()) {
emit(new MatchInstr(t, c));
}
}
public void emitNotChar(Token t, String chars) {
IntervalSet all = (IntervalSet)g.getTokenTypes();
int c = chars.charAt(0);
SplitInstr s = new SplitInstr(2);
RangeInstr left = new RangeInstr(t, t);
left.a = all.getMinElement();
left.b = c-1;
RangeInstr right = new RangeInstr(t, t);
right.a = c+1;
right.b = 127; // all.getMaxElement();
emit(s);
emit(left);
JumpInstr J = new JumpInstr();
emit(J);
emit(right);
s.addrs.add(left.addr);
s.addrs.add(right.addr);
int END = pda.ip;
J.target = END;
return;
}
public byte[] convertInstrsToBytecode() {
Instr last = pda.instrs.get(pda.instrs.size() - 1);
Instr last = obj.instrs.get(obj.instrs.size() - 1);
int size = last.addr + last.nBytes();
byte[] code = new byte[size];
// resolve CALL instruction targets before generating code
for (Instr I : pda.instrs) {
for (Instr I : obj.instrs) {
if ( I instanceof CallInstr ) {
CallInstr C = (CallInstr) I;
String ruleName = C.token.getText();
C.target = pda.ruleToAddr.get(ruleName);
C.target = obj.ruleToAddr.get(ruleName);
}
}
for (Instr I : pda.instrs) {
for (Instr I : obj.instrs) {
I.write(code);
}
return code;
}
public static CompiledPDA compileLexerMode(LexerGrammar lg, String modeName) {
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
PDABytecodeTriggers gen = new PDABytecodeTriggers(null);
gen.g = lg;
gen.pda.tokenTypeToAddr = new int[lg.getMaxTokenType()+1];
// add split for s0 to hook up rules (fill in operands as we gen rules)
int numRules = lg.modes.get(modeName).size();
int numFragmentRules = 0;
for (Rule r : lg.modes.get(modeName)) { if ( r.isFragment() ) numFragmentRules++; }
SplitInstr s0 = new SplitInstr(numRules - numFragmentRules);
gen.emit(s0);
for (Rule r : lg.modes.get(modeName)) { // for each rule in mode
gen.currentRule = r;
GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK);
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
gen.setTreeNodeStream(nodes);
int ttype = lg.getTokenType(r.name);
gen.pda.ruleToAddr.put(r.name, gen.pda.ip);
if ( !r.isFragment() ) {
s0.addrs.add(gen.pda.ip);
gen.pda.tokenTypeToAddr[ttype] = gen.pda.ip;
}
try {
gen.block(); // GEN Instr OBJECTS
int ruleTokenType = lg.getTokenType(r.name);
if ( !r.isFragment() ) {
gen.emit(new AcceptInstr(ruleTokenType));
}
else {
gen.emit(new RetInstr());
}
}
catch (Exception e){
e.printStackTrace(System.err);
}
}
gen.pda.code = gen.convertInstrsToBytecode();
gen.pda.nLabels = gen.labelIndex;
System.out.println(Bytecode.disassemble(gen.pda.code));
System.out.println("rule addrs="+gen.pda.ruleToAddr);
return gen.pda;
public void defineRuleAddr(String name, int ip) {
obj.ruleToAddr.put(name, ip);
}
// (BLOCK (ALT .)) or (BLOCK (ALT 'a') (ALT .))
public boolean blockHasWildcardAlt(GrammarAST block) {
for (Object alt : block.getChildren()) {
AltAST altAST = (AltAST)alt;
if ( altAST.getChildCount()==1 ) {
Tree e = altAST.getChild(0);
if ( e.getType()==ANTLRParser.WILDCARD ) {
return true;
}
}
}
return false;
public void defineRuleIndexToAddr(int index, int ip) {
obj.altToAddr[index] = ip;
}
// testing
public static PDA getPDA(LexerGrammar lg, String modeName) {
CompiledPDA info = compileLexerMode(lg, modeName);
return new PDA(info.code, info.ruleToAddr, info.tokenTypeToAddr, info.nLabels);
public void defineTokenTypeToAddr(int ttype, int ip) {
defineRuleIndexToAddr(ttype, ip);
}
/** Write value at index into a byte array highest to lowest byte,
@ -202,66 +123,4 @@ public class PDABytecodeGenerator extends TreeParser {
memory[index+1] = (byte)(value&0xFF);
}
// ----------
public static PDA getPDA(DFA dfa) {
PDABytecodeTriggers gen = new PDABytecodeTriggers(null);
gen.g = dfa.g;
gen.pda.tokenTypeToAddr = new int[gen.g.getMaxTokenType()+1];
gen.walk(dfa);
gen.pda.code = gen.convertInstrsToBytecode();
CompiledPDA c = gen.pda;
return new PDA(c.code, c.ruleToAddr, c.tokenTypeToAddr, c.nLabels);
}
boolean[] marked;
int[] stateToAddr;
public PDA walk(DFA dfa) {
marked = new boolean[dfa.stateSet.size()+1];
stateToAddr = new int[dfa.stateSet.size()+1];
walk(dfa.startState);
// walk code, update jump targets.
for (Instr I : pda.instrs) {
System.out.println("instr "+I);
if ( I instanceof JumpInstr ) {
JumpInstr J = (JumpInstr)I;
J.target = stateToAddr[J.target];
}
}
return null;
}
// recursive so we follow chains in DFA, leading to fewer
// jmp instructions.
// start by assuming state num is bytecode addr then translate after
// in one pass
public void walk(DFAState d) {
if ( marked[d.stateNumber] ) return;
marked[d.stateNumber] = true;
stateToAddr[d.stateNumber] = pda.ip;
System.out.println("visit "+d.stateNumber+" @"+pda.ip);
if ( d.isAcceptState ) {
AcceptInstr A = new AcceptInstr(d.predictsAlt);
emit(A);
return;
}
SplitInstr S = null;
if ( d.edges.size()>1 ) {
S = new SplitInstr(d.edges.size());
emit(S);
}
for (Edge e : d.edges) {
if ( S!=null ) S.addrs.add(pda.ip);
// TODO: assumes no sets yet!
MatchInstr M = new MatchInstr(e.label.getSingleElement());
JumpInstr J = new JumpInstr(e.target.stateNumber);
emit(M);
emit(J);
walk(e.target);
}
}
}

View File

@ -3,13 +3,14 @@ options {
language = Java;
tokenVocab = ANTLRParser;
ASTLabelType = GrammarAST;
superClass = PDABytecodeGenerator;
// superClass = PDABytecodeGenerator;
}
@header {
package org.antlr.v4.codegen;
import org.antlr.v4.codegen.pda.*;
import org.antlr.v4.tool.GrammarAST;
import org.antlr.v4.tool.AltAST;
import org.antlr.v4.tool.GrammarASTWithOptions;
import org.antlr.v4.tool.LexerGrammar;
import java.util.Collections;
@ -17,6 +18,30 @@ import java.util.Map;
import java.util.HashMap;
}
@members {
PDABytecodeGenerator gen;
public PDABytecodeTriggers(TreeNodeStream input, PDABytecodeGenerator gen) {
this(input);
this.gen = gen;
}
// (BLOCK (ALT .)) or (BLOCK (ALT 'a') (ALT .))
public boolean blockHasWildcardAlt(GrammarAST block) {
for (Object alt : block.getChildren()) {
if ( !(alt instanceof AltAST) ) continue;
AltAST altAST = (AltAST)alt;
if ( altAST.getChildCount()==1 ) {
Tree e = altAST.getChild(0);
if ( e.getType()==WILDCARD ) {
return true;
}
}
}
return false;
}
}
block
: ^( BLOCK (^(OPTIONS .+))?
{
@ -28,8 +53,8 @@ block
SplitInstr S = null;
if ( nAlts>1 ) {
S = new SplitInstr(nAlts);
emit(S);
S.addrs.add(pda.ip);
gen.emit(S);
S.addrs.add(gen.ip);
}
int alt = 1;
}
@ -38,14 +63,14 @@ block
if ( alt < nAlts ) {
JumpInstr J = new JumpInstr();
jumps.add(J);
emit(J);
S.addrs.add(pda.ip);
gen.emit(J);
S.addrs.add(gen.ip);
}
alt++;
}
)+
{
int END = pda.ip;
int END = gen.ip;
for (JumpInstr J : jumps) J.target = END;
}
)
@ -61,14 +86,14 @@ element
: labeledElement
| atom
| ebnf
| ACTION {emit(new ActionInstr($ACTION.token));}
| SEMPRED {emit(new SemPredInstr($SEMPRED.token));}
| GATED_SEMPRED {emit(new SemPredInstr($GATED_SEMPRED.token));}
| ACTION {gen.emit(new ActionInstr($ACTION.token));}
| SEMPRED {gen.emit(new SemPredInstr($SEMPRED.token));}
| GATED_SEMPRED {gen.emit(new SemPredInstr($GATED_SEMPRED.token));}
| treeSpec
;
labeledElement
: ^(ASSIGN ID {emit(new LabelInstr($ID.token));} atom {emit(new SaveInstr($ID.token));} )
: ^(ASSIGN ID {gen.emit(new LabelInstr($ID.token));} atom {gen.emit(new SaveInstr($ID.token));} )
| ^(ASSIGN ID block)
| ^(PLUS_ASSIGN ID atom)
| ^(PLUS_ASSIGN ID block)
@ -87,33 +112,33 @@ ebnf
: ^(astBlockSuffix block)
| {
SplitInstr S = new SplitInstr(2);
emit(S);
S.addrs.add(pda.ip);
gen.emit(S);
S.addrs.add(gen.ip);
}
^(OPTIONAL block)
{
S.addrs.add(pda.ip);
S.addrs.add(gen.ip);
}
| {
int start=pda.ip;
int start=gen.ip;
SplitInstr S = new SplitInstr(2);
emit(S);
int blkStart = pda.ip;
gen.emit(S);
int blkStart = gen.ip;
}
^(CLOSURE block)
{
JumpInstr J = new JumpInstr();
emit(J);
gen.emit(J);
J.target = start;
S.addrs.add(blkStart);
S.addrs.add(pda.ip);
S.addrs.add(gen.ip);
if ( greedyOption!=null && greedyOption.equals("false") ) Collections.reverse(S.addrs);
}
| {int start=pda.ip;} ^(POSITIVE_CLOSURE block)
| {int start=gen.ip;} ^(POSITIVE_CLOSURE block)
{
SplitInstr S = new SplitInstr(2);
emit(S);
int stop = pda.ip;
gen.emit(S);
int stop = gen.ip;
S.addrs.add(start);
S.addrs.add(stop);
if ( greedyOption!=null && greedyOption.equals("false") ) Collections.reverse(S.addrs);
@ -136,8 +161,8 @@ atom
| range
| ^(DOT ID terminal[false])
| ^(DOT ID ruleref)
| ^(WILDCARD .) {emit(new WildcardInstr($WILDCARD.token));}
| WILDCARD {emit(new WildcardInstr($WILDCARD.token));}
| ^(WILDCARD .) {gen.emit(new WildcardInstr($WILDCARD.token));}
| WILDCARD {gen.emit(new WildcardInstr($WILDCARD.token));}
| terminal[false]
| ruleref
;
@ -155,15 +180,15 @@ ruleref
range
: ^(RANGE a=STRING_LITERAL b=STRING_LITERAL)
{emit(new RangeInstr($a.token, $b.token));}
{gen.emit(new RangeInstr($a.token, $b.token));}
;
terminal[boolean not]
: ^(STRING_LITERAL .) {emitString($STRING_LITERAL.token, $not);}
| STRING_LITERAL {emitString($STRING_LITERAL.token, $not);}
| ^(TOKEN_REF ARG_ACTION .) {emit(new CallInstr($TOKEN_REF.token));}
| ^(TOKEN_REF .) {emit(new CallInstr($TOKEN_REF.token));}
| TOKEN_REF {emit(new CallInstr($TOKEN_REF.token));}
: ^(STRING_LITERAL .) {gen.emitString($STRING_LITERAL.token, $not);}
| STRING_LITERAL {gen.emitString($STRING_LITERAL.token, $not);}
| ^(TOKEN_REF ARG_ACTION .) {gen.emit(new CallInstr($TOKEN_REF.token));}
| ^(TOKEN_REF .) {gen.emit(new CallInstr($TOKEN_REF.token));}
| TOKEN_REF {gen.emit(new CallInstr($TOKEN_REF.token));}
| ^(ROOT terminal[false])
| ^(BANG terminal[false])
;

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
// $ANTLR 3.2.1-SNAPSHOT May 24, 2010 15:02:05 SourceGenTriggers.g 2010-05-26 14:22:40
// $ANTLR 3.2.1-SNAPSHOT May 24, 2010 15:02:05 SourceGenTriggers.g 2010-05-27 16:58:15
package org.antlr.v4.codegen;

View File

@ -0,0 +1,9 @@
package org.antlr.v4.codegen.pda;
import org.antlr.v4.runtime.pda.Bytecode;
/** */
public class NotInstr extends Instr {
public short opcode() { return Bytecode.NOT; }
public int nBytes() { return 1; }
}

View File

@ -0,0 +1,20 @@
package org.antlr.v4.codegen.pda;
import org.antlr.v4.codegen.PDABytecodeGenerator;
import org.antlr.v4.misc.IntervalSet;
import org.antlr.v4.runtime.pda.Bytecode;
/** */
public class SetInstr extends Instr {
public IntervalSet set;
public int setIndex;
public SetInstr(IntervalSet set) { this.set = set; }
public short opcode() { return Bytecode.SET; }
public int nBytes() { return 1+2; }
public void write(byte[] code) {
super.write(code);
setIndex = gen.getSetIndex(set);
PDABytecodeGenerator.writeShort(code, addr+1, (short)setIndex);
}
}

View File

@ -3,7 +3,8 @@ package org.antlr.v4.test;
import org.antlr.v4.automata.DFA;
import org.antlr.v4.automata.DecisionState;
import org.antlr.v4.automata.NFA;
import org.antlr.v4.codegen.PDABytecodeGenerator;
import org.antlr.v4.codegen.CompiledPDA;
import org.antlr.v4.codegen.DFACompiler;
import org.antlr.v4.runtime.pda.Bytecode;
import org.antlr.v4.runtime.pda.PDA;
import org.antlr.v4.tool.Grammar;
@ -11,6 +12,21 @@ import org.junit.Test;
/** */
public class TestDFAtoPDABytecodeGeneration extends BaseTest {
@Test public void testNotAisSet() throws Exception {
Grammar g = new Grammar(
"parser grammar T;\n"+
"a : ~A B C | A ;");
String expecting =
"0000:\tsplit 7, 16\n" +
"0007:\tset 0\n" +
"0010:\tjmp 13\n" +
"0013:\taccept 1\n" +
"0016:\tmatch8 5\n" +
"0018:\tjmp 21\n" +
"0021:\taccept 2\n";
checkBytecode(g, 0, expecting);
}
@Test public void testAorB() throws Exception {
Grammar g = new Grammar(
"parser grammar T;\n"+
@ -62,8 +78,6 @@ public class TestDFAtoPDABytecodeGeneration extends BaseTest {
checkBytecode(g, 2, expecting);
}
// TODO: ORDER OF TESTS MATTERS? DFA edge orders get changed. ack!
void checkBytecode(Grammar g, int decision, String expecting) {
NFA nfa = createNFA(g);
DecisionState blk = nfa.decisionToNFAState.get(decision);
@ -71,8 +85,10 @@ public class TestDFAtoPDABytecodeGeneration extends BaseTest {
// Edge e0 = dfa.states.get(1).edge(0);
// Edge e1 = dfa.states.get(1).edge(1);
// e0.target = e1.target;
// System.out.print("altered DFA="+dfa);
PDA PDA = PDABytecodeGenerator.getPDA(dfa);
assertEquals(expecting, Bytecode.disassemble(PDA.code, false));
// System.out.print("altered DFA="+dfa);
DFACompiler comp = new DFACompiler(dfa);
CompiledPDA obj = comp.compile();
PDA pda = new PDA(obj.code, obj.altToAddr, obj.nLabels);
assertEquals(expecting, Bytecode.disassemble(pda.code, false));
}
}

View File

@ -1,7 +1,8 @@
package org.antlr.v4.test;
import org.antlr.v4.Tool;
import org.antlr.v4.codegen.PDABytecodeGenerator;
import org.antlr.v4.codegen.CompiledPDA;
import org.antlr.v4.codegen.LexerCompiler;
import org.antlr.v4.runtime.pda.Bytecode;
import org.antlr.v4.runtime.pda.PDA;
import org.antlr.v4.semantics.SemanticPipeline;
@ -22,6 +23,18 @@ public class TestPDABytecodeGeneration extends BaseTest {
checkBytecode(g, expecting);
}
@Test public void testNotChar() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n"+
"A : ~'a' ;");
String expecting =
"0000:\tsplit 5\n" +
"0005:\tnot \n" +
"0006:\tmatch8 'a'\n" +
"0008:\taccept 4\n";
checkBytecode(g, expecting);
}
@Test public void testIDandIntandKeyword() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
@ -215,7 +228,9 @@ public class TestPDABytecodeGeneration extends BaseTest {
}
}
}
PDA PDA = PDABytecodeGenerator.getPDA(g, LexerGrammar.DEFAULT_MODE_NAME);
LexerCompiler comp = new LexerCompiler(g);
CompiledPDA obj = comp.compileMode(LexerGrammar.DEFAULT_MODE_NAME);
PDA PDA = new PDA(obj.code, obj.altToAddr, obj.nLabels);
assertEquals(expecting, Bytecode.disassemble(PDA.code));
}
}

View File

@ -3,7 +3,8 @@ package org.antlr.v4.test;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.Token;
import org.antlr.v4.Tool;
import org.antlr.v4.codegen.PDABytecodeGenerator;
import org.antlr.v4.codegen.CompiledPDA;
import org.antlr.v4.codegen.LexerCompiler;
import org.antlr.v4.runtime.pda.PDA;
import org.antlr.v4.semantics.SemanticPipeline;
import org.antlr.v4.tool.Grammar;
@ -24,6 +25,14 @@ public class TestPDABytecodeInterp extends BaseTest {
checkMatches(g, "abab", expecting);
}
@Test public void testNotChar() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n"+
"A : ~'a' ;");
String expecting = "A, EOF";
checkMatches(g, "b", expecting);
}
@Test public void testIDandIntandKeyword() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
@ -202,7 +211,10 @@ public class TestPDABytecodeInterp extends BaseTest {
}
}
PDA PDA = PDABytecodeGenerator.getPDA(g, LexerGrammar.DEFAULT_MODE_NAME);
LexerCompiler comp = new LexerCompiler(g);
CompiledPDA obj = comp.compileMode(LexerGrammar.DEFAULT_MODE_NAME);
PDA PDA = new PDA(obj.code, obj.altToAddr, obj.nLabels);
ANTLRStringStream in = new ANTLRStringStream(input);
List<Integer> tokenTypes = new ArrayList<Integer>();
int ttype = 0;
@ -236,7 +248,9 @@ public class TestPDABytecodeInterp extends BaseTest {
}
}
PDA PDA = PDABytecodeGenerator.getPDA(g, LexerGrammar.DEFAULT_MODE_NAME);
LexerCompiler comp = new LexerCompiler(g);
CompiledPDA obj = comp.compileMode(LexerGrammar.DEFAULT_MODE_NAME);
PDA PDA = new PDA(obj.code, obj.altToAddr, obj.nLabels);
ANTLRStringStream in = new ANTLRStringStream(input);
List<Integer> tokenTypes = new ArrayList<Integer>();
int ttype = PDA.execThompson(in);