reorg so instrs in separate package. added label/save; added code gen at least for actions/sempred.

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6834]
This commit is contained in:
parrt 2010-05-02 17:11:30 -08:00
parent 61603d18a9
commit 689687f0ed
22 changed files with 988 additions and 652 deletions

View File

@ -50,7 +50,10 @@ public class Bytecode {
//public static final short NOT = 8; ???
public static final short CALL = 9; // JMP with a push
public static final short RET = 10; // an accept instr for fragment rules
public static final short SAVE = 11;
public static final short LABEL = 11;
public static final short SAVE = 12;
public static final short SEMPRED = 13;
public static final short ACTION = 14;
/** Used for disassembly; describes instruction set */
public static Instruction[] instructions = new Instruction[] {
@ -65,7 +68,10 @@ public class Bytecode {
new Instruction("wildcard"),
new Instruction("call", OperandType.ADDR),
new Instruction("ret"),
new Instruction("label", OperandType.SHORT),
new Instruction("save", OperandType.SHORT),
new Instruction("sempred", OperandType.SHORT),
new Instruction("action", OperandType.SHORT),
};
public static String disassemble(byte[] code, int start) {

View File

@ -2,6 +2,7 @@ package org.antlr.v4.runtime.nfa;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.Token;
import org.antlr.v4.runtime.CommonToken;
import java.util.ArrayList;
import java.util.List;
@ -10,178 +11,46 @@ import java.util.Map;
/** http://swtch.com/~rsc/regexp/regexp2.html */
public class NFA {
public byte[] code;
Map<String, Integer> ruleToAddr;
public Map<String, Integer> ruleToAddr;
public int[] tokenTypeToAddr;
public String[] labels; // TODO: need for actions. What is $label?
public NFA(byte[] code, Map<String, Integer> ruleToAddr, int[] tokenTypeToAddr) {
public NFA(byte[] code, Map<String, Integer> ruleToAddr, int[] tokenTypeToAddr,
String[] labels)
{
this.code = code;
this.ruleToAddr = ruleToAddr;
this.tokenTypeToAddr = tokenTypeToAddr;
}
public int exec(CharStream input, String ruleName) {
return exec(input, ruleToAddr.get(ruleName));
}
public int exec(CharStream input) { return exec(input, 0); }
public int exec(CharStream input, int ip) {
while ( ip < code.length ) {
int c = input.LA(1);
trace(ip);
short opcode = code[ip];
ip++; // move to next instruction or first byte of operand
switch (opcode) {
case Bytecode.MATCH8 :
if ( c != code[ip] ) return 0;
ip++;
input.consume();
break;
case Bytecode.MATCH16 :
if ( c != getShort(code, ip) ) return 0;
ip += 2;
input.consume();
break;
case Bytecode.RANGE8 :
if ( c<code[ip] || c>code[ip+1] ) return 0;
ip += 2;
input.consume();
break;
case Bytecode.RANGE16 :
if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) return 0;
ip += 4;
input.consume();
break;
case Bytecode.ACCEPT :
int ruleIndex = getShort(code, ip);
ip += 2;
System.out.println("accept "+ruleIndex);
return ruleIndex;
case Bytecode.JMP :
int target = getShort(code, ip);
ip = target;
continue;
case Bytecode.SPLIT :
int nopnds = getShort(code, ip);
ip += 2;
for (int i=1; i<=nopnds-1; i++) {
int addr = getShort(code, ip);
ip += 2;
//System.out.println("try alt "+i+" at "+addr);
int m = input.mark();
int r = exec(input, addr);
if ( r>0 ) { input.release(m); return r; }
input.rewind(m);
}
// try final alternative (w/o recursion)
int addr = getShort(code, ip);
ip = addr;
//System.out.println("try alt "+nopnds+" at "+addr);
continue;
default :
throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
}
}
return 0;
}
public static class Context {
public int ip;
public int inputMarker;
public Context(int ip, int inputMarker) {
this.ip = ip;
this.inputMarker = inputMarker;
}
}
public int execNoRecursion(CharStream input, int ip) {
List<Context> work = new ArrayList<Context>();
work.add(new Context(ip, input.mark()));
workLoop:
while ( work.size()>0 ) {
Context ctx = work.remove(work.size()-1); // treat like stack
ip = ctx.ip;
input.rewind(ctx.inputMarker);
while ( ip < code.length ) {
int c = input.LA(1);
trace(ip);
short opcode = code[ip];
ip++; // move to next instruction or first byte of operand
switch (opcode) {
case Bytecode.MATCH8 :
if ( c != code[ip] ) continue workLoop;
ip++;
input.consume();
break;
case Bytecode.MATCH16 :
if ( c != getShort(code, ip) ) continue workLoop;
ip += 2;
input.consume();
break;
case Bytecode.RANGE8 :
if ( c<code[ip] || c>code[ip+1] ) continue workLoop;
ip += 2;
input.consume();
break;
case Bytecode.RANGE16 :
if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) continue workLoop;
ip += 4;
input.consume();
break;
case Bytecode.ACCEPT :
int ruleIndex = getShort(code, ip);
ip += 2;
System.out.println("accept "+ruleIndex);
// returning gives first match not longest; i.e., like PEG
return ruleIndex;
case Bytecode.JMP :
int target = getShort(code, ip);
ip = target;
continue;
case Bytecode.SPLIT :
int nopnds = getShort(code, ip);
ip += 2;
// add split addresses to work queue in reverse order ('cept first one)
for (int i=nopnds-1; i>=1; i--) {
int addr = getShort(code, ip+i*2);
//System.out.println("try alt "+i+" at "+addr);
work.add(new Context(addr, input.mark()));
}
// try first alternative (w/o adding to work list)
int addr = getShort(code, ip);
ip = addr;
//System.out.println("try alt "+nopnds+" at "+addr);
continue;
default :
throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
}
}
}
return 0;
this.labels = labels;
}
public int execThompson(CharStream input) {
int ip = 0; // always start at SPLIT instr at address 0
return execThompson(input, 0, false, new CommonToken[labels.length]);
}
public int execThompson(CharStream input, int ip, boolean doActions, CommonToken[] labelValues) {
int c = input.LA(1);
if ( c==Token.EOF ) return Token.EOF;
List<ThreadState> closure = computeStartState(ip);
List<ThreadState> reach = new ArrayList<ThreadState>();
int prevAcceptAddr = Integer.MAX_VALUE;
int prevAcceptLastCharIndex = -1;
int prevAcceptInputMarker = -1;
int firstAcceptInputMarker = -1;
ThreadState prevAccept = new ThreadState(Integer.MAX_VALUE, -1, NFAStack.EMPTY);
ThreadState firstAccept = null;
// int maxAlts = closure.size(); // >= number of alts; if no decision, this is 1
int firstCharIndex = input.index(); // use when creating Token
do { // while more work
c = input.LA(1);
int i = 0;
boolean accepted = false;
processOneChar:
while ( i<closure.size() ) {
System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
ThreadState t = closure.get(i);
ip = t.addr;
NFAStack context = t.context;
int alt = t.alt;
//System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
trace(ip);
short opcode = code[ip];
ip++; // move to next instruction or first byte of operand
@ -211,26 +80,47 @@ processOneChar:
addToClosure(reach, ip, alt, context);
}
break;
case Bytecode.LABEL :
if ( doActions ) {
int labelIndex = getShort(code, ip);
System.out.println("label "+labels[labelIndex]);
labelValues[labelIndex] =
new CommonToken(input, 0, 0, input.index(), -1);
}
break;
case Bytecode.SAVE :
if ( doActions ) {
int labelIndex = getShort(code, ip);
System.out.println("save "+labels[labelIndex]);
labelValues[labelIndex].setStopIndex(input.index()-1);
}
break;
case Bytecode.ACTION :
if ( doActions ) {
int actionIndex = getShort(code, ip);
System.out.println("action "+ actionIndex);
}
break;
case Bytecode.ACCEPT :
if ( context != NFAStack.EMPTY ) break; // only do accept for outermost rule
accepted = true;
int tokenLastCharIndex = input.index() - 1;
int ttype = getShort(code, ip);
System.out.println("ACCEPT "+ ttype +" with last char position "+ tokenLastCharIndex);
if ( tokenLastCharIndex > prevAcceptLastCharIndex ) {
prevAcceptLastCharIndex = tokenLastCharIndex;
if ( tokenLastCharIndex > prevAccept.inputIndex ) {
prevAccept.inputIndex = tokenLastCharIndex;
// choose longest match so far regardless of rule priority
System.out.println("replacing old best match @ "+prevAcceptAddr);
prevAcceptAddr = ip-1;
prevAcceptInputMarker = input.mark();
firstAcceptInputMarker = prevAcceptInputMarker;
System.out.println("replacing old best match @ "+prevAccept.addr);
prevAccept.addr = ip-1;
prevAccept.inputMarker = input.mark();
if ( firstAccept==null ) firstAccept = prevAccept;
}
else if ( tokenLastCharIndex == prevAcceptLastCharIndex ) {
else if ( tokenLastCharIndex == prevAccept.inputIndex ) {
// choose first rule matched if match is of same length
if ( ip-1 < prevAcceptAddr ) { // it will see both accepts for ambig rules
System.out.println("replacing old best match @ "+prevAcceptAddr);
prevAcceptAddr = ip-1;
prevAcceptInputMarker = input.mark();
if ( ip-1 < prevAccept.addr ) { // it will see both accepts for ambig rules
System.out.println("replacing old best match @ "+prevAccept.addr);
prevAccept.addr = ip-1;
prevAccept.inputMarker = input.mark();
}
}
// if we reach accept state, toss out any addresses in rest
@ -263,8 +153,8 @@ processOneChar:
System.out.println("!!!!! no match for char "+(char)c+" at "+input.index());
input.consume();
}
// else reach.size==0 && matched, don't consume: accepted and
// else reach.size==0 && matched, don't consume: accepted
// swap to avoid reallocating space
List<ThreadState> tmp = reach;
reach = closure;
@ -272,20 +162,22 @@ processOneChar:
reach.clear();
} while ( closure.size()>0 );
if ( prevAcceptAddr >= code.length ) return Token.INVALID_TOKEN_TYPE;
int ttype = getShort(code, prevAcceptAddr+1);
System.out.println("done at index "+input.index());
System.out.println("accept marker="+prevAcceptInputMarker);
input.rewind(prevAcceptInputMarker); // does nothing if we accept'd at input.index() but might need to rewind
input.release(firstAcceptInputMarker); // kill any other markers in stream we made
System.out.println("leaving with index "+input.index());
if ( prevAccept.addr >= code.length ) return Token.INVALID_TOKEN_TYPE;
int ttype = getShort(code, prevAccept.addr+1);
input.rewind(prevAccept.inputMarker); // does nothing if we accept'd at input.index() but might need to rewind
if ( firstAccept.inputMarker < prevAccept.inputMarker ) {
System.out.println("done at index "+input.index());
System.out.println("accept marker="+prevAccept.inputMarker);
input.release(firstAccept.inputMarker); // kill any other markers in stream we made
System.out.println("leaving with index "+input.index());
}
return ttype;
}
void addToClosure(List<ThreadState> closure, int ip, int alt, NFAStack context) {
ThreadState t = new ThreadState(ip, alt, context);
//System.out.println("add to closure "+ip+" "+closure);
if ( closure.contains(t) ) return; // TODO: VERY INEFFICIENT! use int[num-states] as set test
if ( closure.contains(t) ) return;
closure.add(t);
short opcode = code[ip];
ip++; // move to next instruction or first byte of operand
@ -293,11 +185,12 @@ processOneChar:
case Bytecode.JMP :
addToClosure(closure, getShort(code, ip), alt, context);
break;
case Bytecode.LABEL :
case Bytecode.SAVE :
case Bytecode.ACTION :
int labelIndex = getShort(code, ip);
ip += 2;
addToClosure(closure, ip, alt, context); // do closure past SAVE
// TODO: impl
break;
case Bytecode.SPLIT :
int nopnds = getShort(code, ip);
@ -323,8 +216,14 @@ processOneChar:
}
}
List<ThreadState> computeStartState(int ip) { // assume SPLIT at ip
List<ThreadState> computeStartState(int ip) {
// if we're starting at a SPLIT, add closure of all SPLIT targets
// else just add closure of ip
List<ThreadState> closure = new ArrayList<ThreadState>();
if ( code[ip]!=Bytecode.SPLIT ) {
addToClosure(closure, ip, 1, NFAStack.EMPTY);
return closure;
}
ip++;
int nalts = getShort(code, ip);
ip += 2;
@ -336,6 +235,10 @@ processOneChar:
return closure;
}
// ---------------------------------------------------------------------
// this stuff below can't do SAVE nor CALL/RET but faster.
public int execThompson_no_stack(CharStream input, int ip) {
int c = input.LA(1);
if ( c==Token.EOF ) return Token.EOF;
@ -484,4 +387,148 @@ processOneChar:
public static int getShort(byte[] memory, int index) {
return (memory[index]&0xFF) <<(8*1) | (memory[index+1]&0xFF); // prevent sign extension with mask
}
/*
public int exec(CharStream input, String ruleName) {
return exec(input, ruleToAddr.get(ruleName));
}
public int exec(CharStream input) { return exec(input, 0); }
public int exec(CharStream input, int ip) {
while ( ip < code.length ) {
int c = input.LA(1);
trace(ip);
short opcode = code[ip];
ip++; // move to next instruction or first byte of operand
switch (opcode) {
case Bytecode.MATCH8 :
if ( c != code[ip] ) return 0;
ip++;
input.consume();
break;
case Bytecode.MATCH16 :
if ( c != getShort(code, ip) ) return 0;
ip += 2;
input.consume();
break;
case Bytecode.RANGE8 :
if ( c<code[ip] || c>code[ip+1] ) return 0;
ip += 2;
input.consume();
break;
case Bytecode.RANGE16 :
if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) return 0;
ip += 4;
input.consume();
break;
case Bytecode.ACCEPT :
int ruleIndex = getShort(code, ip);
ip += 2;
System.out.println("accept "+ruleIndex);
return ruleIndex;
case Bytecode.JMP :
int target = getShort(code, ip);
ip = target;
continue;
case Bytecode.SPLIT :
int nopnds = getShort(code, ip);
ip += 2;
for (int i=1; i<=nopnds-1; i++) {
int addr = getShort(code, ip);
ip += 2;
//System.out.println("try alt "+i+" at "+addr);
int m = input.mark();
int r = exec(input, addr);
if ( r>0 ) { input.release(m); return r; }
input.rewind(m);
}
// try final alternative (w/o recursion)
int addr = getShort(code, ip);
ip = addr;
//System.out.println("try alt "+nopnds+" at "+addr);
continue;
default :
throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
}
}
return 0;
}
public static class Context {
public int ip;
public int inputMarker;
public Context(int ip, int inputMarker) {
this.ip = ip;
this.inputMarker = inputMarker;
}
}
public int execNoRecursion(CharStream input, int ip) {
List<Context> work = new ArrayList<Context>();
work.add(new Context(ip, input.mark()));
workLoop:
while ( work.size()>0 ) {
Context ctx = work.remove(work.size()-1); // treat like stack
ip = ctx.ip;
input.rewind(ctx.inputMarker);
while ( ip < code.length ) {
int c = input.LA(1);
trace(ip);
short opcode = code[ip];
ip++; // move to next instruction or first byte of operand
switch (opcode) {
case Bytecode.MATCH8 :
if ( c != code[ip] ) continue workLoop;
ip++;
input.consume();
break;
case Bytecode.MATCH16 :
if ( c != getShort(code, ip) ) continue workLoop;
ip += 2;
input.consume();
break;
case Bytecode.RANGE8 :
if ( c<code[ip] || c>code[ip+1] ) continue workLoop;
ip += 2;
input.consume();
break;
case Bytecode.RANGE16 :
if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) continue workLoop;
ip += 4;
input.consume();
break;
case Bytecode.ACCEPT :
int ruleIndex = getShort(code, ip);
ip += 2;
System.out.println("accept "+ruleIndex);
// returning gives first match not longest; i.e., like PEG
return ruleIndex;
case Bytecode.JMP :
int target = getShort(code, ip);
ip = target;
continue;
case Bytecode.SPLIT :
int nopnds = getShort(code, ip);
ip += 2;
// add split addresses to work queue in reverse order ('cept first one)
for (int i=nopnds-1; i>=1; i--) {
int addr = getShort(code, ip+i*2);
//System.out.println("try alt "+i+" at "+addr);
work.add(new Context(addr, input.mark()));
}
// try first alternative (w/o adding to work list)
int addr = getShort(code, ip);
ip = addr;
//System.out.println("try alt "+nopnds+" at "+addr);
continue;
default :
throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
}
}
}
return 0;
}
*/
}

View File

@ -5,12 +5,22 @@ public class ThreadState {
public int addr;
public int alt; // or speculatively matched token type for lexers
public NFAStack context;
public int inputIndex = -1; // char (or token?) index from 0
public int inputMarker = -1; // accept states track input markers in case we need to rewind
public ThreadState(int addr, int alt, NFAStack context) {
this.addr = addr;
this.alt = alt;
this.context = context;
}
public ThreadState(ThreadState t) {
this.addr = t.addr;
this.alt = t.alt;
this.context = t.context;
this.inputIndex = t.inputIndex;
}
public boolean equals(Object o) {
if ( o==null ) return false;
if ( this==o ) return true;

View File

@ -24,7 +24,8 @@ public class CodeGenPipeline {
//ANTLRStringStream input = new ANTLRStringStream("abc32ab");
int ttype = 0;
while ( ttype!= Token.EOF ) {
ttype = nfa.execThompson(input); System.out.println("ttype="+ttype);
ttype = nfa.execThompson(input);
System.out.println("ttype="+ttype);
}
}
}

View File

@ -4,6 +4,7 @@ import org.antlr.runtime.RecognizerSharedState;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.runtime.tree.TreeNodeStream;
import org.antlr.v4.codegen.nfa.*;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.parse.GrammarASTAdaptor;
import org.antlr.v4.runtime.nfa.Bytecode;
@ -20,160 +21,17 @@ import java.util.Map;
/** http://swtch.com/~rsc/regexp/regexp2.html */
public class NFABytecodeGenerator extends TreeParser {
public abstract static class Instr {
public int addr;
public abstract short opcode();
public abstract int nBytes();
public int charSize(int a, int b) { return Math.max(charSize(a), charSize(b)); }
public int charSize(int c) {
if ( c<=0xFF ) return 1;
if ( c<=0xFFFF ) return 2;
return 4;
}
public void write(byte[] code) { code[addr] = (byte)opcode(); }
}
public static class WildcardInstr extends Instr {
Token token;
public WildcardInstr(Token t) { super(); this.token = t; }
public short opcode() { return Bytecode.WILDCARD; }
public int nBytes() { return 1; }
}
public static class MatchInstr extends Instr {
Token token;
int c;
public MatchInstr(Token t, int c) { super(); this.token = t; this.c = c; }
public short opcode() { return charSize(c)==1?Bytecode.MATCH8:Bytecode.MATCH16; };
public int nBytes() { return 1+charSize(c); }
public void write(byte[] code) {
super.write(code);
if ( charSize(c)==1 ) code[addr+1] = (byte)(c&0xFF);
else writeShort(code, addr+1, (short)c);
}
@Override
public String toString() {
return addr+":MatchInstr{" +
"c=" + c +
'}';
}
}
public static class RangeInstr extends Instr {
Token start, stop;
int a, b;
public RangeInstr(Token start, Token stop) {
this.start = start;
this.stop = stop;
a = (char)Target.getCharValueFromGrammarCharLiteral(start.getText());
b = (char)Target.getCharValueFromGrammarCharLiteral(stop.getText());
}
public short opcode() { return charSize(a, b)==1?Bytecode.RANGE8:Bytecode.RANGE16; };
public int nBytes() { return 1+2*charSize(a, b); }
public void write(byte[] code) {
super.write(code);
if ( charSize(a,b)==1 ) {
code[addr+1] = (byte)(a&0xFF);
code[addr+2] = (byte)(b&0xFF);
}
else {
writeShort(code, addr+1, (short)a);
writeShort(code, addr+1+charSize(a,b), (short)b);
}
}
@Override
public String toString() {
return addr+":RangeInstr{"+ a +".."+ b +"}";
}
}
public static class AcceptInstr extends Instr {
int ruleIndex;
public AcceptInstr(int ruleIndex) {
this.ruleIndex = ruleIndex;
}
public short opcode() { return Bytecode.ACCEPT; };
public int nBytes() { return 1+2; }
public void write(byte[] code) {
super.write(code);
writeShort(code, addr+1, (short)ruleIndex);
}
public String toString() { return addr+":AcceptInstr "+ruleIndex; }
}
public static class RetInstr extends Instr {
public short opcode() { return Bytecode.RET; }
public int nBytes() { return 1; }
}
public static class JumpInstr extends Instr {
int target;
public short opcode() { return Bytecode.JMP; };
public int nBytes() { return 1+Bytecode.ADDR_SIZE; }
public void write(byte[] code) {
super.write(code);
writeShort(code, addr+1, (short)target);
}
@Override
public String toString() {
return addr+":JumpInstr{" +
"target=" + target +
'}';
}
}
public static class CallInstr extends Instr {
Token token;
int target;
public CallInstr(Token token) { this.token = token; }
public short opcode() { return Bytecode.CALL; };
public int nBytes() { return 1+Bytecode.ADDR_SIZE; }
public void write(byte[] code) {
super.write(code);
writeShort(code, addr+1, (short)target);
}
@Override
public String toString() {
return addr+":CallInstr{" +
"target=" + target +
'}';
}
}
public static class SplitInstr extends Instr {
List<Integer> addrs = new ArrayList<Integer>();
int nAlts;
public SplitInstr(int nAlts) { this.nAlts = nAlts; }
public short opcode() { return Bytecode.SPLIT; };
public int nBytes() { return 1+2+nAlts*Bytecode.ADDR_SIZE; }
public void write(byte[] code) {
super.write(code);
int a = addr + 1;
writeShort(code, a, (short)addrs.size());
a += 2;
for (int x : addrs) {
writeShort(code, a, (short)x);
a += Bytecode.ADDR_SIZE;
}
}
@Override
public String toString() {
return addr+":SplitInstr{" +
"addrs=" + addrs +
'}';
}
}
LexerGrammar lg;
public List<Instr> instrs = new ArrayList<Instr>();
public int ip = 0; // where to write next
Map<String, Integer> ruleToAddr = new HashMap<String, Integer>();
int[] tokenTypeToAddr;
List<String> labels = new ArrayList<String>();
public NFABytecodeGenerator(TreeNodeStream input) {
public NFABytecodeGenerator(LexerGrammar lg, TreeNodeStream input) {
super(input);
this.lg = lg;
tokenTypeToAddr = new int[lg.getMaxTokenType()+1];
}
public NFABytecodeGenerator(TreeNodeStream input, RecognizerSharedState state) {
@ -193,32 +51,26 @@ public class NFABytecodeGenerator extends TreeParser {
}
}
/** Given any block of alts, return list of instruction objects */
// public static List<Instr> getInstructions(GrammarAST blk, int acceptValue) {
// GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
// CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
// NFABytecodeTriggers gen = new NFABytecodeTriggers(nodes);
// try {
// gen.block();
// gen.emit(new NFABytecodeGenerator.AcceptInstr(acceptValue));
// }
// catch (Exception e){
// e.printStackTrace(System.err);
// }
// return gen.instrs;
// }
public static byte[] getByteCode(Map<String, Integer> ruleToAddr, List<Instr> instrs) {
public byte[] getBytecode() {
Instr last = instrs.get(instrs.size() - 1);
int size = last.addr + last.nBytes();
byte[] code = new byte[size];
// resolve CALL instruction targets before generating code
// resolve CALL instruction targets and index labels before generating code
for (Instr I : instrs) {
if ( I instanceof CallInstr ) {
CallInstr C = (CallInstr) I;
String ruleName = C.token.getText();
C.target = ruleToAddr.get(ruleName);
}
else if ( I instanceof LabelInstr ) {
LabelInstr L = (LabelInstr)I;
L.labelIndex = labels.size();
labels.add(L.token.getText());
}
else if ( I instanceof SaveInstr ) {
SaveInstr S = (SaveInstr)I;
S.labelIndex = labels.size()-1;
}
}
for (Instr I : instrs) {
I.write(code);
@ -228,7 +80,7 @@ public class NFABytecodeGenerator extends TreeParser {
public static NFA getBytecode(LexerGrammar lg, String modeName) {
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
NFABytecodeTriggers gen = new NFABytecodeTriggers(null);
NFABytecodeTriggers gen = new NFABytecodeTriggers(lg, null);
// add split for s0 to hook up rules (fill in operands as we gen rules)
int numRules = lg.modes.get(modeName).size();
@ -237,37 +89,35 @@ public class NFABytecodeGenerator extends TreeParser {
SplitInstr s0 = new SplitInstr(numRules - numFragmentRules);
gen.emit(s0);
Map<String, Integer> ruleToAddr = new HashMap<String, Integer>();
int[] tokenTypeToAddr = new int[lg.getMaxTokenType()+1];
for (Rule r : lg.modes.get(modeName)) { // for each rule in mode
GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK);
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
gen.setTreeNodeStream(nodes);
int ttype = lg.getTokenType(r.name);
ruleToAddr.put(r.name, gen.ip);
gen.ruleToAddr.put(r.name, gen.ip);
if ( !r.isFragment() ) {
s0.addrs.add(gen.ip);
tokenTypeToAddr[ttype] = gen.ip;
gen.tokenTypeToAddr[ttype] = gen.ip;
}
try {
gen.block();
((NFABytecodeTriggers)gen).block();
int ruleTokenType = lg.getTokenType(r.name);
if ( !r.isFragment() ) {
gen.emit(new NFABytecodeGenerator.AcceptInstr(ruleTokenType));
gen.emit(new AcceptInstr(ruleTokenType));
}
else {
gen.emit(new NFABytecodeGenerator.RetInstr());
gen.emit(new RetInstr());
}
}
catch (Exception e){
e.printStackTrace(System.err);
}
}
byte[] code = NFABytecodeGenerator.getByteCode(ruleToAddr, gen.instrs);
byte[] code = gen.getBytecode();
System.out.println(Bytecode.disassemble(code));
System.out.println("rule addrs="+ruleToAddr);
System.out.println("rule addrs="+gen.ruleToAddr);
return new NFA(code, ruleToAddr, tokenTypeToAddr);
return new NFA(code, gen.ruleToAddr, gen.tokenTypeToAddr, gen.labels.toArray(new String[0]));
}
/** Write value at index into a byte array highest to lowest byte,
@ -277,5 +127,4 @@ public class NFABytecodeGenerator extends TreeParser {
memory[index+0] = (byte)((value>>(8*1))&0xFF);
memory[index+1] = (byte)(value&0xFF);
}
}

View File

@ -8,13 +8,21 @@ options {
@header {
package org.antlr.v4.codegen;
import org.antlr.v4.codegen.nfa.*;
import org.antlr.v4.tool.GrammarAST;
import org.antlr.v4.tool.GrammarASTWithOptions;
import org.antlr.v4.tool.LexerGrammar;
import java.util.Collections;
import java.util.Map;
import java.util.HashMap;
}
@members {
public NFABytecodeTriggers(LexerGrammar lg, TreeNodeStream input) {
super(lg, input);
}
}
/*
e1 | e2 | e3:
split 3, L1, L2, L3
@ -69,14 +77,14 @@ element
: labeledElement
| atom
| ebnf
| ACTION
| SEMPRED
| GATED_SEMPRED
| ACTION {emit(new ActionInstr($ACTION.token));}
| SEMPRED {emit(new SemPredInstr($SEMPRED.token));}
| GATED_SEMPRED {emit(new SemPredInstr($GATED_SEMPRED.token));}
| treeSpec
;
labeledElement
: ^(ASSIGN ID atom)
: ^(ASSIGN ID {emit(new LabelInstr($ID.token));} atom {emit(new SaveInstr($ID.token));} )
| ^(ASSIGN ID block)
| ^(PLUS_ASSIGN ID atom)
| ^(PLUS_ASSIGN ID block)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,19 @@
package org.antlr.v4.codegen.nfa;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
/** */
public class AcceptInstr extends Instr {
public int ruleIndex;
public AcceptInstr(int ruleIndex) {
this.ruleIndex = ruleIndex;
}
public short opcode() { return Bytecode.ACCEPT; };
public int nBytes() { return 1+2; }
public void write(byte[] code) {
super.write(code);
NFABytecodeGenerator.writeShort(code, addr+1, (short)ruleIndex);
}
public String toString() { return addr+":AcceptInstr "+ruleIndex; }
}

View File

@ -0,0 +1,21 @@
package org.antlr.v4.codegen.nfa;
import org.antlr.runtime.Token;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
/** */
public class ActionInstr extends Instr {
public int actionIndex;
public Token token;
public ActionInstr(Token token) {
this.token = token;
}
public short opcode() { return Bytecode.ACTION; };
public int nBytes() { return 1+2; }
public void write(byte[] code) {
super.write(code);
NFABytecodeGenerator.writeShort(code, addr+1, (short)actionIndex);
}
public String toString() { return addr+":ActionInstr "+actionIndex; }
}

View File

@ -0,0 +1,25 @@
package org.antlr.v4.codegen.nfa;
import org.antlr.runtime.Token;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
/** */
public class CallInstr extends Instr {
public Token token;
public int target;
public CallInstr(Token token) { this.token = token; }
public short opcode() { return Bytecode.CALL; };
public int nBytes() { return 1+Bytecode.ADDR_SIZE; }
public void write(byte[] code) {
super.write(code);
NFABytecodeGenerator.writeShort(code, addr+1, (short)target);
}
@Override
public String toString() {
return addr+":CallInstr{" +
"target=" + target +
'}';
}
}

View File

@ -0,0 +1,15 @@
package org.antlr.v4.codegen.nfa;
/** */
public abstract class Instr {
public int addr;
public abstract short opcode();
public abstract int nBytes();
public int charSize(int a, int b) { return Math.max(charSize(a), charSize(b)); }
public int charSize(int c) {
if ( c<=0xFF ) return 1;
if ( c<=0xFFFF ) return 2;
return 4;
}
public void write(byte[] code) { code[addr] = (byte)opcode(); }
}

View File

@ -0,0 +1,22 @@
package org.antlr.v4.codegen.nfa;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
/** */
public class JumpInstr extends Instr {
public int target;
public short opcode() { return Bytecode.JMP; };
public int nBytes() { return 1+Bytecode.ADDR_SIZE; }
public void write(byte[] code) {
super.write(code);
NFABytecodeGenerator.writeShort(code, addr+1, (short)target);
}
@Override
public String toString() {
return addr+":JumpInstr{" +
"target=" + target +
'}';
}
}

View File

@ -0,0 +1,21 @@
package org.antlr.v4.codegen.nfa;
import org.antlr.runtime.Token;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
/** */
public class LabelInstr extends Instr {
public int labelIndex;
public Token token;
public LabelInstr(Token token) {
this.token = token;
}
public short opcode() { return Bytecode.LABEL; };
public int nBytes() { return 1+2; }
public void write(byte[] code) {
super.write(code);
NFABytecodeGenerator.writeShort(code, addr+1, (short)labelIndex);
}
public String toString() { return addr+":LabelInstr "+ labelIndex; }
}

View File

@ -0,0 +1,26 @@
package org.antlr.v4.codegen.nfa;
import org.antlr.runtime.Token;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
/** */
public class MatchInstr extends Instr {
public Token token;
public int c;
public MatchInstr(Token t, int c) { super(); this.token = t; this.c = c; }
public short opcode() { return charSize(c)==1? Bytecode.MATCH8:Bytecode.MATCH16; };
public int nBytes() { return 1+charSize(c); }
public void write(byte[] code) {
super.write(code);
if ( charSize(c)==1 ) code[addr+1] = (byte)(c&0xFF);
else NFABytecodeGenerator.writeShort(code, addr+1, (short)c);
}
@Override
public String toString() {
return addr+":MatchInstr{" +
"c=" + c +
'}';
}
}

View File

@ -0,0 +1,36 @@
package org.antlr.v4.codegen.nfa;
import org.antlr.runtime.Token;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.codegen.Target;
import org.antlr.v4.runtime.nfa.Bytecode;
/** */
public class RangeInstr extends Instr {
public Token start, stop;
public int a, b;
public RangeInstr(Token start, Token stop) {
this.start = start;
this.stop = stop;
a = (char) Target.getCharValueFromGrammarCharLiteral(start.getText());
b = (char)Target.getCharValueFromGrammarCharLiteral(stop.getText());
}
public short opcode() { return charSize(a, b)==1? Bytecode.RANGE8:Bytecode.RANGE16; };
public int nBytes() { return 1+2*charSize(a, b); }
public void write(byte[] code) {
super.write(code);
if ( charSize(a,b)==1 ) {
code[addr+1] = (byte)(a&0xFF);
code[addr+2] = (byte)(b&0xFF);
}
else {
NFABytecodeGenerator.writeShort(code, addr+1, (short)a);
NFABytecodeGenerator.writeShort(code, addr+1+charSize(a,b), (short)b);
}
}
@Override
public String toString() {
return addr+":RangeInstr{"+ a +".."+ b +"}";
}
}

View File

@ -0,0 +1,9 @@
package org.antlr.v4.codegen.nfa;
import org.antlr.v4.runtime.nfa.Bytecode;
/** */
public class RetInstr extends Instr {
public short opcode() { return Bytecode.RET; }
public int nBytes() { return 1; }
}

View File

@ -0,0 +1,21 @@
package org.antlr.v4.codegen.nfa;
import org.antlr.runtime.Token;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
/** */
public class SaveInstr extends Instr {
public int labelIndex;
public Token token;
public SaveInstr(Token token) {
this.token = token;
}
public short opcode() { return Bytecode.SAVE; };
public int nBytes() { return 1+2; }
public void write(byte[] code) {
super.write(code);
NFABytecodeGenerator.writeShort(code, addr+1, (short) labelIndex);
}
public String toString() { return addr+":SaveInstr "+ labelIndex; }
}

View File

@ -0,0 +1,21 @@
package org.antlr.v4.codegen.nfa;
import org.antlr.runtime.Token;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
/** */
public class SemPredInstr extends Instr {
public int predIndex;
public Token token;
public SemPredInstr(Token token) {
this.token = token;
}
public short opcode() { return Bytecode.SEMPRED; };
public int nBytes() { return 1+2; }
public void write(byte[] code) {
super.write(code);
NFABytecodeGenerator.writeShort(code, addr+1, (short) predIndex);
}
public String toString() { return addr+":SemPredInstr "+ predIndex; }
}

View File

@ -0,0 +1,33 @@
package org.antlr.v4.codegen.nfa;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
import java.util.ArrayList;
import java.util.List;
/** */
public class SplitInstr extends Instr {
public List<Integer> addrs = new ArrayList<Integer>();
public int nAlts;
public SplitInstr(int nAlts) { this.nAlts = nAlts; }
public short opcode() { return Bytecode.SPLIT; };
public int nBytes() { return 1+2+nAlts*Bytecode.ADDR_SIZE; }
public void write(byte[] code) {
super.write(code);
int a = addr + 1;
NFABytecodeGenerator.writeShort(code, a, (short)addrs.size());
a += 2;
for (int x : addrs) {
NFABytecodeGenerator.writeShort(code, a, (short)x);
a += Bytecode.ADDR_SIZE;
}
}
@Override
public String toString() {
return addr+":SplitInstr{" +
"addrs=" + addrs +
'}';
}
}

View File

@ -0,0 +1,12 @@
package org.antlr.v4.codegen.nfa;
import org.antlr.runtime.Token;
import org.antlr.v4.runtime.nfa.Bytecode;
/** */
public class WildcardInstr extends Instr {
public Token token;
public WildcardInstr(Token t) { super(); this.token = t; }
public short opcode() { return Bytecode.WILDCARD; }
public int nBytes() { return 1; }
}

View File

@ -80,6 +80,71 @@ public class TestNFABytecodeGeneration extends BaseTest {
checkBytecode(g, expecting);
}
@Test public void testLabeledChar() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"A : a='a' ;\n");
String expecting =
"0000:\tsplit 5\n" +
"0005:\tlabel 0\n" +
"0008:\tmatch8 'a'\n" +
"0010:\tsave 0\n" +
"0013:\taccept 4\n";
checkBytecode(g, expecting);
}
@Test public void testLabeledString() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"A : a='aa' ;\n");
String expecting =
"0000:\tsplit 5\n" +
"0005:\tlabel 0\n" +
"0008:\tmatch8 'a'\n" +
"0010:\tmatch8 'a'\n" +
"0012:\tsave 0\n" +
"0015:\taccept 4\n";
checkBytecode(g, expecting);
}
@Test public void testLabeledToken() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"I : d=D ;\n" +
"fragment D : '0'..'9'+ ;\n");
String expecting =
"0000:\tsplit 5\n" +
"0005:\tlabel 0\n" +
"0008:\tcall 17\n" +
"0011:\tsave 0\n" +
"0014:\taccept 4\n" +
"0017:\trange8 '0', '9'\n" +
"0020:\tsplit 17, 27\n" +
"0027:\tret \n";
checkBytecode(g, expecting);
}
@Test public void testLabelIndexes() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"A : a='a' ;\n" +
"B : a='b' b='c' ;\n");
String expecting =
"0000:\tsplit 7, 18\n" +
"0007:\tlabel 0\n" +
"0010:\tmatch8 'a'\n" +
"0012:\tsave 0\n" +
"0015:\taccept 4\n" +
"0018:\tlabel 1\n" +
"0021:\tmatch8 'b'\n" +
"0023:\tsave 1\n" +
"0026:\tlabel 2\n" +
"0029:\tmatch8 'c'\n" +
"0031:\tsave 2\n" +
"0034:\taccept 5\n";
checkBytecode(g, expecting);
}
public void _template() throws Exception {
LexerGrammar g = new LexerGrammar(
"\n");

View File

@ -4,6 +4,7 @@ import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.Token;
import org.antlr.v4.Tool;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.CommonToken;
import org.antlr.v4.runtime.nfa.NFA;
import org.antlr.v4.semantics.SemanticPipeline;
import org.antlr.v4.tool.Grammar;
@ -11,6 +12,7 @@ import org.antlr.v4.tool.LexerGrammar;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/** */
@ -118,6 +120,44 @@ public class TestNFABytecodeInterp extends BaseTest {
checkMatches(g, "1.", "NUM, DOT, EOF");
}
@Test public void testLabeledChar() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"A : a='a' ;\n");
checkMatches(g, "a", "A, EOF", "[[@-1,0:0='a',<0>,1:0]]");
}
@Test public void testLabeledString() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"A : a='abc' ;\n");
checkMatches(g, "abc", "A, EOF", "[[@-1,0:2='abc',<0>,1:0]]");
}
@Test public void testLabeledToken() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"I : d=D ;\n" +
"fragment D : '0'..'9'+ ;\n");
checkMatches(g, "901", "I, EOF", "[[@-1,0:2='901',<0>,1:0]]");
}
@Test public void testLabelInLoopIsLastElement() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"I : d=D+ ;\n" +
"fragment D : '0'..'9' ;\n");
checkMatches(g, "901", "I, EOF", "[[@-1,2:2='1',<0>,1:2]]");
}
@Test public void testLabelIndexes() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"A : a='a' ;\n" +
"B : a='b' b='c' ;\n");
checkMatches(g, "bc", "B, EOF", "[[@-1,0:-1='',<0>,1:0], [@-1,0:0='b',<0>,1:0], [@-1,1:1='c',<0>,1:1]]");
}
public void _template() throws Exception {
LexerGrammar g = new LexerGrammar(
"\n");
@ -126,6 +166,12 @@ public class TestNFABytecodeInterp extends BaseTest {
}
void checkMatches(LexerGrammar g, String input, String expecting) {
checkMatches(g, input, expecting, null);
}
void checkMatches(LexerGrammar g, String input, String expecting,
String expectingTokens)
{
if ( g.ast!=null && !g.ast.hasErrors ) {
System.out.println(g.ast.toStringTree());
Tool antlr = new Tool();
@ -138,22 +184,27 @@ public class TestNFABytecodeInterp extends BaseTest {
}
}
List<Integer> expectingTokens = new ArrayList<Integer>();
List<Integer> expectingTokenTypes = new ArrayList<Integer>();
if ( expecting!=null && !expecting.trim().equals("") ) {
for (String tname : expecting.replace(" ", "").split(",")) {
int ttype = g.getTokenType(tname);
expectingTokens.add(ttype);
expectingTokenTypes.add(ttype);
}
}
NFA nfa = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
ANTLRStringStream in = new ANTLRStringStream(input);
List<Integer> tokens = new ArrayList<Integer>();
List<Integer> tokenTypes = new ArrayList<Integer>();
CommonToken[] tokens = new CommonToken[nfa.labels.length];
int ttype = 0;
do {
ttype = nfa.execThompson(in);
tokens.add(ttype);
ttype = nfa.execThompson(in, 0, true, tokens);
tokenTypes.add(ttype);
} while ( ttype!= Token.EOF );
assertEquals(expectingTokens, tokens);
assertEquals(expectingTokenTypes, tokenTypes);
if ( expectingTokens!=null ) {
assertEquals(expectingTokens, Arrays.toString(tokens));
}
}
}