forked from jasder/antlr
reorg so instrs in separate package. added label/save; added code gen at least for actions/sempred.
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6834]
This commit is contained in:
parent
61603d18a9
commit
689687f0ed
|
@ -50,7 +50,10 @@ public class Bytecode {
|
||||||
//public static final short NOT = 8; ???
|
//public static final short NOT = 8; ???
|
||||||
public static final short CALL = 9; // JMP with a push
|
public static final short CALL = 9; // JMP with a push
|
||||||
public static final short RET = 10; // an accept instr for fragment rules
|
public static final short RET = 10; // an accept instr for fragment rules
|
||||||
public static final short SAVE = 11;
|
public static final short LABEL = 11;
|
||||||
|
public static final short SAVE = 12;
|
||||||
|
public static final short SEMPRED = 13;
|
||||||
|
public static final short ACTION = 14;
|
||||||
|
|
||||||
/** Used for disassembly; describes instruction set */
|
/** Used for disassembly; describes instruction set */
|
||||||
public static Instruction[] instructions = new Instruction[] {
|
public static Instruction[] instructions = new Instruction[] {
|
||||||
|
@ -65,7 +68,10 @@ public class Bytecode {
|
||||||
new Instruction("wildcard"),
|
new Instruction("wildcard"),
|
||||||
new Instruction("call", OperandType.ADDR),
|
new Instruction("call", OperandType.ADDR),
|
||||||
new Instruction("ret"),
|
new Instruction("ret"),
|
||||||
|
new Instruction("label", OperandType.SHORT),
|
||||||
new Instruction("save", OperandType.SHORT),
|
new Instruction("save", OperandType.SHORT),
|
||||||
|
new Instruction("sempred", OperandType.SHORT),
|
||||||
|
new Instruction("action", OperandType.SHORT),
|
||||||
};
|
};
|
||||||
|
|
||||||
public static String disassemble(byte[] code, int start) {
|
public static String disassemble(byte[] code, int start) {
|
||||||
|
|
|
@ -2,6 +2,7 @@ package org.antlr.v4.runtime.nfa;
|
||||||
|
|
||||||
import org.antlr.runtime.CharStream;
|
import org.antlr.runtime.CharStream;
|
||||||
import org.antlr.runtime.Token;
|
import org.antlr.runtime.Token;
|
||||||
|
import org.antlr.v4.runtime.CommonToken;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -10,178 +11,46 @@ import java.util.Map;
|
||||||
/** http://swtch.com/~rsc/regexp/regexp2.html */
|
/** http://swtch.com/~rsc/regexp/regexp2.html */
|
||||||
public class NFA {
|
public class NFA {
|
||||||
public byte[] code;
|
public byte[] code;
|
||||||
Map<String, Integer> ruleToAddr;
|
public Map<String, Integer> ruleToAddr;
|
||||||
public int[] tokenTypeToAddr;
|
public int[] tokenTypeToAddr;
|
||||||
|
public String[] labels; // TODO: need for actions. What is $label?
|
||||||
|
|
||||||
public NFA(byte[] code, Map<String, Integer> ruleToAddr, int[] tokenTypeToAddr) {
|
public NFA(byte[] code, Map<String, Integer> ruleToAddr, int[] tokenTypeToAddr,
|
||||||
|
String[] labels)
|
||||||
|
{
|
||||||
this.code = code;
|
this.code = code;
|
||||||
this.ruleToAddr = ruleToAddr;
|
this.ruleToAddr = ruleToAddr;
|
||||||
this.tokenTypeToAddr = tokenTypeToAddr;
|
this.tokenTypeToAddr = tokenTypeToAddr;
|
||||||
}
|
this.labels = labels;
|
||||||
|
|
||||||
public int exec(CharStream input, String ruleName) {
|
|
||||||
return exec(input, ruleToAddr.get(ruleName));
|
|
||||||
}
|
|
||||||
|
|
||||||
public int exec(CharStream input) { return exec(input, 0); }
|
|
||||||
|
|
||||||
public int exec(CharStream input, int ip) {
|
|
||||||
while ( ip < code.length ) {
|
|
||||||
int c = input.LA(1);
|
|
||||||
trace(ip);
|
|
||||||
short opcode = code[ip];
|
|
||||||
ip++; // move to next instruction or first byte of operand
|
|
||||||
switch (opcode) {
|
|
||||||
case Bytecode.MATCH8 :
|
|
||||||
if ( c != code[ip] ) return 0;
|
|
||||||
ip++;
|
|
||||||
input.consume();
|
|
||||||
break;
|
|
||||||
case Bytecode.MATCH16 :
|
|
||||||
if ( c != getShort(code, ip) ) return 0;
|
|
||||||
ip += 2;
|
|
||||||
input.consume();
|
|
||||||
break;
|
|
||||||
case Bytecode.RANGE8 :
|
|
||||||
if ( c<code[ip] || c>code[ip+1] ) return 0;
|
|
||||||
ip += 2;
|
|
||||||
input.consume();
|
|
||||||
break;
|
|
||||||
case Bytecode.RANGE16 :
|
|
||||||
if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) return 0;
|
|
||||||
ip += 4;
|
|
||||||
input.consume();
|
|
||||||
break;
|
|
||||||
case Bytecode.ACCEPT :
|
|
||||||
int ruleIndex = getShort(code, ip);
|
|
||||||
ip += 2;
|
|
||||||
System.out.println("accept "+ruleIndex);
|
|
||||||
return ruleIndex;
|
|
||||||
case Bytecode.JMP :
|
|
||||||
int target = getShort(code, ip);
|
|
||||||
ip = target;
|
|
||||||
continue;
|
|
||||||
case Bytecode.SPLIT :
|
|
||||||
int nopnds = getShort(code, ip);
|
|
||||||
ip += 2;
|
|
||||||
for (int i=1; i<=nopnds-1; i++) {
|
|
||||||
int addr = getShort(code, ip);
|
|
||||||
ip += 2;
|
|
||||||
//System.out.println("try alt "+i+" at "+addr);
|
|
||||||
int m = input.mark();
|
|
||||||
int r = exec(input, addr);
|
|
||||||
if ( r>0 ) { input.release(m); return r; }
|
|
||||||
input.rewind(m);
|
|
||||||
}
|
|
||||||
// try final alternative (w/o recursion)
|
|
||||||
int addr = getShort(code, ip);
|
|
||||||
ip = addr;
|
|
||||||
//System.out.println("try alt "+nopnds+" at "+addr);
|
|
||||||
continue;
|
|
||||||
default :
|
|
||||||
throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class Context {
|
|
||||||
public int ip;
|
|
||||||
public int inputMarker;
|
|
||||||
public Context(int ip, int inputMarker) {
|
|
||||||
this.ip = ip;
|
|
||||||
this.inputMarker = inputMarker;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public int execNoRecursion(CharStream input, int ip) {
|
|
||||||
List<Context> work = new ArrayList<Context>();
|
|
||||||
work.add(new Context(ip, input.mark()));
|
|
||||||
workLoop:
|
|
||||||
while ( work.size()>0 ) {
|
|
||||||
Context ctx = work.remove(work.size()-1); // treat like stack
|
|
||||||
ip = ctx.ip;
|
|
||||||
input.rewind(ctx.inputMarker);
|
|
||||||
while ( ip < code.length ) {
|
|
||||||
int c = input.LA(1);
|
|
||||||
trace(ip);
|
|
||||||
short opcode = code[ip];
|
|
||||||
ip++; // move to next instruction or first byte of operand
|
|
||||||
switch (opcode) {
|
|
||||||
case Bytecode.MATCH8 :
|
|
||||||
if ( c != code[ip] ) continue workLoop;
|
|
||||||
ip++;
|
|
||||||
input.consume();
|
|
||||||
break;
|
|
||||||
case Bytecode.MATCH16 :
|
|
||||||
if ( c != getShort(code, ip) ) continue workLoop;
|
|
||||||
ip += 2;
|
|
||||||
input.consume();
|
|
||||||
break;
|
|
||||||
case Bytecode.RANGE8 :
|
|
||||||
if ( c<code[ip] || c>code[ip+1] ) continue workLoop;
|
|
||||||
ip += 2;
|
|
||||||
input.consume();
|
|
||||||
break;
|
|
||||||
case Bytecode.RANGE16 :
|
|
||||||
if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) continue workLoop;
|
|
||||||
ip += 4;
|
|
||||||
input.consume();
|
|
||||||
break;
|
|
||||||
case Bytecode.ACCEPT :
|
|
||||||
int ruleIndex = getShort(code, ip);
|
|
||||||
ip += 2;
|
|
||||||
System.out.println("accept "+ruleIndex);
|
|
||||||
// returning gives first match not longest; i.e., like PEG
|
|
||||||
return ruleIndex;
|
|
||||||
case Bytecode.JMP :
|
|
||||||
int target = getShort(code, ip);
|
|
||||||
ip = target;
|
|
||||||
continue;
|
|
||||||
case Bytecode.SPLIT :
|
|
||||||
int nopnds = getShort(code, ip);
|
|
||||||
ip += 2;
|
|
||||||
// add split addresses to work queue in reverse order ('cept first one)
|
|
||||||
for (int i=nopnds-1; i>=1; i--) {
|
|
||||||
int addr = getShort(code, ip+i*2);
|
|
||||||
//System.out.println("try alt "+i+" at "+addr);
|
|
||||||
work.add(new Context(addr, input.mark()));
|
|
||||||
}
|
|
||||||
// try first alternative (w/o adding to work list)
|
|
||||||
int addr = getShort(code, ip);
|
|
||||||
ip = addr;
|
|
||||||
//System.out.println("try alt "+nopnds+" at "+addr);
|
|
||||||
continue;
|
|
||||||
default :
|
|
||||||
throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int execThompson(CharStream input) {
|
public int execThompson(CharStream input) {
|
||||||
int ip = 0; // always start at SPLIT instr at address 0
|
return execThompson(input, 0, false, new CommonToken[labels.length]);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int execThompson(CharStream input, int ip, boolean doActions, CommonToken[] labelValues) {
|
||||||
int c = input.LA(1);
|
int c = input.LA(1);
|
||||||
if ( c==Token.EOF ) return Token.EOF;
|
if ( c==Token.EOF ) return Token.EOF;
|
||||||
|
|
||||||
List<ThreadState> closure = computeStartState(ip);
|
List<ThreadState> closure = computeStartState(ip);
|
||||||
List<ThreadState> reach = new ArrayList<ThreadState>();
|
List<ThreadState> reach = new ArrayList<ThreadState>();
|
||||||
int prevAcceptAddr = Integer.MAX_VALUE;
|
ThreadState prevAccept = new ThreadState(Integer.MAX_VALUE, -1, NFAStack.EMPTY);
|
||||||
int prevAcceptLastCharIndex = -1;
|
ThreadState firstAccept = null;
|
||||||
int prevAcceptInputMarker = -1;
|
|
||||||
int firstAcceptInputMarker = -1;
|
// int maxAlts = closure.size(); // >= number of alts; if no decision, this is 1
|
||||||
|
int firstCharIndex = input.index(); // use when creating Token
|
||||||
|
|
||||||
do { // while more work
|
do { // while more work
|
||||||
c = input.LA(1);
|
c = input.LA(1);
|
||||||
int i = 0;
|
int i = 0;
|
||||||
boolean accepted = false;
|
boolean accepted = false;
|
||||||
processOneChar:
|
processOneChar:
|
||||||
while ( i<closure.size() ) {
|
while ( i<closure.size() ) {
|
||||||
System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
|
|
||||||
ThreadState t = closure.get(i);
|
ThreadState t = closure.get(i);
|
||||||
ip = t.addr;
|
ip = t.addr;
|
||||||
NFAStack context = t.context;
|
NFAStack context = t.context;
|
||||||
int alt = t.alt;
|
int alt = t.alt;
|
||||||
|
//System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
|
||||||
trace(ip);
|
trace(ip);
|
||||||
short opcode = code[ip];
|
short opcode = code[ip];
|
||||||
ip++; // move to next instruction or first byte of operand
|
ip++; // move to next instruction or first byte of operand
|
||||||
|
@ -211,26 +80,47 @@ processOneChar:
|
||||||
addToClosure(reach, ip, alt, context);
|
addToClosure(reach, ip, alt, context);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case Bytecode.LABEL :
|
||||||
|
if ( doActions ) {
|
||||||
|
int labelIndex = getShort(code, ip);
|
||||||
|
System.out.println("label "+labels[labelIndex]);
|
||||||
|
labelValues[labelIndex] =
|
||||||
|
new CommonToken(input, 0, 0, input.index(), -1);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case Bytecode.SAVE :
|
||||||
|
if ( doActions ) {
|
||||||
|
int labelIndex = getShort(code, ip);
|
||||||
|
System.out.println("save "+labels[labelIndex]);
|
||||||
|
labelValues[labelIndex].setStopIndex(input.index()-1);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case Bytecode.ACTION :
|
||||||
|
if ( doActions ) {
|
||||||
|
int actionIndex = getShort(code, ip);
|
||||||
|
System.out.println("action "+ actionIndex);
|
||||||
|
}
|
||||||
|
break;
|
||||||
case Bytecode.ACCEPT :
|
case Bytecode.ACCEPT :
|
||||||
if ( context != NFAStack.EMPTY ) break; // only do accept for outermost rule
|
if ( context != NFAStack.EMPTY ) break; // only do accept for outermost rule
|
||||||
accepted = true;
|
accepted = true;
|
||||||
int tokenLastCharIndex = input.index() - 1;
|
int tokenLastCharIndex = input.index() - 1;
|
||||||
int ttype = getShort(code, ip);
|
int ttype = getShort(code, ip);
|
||||||
System.out.println("ACCEPT "+ ttype +" with last char position "+ tokenLastCharIndex);
|
System.out.println("ACCEPT "+ ttype +" with last char position "+ tokenLastCharIndex);
|
||||||
if ( tokenLastCharIndex > prevAcceptLastCharIndex ) {
|
if ( tokenLastCharIndex > prevAccept.inputIndex ) {
|
||||||
prevAcceptLastCharIndex = tokenLastCharIndex;
|
prevAccept.inputIndex = tokenLastCharIndex;
|
||||||
// choose longest match so far regardless of rule priority
|
// choose longest match so far regardless of rule priority
|
||||||
System.out.println("replacing old best match @ "+prevAcceptAddr);
|
System.out.println("replacing old best match @ "+prevAccept.addr);
|
||||||
prevAcceptAddr = ip-1;
|
prevAccept.addr = ip-1;
|
||||||
prevAcceptInputMarker = input.mark();
|
prevAccept.inputMarker = input.mark();
|
||||||
firstAcceptInputMarker = prevAcceptInputMarker;
|
if ( firstAccept==null ) firstAccept = prevAccept;
|
||||||
}
|
}
|
||||||
else if ( tokenLastCharIndex == prevAcceptLastCharIndex ) {
|
else if ( tokenLastCharIndex == prevAccept.inputIndex ) {
|
||||||
// choose first rule matched if match is of same length
|
// choose first rule matched if match is of same length
|
||||||
if ( ip-1 < prevAcceptAddr ) { // it will see both accepts for ambig rules
|
if ( ip-1 < prevAccept.addr ) { // it will see both accepts for ambig rules
|
||||||
System.out.println("replacing old best match @ "+prevAcceptAddr);
|
System.out.println("replacing old best match @ "+prevAccept.addr);
|
||||||
prevAcceptAddr = ip-1;
|
prevAccept.addr = ip-1;
|
||||||
prevAcceptInputMarker = input.mark();
|
prevAccept.inputMarker = input.mark();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// if we reach accept state, toss out any addresses in rest
|
// if we reach accept state, toss out any addresses in rest
|
||||||
|
@ -263,8 +153,8 @@ processOneChar:
|
||||||
System.out.println("!!!!! no match for char "+(char)c+" at "+input.index());
|
System.out.println("!!!!! no match for char "+(char)c+" at "+input.index());
|
||||||
input.consume();
|
input.consume();
|
||||||
}
|
}
|
||||||
// else reach.size==0 && matched, don't consume: accepted and
|
// else reach.size==0 && matched, don't consume: accepted
|
||||||
|
|
||||||
// swap to avoid reallocating space
|
// swap to avoid reallocating space
|
||||||
List<ThreadState> tmp = reach;
|
List<ThreadState> tmp = reach;
|
||||||
reach = closure;
|
reach = closure;
|
||||||
|
@ -272,20 +162,22 @@ processOneChar:
|
||||||
reach.clear();
|
reach.clear();
|
||||||
} while ( closure.size()>0 );
|
} while ( closure.size()>0 );
|
||||||
|
|
||||||
if ( prevAcceptAddr >= code.length ) return Token.INVALID_TOKEN_TYPE;
|
if ( prevAccept.addr >= code.length ) return Token.INVALID_TOKEN_TYPE;
|
||||||
int ttype = getShort(code, prevAcceptAddr+1);
|
int ttype = getShort(code, prevAccept.addr+1);
|
||||||
System.out.println("done at index "+input.index());
|
input.rewind(prevAccept.inputMarker); // does nothing if we accept'd at input.index() but might need to rewind
|
||||||
System.out.println("accept marker="+prevAcceptInputMarker);
|
if ( firstAccept.inputMarker < prevAccept.inputMarker ) {
|
||||||
input.rewind(prevAcceptInputMarker); // does nothing if we accept'd at input.index() but might need to rewind
|
System.out.println("done at index "+input.index());
|
||||||
input.release(firstAcceptInputMarker); // kill any other markers in stream we made
|
System.out.println("accept marker="+prevAccept.inputMarker);
|
||||||
System.out.println("leaving with index "+input.index());
|
input.release(firstAccept.inputMarker); // kill any other markers in stream we made
|
||||||
|
System.out.println("leaving with index "+input.index());
|
||||||
|
}
|
||||||
return ttype;
|
return ttype;
|
||||||
}
|
}
|
||||||
|
|
||||||
void addToClosure(List<ThreadState> closure, int ip, int alt, NFAStack context) {
|
void addToClosure(List<ThreadState> closure, int ip, int alt, NFAStack context) {
|
||||||
ThreadState t = new ThreadState(ip, alt, context);
|
ThreadState t = new ThreadState(ip, alt, context);
|
||||||
//System.out.println("add to closure "+ip+" "+closure);
|
//System.out.println("add to closure "+ip+" "+closure);
|
||||||
if ( closure.contains(t) ) return; // TODO: VERY INEFFICIENT! use int[num-states] as set test
|
if ( closure.contains(t) ) return;
|
||||||
closure.add(t);
|
closure.add(t);
|
||||||
short opcode = code[ip];
|
short opcode = code[ip];
|
||||||
ip++; // move to next instruction or first byte of operand
|
ip++; // move to next instruction or first byte of operand
|
||||||
|
@ -293,11 +185,12 @@ processOneChar:
|
||||||
case Bytecode.JMP :
|
case Bytecode.JMP :
|
||||||
addToClosure(closure, getShort(code, ip), alt, context);
|
addToClosure(closure, getShort(code, ip), alt, context);
|
||||||
break;
|
break;
|
||||||
|
case Bytecode.LABEL :
|
||||||
case Bytecode.SAVE :
|
case Bytecode.SAVE :
|
||||||
|
case Bytecode.ACTION :
|
||||||
int labelIndex = getShort(code, ip);
|
int labelIndex = getShort(code, ip);
|
||||||
ip += 2;
|
ip += 2;
|
||||||
addToClosure(closure, ip, alt, context); // do closure past SAVE
|
addToClosure(closure, ip, alt, context); // do closure past SAVE
|
||||||
// TODO: impl
|
|
||||||
break;
|
break;
|
||||||
case Bytecode.SPLIT :
|
case Bytecode.SPLIT :
|
||||||
int nopnds = getShort(code, ip);
|
int nopnds = getShort(code, ip);
|
||||||
|
@ -323,8 +216,14 @@ processOneChar:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
List<ThreadState> computeStartState(int ip) { // assume SPLIT at ip
|
List<ThreadState> computeStartState(int ip) {
|
||||||
|
// if we're starting at a SPLIT, add closure of all SPLIT targets
|
||||||
|
// else just add closure of ip
|
||||||
List<ThreadState> closure = new ArrayList<ThreadState>();
|
List<ThreadState> closure = new ArrayList<ThreadState>();
|
||||||
|
if ( code[ip]!=Bytecode.SPLIT ) {
|
||||||
|
addToClosure(closure, ip, 1, NFAStack.EMPTY);
|
||||||
|
return closure;
|
||||||
|
}
|
||||||
ip++;
|
ip++;
|
||||||
int nalts = getShort(code, ip);
|
int nalts = getShort(code, ip);
|
||||||
ip += 2;
|
ip += 2;
|
||||||
|
@ -336,6 +235,10 @@ processOneChar:
|
||||||
return closure;
|
return closure;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------
|
||||||
|
|
||||||
|
// this stuff below can't do SAVE nor CALL/RET but faster.
|
||||||
|
|
||||||
public int execThompson_no_stack(CharStream input, int ip) {
|
public int execThompson_no_stack(CharStream input, int ip) {
|
||||||
int c = input.LA(1);
|
int c = input.LA(1);
|
||||||
if ( c==Token.EOF ) return Token.EOF;
|
if ( c==Token.EOF ) return Token.EOF;
|
||||||
|
@ -484,4 +387,148 @@ processOneChar:
|
||||||
public static int getShort(byte[] memory, int index) {
|
public static int getShort(byte[] memory, int index) {
|
||||||
return (memory[index]&0xFF) <<(8*1) | (memory[index+1]&0xFF); // prevent sign extension with mask
|
return (memory[index]&0xFF) <<(8*1) | (memory[index+1]&0xFF); // prevent sign extension with mask
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
public int exec(CharStream input, String ruleName) {
|
||||||
|
return exec(input, ruleToAddr.get(ruleName));
|
||||||
|
}
|
||||||
|
|
||||||
|
public int exec(CharStream input) { return exec(input, 0); }
|
||||||
|
|
||||||
|
public int exec(CharStream input, int ip) {
|
||||||
|
while ( ip < code.length ) {
|
||||||
|
int c = input.LA(1);
|
||||||
|
trace(ip);
|
||||||
|
short opcode = code[ip];
|
||||||
|
ip++; // move to next instruction or first byte of operand
|
||||||
|
switch (opcode) {
|
||||||
|
case Bytecode.MATCH8 :
|
||||||
|
if ( c != code[ip] ) return 0;
|
||||||
|
ip++;
|
||||||
|
input.consume();
|
||||||
|
break;
|
||||||
|
case Bytecode.MATCH16 :
|
||||||
|
if ( c != getShort(code, ip) ) return 0;
|
||||||
|
ip += 2;
|
||||||
|
input.consume();
|
||||||
|
break;
|
||||||
|
case Bytecode.RANGE8 :
|
||||||
|
if ( c<code[ip] || c>code[ip+1] ) return 0;
|
||||||
|
ip += 2;
|
||||||
|
input.consume();
|
||||||
|
break;
|
||||||
|
case Bytecode.RANGE16 :
|
||||||
|
if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) return 0;
|
||||||
|
ip += 4;
|
||||||
|
input.consume();
|
||||||
|
break;
|
||||||
|
case Bytecode.ACCEPT :
|
||||||
|
int ruleIndex = getShort(code, ip);
|
||||||
|
ip += 2;
|
||||||
|
System.out.println("accept "+ruleIndex);
|
||||||
|
return ruleIndex;
|
||||||
|
case Bytecode.JMP :
|
||||||
|
int target = getShort(code, ip);
|
||||||
|
ip = target;
|
||||||
|
continue;
|
||||||
|
case Bytecode.SPLIT :
|
||||||
|
int nopnds = getShort(code, ip);
|
||||||
|
ip += 2;
|
||||||
|
for (int i=1; i<=nopnds-1; i++) {
|
||||||
|
int addr = getShort(code, ip);
|
||||||
|
ip += 2;
|
||||||
|
//System.out.println("try alt "+i+" at "+addr);
|
||||||
|
int m = input.mark();
|
||||||
|
int r = exec(input, addr);
|
||||||
|
if ( r>0 ) { input.release(m); return r; }
|
||||||
|
input.rewind(m);
|
||||||
|
}
|
||||||
|
// try final alternative (w/o recursion)
|
||||||
|
int addr = getShort(code, ip);
|
||||||
|
ip = addr;
|
||||||
|
//System.out.println("try alt "+nopnds+" at "+addr);
|
||||||
|
continue;
|
||||||
|
default :
|
||||||
|
throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Context {
|
||||||
|
public int ip;
|
||||||
|
public int inputMarker;
|
||||||
|
public Context(int ip, int inputMarker) {
|
||||||
|
this.ip = ip;
|
||||||
|
this.inputMarker = inputMarker;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int execNoRecursion(CharStream input, int ip) {
|
||||||
|
List<Context> work = new ArrayList<Context>();
|
||||||
|
work.add(new Context(ip, input.mark()));
|
||||||
|
workLoop:
|
||||||
|
while ( work.size()>0 ) {
|
||||||
|
Context ctx = work.remove(work.size()-1); // treat like stack
|
||||||
|
ip = ctx.ip;
|
||||||
|
input.rewind(ctx.inputMarker);
|
||||||
|
while ( ip < code.length ) {
|
||||||
|
int c = input.LA(1);
|
||||||
|
trace(ip);
|
||||||
|
short opcode = code[ip];
|
||||||
|
ip++; // move to next instruction or first byte of operand
|
||||||
|
switch (opcode) {
|
||||||
|
case Bytecode.MATCH8 :
|
||||||
|
if ( c != code[ip] ) continue workLoop;
|
||||||
|
ip++;
|
||||||
|
input.consume();
|
||||||
|
break;
|
||||||
|
case Bytecode.MATCH16 :
|
||||||
|
if ( c != getShort(code, ip) ) continue workLoop;
|
||||||
|
ip += 2;
|
||||||
|
input.consume();
|
||||||
|
break;
|
||||||
|
case Bytecode.RANGE8 :
|
||||||
|
if ( c<code[ip] || c>code[ip+1] ) continue workLoop;
|
||||||
|
ip += 2;
|
||||||
|
input.consume();
|
||||||
|
break;
|
||||||
|
case Bytecode.RANGE16 :
|
||||||
|
if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) continue workLoop;
|
||||||
|
ip += 4;
|
||||||
|
input.consume();
|
||||||
|
break;
|
||||||
|
case Bytecode.ACCEPT :
|
||||||
|
int ruleIndex = getShort(code, ip);
|
||||||
|
ip += 2;
|
||||||
|
System.out.println("accept "+ruleIndex);
|
||||||
|
// returning gives first match not longest; i.e., like PEG
|
||||||
|
return ruleIndex;
|
||||||
|
case Bytecode.JMP :
|
||||||
|
int target = getShort(code, ip);
|
||||||
|
ip = target;
|
||||||
|
continue;
|
||||||
|
case Bytecode.SPLIT :
|
||||||
|
int nopnds = getShort(code, ip);
|
||||||
|
ip += 2;
|
||||||
|
// add split addresses to work queue in reverse order ('cept first one)
|
||||||
|
for (int i=nopnds-1; i>=1; i--) {
|
||||||
|
int addr = getShort(code, ip+i*2);
|
||||||
|
//System.out.println("try alt "+i+" at "+addr);
|
||||||
|
work.add(new Context(addr, input.mark()));
|
||||||
|
}
|
||||||
|
// try first alternative (w/o adding to work list)
|
||||||
|
int addr = getShort(code, ip);
|
||||||
|
ip = addr;
|
||||||
|
//System.out.println("try alt "+nopnds+" at "+addr);
|
||||||
|
continue;
|
||||||
|
default :
|
||||||
|
throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,12 +5,22 @@ public class ThreadState {
|
||||||
public int addr;
|
public int addr;
|
||||||
public int alt; // or speculatively matched token type for lexers
|
public int alt; // or speculatively matched token type for lexers
|
||||||
public NFAStack context;
|
public NFAStack context;
|
||||||
|
public int inputIndex = -1; // char (or token?) index from 0
|
||||||
|
public int inputMarker = -1; // accept states track input markers in case we need to rewind
|
||||||
|
|
||||||
public ThreadState(int addr, int alt, NFAStack context) {
|
public ThreadState(int addr, int alt, NFAStack context) {
|
||||||
this.addr = addr;
|
this.addr = addr;
|
||||||
this.alt = alt;
|
this.alt = alt;
|
||||||
this.context = context;
|
this.context = context;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ThreadState(ThreadState t) {
|
||||||
|
this.addr = t.addr;
|
||||||
|
this.alt = t.alt;
|
||||||
|
this.context = t.context;
|
||||||
|
this.inputIndex = t.inputIndex;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean equals(Object o) {
|
public boolean equals(Object o) {
|
||||||
if ( o==null ) return false;
|
if ( o==null ) return false;
|
||||||
if ( this==o ) return true;
|
if ( this==o ) return true;
|
||||||
|
|
|
@ -24,7 +24,8 @@ public class CodeGenPipeline {
|
||||||
//ANTLRStringStream input = new ANTLRStringStream("abc32ab");
|
//ANTLRStringStream input = new ANTLRStringStream("abc32ab");
|
||||||
int ttype = 0;
|
int ttype = 0;
|
||||||
while ( ttype!= Token.EOF ) {
|
while ( ttype!= Token.EOF ) {
|
||||||
ttype = nfa.execThompson(input); System.out.println("ttype="+ttype);
|
ttype = nfa.execThompson(input);
|
||||||
|
System.out.println("ttype="+ttype);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@ import org.antlr.runtime.RecognizerSharedState;
|
||||||
import org.antlr.runtime.Token;
|
import org.antlr.runtime.Token;
|
||||||
import org.antlr.runtime.tree.CommonTreeNodeStream;
|
import org.antlr.runtime.tree.CommonTreeNodeStream;
|
||||||
import org.antlr.runtime.tree.TreeNodeStream;
|
import org.antlr.runtime.tree.TreeNodeStream;
|
||||||
|
import org.antlr.v4.codegen.nfa.*;
|
||||||
import org.antlr.v4.parse.ANTLRParser;
|
import org.antlr.v4.parse.ANTLRParser;
|
||||||
import org.antlr.v4.parse.GrammarASTAdaptor;
|
import org.antlr.v4.parse.GrammarASTAdaptor;
|
||||||
import org.antlr.v4.runtime.nfa.Bytecode;
|
import org.antlr.v4.runtime.nfa.Bytecode;
|
||||||
|
@ -20,160 +21,17 @@ import java.util.Map;
|
||||||
|
|
||||||
/** http://swtch.com/~rsc/regexp/regexp2.html */
|
/** http://swtch.com/~rsc/regexp/regexp2.html */
|
||||||
public class NFABytecodeGenerator extends TreeParser {
|
public class NFABytecodeGenerator extends TreeParser {
|
||||||
public abstract static class Instr {
|
LexerGrammar lg;
|
||||||
public int addr;
|
|
||||||
public abstract short opcode();
|
|
||||||
public abstract int nBytes();
|
|
||||||
public int charSize(int a, int b) { return Math.max(charSize(a), charSize(b)); }
|
|
||||||
public int charSize(int c) {
|
|
||||||
if ( c<=0xFF ) return 1;
|
|
||||||
if ( c<=0xFFFF ) return 2;
|
|
||||||
return 4;
|
|
||||||
}
|
|
||||||
public void write(byte[] code) { code[addr] = (byte)opcode(); }
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class WildcardInstr extends Instr {
|
|
||||||
Token token;
|
|
||||||
public WildcardInstr(Token t) { super(); this.token = t; }
|
|
||||||
public short opcode() { return Bytecode.WILDCARD; }
|
|
||||||
public int nBytes() { return 1; }
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class MatchInstr extends Instr {
|
|
||||||
Token token;
|
|
||||||
int c;
|
|
||||||
public MatchInstr(Token t, int c) { super(); this.token = t; this.c = c; }
|
|
||||||
public short opcode() { return charSize(c)==1?Bytecode.MATCH8:Bytecode.MATCH16; };
|
|
||||||
public int nBytes() { return 1+charSize(c); }
|
|
||||||
public void write(byte[] code) {
|
|
||||||
super.write(code);
|
|
||||||
if ( charSize(c)==1 ) code[addr+1] = (byte)(c&0xFF);
|
|
||||||
else writeShort(code, addr+1, (short)c);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return addr+":MatchInstr{" +
|
|
||||||
"c=" + c +
|
|
||||||
'}';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class RangeInstr extends Instr {
|
|
||||||
Token start, stop;
|
|
||||||
int a, b;
|
|
||||||
public RangeInstr(Token start, Token stop) {
|
|
||||||
this.start = start;
|
|
||||||
this.stop = stop;
|
|
||||||
a = (char)Target.getCharValueFromGrammarCharLiteral(start.getText());
|
|
||||||
b = (char)Target.getCharValueFromGrammarCharLiteral(stop.getText());
|
|
||||||
}
|
|
||||||
public short opcode() { return charSize(a, b)==1?Bytecode.RANGE8:Bytecode.RANGE16; };
|
|
||||||
public int nBytes() { return 1+2*charSize(a, b); }
|
|
||||||
public void write(byte[] code) {
|
|
||||||
super.write(code);
|
|
||||||
if ( charSize(a,b)==1 ) {
|
|
||||||
code[addr+1] = (byte)(a&0xFF);
|
|
||||||
code[addr+2] = (byte)(b&0xFF);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
writeShort(code, addr+1, (short)a);
|
|
||||||
writeShort(code, addr+1+charSize(a,b), (short)b);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return addr+":RangeInstr{"+ a +".."+ b +"}";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class AcceptInstr extends Instr {
|
|
||||||
int ruleIndex;
|
|
||||||
public AcceptInstr(int ruleIndex) {
|
|
||||||
this.ruleIndex = ruleIndex;
|
|
||||||
}
|
|
||||||
public short opcode() { return Bytecode.ACCEPT; };
|
|
||||||
public int nBytes() { return 1+2; }
|
|
||||||
public void write(byte[] code) {
|
|
||||||
super.write(code);
|
|
||||||
writeShort(code, addr+1, (short)ruleIndex);
|
|
||||||
}
|
|
||||||
public String toString() { return addr+":AcceptInstr "+ruleIndex; }
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class RetInstr extends Instr {
|
|
||||||
public short opcode() { return Bytecode.RET; }
|
|
||||||
public int nBytes() { return 1; }
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class JumpInstr extends Instr {
|
|
||||||
int target;
|
|
||||||
public short opcode() { return Bytecode.JMP; };
|
|
||||||
public int nBytes() { return 1+Bytecode.ADDR_SIZE; }
|
|
||||||
public void write(byte[] code) {
|
|
||||||
super.write(code);
|
|
||||||
writeShort(code, addr+1, (short)target);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return addr+":JumpInstr{" +
|
|
||||||
"target=" + target +
|
|
||||||
'}';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class CallInstr extends Instr {
|
|
||||||
Token token;
|
|
||||||
int target;
|
|
||||||
public CallInstr(Token token) { this.token = token; }
|
|
||||||
public short opcode() { return Bytecode.CALL; };
|
|
||||||
public int nBytes() { return 1+Bytecode.ADDR_SIZE; }
|
|
||||||
public void write(byte[] code) {
|
|
||||||
super.write(code);
|
|
||||||
writeShort(code, addr+1, (short)target);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return addr+":CallInstr{" +
|
|
||||||
"target=" + target +
|
|
||||||
'}';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class SplitInstr extends Instr {
|
|
||||||
List<Integer> addrs = new ArrayList<Integer>();
|
|
||||||
int nAlts;
|
|
||||||
public SplitInstr(int nAlts) { this.nAlts = nAlts; }
|
|
||||||
public short opcode() { return Bytecode.SPLIT; };
|
|
||||||
public int nBytes() { return 1+2+nAlts*Bytecode.ADDR_SIZE; }
|
|
||||||
public void write(byte[] code) {
|
|
||||||
super.write(code);
|
|
||||||
int a = addr + 1;
|
|
||||||
writeShort(code, a, (short)addrs.size());
|
|
||||||
a += 2;
|
|
||||||
for (int x : addrs) {
|
|
||||||
writeShort(code, a, (short)x);
|
|
||||||
a += Bytecode.ADDR_SIZE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return addr+":SplitInstr{" +
|
|
||||||
"addrs=" + addrs +
|
|
||||||
'}';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<Instr> instrs = new ArrayList<Instr>();
|
public List<Instr> instrs = new ArrayList<Instr>();
|
||||||
public int ip = 0; // where to write next
|
public int ip = 0; // where to write next
|
||||||
|
Map<String, Integer> ruleToAddr = new HashMap<String, Integer>();
|
||||||
|
int[] tokenTypeToAddr;
|
||||||
|
List<String> labels = new ArrayList<String>();
|
||||||
|
|
||||||
public NFABytecodeGenerator(TreeNodeStream input) {
|
public NFABytecodeGenerator(LexerGrammar lg, TreeNodeStream input) {
|
||||||
super(input);
|
super(input);
|
||||||
|
this.lg = lg;
|
||||||
|
tokenTypeToAddr = new int[lg.getMaxTokenType()+1];
|
||||||
}
|
}
|
||||||
|
|
||||||
public NFABytecodeGenerator(TreeNodeStream input, RecognizerSharedState state) {
|
public NFABytecodeGenerator(TreeNodeStream input, RecognizerSharedState state) {
|
||||||
|
@ -193,32 +51,26 @@ public class NFABytecodeGenerator extends TreeParser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Given any block of alts, return list of instruction objects */
|
public byte[] getBytecode() {
|
||||||
// public static List<Instr> getInstructions(GrammarAST blk, int acceptValue) {
|
|
||||||
// GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
|
|
||||||
// CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
|
|
||||||
// NFABytecodeTriggers gen = new NFABytecodeTriggers(nodes);
|
|
||||||
// try {
|
|
||||||
// gen.block();
|
|
||||||
// gen.emit(new NFABytecodeGenerator.AcceptInstr(acceptValue));
|
|
||||||
// }
|
|
||||||
// catch (Exception e){
|
|
||||||
// e.printStackTrace(System.err);
|
|
||||||
// }
|
|
||||||
// return gen.instrs;
|
|
||||||
// }
|
|
||||||
|
|
||||||
public static byte[] getByteCode(Map<String, Integer> ruleToAddr, List<Instr> instrs) {
|
|
||||||
Instr last = instrs.get(instrs.size() - 1);
|
Instr last = instrs.get(instrs.size() - 1);
|
||||||
int size = last.addr + last.nBytes();
|
int size = last.addr + last.nBytes();
|
||||||
byte[] code = new byte[size];
|
byte[] code = new byte[size];
|
||||||
// resolve CALL instruction targets before generating code
|
// resolve CALL instruction targets and index labels before generating code
|
||||||
for (Instr I : instrs) {
|
for (Instr I : instrs) {
|
||||||
if ( I instanceof CallInstr ) {
|
if ( I instanceof CallInstr ) {
|
||||||
CallInstr C = (CallInstr) I;
|
CallInstr C = (CallInstr) I;
|
||||||
String ruleName = C.token.getText();
|
String ruleName = C.token.getText();
|
||||||
C.target = ruleToAddr.get(ruleName);
|
C.target = ruleToAddr.get(ruleName);
|
||||||
}
|
}
|
||||||
|
else if ( I instanceof LabelInstr ) {
|
||||||
|
LabelInstr L = (LabelInstr)I;
|
||||||
|
L.labelIndex = labels.size();
|
||||||
|
labels.add(L.token.getText());
|
||||||
|
}
|
||||||
|
else if ( I instanceof SaveInstr ) {
|
||||||
|
SaveInstr S = (SaveInstr)I;
|
||||||
|
S.labelIndex = labels.size()-1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
for (Instr I : instrs) {
|
for (Instr I : instrs) {
|
||||||
I.write(code);
|
I.write(code);
|
||||||
|
@ -228,7 +80,7 @@ public class NFABytecodeGenerator extends TreeParser {
|
||||||
|
|
||||||
public static NFA getBytecode(LexerGrammar lg, String modeName) {
|
public static NFA getBytecode(LexerGrammar lg, String modeName) {
|
||||||
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
|
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
|
||||||
NFABytecodeTriggers gen = new NFABytecodeTriggers(null);
|
NFABytecodeTriggers gen = new NFABytecodeTriggers(lg, null);
|
||||||
|
|
||||||
// add split for s0 to hook up rules (fill in operands as we gen rules)
|
// add split for s0 to hook up rules (fill in operands as we gen rules)
|
||||||
int numRules = lg.modes.get(modeName).size();
|
int numRules = lg.modes.get(modeName).size();
|
||||||
|
@ -237,37 +89,35 @@ public class NFABytecodeGenerator extends TreeParser {
|
||||||
SplitInstr s0 = new SplitInstr(numRules - numFragmentRules);
|
SplitInstr s0 = new SplitInstr(numRules - numFragmentRules);
|
||||||
gen.emit(s0);
|
gen.emit(s0);
|
||||||
|
|
||||||
Map<String, Integer> ruleToAddr = new HashMap<String, Integer>();
|
|
||||||
int[] tokenTypeToAddr = new int[lg.getMaxTokenType()+1];
|
|
||||||
for (Rule r : lg.modes.get(modeName)) { // for each rule in mode
|
for (Rule r : lg.modes.get(modeName)) { // for each rule in mode
|
||||||
GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK);
|
GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK);
|
||||||
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
|
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
|
||||||
gen.setTreeNodeStream(nodes);
|
gen.setTreeNodeStream(nodes);
|
||||||
int ttype = lg.getTokenType(r.name);
|
int ttype = lg.getTokenType(r.name);
|
||||||
ruleToAddr.put(r.name, gen.ip);
|
gen.ruleToAddr.put(r.name, gen.ip);
|
||||||
if ( !r.isFragment() ) {
|
if ( !r.isFragment() ) {
|
||||||
s0.addrs.add(gen.ip);
|
s0.addrs.add(gen.ip);
|
||||||
tokenTypeToAddr[ttype] = gen.ip;
|
gen.tokenTypeToAddr[ttype] = gen.ip;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
gen.block();
|
((NFABytecodeTriggers)gen).block();
|
||||||
int ruleTokenType = lg.getTokenType(r.name);
|
int ruleTokenType = lg.getTokenType(r.name);
|
||||||
if ( !r.isFragment() ) {
|
if ( !r.isFragment() ) {
|
||||||
gen.emit(new NFABytecodeGenerator.AcceptInstr(ruleTokenType));
|
gen.emit(new AcceptInstr(ruleTokenType));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
gen.emit(new NFABytecodeGenerator.RetInstr());
|
gen.emit(new RetInstr());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (Exception e){
|
catch (Exception e){
|
||||||
e.printStackTrace(System.err);
|
e.printStackTrace(System.err);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
byte[] code = NFABytecodeGenerator.getByteCode(ruleToAddr, gen.instrs);
|
byte[] code = gen.getBytecode();
|
||||||
System.out.println(Bytecode.disassemble(code));
|
System.out.println(Bytecode.disassemble(code));
|
||||||
System.out.println("rule addrs="+ruleToAddr);
|
System.out.println("rule addrs="+gen.ruleToAddr);
|
||||||
|
|
||||||
return new NFA(code, ruleToAddr, tokenTypeToAddr);
|
return new NFA(code, gen.ruleToAddr, gen.tokenTypeToAddr, gen.labels.toArray(new String[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Write value at index into a byte array highest to lowest byte,
|
/** Write value at index into a byte array highest to lowest byte,
|
||||||
|
@ -277,5 +127,4 @@ public class NFABytecodeGenerator extends TreeParser {
|
||||||
memory[index+0] = (byte)((value>>(8*1))&0xFF);
|
memory[index+0] = (byte)((value>>(8*1))&0xFF);
|
||||||
memory[index+1] = (byte)(value&0xFF);
|
memory[index+1] = (byte)(value&0xFF);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,13 +8,21 @@ options {
|
||||||
|
|
||||||
@header {
|
@header {
|
||||||
package org.antlr.v4.codegen;
|
package org.antlr.v4.codegen;
|
||||||
|
import org.antlr.v4.codegen.nfa.*;
|
||||||
import org.antlr.v4.tool.GrammarAST;
|
import org.antlr.v4.tool.GrammarAST;
|
||||||
import org.antlr.v4.tool.GrammarASTWithOptions;
|
import org.antlr.v4.tool.GrammarASTWithOptions;
|
||||||
|
import org.antlr.v4.tool.LexerGrammar;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@members {
|
||||||
|
public NFABytecodeTriggers(LexerGrammar lg, TreeNodeStream input) {
|
||||||
|
super(lg, input);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
e1 | e2 | e3:
|
e1 | e2 | e3:
|
||||||
split 3, L1, L2, L3
|
split 3, L1, L2, L3
|
||||||
|
@ -69,14 +77,14 @@ element
|
||||||
: labeledElement
|
: labeledElement
|
||||||
| atom
|
| atom
|
||||||
| ebnf
|
| ebnf
|
||||||
| ACTION
|
| ACTION {emit(new ActionInstr($ACTION.token));}
|
||||||
| SEMPRED
|
| SEMPRED {emit(new SemPredInstr($SEMPRED.token));}
|
||||||
| GATED_SEMPRED
|
| GATED_SEMPRED {emit(new SemPredInstr($GATED_SEMPRED.token));}
|
||||||
| treeSpec
|
| treeSpec
|
||||||
;
|
;
|
||||||
|
|
||||||
labeledElement
|
labeledElement
|
||||||
: ^(ASSIGN ID atom)
|
: ^(ASSIGN ID {emit(new LabelInstr($ID.token));} atom {emit(new SaveInstr($ID.token));} )
|
||||||
| ^(ASSIGN ID block)
|
| ^(ASSIGN ID block)
|
||||||
| ^(PLUS_ASSIGN ID atom)
|
| ^(PLUS_ASSIGN ID atom)
|
||||||
| ^(PLUS_ASSIGN ID block)
|
| ^(PLUS_ASSIGN ID block)
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,19 @@
|
||||||
|
package org.antlr.v4.codegen.nfa;
|
||||||
|
|
||||||
|
import org.antlr.v4.codegen.NFABytecodeGenerator;
|
||||||
|
import org.antlr.v4.runtime.nfa.Bytecode;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class AcceptInstr extends Instr {
|
||||||
|
public int ruleIndex;
|
||||||
|
public AcceptInstr(int ruleIndex) {
|
||||||
|
this.ruleIndex = ruleIndex;
|
||||||
|
}
|
||||||
|
public short opcode() { return Bytecode.ACCEPT; };
|
||||||
|
public int nBytes() { return 1+2; }
|
||||||
|
public void write(byte[] code) {
|
||||||
|
super.write(code);
|
||||||
|
NFABytecodeGenerator.writeShort(code, addr+1, (short)ruleIndex);
|
||||||
|
}
|
||||||
|
public String toString() { return addr+":AcceptInstr "+ruleIndex; }
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
package org.antlr.v4.codegen.nfa;
|
||||||
|
|
||||||
|
import org.antlr.runtime.Token;
|
||||||
|
import org.antlr.v4.codegen.NFABytecodeGenerator;
|
||||||
|
import org.antlr.v4.runtime.nfa.Bytecode;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class ActionInstr extends Instr {
|
||||||
|
public int actionIndex;
|
||||||
|
public Token token;
|
||||||
|
public ActionInstr(Token token) {
|
||||||
|
this.token = token;
|
||||||
|
}
|
||||||
|
public short opcode() { return Bytecode.ACTION; };
|
||||||
|
public int nBytes() { return 1+2; }
|
||||||
|
public void write(byte[] code) {
|
||||||
|
super.write(code);
|
||||||
|
NFABytecodeGenerator.writeShort(code, addr+1, (short)actionIndex);
|
||||||
|
}
|
||||||
|
public String toString() { return addr+":ActionInstr "+actionIndex; }
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
package org.antlr.v4.codegen.nfa;
|
||||||
|
|
||||||
|
import org.antlr.runtime.Token;
|
||||||
|
import org.antlr.v4.codegen.NFABytecodeGenerator;
|
||||||
|
import org.antlr.v4.runtime.nfa.Bytecode;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class CallInstr extends Instr {
|
||||||
|
public Token token;
|
||||||
|
public int target;
|
||||||
|
public CallInstr(Token token) { this.token = token; }
|
||||||
|
public short opcode() { return Bytecode.CALL; };
|
||||||
|
public int nBytes() { return 1+Bytecode.ADDR_SIZE; }
|
||||||
|
public void write(byte[] code) {
|
||||||
|
super.write(code);
|
||||||
|
NFABytecodeGenerator.writeShort(code, addr+1, (short)target);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return addr+":CallInstr{" +
|
||||||
|
"target=" + target +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,15 @@
|
||||||
|
package org.antlr.v4.codegen.nfa;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public abstract class Instr {
|
||||||
|
public int addr;
|
||||||
|
public abstract short opcode();
|
||||||
|
public abstract int nBytes();
|
||||||
|
public int charSize(int a, int b) { return Math.max(charSize(a), charSize(b)); }
|
||||||
|
public int charSize(int c) {
|
||||||
|
if ( c<=0xFF ) return 1;
|
||||||
|
if ( c<=0xFFFF ) return 2;
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
public void write(byte[] code) { code[addr] = (byte)opcode(); }
|
||||||
|
}
|
|
@ -0,0 +1,22 @@
|
||||||
|
package org.antlr.v4.codegen.nfa;
|
||||||
|
|
||||||
|
import org.antlr.v4.codegen.NFABytecodeGenerator;
|
||||||
|
import org.antlr.v4.runtime.nfa.Bytecode;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class JumpInstr extends Instr {
|
||||||
|
public int target;
|
||||||
|
public short opcode() { return Bytecode.JMP; };
|
||||||
|
public int nBytes() { return 1+Bytecode.ADDR_SIZE; }
|
||||||
|
public void write(byte[] code) {
|
||||||
|
super.write(code);
|
||||||
|
NFABytecodeGenerator.writeShort(code, addr+1, (short)target);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return addr+":JumpInstr{" +
|
||||||
|
"target=" + target +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
package org.antlr.v4.codegen.nfa;
|
||||||
|
|
||||||
|
import org.antlr.runtime.Token;
|
||||||
|
import org.antlr.v4.codegen.NFABytecodeGenerator;
|
||||||
|
import org.antlr.v4.runtime.nfa.Bytecode;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class LabelInstr extends Instr {
|
||||||
|
public int labelIndex;
|
||||||
|
public Token token;
|
||||||
|
public LabelInstr(Token token) {
|
||||||
|
this.token = token;
|
||||||
|
}
|
||||||
|
public short opcode() { return Bytecode.LABEL; };
|
||||||
|
public int nBytes() { return 1+2; }
|
||||||
|
public void write(byte[] code) {
|
||||||
|
super.write(code);
|
||||||
|
NFABytecodeGenerator.writeShort(code, addr+1, (short)labelIndex);
|
||||||
|
}
|
||||||
|
public String toString() { return addr+":LabelInstr "+ labelIndex; }
|
||||||
|
}
|
|
@ -0,0 +1,26 @@
|
||||||
|
package org.antlr.v4.codegen.nfa;
|
||||||
|
|
||||||
|
import org.antlr.runtime.Token;
|
||||||
|
import org.antlr.v4.codegen.NFABytecodeGenerator;
|
||||||
|
import org.antlr.v4.runtime.nfa.Bytecode;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class MatchInstr extends Instr {
|
||||||
|
public Token token;
|
||||||
|
public int c;
|
||||||
|
public MatchInstr(Token t, int c) { super(); this.token = t; this.c = c; }
|
||||||
|
public short opcode() { return charSize(c)==1? Bytecode.MATCH8:Bytecode.MATCH16; };
|
||||||
|
public int nBytes() { return 1+charSize(c); }
|
||||||
|
public void write(byte[] code) {
|
||||||
|
super.write(code);
|
||||||
|
if ( charSize(c)==1 ) code[addr+1] = (byte)(c&0xFF);
|
||||||
|
else NFABytecodeGenerator.writeShort(code, addr+1, (short)c);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return addr+":MatchInstr{" +
|
||||||
|
"c=" + c +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,36 @@
|
||||||
|
package org.antlr.v4.codegen.nfa;
|
||||||
|
|
||||||
|
import org.antlr.runtime.Token;
|
||||||
|
import org.antlr.v4.codegen.NFABytecodeGenerator;
|
||||||
|
import org.antlr.v4.codegen.Target;
|
||||||
|
import org.antlr.v4.runtime.nfa.Bytecode;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class RangeInstr extends Instr {
|
||||||
|
public Token start, stop;
|
||||||
|
public int a, b;
|
||||||
|
public RangeInstr(Token start, Token stop) {
|
||||||
|
this.start = start;
|
||||||
|
this.stop = stop;
|
||||||
|
a = (char) Target.getCharValueFromGrammarCharLiteral(start.getText());
|
||||||
|
b = (char)Target.getCharValueFromGrammarCharLiteral(stop.getText());
|
||||||
|
}
|
||||||
|
public short opcode() { return charSize(a, b)==1? Bytecode.RANGE8:Bytecode.RANGE16; };
|
||||||
|
public int nBytes() { return 1+2*charSize(a, b); }
|
||||||
|
public void write(byte[] code) {
|
||||||
|
super.write(code);
|
||||||
|
if ( charSize(a,b)==1 ) {
|
||||||
|
code[addr+1] = (byte)(a&0xFF);
|
||||||
|
code[addr+2] = (byte)(b&0xFF);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
NFABytecodeGenerator.writeShort(code, addr+1, (short)a);
|
||||||
|
NFABytecodeGenerator.writeShort(code, addr+1+charSize(a,b), (short)b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return addr+":RangeInstr{"+ a +".."+ b +"}";
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,9 @@
|
||||||
|
package org.antlr.v4.codegen.nfa;
|
||||||
|
|
||||||
|
import org.antlr.v4.runtime.nfa.Bytecode;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class RetInstr extends Instr {
|
||||||
|
public short opcode() { return Bytecode.RET; }
|
||||||
|
public int nBytes() { return 1; }
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
package org.antlr.v4.codegen.nfa;
|
||||||
|
|
||||||
|
import org.antlr.runtime.Token;
|
||||||
|
import org.antlr.v4.codegen.NFABytecodeGenerator;
|
||||||
|
import org.antlr.v4.runtime.nfa.Bytecode;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class SaveInstr extends Instr {
|
||||||
|
public int labelIndex;
|
||||||
|
public Token token;
|
||||||
|
public SaveInstr(Token token) {
|
||||||
|
this.token = token;
|
||||||
|
}
|
||||||
|
public short opcode() { return Bytecode.SAVE; };
|
||||||
|
public int nBytes() { return 1+2; }
|
||||||
|
public void write(byte[] code) {
|
||||||
|
super.write(code);
|
||||||
|
NFABytecodeGenerator.writeShort(code, addr+1, (short) labelIndex);
|
||||||
|
}
|
||||||
|
public String toString() { return addr+":SaveInstr "+ labelIndex; }
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
package org.antlr.v4.codegen.nfa;
|
||||||
|
|
||||||
|
import org.antlr.runtime.Token;
|
||||||
|
import org.antlr.v4.codegen.NFABytecodeGenerator;
|
||||||
|
import org.antlr.v4.runtime.nfa.Bytecode;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class SemPredInstr extends Instr {
|
||||||
|
public int predIndex;
|
||||||
|
public Token token;
|
||||||
|
public SemPredInstr(Token token) {
|
||||||
|
this.token = token;
|
||||||
|
}
|
||||||
|
public short opcode() { return Bytecode.SEMPRED; };
|
||||||
|
public int nBytes() { return 1+2; }
|
||||||
|
public void write(byte[] code) {
|
||||||
|
super.write(code);
|
||||||
|
NFABytecodeGenerator.writeShort(code, addr+1, (short) predIndex);
|
||||||
|
}
|
||||||
|
public String toString() { return addr+":SemPredInstr "+ predIndex; }
|
||||||
|
}
|
|
@ -0,0 +1,33 @@
|
||||||
|
package org.antlr.v4.codegen.nfa;
|
||||||
|
|
||||||
|
import org.antlr.v4.codegen.NFABytecodeGenerator;
|
||||||
|
import org.antlr.v4.runtime.nfa.Bytecode;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class SplitInstr extends Instr {
|
||||||
|
public List<Integer> addrs = new ArrayList<Integer>();
|
||||||
|
public int nAlts;
|
||||||
|
public SplitInstr(int nAlts) { this.nAlts = nAlts; }
|
||||||
|
public short opcode() { return Bytecode.SPLIT; };
|
||||||
|
public int nBytes() { return 1+2+nAlts*Bytecode.ADDR_SIZE; }
|
||||||
|
public void write(byte[] code) {
|
||||||
|
super.write(code);
|
||||||
|
int a = addr + 1;
|
||||||
|
NFABytecodeGenerator.writeShort(code, a, (short)addrs.size());
|
||||||
|
a += 2;
|
||||||
|
for (int x : addrs) {
|
||||||
|
NFABytecodeGenerator.writeShort(code, a, (short)x);
|
||||||
|
a += Bytecode.ADDR_SIZE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return addr+":SplitInstr{" +
|
||||||
|
"addrs=" + addrs +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,12 @@
|
||||||
|
package org.antlr.v4.codegen.nfa;
|
||||||
|
|
||||||
|
import org.antlr.runtime.Token;
|
||||||
|
import org.antlr.v4.runtime.nfa.Bytecode;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class WildcardInstr extends Instr {
|
||||||
|
public Token token;
|
||||||
|
public WildcardInstr(Token t) { super(); this.token = t; }
|
||||||
|
public short opcode() { return Bytecode.WILDCARD; }
|
||||||
|
public int nBytes() { return 1; }
|
||||||
|
}
|
|
@ -80,6 +80,71 @@ public class TestNFABytecodeGeneration extends BaseTest {
|
||||||
checkBytecode(g, expecting);
|
checkBytecode(g, expecting);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test public void testLabeledChar() throws Exception {
|
||||||
|
LexerGrammar g = new LexerGrammar(
|
||||||
|
"lexer grammar L;\n" +
|
||||||
|
"A : a='a' ;\n");
|
||||||
|
String expecting =
|
||||||
|
"0000:\tsplit 5\n" +
|
||||||
|
"0005:\tlabel 0\n" +
|
||||||
|
"0008:\tmatch8 'a'\n" +
|
||||||
|
"0010:\tsave 0\n" +
|
||||||
|
"0013:\taccept 4\n";
|
||||||
|
checkBytecode(g, expecting);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test public void testLabeledString() throws Exception {
|
||||||
|
LexerGrammar g = new LexerGrammar(
|
||||||
|
"lexer grammar L;\n" +
|
||||||
|
"A : a='aa' ;\n");
|
||||||
|
String expecting =
|
||||||
|
"0000:\tsplit 5\n" +
|
||||||
|
"0005:\tlabel 0\n" +
|
||||||
|
"0008:\tmatch8 'a'\n" +
|
||||||
|
"0010:\tmatch8 'a'\n" +
|
||||||
|
"0012:\tsave 0\n" +
|
||||||
|
"0015:\taccept 4\n";
|
||||||
|
checkBytecode(g, expecting);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test public void testLabeledToken() throws Exception {
|
||||||
|
LexerGrammar g = new LexerGrammar(
|
||||||
|
"lexer grammar L;\n" +
|
||||||
|
"I : d=D ;\n" +
|
||||||
|
"fragment D : '0'..'9'+ ;\n");
|
||||||
|
String expecting =
|
||||||
|
"0000:\tsplit 5\n" +
|
||||||
|
"0005:\tlabel 0\n" +
|
||||||
|
"0008:\tcall 17\n" +
|
||||||
|
"0011:\tsave 0\n" +
|
||||||
|
"0014:\taccept 4\n" +
|
||||||
|
"0017:\trange8 '0', '9'\n" +
|
||||||
|
"0020:\tsplit 17, 27\n" +
|
||||||
|
"0027:\tret \n";
|
||||||
|
checkBytecode(g, expecting);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test public void testLabelIndexes() throws Exception {
|
||||||
|
LexerGrammar g = new LexerGrammar(
|
||||||
|
"lexer grammar L;\n" +
|
||||||
|
"A : a='a' ;\n" +
|
||||||
|
"B : a='b' b='c' ;\n");
|
||||||
|
String expecting =
|
||||||
|
"0000:\tsplit 7, 18\n" +
|
||||||
|
"0007:\tlabel 0\n" +
|
||||||
|
"0010:\tmatch8 'a'\n" +
|
||||||
|
"0012:\tsave 0\n" +
|
||||||
|
"0015:\taccept 4\n" +
|
||||||
|
"0018:\tlabel 1\n" +
|
||||||
|
"0021:\tmatch8 'b'\n" +
|
||||||
|
"0023:\tsave 1\n" +
|
||||||
|
"0026:\tlabel 2\n" +
|
||||||
|
"0029:\tmatch8 'c'\n" +
|
||||||
|
"0031:\tsave 2\n" +
|
||||||
|
"0034:\taccept 5\n";
|
||||||
|
checkBytecode(g, expecting);
|
||||||
|
}
|
||||||
|
|
||||||
public void _template() throws Exception {
|
public void _template() throws Exception {
|
||||||
LexerGrammar g = new LexerGrammar(
|
LexerGrammar g = new LexerGrammar(
|
||||||
"\n");
|
"\n");
|
||||||
|
|
|
@ -4,6 +4,7 @@ import org.antlr.runtime.ANTLRStringStream;
|
||||||
import org.antlr.runtime.Token;
|
import org.antlr.runtime.Token;
|
||||||
import org.antlr.v4.Tool;
|
import org.antlr.v4.Tool;
|
||||||
import org.antlr.v4.codegen.NFABytecodeGenerator;
|
import org.antlr.v4.codegen.NFABytecodeGenerator;
|
||||||
|
import org.antlr.v4.runtime.CommonToken;
|
||||||
import org.antlr.v4.runtime.nfa.NFA;
|
import org.antlr.v4.runtime.nfa.NFA;
|
||||||
import org.antlr.v4.semantics.SemanticPipeline;
|
import org.antlr.v4.semantics.SemanticPipeline;
|
||||||
import org.antlr.v4.tool.Grammar;
|
import org.antlr.v4.tool.Grammar;
|
||||||
|
@ -11,6 +12,7 @@ import org.antlr.v4.tool.LexerGrammar;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/** */
|
/** */
|
||||||
|
@ -118,6 +120,44 @@ public class TestNFABytecodeInterp extends BaseTest {
|
||||||
checkMatches(g, "1.", "NUM, DOT, EOF");
|
checkMatches(g, "1.", "NUM, DOT, EOF");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test public void testLabeledChar() throws Exception {
|
||||||
|
LexerGrammar g = new LexerGrammar(
|
||||||
|
"lexer grammar L;\n" +
|
||||||
|
"A : a='a' ;\n");
|
||||||
|
checkMatches(g, "a", "A, EOF", "[[@-1,0:0='a',<0>,1:0]]");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test public void testLabeledString() throws Exception {
|
||||||
|
LexerGrammar g = new LexerGrammar(
|
||||||
|
"lexer grammar L;\n" +
|
||||||
|
"A : a='abc' ;\n");
|
||||||
|
checkMatches(g, "abc", "A, EOF", "[[@-1,0:2='abc',<0>,1:0]]");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test public void testLabeledToken() throws Exception {
|
||||||
|
LexerGrammar g = new LexerGrammar(
|
||||||
|
"lexer grammar L;\n" +
|
||||||
|
"I : d=D ;\n" +
|
||||||
|
"fragment D : '0'..'9'+ ;\n");
|
||||||
|
checkMatches(g, "901", "I, EOF", "[[@-1,0:2='901',<0>,1:0]]");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test public void testLabelInLoopIsLastElement() throws Exception {
|
||||||
|
LexerGrammar g = new LexerGrammar(
|
||||||
|
"lexer grammar L;\n" +
|
||||||
|
"I : d=D+ ;\n" +
|
||||||
|
"fragment D : '0'..'9' ;\n");
|
||||||
|
checkMatches(g, "901", "I, EOF", "[[@-1,2:2='1',<0>,1:2]]");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test public void testLabelIndexes() throws Exception {
|
||||||
|
LexerGrammar g = new LexerGrammar(
|
||||||
|
"lexer grammar L;\n" +
|
||||||
|
"A : a='a' ;\n" +
|
||||||
|
"B : a='b' b='c' ;\n");
|
||||||
|
checkMatches(g, "bc", "B, EOF", "[[@-1,0:-1='',<0>,1:0], [@-1,0:0='b',<0>,1:0], [@-1,1:1='c',<0>,1:1]]");
|
||||||
|
}
|
||||||
|
|
||||||
public void _template() throws Exception {
|
public void _template() throws Exception {
|
||||||
LexerGrammar g = new LexerGrammar(
|
LexerGrammar g = new LexerGrammar(
|
||||||
"\n");
|
"\n");
|
||||||
|
@ -126,6 +166,12 @@ public class TestNFABytecodeInterp extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
void checkMatches(LexerGrammar g, String input, String expecting) {
|
void checkMatches(LexerGrammar g, String input, String expecting) {
|
||||||
|
checkMatches(g, input, expecting, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
void checkMatches(LexerGrammar g, String input, String expecting,
|
||||||
|
String expectingTokens)
|
||||||
|
{
|
||||||
if ( g.ast!=null && !g.ast.hasErrors ) {
|
if ( g.ast!=null && !g.ast.hasErrors ) {
|
||||||
System.out.println(g.ast.toStringTree());
|
System.out.println(g.ast.toStringTree());
|
||||||
Tool antlr = new Tool();
|
Tool antlr = new Tool();
|
||||||
|
@ -138,22 +184,27 @@ public class TestNFABytecodeInterp extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
List<Integer> expectingTokens = new ArrayList<Integer>();
|
List<Integer> expectingTokenTypes = new ArrayList<Integer>();
|
||||||
if ( expecting!=null && !expecting.trim().equals("") ) {
|
if ( expecting!=null && !expecting.trim().equals("") ) {
|
||||||
for (String tname : expecting.replace(" ", "").split(",")) {
|
for (String tname : expecting.replace(" ", "").split(",")) {
|
||||||
int ttype = g.getTokenType(tname);
|
int ttype = g.getTokenType(tname);
|
||||||
expectingTokens.add(ttype);
|
expectingTokenTypes.add(ttype);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NFA nfa = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
|
NFA nfa = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
|
||||||
ANTLRStringStream in = new ANTLRStringStream(input);
|
ANTLRStringStream in = new ANTLRStringStream(input);
|
||||||
List<Integer> tokens = new ArrayList<Integer>();
|
List<Integer> tokenTypes = new ArrayList<Integer>();
|
||||||
|
CommonToken[] tokens = new CommonToken[nfa.labels.length];
|
||||||
int ttype = 0;
|
int ttype = 0;
|
||||||
do {
|
do {
|
||||||
ttype = nfa.execThompson(in);
|
ttype = nfa.execThompson(in, 0, true, tokens);
|
||||||
tokens.add(ttype);
|
tokenTypes.add(ttype);
|
||||||
} while ( ttype!= Token.EOF );
|
} while ( ttype!= Token.EOF );
|
||||||
assertEquals(expectingTokens, tokens);
|
assertEquals(expectingTokenTypes, tokenTypes);
|
||||||
|
|
||||||
|
if ( expectingTokens!=null ) {
|
||||||
|
assertEquals(expectingTokens, Arrays.toString(tokens));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue