forked from jasder/antlr
added exec version that handles stack and new ThreadState / context. unit tests
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6829]
This commit is contained in:
parent
541350bbe3
commit
9dbc6a43fd
|
@ -48,7 +48,9 @@ public class Bytecode {
|
|||
public static final short RANGE16 = 7;
|
||||
public static final short WILDCARD = 8;
|
||||
//public static final short NOT = 8; ???
|
||||
public static final short SAVE = 9;
|
||||
public static final short CALL = 9; // JMP with a push
|
||||
public static final short RET = 10; // an accept instr for fragment rules
|
||||
public static final short SAVE = 11;
|
||||
|
||||
/** Used for disassembly; describes instruction set */
|
||||
public static Instruction[] instructions = new Instruction[] {
|
||||
|
@ -61,6 +63,8 @@ public class Bytecode {
|
|||
new Instruction("range8", OperandType.BYTE, OperandType.BYTE),
|
||||
new Instruction("range16", OperandType.CHAR, OperandType.CHAR),
|
||||
new Instruction("wildcard"),
|
||||
new Instruction("call", OperandType.ADDR),
|
||||
new Instruction("ret"),
|
||||
new Instruction("save", OperandType.SHORT),
|
||||
};
|
||||
|
||||
|
|
|
@ -185,7 +185,159 @@ workLoop:
|
|||
return 0;
|
||||
}
|
||||
|
||||
public int execThompson(CharStream input, int ip) {
|
||||
public int execThompson(CharStream input) {
|
||||
int ip = 0; // always start at SPLIT instr at address 0
|
||||
int c = input.LA(1);
|
||||
if ( c==Token.EOF ) return Token.EOF;
|
||||
|
||||
List<ThreadState> closure = computeStartState(ip);
|
||||
List<ThreadState> reach = new ArrayList<ThreadState>();
|
||||
int prevAcceptAddr = Integer.MAX_VALUE;
|
||||
int prevAcceptLastCharIndex = -1;
|
||||
int prevAcceptInputMarker = -1;
|
||||
int firstAcceptInputMarker = -1;
|
||||
do { // while more work
|
||||
c = input.LA(1);
|
||||
int i = 0;
|
||||
processOneChar:
|
||||
while ( i<closure.size() ) {
|
||||
//for (int i=0; i<closure.size(); i++) {
|
||||
System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
|
||||
ThreadState t = closure.get(i);
|
||||
ip = t.addr;
|
||||
NFAStack context = t.context;
|
||||
int alt = t.alt;
|
||||
trace(ip);
|
||||
short opcode = code[ip];
|
||||
ip++; // move to next instruction or first byte of operand
|
||||
switch (opcode) {
|
||||
case Bytecode.MATCH8 :
|
||||
if ( c == code[ip] ) {
|
||||
addToClosure(reach, ip+1, alt, context);
|
||||
}
|
||||
break;
|
||||
case Bytecode.MATCH16 :
|
||||
if ( c == getShort(code, ip) ) {
|
||||
addToClosure(reach, ip+2, alt, context);
|
||||
}
|
||||
break;
|
||||
case Bytecode.RANGE8 :
|
||||
if ( c>=code[ip] && c<=code[ip+1] ) {
|
||||
addToClosure(reach, ip+2, alt, context);
|
||||
}
|
||||
break;
|
||||
case Bytecode.RANGE16 :
|
||||
if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) {
|
||||
addToClosure(reach, ip+4, alt, context);
|
||||
}
|
||||
break;
|
||||
case Bytecode.WILDCARD :
|
||||
if ( c!=Token.EOF ) addToClosure(reach, ip, alt, context);
|
||||
break;
|
||||
case Bytecode.ACCEPT :
|
||||
int tokenLastCharIndex = input.index() - 1;
|
||||
int ttype = getShort(code, ip);
|
||||
System.out.println("ACCEPT "+ ttype +" with last char position "+ tokenLastCharIndex);
|
||||
if ( tokenLastCharIndex > prevAcceptLastCharIndex ) {
|
||||
prevAcceptLastCharIndex = tokenLastCharIndex;
|
||||
// choose longest match so far regardless of rule priority
|
||||
System.out.println("replacing old best match @ "+prevAcceptAddr);
|
||||
prevAcceptAddr = ip-1;
|
||||
prevAcceptInputMarker = input.mark();
|
||||
firstAcceptInputMarker = prevAcceptInputMarker;
|
||||
}
|
||||
else if ( tokenLastCharIndex == prevAcceptLastCharIndex ) {
|
||||
// choose first rule matched if match is of same length
|
||||
if ( ip-1 < prevAcceptAddr ) { // it will see both accepts for ambig rules
|
||||
System.out.println("replacing old best match @ "+prevAcceptAddr);
|
||||
prevAcceptAddr = ip-1;
|
||||
prevAcceptInputMarker = input.mark();
|
||||
}
|
||||
}
|
||||
// if we reach accept state, toss out any addresses in rest
|
||||
// of work list associated with accept's rule; that rule is done
|
||||
int j=i+1;
|
||||
while ( j<closure.size() ) {
|
||||
ThreadState cl = closure.get(j);
|
||||
System.out.println("remaining "+ cl);
|
||||
if ( cl.alt==alt ) closure.remove(j);
|
||||
else j++;
|
||||
}
|
||||
// then, move to next char, looking for longer match
|
||||
// (we continue processing if there are states in reach)
|
||||
break;
|
||||
case Bytecode.JMP : // ignore
|
||||
case Bytecode.SPLIT :
|
||||
case Bytecode.CALL :
|
||||
case Bytecode.RET :
|
||||
break;
|
||||
default :
|
||||
throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
|
||||
}
|
||||
i++;
|
||||
}
|
||||
if ( reach.size()>0 ) { // if we reached other states, consume and process them
|
||||
input.consume();
|
||||
}
|
||||
// swap to avoid reallocating space
|
||||
List<ThreadState> tmp = reach;
|
||||
reach = closure;
|
||||
closure = tmp;
|
||||
reach.clear();
|
||||
} while ( closure.size()>0 );
|
||||
|
||||
if ( prevAcceptAddr >= code.length ) return Token.INVALID_TOKEN_TYPE;
|
||||
int ttype = getShort(code, prevAcceptAddr+1);
|
||||
System.out.println("done at index "+input.index());
|
||||
System.out.println("accept marker="+prevAcceptInputMarker);
|
||||
input.rewind(prevAcceptInputMarker); // does nothing if we accept'd at input.index() but might need to rewind
|
||||
input.release(firstAcceptInputMarker); // kill any other markers in stream we made
|
||||
System.out.println("leaving with index "+input.index());
|
||||
return ttype;
|
||||
}
|
||||
|
||||
void addToClosure(List<ThreadState> closure, int ip, int alt, NFAStack context) {
|
||||
ThreadState t = new ThreadState(ip, alt, context);
|
||||
//System.out.println("add to closure "+ip+" "+closure);
|
||||
if ( closure.contains(t) ) return; // TODO: VERY INEFFICIENT! use int[num-states] as set test
|
||||
closure.add(t);
|
||||
short opcode = code[ip];
|
||||
ip++; // move to next instruction or first byte of operand
|
||||
switch (opcode) {
|
||||
case Bytecode.JMP :
|
||||
addToClosure(closure, getShort(code, ip), alt, context);
|
||||
break;
|
||||
case Bytecode.SAVE :
|
||||
int labelIndex = getShort(code, ip);
|
||||
ip += 2;
|
||||
addToClosure(closure, ip, alt, context); // do closure pass SAVE
|
||||
// TODO: impl
|
||||
break;
|
||||
case Bytecode.SPLIT :
|
||||
int nopnds = getShort(code, ip);
|
||||
ip += 2;
|
||||
// add split addresses to work queue in reverse order ('cept first one)
|
||||
for (int i=0; i<nopnds; i++) {
|
||||
addToClosure(closure, getShort(code, ip+i*2), alt, context);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
List<ThreadState> computeStartState(int ip) { // assume SPLIT at ip
|
||||
List<ThreadState> closure = new ArrayList<ThreadState>();
|
||||
ip++;
|
||||
int nalts = getShort(code, ip);
|
||||
ip += 2;
|
||||
// add split addresses to work queue in reverse order ('cept first one)
|
||||
for (int i=1; i<=nalts; i++) {
|
||||
addToClosure(closure, getShort(code, ip), i, NFAStack.EMPTY);
|
||||
ip += Bytecode.ADDR_SIZE;
|
||||
}
|
||||
return closure;
|
||||
}
|
||||
|
||||
public int execThompson_no_stack(CharStream input, int ip) {
|
||||
int c = input.LA(1);
|
||||
if ( c==Token.EOF ) return Token.EOF;
|
||||
|
||||
|
@ -195,7 +347,7 @@ workLoop:
|
|||
int prevAcceptLastCharIndex = -1;
|
||||
int prevAcceptInputMarker = -1;
|
||||
int firstAcceptInputMarker = -1;
|
||||
addToClosure(closure, ip);
|
||||
addToClosure_no_stack(closure, ip);
|
||||
do { // while more work
|
||||
c = input.LA(1);
|
||||
int i = 0;
|
||||
|
@ -210,26 +362,26 @@ processOneChar:
|
|||
switch (opcode) {
|
||||
case Bytecode.MATCH8 :
|
||||
if ( c == code[ip] ) {
|
||||
addToClosure(reach, ip+1);
|
||||
addToClosure_no_stack(reach, ip+1);
|
||||
}
|
||||
break;
|
||||
case Bytecode.MATCH16 :
|
||||
if ( c == getShort(code, ip) ) {
|
||||
addToClosure(reach, ip+2);
|
||||
addToClosure_no_stack(reach, ip+2);
|
||||
}
|
||||
break;
|
||||
case Bytecode.RANGE8 :
|
||||
if ( c>=code[ip] && c<=code[ip+1] ) {
|
||||
addToClosure(reach, ip+2);
|
||||
addToClosure_no_stack(reach, ip+2);
|
||||
}
|
||||
break;
|
||||
case Bytecode.RANGE16 :
|
||||
if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) {
|
||||
addToClosure(reach, ip+4);
|
||||
addToClosure_no_stack(reach, ip+4);
|
||||
}
|
||||
break;
|
||||
case Bytecode.WILDCARD :
|
||||
if ( c!=Token.EOF ) addToClosure(reach, ip);
|
||||
if ( c!=Token.EOF ) addToClosure_no_stack(reach, ip);
|
||||
break;
|
||||
case Bytecode.ACCEPT :
|
||||
int tokenLastCharIndex = input.index() - 1;
|
||||
|
@ -298,7 +450,7 @@ processOneChar:
|
|||
return ttype;
|
||||
}
|
||||
|
||||
void addToClosure(List<Integer> closure, int ip) {
|
||||
void addToClosure_no_stack(List<Integer> closure, int ip) {
|
||||
//System.out.println("add to closure "+ip+" "+closure);
|
||||
if ( closure.contains(ip) ) return; // TODO: VERY INEFFICIENT! use int[num-states] as set test
|
||||
closure.add(ip);
|
||||
|
@ -306,12 +458,12 @@ processOneChar:
|
|||
ip++; // move to next instruction or first byte of operand
|
||||
switch (opcode) {
|
||||
case Bytecode.JMP :
|
||||
addToClosure(closure, getShort(code, ip));
|
||||
addToClosure_no_stack(closure, getShort(code, ip));
|
||||
break;
|
||||
case Bytecode.SAVE :
|
||||
int labelIndex = getShort(code, ip);
|
||||
ip += 2;
|
||||
addToClosure(closure, ip); // do closure pass SAVE
|
||||
addToClosure_no_stack(closure, ip); // do closure pass SAVE
|
||||
// TODO: impl
|
||||
break;
|
||||
case Bytecode.SPLIT :
|
||||
|
@ -319,7 +471,7 @@ processOneChar:
|
|||
ip += 2;
|
||||
// add split addresses to work queue in reverse order ('cept first one)
|
||||
for (int i=0; i<nopnds; i++) {
|
||||
addToClosure(closure, getShort(code, ip+i*2));
|
||||
addToClosure_no_stack(closure, getShort(code, ip+i*2));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
package org.antlr.v4.runtime.nfa;
|
||||
|
||||
/** Identical to ANTLR's static grammar analysis NFAContext object */
|
||||
public class NFAStack {
|
||||
public static final NFAStack EMPTY = new NFAStack(null, -1);
|
||||
|
||||
public NFAStack parent;
|
||||
|
||||
/** The NFA state following state that invoked another rule's start state
|
||||
* is recorded on the rule invocation context stack.
|
||||
*/
|
||||
public int returnAddr;
|
||||
|
||||
/** Computing the hashCode is very expensive and NFA.addToClosure()
|
||||
* uses it to track when it's seen a state|ctx before to avoid
|
||||
* infinite loops. As we add new contexts, record the hash code
|
||||
* as this + parent.cachedHashCode. Avoids walking
|
||||
* up the tree for every hashCode(). Note that this caching works
|
||||
* because a context is a monotonically growing tree of context nodes
|
||||
* and nothing on the stack is ever modified...ctx just grows
|
||||
* or shrinks.
|
||||
*/
|
||||
protected int cachedHashCode;
|
||||
|
||||
public NFAStack(NFAStack parent, int returnAddr) {
|
||||
this.parent = parent;
|
||||
this.returnAddr = returnAddr;
|
||||
if ( returnAddr >= 0 ) {
|
||||
this.cachedHashCode = returnAddr;
|
||||
}
|
||||
if ( parent!=null ) {
|
||||
this.cachedHashCode += parent.cachedHashCode;
|
||||
}
|
||||
}
|
||||
|
||||
public int hashCode() { return cachedHashCode; }
|
||||
|
||||
/** Two contexts are equals() if both have
|
||||
* same call stack; walk upwards to the root.
|
||||
* Recall that the root sentinel node has no parent.
|
||||
* Note that you may be comparing contextsv in different alt trees.
|
||||
*/
|
||||
public boolean equals(Object o) {
|
||||
NFAStack other = ((NFAStack)o);
|
||||
if ( this.cachedHashCode != other.cachedHashCode ) {
|
||||
return false; // can't be same if hash is different
|
||||
}
|
||||
if ( this==other ) return true;
|
||||
|
||||
// System.out.println("comparing "+this+" with "+other);
|
||||
NFAStack sp = this;
|
||||
while ( sp.parent!=null && other.parent!=null ) {
|
||||
if ( sp.returnAddr != other.returnAddr) return false;
|
||||
sp = sp.parent;
|
||||
other = other.parent;
|
||||
}
|
||||
if ( !(sp.parent==null && other.parent==null) ) {
|
||||
return false; // both pointers must be at their roots after walk
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuffer buf = new StringBuffer();
|
||||
NFAStack sp = this;
|
||||
buf.append("[");
|
||||
while ( sp.parent!=null ) {
|
||||
buf.append(sp.returnAddr);
|
||||
buf.append(" ");
|
||||
sp = sp.parent;
|
||||
}
|
||||
buf.append("$]");
|
||||
return buf.toString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
package org.antlr.v4.runtime.nfa;
|
||||
|
||||
/** NFA simulation thread state */
|
||||
public class ThreadState {
|
||||
public int addr;
|
||||
public int alt; // or speculatively matched token type for lexers
|
||||
public NFAStack context;
|
||||
public ThreadState(int addr, int alt, NFAStack context) {
|
||||
this.addr = addr;
|
||||
this.alt = alt;
|
||||
this.context = context;
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if ( o==null ) return false;
|
||||
if ( this==o ) return true;
|
||||
ThreadState other = (ThreadState)o;
|
||||
return this.addr==other.addr &&
|
||||
this.alt==other.alt &&
|
||||
this.context.equals(other.context);
|
||||
}
|
||||
|
||||
public int hashCode() { return addr + context.hashCode(); }
|
||||
|
||||
public String toString() {
|
||||
if ( context.parent==null ) {
|
||||
return "("+addr+","+alt+")";
|
||||
}
|
||||
return "("+addr+","+alt+","+context+")";
|
||||
}
|
||||
}
|
|
@ -178,13 +178,14 @@ public class NFAContext {
|
|||
}
|
||||
|
||||
public int hashCode() {
|
||||
int h = 0;
|
||||
NFAContext sp = this;
|
||||
while ( sp.parent!=null ) {
|
||||
h += sp.returnState.stateNumber;
|
||||
sp = sp.parent;
|
||||
}
|
||||
return h;
|
||||
return cachedHashCode; // works with tests; don't recompute.
|
||||
// int h = 0;
|
||||
// NFAContext sp = this;
|
||||
// while ( sp.parent!=null ) {
|
||||
// h += sp.returnState.stateNumber;
|
||||
// sp = sp.parent;
|
||||
// }
|
||||
// return h;
|
||||
}
|
||||
|
||||
/** How many rule invocations in this context? I.e., how many
|
||||
|
|
|
@ -19,11 +19,12 @@ public class CodeGenPipeline {
|
|||
LexerGrammar lg = (LexerGrammar)g;
|
||||
for (String modeName : lg.modes.keySet()) { // for each mode
|
||||
NFA nfa = NFABytecodeGenerator.getBytecode(lg, modeName);
|
||||
ANTLRStringStream input = new ANTLRStringStream("/*x*/ab");
|
||||
//ANTLRStringStream input = new ANTLRStringStream("32");
|
||||
ANTLRStringStream input = new ANTLRStringStream("/*x*/!ab");
|
||||
//ANTLRStringStream input = new ANTLRStringStream("abc32ab");
|
||||
int ttype = 0;
|
||||
while ( ttype!= Token.EOF ) {
|
||||
ttype = nfa.execThompson(input, 0); System.out.println("ttype="+ttype);
|
||||
ttype = nfa.execThompson(input); System.out.println("ttype="+ttype);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -103,6 +103,11 @@ public class NFABytecodeGenerator extends TreeParser {
|
|||
public String toString() { return addr+":AcceptInstr "+ruleIndex; }
|
||||
}
|
||||
|
||||
public static class RetInstr extends Instr {
|
||||
public short opcode() { return Bytecode.RET; }
|
||||
public int nBytes() { return 1; }
|
||||
}
|
||||
|
||||
public static class JumpInstr extends Instr {
|
||||
int target;
|
||||
public short opcode() { return Bytecode.JMP; };
|
||||
|
@ -120,6 +125,25 @@ public class NFABytecodeGenerator extends TreeParser {
|
|||
}
|
||||
}
|
||||
|
||||
public static class CallInstr extends Instr {
|
||||
Token token;
|
||||
int target;
|
||||
public CallInstr(Token token) { this.token = token; }
|
||||
public short opcode() { return Bytecode.CALL; };
|
||||
public int nBytes() { return 1+Bytecode.ADDR_SIZE; }
|
||||
public void write(byte[] code) {
|
||||
super.write(code);
|
||||
writeShort(code, addr+1, (short)target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return addr+":CallInstr{" +
|
||||
"target=" + target +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
||||
public static class SplitInstr extends Instr {
|
||||
List<Integer> addrs = new ArrayList<Integer>();
|
||||
int nAlts;
|
||||
|
@ -212,21 +236,26 @@ public class NFABytecodeGenerator extends TreeParser {
|
|||
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
|
||||
gen.setTreeNodeStream(nodes);
|
||||
int ttype = lg.getTokenType(r.name);
|
||||
tokenTypeToAddr[ttype] = gen.ip;
|
||||
ruleToAddr.put(r.name, gen.ip);
|
||||
if ( !r.isFragment() ) s0.addrs.add(gen.ip);
|
||||
if ( !r.isFragment() ) {
|
||||
s0.addrs.add(gen.ip);
|
||||
tokenTypeToAddr[ttype] = gen.ip;
|
||||
}
|
||||
try {
|
||||
gen.block();
|
||||
int ruleTokenType = lg.getTokenType(r.name);
|
||||
gen.emit(new NFABytecodeGenerator.AcceptInstr(ruleTokenType));
|
||||
if ( !r.isFragment() ) {
|
||||
gen.emit(new NFABytecodeGenerator.AcceptInstr(ruleTokenType));
|
||||
}
|
||||
else {
|
||||
gen.emit(new NFABytecodeGenerator.RetInstr());
|
||||
}
|
||||
}
|
||||
catch (Exception e){
|
||||
e.printStackTrace(System.err);
|
||||
}
|
||||
}
|
||||
byte[] code = NFABytecodeGenerator.getByteCode(gen.instrs);
|
||||
System.out.println("all:");
|
||||
System.out.println(Bytecode.disassemble(code));
|
||||
System.out.println("rule addrs="+ruleToAddr);
|
||||
|
||||
NFA nfa = new NFA(code, ruleToAddr);
|
||||
|
|
|
@ -168,8 +168,8 @@ range
|
|||
terminal
|
||||
: ^(STRING_LITERAL .) {emitString($STRING_LITERAL.token);}
|
||||
| STRING_LITERAL {emitString($STRING_LITERAL.token);}
|
||||
| ^(TOKEN_REF ARG_ACTION .)
|
||||
| ^(TOKEN_REF .)
|
||||
| ^(TOKEN_REF ARG_ACTION .) {emit(new CallInstr($TOKEN_REF.token));}
|
||||
| ^(TOKEN_REF .) {emit(new CallInstr($TOKEN_REF.token));}
|
||||
| TOKEN_REF
|
||||
| ^(ROOT terminal)
|
||||
| ^(BANG terminal)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// $ANTLR ${project.version} ${buildNumber} NFABytecodeTriggers.g 2010-04-30 18:19:35
|
||||
// $ANTLR ${project.version} ${buildNumber} NFABytecodeTriggers.g 2010-05-01 11:23:08
|
||||
|
||||
package org.antlr.v4.codegen;
|
||||
|
||||
|
@ -1319,6 +1319,8 @@ public class NFABytecodeTriggers extends NFABytecodeGenerator {
|
|||
public final void terminal() throws RecognitionException {
|
||||
GrammarAST STRING_LITERAL3=null;
|
||||
GrammarAST STRING_LITERAL4=null;
|
||||
GrammarAST TOKEN_REF5=null;
|
||||
GrammarAST TOKEN_REF6=null;
|
||||
|
||||
try {
|
||||
// NFABytecodeTriggers.g:169:5: ( ^( STRING_LITERAL . ) | STRING_LITERAL | ^( TOKEN_REF ARG_ACTION . ) | ^( TOKEN_REF . ) | TOKEN_REF | ^( ROOT terminal ) | ^( BANG terminal ) )
|
||||
|
@ -1349,42 +1351,44 @@ public class NFABytecodeTriggers extends NFABytecodeGenerator {
|
|||
case 3 :
|
||||
// NFABytecodeTriggers.g:171:7: ^( TOKEN_REF ARG_ACTION . )
|
||||
{
|
||||
match(input,TOKEN_REF,FOLLOW_TOKEN_REF_in_terminal847);
|
||||
TOKEN_REF5=(GrammarAST)match(input,TOKEN_REF,FOLLOW_TOKEN_REF_in_terminal847);
|
||||
|
||||
match(input, Token.DOWN, null);
|
||||
match(input,ARG_ACTION,FOLLOW_ARG_ACTION_in_terminal849);
|
||||
matchAny(input);
|
||||
|
||||
match(input, Token.UP, null);
|
||||
emit(new CallInstr(TOKEN_REF5.token));
|
||||
|
||||
}
|
||||
break;
|
||||
case 4 :
|
||||
// NFABytecodeTriggers.g:172:7: ^( TOKEN_REF . )
|
||||
{
|
||||
match(input,TOKEN_REF,FOLLOW_TOKEN_REF_in_terminal862);
|
||||
TOKEN_REF6=(GrammarAST)match(input,TOKEN_REF,FOLLOW_TOKEN_REF_in_terminal863);
|
||||
|
||||
match(input, Token.DOWN, null);
|
||||
matchAny(input);
|
||||
|
||||
match(input, Token.UP, null);
|
||||
emit(new CallInstr(TOKEN_REF6.token));
|
||||
|
||||
}
|
||||
break;
|
||||
case 5 :
|
||||
// NFABytecodeTriggers.g:173:7: TOKEN_REF
|
||||
{
|
||||
match(input,TOKEN_REF,FOLLOW_TOKEN_REF_in_terminal877);
|
||||
match(input,TOKEN_REF,FOLLOW_TOKEN_REF_in_terminal879);
|
||||
|
||||
}
|
||||
break;
|
||||
case 6 :
|
||||
// NFABytecodeTriggers.g:174:7: ^( ROOT terminal )
|
||||
{
|
||||
match(input,ROOT,FOLLOW_ROOT_in_terminal891);
|
||||
match(input,ROOT,FOLLOW_ROOT_in_terminal893);
|
||||
|
||||
match(input, Token.DOWN, null);
|
||||
pushFollow(FOLLOW_terminal_in_terminal893);
|
||||
pushFollow(FOLLOW_terminal_in_terminal895);
|
||||
terminal();
|
||||
|
||||
state._fsp--;
|
||||
|
@ -1397,10 +1401,10 @@ public class NFABytecodeTriggers extends NFABytecodeGenerator {
|
|||
case 7 :
|
||||
// NFABytecodeTriggers.g:175:7: ^( BANG terminal )
|
||||
{
|
||||
match(input,BANG,FOLLOW_BANG_in_terminal906);
|
||||
match(input,BANG,FOLLOW_BANG_in_terminal908);
|
||||
|
||||
match(input, Token.DOWN, null);
|
||||
pushFollow(FOLLOW_terminal_in_terminal908);
|
||||
pushFollow(FOLLOW_terminal_in_terminal910);
|
||||
terminal();
|
||||
|
||||
state._fsp--;
|
||||
|
@ -1785,11 +1789,11 @@ public class NFABytecodeTriggers extends NFABytecodeGenerator {
|
|||
public static final BitSet FOLLOW_STRING_LITERAL_in_terminal833 = new BitSet(new long[]{0x0000000000000002L});
|
||||
public static final BitSet FOLLOW_TOKEN_REF_in_terminal847 = new BitSet(new long[]{0x0000000000000004L});
|
||||
public static final BitSet FOLLOW_ARG_ACTION_in_terminal849 = new BitSet(new long[]{0xFFFFFFFFFFFFFFF0L,0x0000007FFFFFFFFFL});
|
||||
public static final BitSet FOLLOW_TOKEN_REF_in_terminal862 = new BitSet(new long[]{0x0000000000000004L});
|
||||
public static final BitSet FOLLOW_TOKEN_REF_in_terminal877 = new BitSet(new long[]{0x0000000000000002L});
|
||||
public static final BitSet FOLLOW_ROOT_in_terminal891 = new BitSet(new long[]{0x0000000000000004L});
|
||||
public static final BitSet FOLLOW_terminal_in_terminal893 = new BitSet(new long[]{0x0000000000000008L});
|
||||
public static final BitSet FOLLOW_BANG_in_terminal906 = new BitSet(new long[]{0x0000000000000004L});
|
||||
public static final BitSet FOLLOW_terminal_in_terminal908 = new BitSet(new long[]{0x0000000000000008L});
|
||||
public static final BitSet FOLLOW_TOKEN_REF_in_terminal863 = new BitSet(new long[]{0x0000000000000004L});
|
||||
public static final BitSet FOLLOW_TOKEN_REF_in_terminal879 = new BitSet(new long[]{0x0000000000000002L});
|
||||
public static final BitSet FOLLOW_ROOT_in_terminal893 = new BitSet(new long[]{0x0000000000000004L});
|
||||
public static final BitSet FOLLOW_terminal_in_terminal895 = new BitSet(new long[]{0x0000000000000008L});
|
||||
public static final BitSet FOLLOW_BANG_in_terminal908 = new BitSet(new long[]{0x0000000000000004L});
|
||||
public static final BitSet FOLLOW_terminal_in_terminal910 = new BitSet(new long[]{0x0000000000000008L});
|
||||
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
package org.antlr.v4.test;
|
||||
|
||||
import org.antlr.v4.Tool;
|
||||
import org.antlr.v4.codegen.NFABytecodeGenerator;
|
||||
import org.antlr.v4.runtime.nfa.Bytecode;
|
||||
import org.antlr.v4.runtime.nfa.NFA;
|
||||
import org.antlr.v4.semantics.SemanticPipeline;
|
||||
import org.antlr.v4.tool.Grammar;
|
||||
import org.antlr.v4.tool.LexerGrammar;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestNFABytecodeGeneration extends BaseTest {
|
||||
@Test public void testString() throws Exception {
|
||||
LexerGrammar g = new LexerGrammar(
|
||||
"lexer grammar L;\n"+
|
||||
"A : 'ab' ;");
|
||||
String expecting =
|
||||
"0000:\tsplit 5\n" +
|
||||
"0005:\tmatch8 'a'\n" +
|
||||
"0007:\tmatch8 'b'\n" +
|
||||
"0009:\taccept 4\n";
|
||||
checkBytecode(g, expecting);
|
||||
}
|
||||
|
||||
@Test public void testIDandIntandKeyword() throws Exception {
|
||||
LexerGrammar g = new LexerGrammar(
|
||||
"lexer grammar L;\n" +
|
||||
"A : 'ab';\n" +
|
||||
"B : 'a'..'z'+ ;\n" +
|
||||
"I : '0'..'9'+ ;\n");
|
||||
String expecting =
|
||||
"0000:\tsplit 9, 16, 29\n" +
|
||||
"0009:\tmatch8 'a'\n" +
|
||||
"0011:\tmatch8 'b'\n" +
|
||||
"0013:\taccept 4\n" +
|
||||
"0016:\trange8 'a', 'z'\n" +
|
||||
"0019:\tsplit 16, 26\n" +
|
||||
"0026:\taccept 5\n" +
|
||||
"0029:\trange8 '0', '9'\n" +
|
||||
"0032:\tsplit 29, 39\n" +
|
||||
"0039:\taccept 6\n";
|
||||
checkBytecode(g, expecting);
|
||||
}
|
||||
|
||||
@Test public void testNonGreedy() throws Exception {
|
||||
LexerGrammar g = new LexerGrammar(
|
||||
"lexer grammar L;\n" +
|
||||
"\n" +
|
||||
"CMT : '/*' (options {greedy=false;}:.)* '*/' ;\n" +
|
||||
"ID : 'ab' ;\n");
|
||||
String expecting =
|
||||
"0000:\tsplit 7, 29\n" +
|
||||
"0007:\tmatch8 '/'\n" +
|
||||
"0009:\tmatch8 '*'\n" +
|
||||
"0011:\tsplit 22, 18\n" +
|
||||
"0018:\twildcard \n" +
|
||||
"0019:\tjmp 11\n" +
|
||||
"0022:\tmatch8 '*'\n" +
|
||||
"0024:\tmatch8 '/'\n" +
|
||||
"0026:\taccept 4\n" +
|
||||
"0029:\tmatch8 'a'\n" +
|
||||
"0031:\tmatch8 'b'\n" +
|
||||
"0033:\taccept 5\n";
|
||||
checkBytecode(g, expecting);
|
||||
}
|
||||
|
||||
@Test public void testCallFragment() throws Exception {
|
||||
LexerGrammar g = new LexerGrammar(
|
||||
"lexer grammar L;\n" +
|
||||
"I : D+ ;\n" +
|
||||
"fragment D : '0'..'9'+ ;\n");
|
||||
String expecting =
|
||||
"0000:\tsplit 5\n" +
|
||||
"0005:\tsplit 5, 12\n" +
|
||||
"0012:\taccept 4\n" +
|
||||
"0015:\trange8 '0', '9'\n" +
|
||||
"0018:\tsplit 15, 25\n" +
|
||||
"0025:\tret \n";
|
||||
checkBytecode(g, expecting);
|
||||
}
|
||||
|
||||
public void _template() throws Exception {
|
||||
LexerGrammar g = new LexerGrammar(
|
||||
"\n");
|
||||
String expecting =
|
||||
"\n";
|
||||
checkBytecode(g, expecting);
|
||||
}
|
||||
|
||||
void checkBytecode(LexerGrammar g, String expecting) {
|
||||
if ( g.ast!=null && !g.ast.hasErrors ) {
|
||||
System.out.println(g.ast.toStringTree());
|
||||
Tool antlr = new Tool();
|
||||
SemanticPipeline sem = new SemanticPipeline(g);
|
||||
sem.process();
|
||||
if ( g.getImportedGrammars()!=null ) { // process imported grammars (if any)
|
||||
for (Grammar imp : g.getImportedGrammars()) {
|
||||
antlr.process(imp);
|
||||
}
|
||||
}
|
||||
}
|
||||
NFA nfa = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
|
||||
assertEquals(expecting, Bytecode.disassemble(nfa.code));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
package org.antlr.v4.test;
|
||||
|
||||
import org.antlr.runtime.ANTLRStringStream;
|
||||
import org.antlr.runtime.Token;
|
||||
import org.antlr.v4.Tool;
|
||||
import org.antlr.v4.codegen.NFABytecodeGenerator;
|
||||
import org.antlr.v4.runtime.nfa.NFA;
|
||||
import org.antlr.v4.semantics.SemanticPipeline;
|
||||
import org.antlr.v4.tool.Grammar;
|
||||
import org.antlr.v4.tool.LexerGrammar;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/** */
|
||||
public class TestNFABytecodeInterp extends BaseTest {
|
||||
@Test public void testString() throws Exception {
|
||||
LexerGrammar g = new LexerGrammar(
|
||||
"lexer grammar L;\n"+
|
||||
"A : 'ab' ;");
|
||||
String expecting = "A, A, EOF";
|
||||
checkMatches(g, "abab", expecting);
|
||||
}
|
||||
|
||||
@Test public void testIDandIntandKeyword() throws Exception {
|
||||
LexerGrammar g = new LexerGrammar(
|
||||
"lexer grammar L;\n" +
|
||||
"A : 'ab';\n" +
|
||||
"B : 'a'..'z'+ ;\n" +
|
||||
"I : '0'..'9'+ ;\n");
|
||||
String expecting = "A, I, B, EOF";
|
||||
checkMatches(g, "ab32abc", expecting);
|
||||
}
|
||||
|
||||
@Test public void testNonGreedy() throws Exception {
|
||||
LexerGrammar g = new LexerGrammar(
|
||||
"lexer grammar L;\n" +
|
||||
"\n" +
|
||||
"CMT : '/*' (options {greedy=false;}:.)* '*/' ;\n" +
|
||||
"ID : 'ab' ;\n");
|
||||
String expecting = "ID, CMT, EOF";
|
||||
checkMatches(g, "ab/* x */", expecting);
|
||||
}
|
||||
|
||||
@Test public void testNonGreedyAndCommonLeftPrefix() throws Exception {
|
||||
LexerGrammar g = new LexerGrammar(
|
||||
"lexer grammar L;\n" +
|
||||
"\n" +
|
||||
"CMT : '/*' (options {greedy=false;}:.)* '*/' ;\n" +
|
||||
"CMT2: '/*' (options {greedy=false;}:.)* '*/' '!' ;\n" +
|
||||
"ID : 'ab' ;\n");
|
||||
String expecting = "ID, CMT2, CMT, EOF";
|
||||
checkMatches(g, "ab/* x */!/* foo */", expecting);
|
||||
}
|
||||
|
||||
@Test public void testCallFragment() throws Exception {
|
||||
LexerGrammar g = new LexerGrammar(
|
||||
"lexer grammar L;\n" +
|
||||
"I : D+ ;\n" +
|
||||
"fragment D : '0'..'9'+ ;\n");
|
||||
String expecting = "";
|
||||
checkMatches(g, "a", expecting);
|
||||
}
|
||||
|
||||
public void _template() throws Exception {
|
||||
LexerGrammar g = new LexerGrammar(
|
||||
"\n");
|
||||
String expecting = "";
|
||||
checkMatches(g, "input", expecting);
|
||||
}
|
||||
|
||||
void checkMatches(LexerGrammar g, String input, String expecting) {
|
||||
if ( g.ast!=null && !g.ast.hasErrors ) {
|
||||
System.out.println(g.ast.toStringTree());
|
||||
Tool antlr = new Tool();
|
||||
SemanticPipeline sem = new SemanticPipeline(g);
|
||||
sem.process();
|
||||
if ( g.getImportedGrammars()!=null ) { // process imported grammars (if any)
|
||||
for (Grammar imp : g.getImportedGrammars()) {
|
||||
antlr.process(imp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
List<Integer> expectingTokens = new ArrayList<Integer>();
|
||||
if ( expecting!=null && !expecting.trim().equals("") ) {
|
||||
for (String tname : expecting.replace(" ", "").split(",")) {
|
||||
int ttype = g.getTokenType(tname);
|
||||
expectingTokens.add(ttype);
|
||||
}
|
||||
}
|
||||
|
||||
NFA nfa = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
|
||||
ANTLRStringStream in = new ANTLRStringStream(input);
|
||||
List<Integer> tokens = new ArrayList<Integer>();
|
||||
int ttype = 0;
|
||||
do {
|
||||
ttype = nfa.execThompson(in);
|
||||
tokens.add(ttype);
|
||||
} while ( ttype!= Token.EOF );
|
||||
assertEquals(expectingTokens, tokens);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue