reorg a bit
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6825]
This commit is contained in:
parent
a576c16905
commit
7cf42fe4dd
|
@ -183,17 +183,17 @@ workLoop:
|
||||||
}
|
}
|
||||||
|
|
||||||
public int execThompson(CharStream input, int ip) {
|
public int execThompson(CharStream input, int ip) {
|
||||||
|
int c = input.LA(1);
|
||||||
|
if ( c==Token.EOF ) return Token.EOF;
|
||||||
|
|
||||||
List<Integer> closure = new ArrayList<Integer>();
|
List<Integer> closure = new ArrayList<Integer>();
|
||||||
List<Integer> reach = new ArrayList<Integer>();
|
List<Integer> reach = new ArrayList<Integer>();
|
||||||
int lastAcceptAddr = Integer.MAX_VALUE;
|
int prevAcceptAddr = Integer.MAX_VALUE;
|
||||||
int lastAcceptLastCharIndex = -1;
|
int prevAcceptLastCharIndex = -1;
|
||||||
addToClosure(closure, ip);
|
addToClosure(closure, ip);
|
||||||
//while ( input.LA(1) != Token.EOF ) {
|
do { // while more work
|
||||||
while ( true ) {
|
c = input.LA(1);
|
||||||
if ( closure.size()==0 ) break; // no more work
|
|
||||||
int c = input.LA(1);
|
|
||||||
processOneChar:
|
processOneChar:
|
||||||
//while ( closure.size()>0 ) {
|
|
||||||
for (int i=0; i<closure.size(); i++) {
|
for (int i=0; i<closure.size(); i++) {
|
||||||
System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
|
System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
|
||||||
ip = closure.get(i);
|
ip = closure.get(i);
|
||||||
|
@ -223,18 +223,22 @@ processOneChar:
|
||||||
break;
|
break;
|
||||||
case Bytecode.ACCEPT :
|
case Bytecode.ACCEPT :
|
||||||
int ttype = getShort(code, ip);
|
int ttype = getShort(code, ip);
|
||||||
System.out.println("ACCEPT "+ttype+" with last char position "+(input.index()-1));
|
int tokenLastCharIndex = input.index() - 1;
|
||||||
if ( input.index()-1 >= lastAcceptLastCharIndex ) {
|
System.out.println("ACCEPT "+ttype+" with last char position "+ tokenLastCharIndex);
|
||||||
lastAcceptLastCharIndex = input.index()-1;
|
if ( tokenLastCharIndex > prevAcceptLastCharIndex ) {
|
||||||
// choose first rule mentioned if match of same length
|
prevAcceptLastCharIndex = tokenLastCharIndex;
|
||||||
if ( ip-1 < lastAcceptAddr ) { // it will see both accepts of ambig rules
|
// choose longest match so far regardless of rule priority
|
||||||
System.out.println("replacing old best match @ "+lastAcceptAddr);
|
System.out.println("replacing old best match @ "+prevAcceptAddr);
|
||||||
lastAcceptAddr = ip-1;
|
prevAcceptAddr = ip-1;
|
||||||
|
}
|
||||||
|
else if ( tokenLastCharIndex == prevAcceptLastCharIndex ) {
|
||||||
|
// choose first rule matched if match is of same length
|
||||||
|
if ( ip-1 < prevAcceptAddr ) { // it will see both accepts for ambig rules
|
||||||
|
System.out.println("replacing old best match @ "+prevAcceptAddr);
|
||||||
|
prevAcceptAddr = ip-1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// returning gives first match not longest; i.e., like PEG
|
// keep trying for more to get longest match
|
||||||
//break processOneChar;
|
|
||||||
// keep trying for more to get longest (like we expect)
|
|
||||||
break;
|
break;
|
||||||
case Bytecode.JMP :
|
case Bytecode.JMP :
|
||||||
case Bytecode.SPLIT :
|
case Bytecode.SPLIT :
|
||||||
|
@ -243,7 +247,7 @@ processOneChar:
|
||||||
throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
|
throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ( reach.size()>0 ) { // if we reached other code, consume and process reach list
|
if ( reach.size()>0 ) { // if we reached other states, consume and process them
|
||||||
System.out.println("CONSUME");
|
System.out.println("CONSUME");
|
||||||
input.consume();
|
input.consume();
|
||||||
}
|
}
|
||||||
|
@ -252,9 +256,10 @@ processOneChar:
|
||||||
reach = closure;
|
reach = closure;
|
||||||
closure = tmp;
|
closure = tmp;
|
||||||
reach.clear();
|
reach.clear();
|
||||||
}
|
} while ( closure.size()>0 );
|
||||||
if ( lastAcceptAddr<0 ) return Token.INVALID_TOKEN_TYPE;
|
|
||||||
int ttype = getShort(code, lastAcceptAddr+1);
|
if ( prevAcceptAddr<0 ) return Token.INVALID_TOKEN_TYPE;
|
||||||
|
int ttype = getShort(code, prevAcceptAddr+1);
|
||||||
return ttype;
|
return ttype;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package org.antlr.v4.codegen;
|
package org.antlr.v4.codegen;
|
||||||
|
|
||||||
import org.antlr.runtime.ANTLRStringStream;
|
import org.antlr.runtime.ANTLRStringStream;
|
||||||
|
import org.antlr.runtime.Token;
|
||||||
import org.antlr.v4.runtime.nfa.NFA;
|
import org.antlr.v4.runtime.nfa.NFA;
|
||||||
import org.antlr.v4.tool.Grammar;
|
import org.antlr.v4.tool.Grammar;
|
||||||
import org.antlr.v4.tool.LexerGrammar;
|
import org.antlr.v4.tool.LexerGrammar;
|
||||||
|
@ -19,9 +20,10 @@ public class CodeGenPipeline {
|
||||||
for (String modeName : lg.modes.keySet()) { // for each mode
|
for (String modeName : lg.modes.keySet()) { // for each mode
|
||||||
NFA nfa = NFABytecodeGenerator.getBytecode(lg, modeName);
|
NFA nfa = NFABytecodeGenerator.getBytecode(lg, modeName);
|
||||||
ANTLRStringStream input = new ANTLRStringStream("abc32ab");
|
ANTLRStringStream input = new ANTLRStringStream("abc32ab");
|
||||||
int ttype = nfa.execThompson(input, 0); System.out.println("ttype="+ttype);
|
int ttype = 0;
|
||||||
ttype = nfa.execThompson(input,0); System.out.println("ttype="+ttype);
|
while ( ttype!= Token.EOF ) {
|
||||||
ttype = nfa.execThompson(input,0); System.out.println("ttype="+ttype);
|
ttype = nfa.execThompson(input, 0); System.out.println("ttype="+ttype);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue