reorg a bit

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6825]
This commit is contained in:
parrt 2010-04-30 13:54:10 -08:00
parent a576c16905
commit 7cf42fe4dd
2 changed files with 31 additions and 24 deletions

View File

@ -183,17 +183,17 @@ workLoop:
} }
public int execThompson(CharStream input, int ip) { public int execThompson(CharStream input, int ip) {
int c = input.LA(1);
if ( c==Token.EOF ) return Token.EOF;
List<Integer> closure = new ArrayList<Integer>(); List<Integer> closure = new ArrayList<Integer>();
List<Integer> reach = new ArrayList<Integer>(); List<Integer> reach = new ArrayList<Integer>();
int lastAcceptAddr = Integer.MAX_VALUE; int prevAcceptAddr = Integer.MAX_VALUE;
int lastAcceptLastCharIndex = -1; int prevAcceptLastCharIndex = -1;
addToClosure(closure, ip); addToClosure(closure, ip);
//while ( input.LA(1) != Token.EOF ) { do { // while more work
while ( true ) { c = input.LA(1);
if ( closure.size()==0 ) break; // no more work
int c = input.LA(1);
processOneChar: processOneChar:
//while ( closure.size()>0 ) {
for (int i=0; i<closure.size(); i++) { for (int i=0; i<closure.size(); i++) {
System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach); System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
ip = closure.get(i); ip = closure.get(i);
@ -223,18 +223,22 @@ processOneChar:
break; break;
case Bytecode.ACCEPT : case Bytecode.ACCEPT :
int ttype = getShort(code, ip); int ttype = getShort(code, ip);
System.out.println("ACCEPT "+ttype+" with last char position "+(input.index()-1)); int tokenLastCharIndex = input.index() - 1;
if ( input.index()-1 >= lastAcceptLastCharIndex ) { System.out.println("ACCEPT "+ttype+" with last char position "+ tokenLastCharIndex);
lastAcceptLastCharIndex = input.index()-1; if ( tokenLastCharIndex > prevAcceptLastCharIndex ) {
// choose first rule mentioned if match of same length prevAcceptLastCharIndex = tokenLastCharIndex;
if ( ip-1 < lastAcceptAddr ) { // it will see both accepts of ambig rules // choose longest match so far regardless of rule priority
System.out.println("replacing old best match @ "+lastAcceptAddr); System.out.println("replacing old best match @ "+prevAcceptAddr);
lastAcceptAddr = ip-1; prevAcceptAddr = ip-1;
}
else if ( tokenLastCharIndex == prevAcceptLastCharIndex ) {
// choose first rule matched if match is of same length
if ( ip-1 < prevAcceptAddr ) { // it will see both accepts for ambig rules
System.out.println("replacing old best match @ "+prevAcceptAddr);
prevAcceptAddr = ip-1;
} }
} }
// returning gives first match not longest; i.e., like PEG // keep trying for more to get longest match
//break processOneChar;
// keep trying for more to get longest (like we expect)
break; break;
case Bytecode.JMP : case Bytecode.JMP :
case Bytecode.SPLIT : case Bytecode.SPLIT :
@ -243,7 +247,7 @@ processOneChar:
throw new RuntimeException("invalid instruction @ "+ip+": "+opcode); throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
} }
} }
if ( reach.size()>0 ) { // if we reached other code, consume and process reach list if ( reach.size()>0 ) { // if we reached other states, consume and process them
System.out.println("CONSUME"); System.out.println("CONSUME");
input.consume(); input.consume();
} }
@ -252,9 +256,10 @@ processOneChar:
reach = closure; reach = closure;
closure = tmp; closure = tmp;
reach.clear(); reach.clear();
} } while ( closure.size()>0 );
if ( lastAcceptAddr<0 ) return Token.INVALID_TOKEN_TYPE;
int ttype = getShort(code, lastAcceptAddr+1); if ( prevAcceptAddr<0 ) return Token.INVALID_TOKEN_TYPE;
int ttype = getShort(code, prevAcceptAddr+1);
return ttype; return ttype;
} }

View File

@ -1,6 +1,7 @@
package org.antlr.v4.codegen; package org.antlr.v4.codegen;
import org.antlr.runtime.ANTLRStringStream; import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.Token;
import org.antlr.v4.runtime.nfa.NFA; import org.antlr.v4.runtime.nfa.NFA;
import org.antlr.v4.tool.Grammar; import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.LexerGrammar; import org.antlr.v4.tool.LexerGrammar;
@ -19,9 +20,10 @@ public class CodeGenPipeline {
for (String modeName : lg.modes.keySet()) { // for each mode for (String modeName : lg.modes.keySet()) { // for each mode
NFA nfa = NFABytecodeGenerator.getBytecode(lg, modeName); NFA nfa = NFABytecodeGenerator.getBytecode(lg, modeName);
ANTLRStringStream input = new ANTLRStringStream("abc32ab"); ANTLRStringStream input = new ANTLRStringStream("abc32ab");
int ttype = nfa.execThompson(input, 0); System.out.println("ttype="+ttype); int ttype = 0;
ttype = nfa.execThompson(input,0); System.out.println("ttype="+ttype); while ( ttype!= Token.EOF ) {
ttype = nfa.execThompson(input,0); System.out.println("ttype="+ttype); ttype = nfa.execThompson(input, 0); System.out.println("ttype="+ttype);
}
} }
} }
} }