reorg a bit

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6825]
2010-04-30 13:54:10 -08:00 · 2010-04-30 13:54:10 -08:00 · 7cf42fe4dd
parent a576c16905
commit 7cf42fe4dd
2 changed files with 31 additions and 24 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/nfa/NFA.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/nfa/NFA.java
@ -183,17 +183,17 @@ workLoop:
 	}
 	public int execThompson(CharStream input, int ip) {
 		int c = input.LA(1);
 		if ( c==Token.EOF ) return Token.EOF;
 		List<Integer> closure = new ArrayList<Integer>();
 		List<Integer> reach = new ArrayList<Integer>();
-		int lastAcceptAddr = Integer.MAX_VALUE;
+		int prevAcceptAddr = Integer.MAX_VALUE;
-		int lastAcceptLastCharIndex = -1;
+		int prevAcceptLastCharIndex = -1;
 		addToClosure(closure, ip);
-		//while ( input.LA(1) != Token.EOF ) {
+		do { // while more work
-		while ( true ) {
+			c = input.LA(1);
 			if ( closure.size()==0 ) break; // no more work
 			int c = input.LA(1);
 processOneChar:
 			//while ( closure.size()>0 ) {
 			for (int i=0; i<closure.size(); i++) {
 				System.out.println("input["+input.index()+"]=="+(char)c+" closure="+closure+", i="+i+", reach="+ reach);
 				ip = closure.get(i); 
@ -223,18 +223,22 @@ processOneChar:
 						break;
 					case Bytecode.ACCEPT :
 						int ttype = getShort(code, ip);
-						System.out.println("ACCEPT "+ttype+" with last char position "+(input.index()-1));
+						int tokenLastCharIndex = input.index() - 1;
-						if ( input.index()-1 >= lastAcceptLastCharIndex ) {
+						System.out.println("ACCEPT "+ttype+" with last char position "+ tokenLastCharIndex);
-							lastAcceptLastCharIndex = input.index()-1;
+						if ( tokenLastCharIndex > prevAcceptLastCharIndex ) {
-							// choose first rule mentioned if match of same length
+							prevAcceptLastCharIndex = tokenLastCharIndex;
-							if ( ip-1 < lastAcceptAddr ) { // it will see both accepts of ambig rules
+							// choose longest match so far regardless of rule priority
-								System.out.println("replacing old best match @ "+lastAcceptAddr);
+							System.out.println("replacing old best match @ "+prevAcceptAddr);
-								lastAcceptAddr = ip-1;
+							prevAcceptAddr = ip-1;
 						}
 						else if ( tokenLastCharIndex == prevAcceptLastCharIndex ) {
 							// choose first rule matched if match is of same length
 							if ( ip-1 < prevAcceptAddr ) { // it will see both accepts for ambig rules
 								System.out.println("replacing old best match @ "+prevAcceptAddr);
 								prevAcceptAddr = ip-1;
 							}
 						}
-						// returning gives first match not longest; i.e., like PEG
+						// keep trying for more to get longest match
 						//break processOneChar;
 						// keep trying for more to get longest (like we expect)
 						break;
 					case Bytecode.JMP :
 					case Bytecode.SPLIT :
@ -243,7 +247,7 @@ processOneChar:
 						throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
 				}
 			}
-			if ( reach.size()>0 ) { // if we reached other code, consume and process reach list
+			if ( reach.size()>0 ) { // if we reached other states, consume and process them
 				System.out.println("CONSUME");
 				input.consume();
 			}
@ -252,9 +256,10 @@ processOneChar:
 			reach = closure;
 			closure = tmp;
 			reach.clear();
-		}
+		} while ( closure.size()>0 );
-		if ( lastAcceptAddr<0 ) return Token.INVALID_TOKEN_TYPE;
+
-		int ttype = getShort(code, lastAcceptAddr+1);
+		if ( prevAcceptAddr<0 ) return Token.INVALID_TOKEN_TYPE;
 		int ttype = getShort(code, prevAcceptAddr+1);
 		return ttype;
 	}
--- a/tool/src/org/antlr/v4/codegen/CodeGenPipeline.java
+++ b/tool/src/org/antlr/v4/codegen/CodeGenPipeline.java
@ -1,6 +1,7 @@
 package org.antlr.v4.codegen;
 import org.antlr.runtime.ANTLRStringStream;
 import org.antlr.runtime.Token;
 import org.antlr.v4.runtime.nfa.NFA;
 import org.antlr.v4.tool.Grammar;
 import org.antlr.v4.tool.LexerGrammar;
@ -19,9 +20,10 @@ public class CodeGenPipeline {
 		for (String modeName : lg.modes.keySet()) { // for each mode
 			NFA nfa = NFABytecodeGenerator.getBytecode(lg, modeName);
 			ANTLRStringStream input = new ANTLRStringStream("abc32ab");
-			int ttype = nfa.execThompson(input, 0); System.out.println("ttype="+ttype);
+			int ttype = 0;
-			ttype = nfa.execThompson(input,0); System.out.println("ttype="+ttype);
+			while ( ttype!= Token.EOF ) {
-			ttype = nfa.execThompson(input,0); System.out.println("ttype="+ttype);
+				ttype = nfa.execThompson(input, 0); System.out.println("ttype="+ttype);
 			}
 		}
 	}
 }