simplified delete configs algorithm.

2012-09-29 12:48:20 -07:00 · 2012-09-29 12:48:20 -07:00 · b56cd0f587
parent 84c786da50
commit b56cd0f587
3 changed files with 24 additions and 31 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java
@ -42,8 +42,6 @@ import org.antlr.v4.runtime.misc.Nullable;

 import java.io.IOException;
 import java.io.OutputStream;
-import java.util.HashSet;
-import java.util.Set;

 /** "dup" of ParserInterpreter */
 public class LexerATNSimulator extends ATNSimulator {
@ -433,10 +431,11 @@ public class LexerATNSimulator extends ATNSimulator {
 					captureSimState(prevAccept, input, c);
 				}

-				// if we reach lexer accept state, toss out any configs pointing
-				// at wildcard edges in rest of configs work list associated
-				// with this rule (config.alt); that rule is done. this is how
-				// we cut off nongreedy .+ loops.
+				// if we reach lexer accept state with empty stack,
+				// toss out any configs pointing at wildcard edges
+				// in rest of configs work list associated with this
+				// rule (config.alt); that rule is done. this is how we
+				// cut off nongreedy .+ loops.
 				reach = deleteWildcardConfigsForAlt(reach, ci, c);

 			 	// move to next char, looking for longer match
@ -528,13 +527,16 @@ public class LexerATNSimulator extends ATNSimulator {
 	}

 	/** Delete configs for alt following ci that have a wildcard edge but
-	 *  only for configs with same stack. E.g., if we want to kill after
+	 *  only for configs with empty stack. E.g., if we want to kill after
 	 *  config (2,1,[$]), then we need to wack only configs with $ stack:
 	 *
 	 *  	[..., (2,1,[$]), ..., (7,1,[[$, 6 $]])]
 	 *
-	 *  That means wacking (7,1,[$]) but not (7,1,[6 $]).  If incoming config
-	 *  has multiple stacks, must look for each one in other configs.
+	 *  That means wacking (7,1,[$]) but not (7,1,[6 $]).
+	 *
+	 *  Incoming config could have multiple stacks but we only care about
+	 *  empty stack since that means we reached end of a lexer rule from
+	 *  nextToken directly.
 	 *
 	 *  Closure is unmodified; copy returned.
 	 */
@ -547,13 +549,6 @@ public class LexerATNSimulator extends ATNSimulator {
 			System.out.printf("deleteWildcardConfigsForAlt for alt %d after config %d\n", alt, ci);
 		}

-		// collect ctxs from incoming config; must wack all of those.
-		Set<SingletonPredictionContext> contextsToKill =
-			new HashSet<SingletonPredictionContext>();
-		for (SingletonPredictionContext ctx : config.context) {
-			contextsToKill.add(ctx);
-		}
-
 		ATNConfigSet dup = new ATNConfigSet(); // build up as we go thru loop
 		for (int j=0; j<=ci; j++) dup.add(closure.get(j)); // add stuff up to ci
 		int j=ci+1;
@ -562,11 +557,10 @@ public class LexerATNSimulator extends ATNSimulator {
 			boolean isWildcard = c.state.getClass() == ATNState.class && // plain state only, not rulestop etc..
 				    c.state.transition(0) instanceof WildcardTransition;
 			if ( c.alt == alt && isWildcard ) {
-				// found config to kill but only if same stack.
-				// find c stacks that are in contextsToKill
+				// found config to kill but only if empty stack.
 				for (SingletonPredictionContext ctx : c.context) {
-					if ( contextsToKill.contains(ctx) ) {
-						// c.alt, c.ctx matches and j > ci => kill it
+					if ( ctx.isEmpty() ) {
+						// c.alt matches, empty stack, and j > ci => kill it
 						if ( debug ) {
 							System.out.format("delete config %s since alt %d and %d leads to wildcard\n",
 											  c, c.alt, c.state.stateNumber);
--- a/tool/playground/L.g
+++ b/tool/playground/L.g
@ -1,7 +1,6 @@
 lexer grammar L;
-STRING_START : '"' {pushMode(STRING_MODE); more();} ;
-WS : ' '|'
-' {skip();} ;
+STRING_START : '"' -> pushMode(STRING_MODE), more ;
+WS : (' '|'\n') -> skip ;
 mode STRING_MODE;
-STRING : '"' {popMode();} ;
-ANY : . {more();} ;
+STRING : '"' -> popMode ;
+ANY : . -> more ;
--- a/tool/test/org/antlr/v4/test/TestLexerExec.java
+++ b/tool/test/org/antlr/v4/test/TestLexerExec.java
@ -125,12 +125,12 @@ public class TestLexerExec extends BaseTest {
 			"STRING_START : '\"' -> pushMode(STRING_MODE), more ;\n" +
 			"WS : (' '|'\n') -> skip ;\n"+
 			"mode STRING_MODE;\n"+
-			"STRING : '\"' -> popMode ;\n"+
+			"STRING : '\"' -> popMode ;\n"+  // token type 2
 			"ANY : . -> more ;\n";
 		String found = execLexer("L.g4", grammar, "L", "\"abc\" \"ab\"");
 		String expecting =
-			"[@0,0:4='\"abc\"',<3>,1:0]\n" +
-			"[@1,6:9='\"ab\"',<3>,1:6]\n" +
+			"[@0,0:4='\"abc\"',<2>,1:0]\n" +
+			"[@1,6:9='\"ab\"',<2>,1:6]\n" +
 			"[@2,10:9='<EOF>',<-1>,1:10]\n";
 		assertEquals(expecting, found);
 	}
@ -141,12 +141,12 @@ public class TestLexerExec extends BaseTest {
 			"STRING_START : '\"' -> mode(STRING_MODE), more ;\n" +
 			"WS : (' '|'\n') -> skip ;\n"+
 			"mode STRING_MODE;\n"+
-			"STRING : '\"' -> mode(DEFAULT_MODE) ;\n"+
+			"STRING : '\"' -> mode(DEFAULT_MODE) ;\n"+ // ttype 2 since '"' ambiguity
 			"ANY : . -> more ;\n";
 		String found = execLexer("L.g4", grammar, "L", "\"abc\" \"ab\"");
 		String expecting =
-			"[@0,0:4='\"abc\"',<3>,1:0]\n" +
-			"[@1,6:9='\"ab\"',<3>,1:6]\n" +
+			"[@0,0:4='\"abc\"',<2>,1:0]\n" +
+			"[@1,6:9='\"ab\"',<2>,1:6]\n" +
 			"[@2,10:9='<EOF>',<-1>,1:10]\n";
 		assertEquals(expecting, found);
 	}