From b56cd0f587a896eee2bced803382d9d82e3ba2da Mon Sep 17 00:00:00 2001 From: Terence Parr Date: Sat, 29 Sep 2012 12:48:20 -0700 Subject: [PATCH] simplified delete configs algorithm. --- .../v4/runtime/atn/LexerATNSimulator.java | 34 ++++++++----------- tool/playground/L.g | 9 +++-- .../test/org/antlr/v4/test/TestLexerExec.java | 12 +++---- 3 files changed, 24 insertions(+), 31 deletions(-) diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java index 5be7c973b..b441674ad 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java @@ -42,8 +42,6 @@ import org.antlr.v4.runtime.misc.Nullable; import java.io.IOException; import java.io.OutputStream; -import java.util.HashSet; -import java.util.Set; /** "dup" of ParserInterpreter */ public class LexerATNSimulator extends ATNSimulator { @@ -433,10 +431,11 @@ public class LexerATNSimulator extends ATNSimulator { captureSimState(prevAccept, input, c); } - // if we reach lexer accept state, toss out any configs pointing - // at wildcard edges in rest of configs work list associated - // with this rule (config.alt); that rule is done. this is how - // we cut off nongreedy .+ loops. + // if we reach lexer accept state with empty stack, + // toss out any configs pointing at wildcard edges + // in rest of configs work list associated with this + // rule (config.alt); that rule is done. this is how we + // cut off nongreedy .+ loops. reach = deleteWildcardConfigsForAlt(reach, ci, c); // move to next char, looking for longer match @@ -528,13 +527,16 @@ public class LexerATNSimulator extends ATNSimulator { } /** Delete configs for alt following ci that have a wildcard edge but - * only for configs with same stack. E.g., if we want to kill after + * only for configs with empty stack. E.g., if we want to kill after * config (2,1,[$]), then we need to wack only configs with $ stack: * * [..., (2,1,[$]), ..., (7,1,[[$, 6 $]])] * - * That means wacking (7,1,[$]) but not (7,1,[6 $]). If incoming config - * has multiple stacks, must look for each one in other configs. + * That means wacking (7,1,[$]) but not (7,1,[6 $]). + * + * Incoming config could have multiple stacks but we only care about + * empty stack since that means we reached end of a lexer rule from + * nextToken directly. * * Closure is unmodified; copy returned. */ @@ -547,13 +549,6 @@ public class LexerATNSimulator extends ATNSimulator { System.out.printf("deleteWildcardConfigsForAlt for alt %d after config %d\n", alt, ci); } - // collect ctxs from incoming config; must wack all of those. - Set contextsToKill = - new HashSet(); - for (SingletonPredictionContext ctx : config.context) { - contextsToKill.add(ctx); - } - ATNConfigSet dup = new ATNConfigSet(); // build up as we go thru loop for (int j=0; j<=ci; j++) dup.add(closure.get(j)); // add stuff up to ci int j=ci+1; @@ -562,11 +557,10 @@ public class LexerATNSimulator extends ATNSimulator { boolean isWildcard = c.state.getClass() == ATNState.class && // plain state only, not rulestop etc.. c.state.transition(0) instanceof WildcardTransition; if ( c.alt == alt && isWildcard ) { - // found config to kill but only if same stack. - // find c stacks that are in contextsToKill + // found config to kill but only if empty stack. for (SingletonPredictionContext ctx : c.context) { - if ( contextsToKill.contains(ctx) ) { - // c.alt, c.ctx matches and j > ci => kill it + if ( ctx.isEmpty() ) { + // c.alt matches, empty stack, and j > ci => kill it if ( debug ) { System.out.format("delete config %s since alt %d and %d leads to wildcard\n", c, c.alt, c.state.stateNumber); diff --git a/tool/playground/L.g b/tool/playground/L.g index 0ad9f25b9..40fd6e1bd 100644 --- a/tool/playground/L.g +++ b/tool/playground/L.g @@ -1,7 +1,6 @@ lexer grammar L; -STRING_START : '"' {pushMode(STRING_MODE); more();} ; -WS : ' '|' -' {skip();} ; +STRING_START : '"' -> pushMode(STRING_MODE), more ; +WS : (' '|'\n') -> skip ; mode STRING_MODE; -STRING : '"' {popMode();} ; -ANY : . {more();} ; +STRING : '"' -> popMode ; +ANY : . -> more ; diff --git a/tool/test/org/antlr/v4/test/TestLexerExec.java b/tool/test/org/antlr/v4/test/TestLexerExec.java index 933f1732c..fe0f2ba83 100644 --- a/tool/test/org/antlr/v4/test/TestLexerExec.java +++ b/tool/test/org/antlr/v4/test/TestLexerExec.java @@ -125,12 +125,12 @@ public class TestLexerExec extends BaseTest { "STRING_START : '\"' -> pushMode(STRING_MODE), more ;\n" + "WS : (' '|'\n') -> skip ;\n"+ "mode STRING_MODE;\n"+ - "STRING : '\"' -> popMode ;\n"+ + "STRING : '\"' -> popMode ;\n"+ // token type 2 "ANY : . -> more ;\n"; String found = execLexer("L.g4", grammar, "L", "\"abc\" \"ab\""); String expecting = - "[@0,0:4='\"abc\"',<3>,1:0]\n" + - "[@1,6:9='\"ab\"',<3>,1:6]\n" + + "[@0,0:4='\"abc\"',<2>,1:0]\n" + + "[@1,6:9='\"ab\"',<2>,1:6]\n" + "[@2,10:9='',<-1>,1:10]\n"; assertEquals(expecting, found); } @@ -141,12 +141,12 @@ public class TestLexerExec extends BaseTest { "STRING_START : '\"' -> mode(STRING_MODE), more ;\n" + "WS : (' '|'\n') -> skip ;\n"+ "mode STRING_MODE;\n"+ - "STRING : '\"' -> mode(DEFAULT_MODE) ;\n"+ + "STRING : '\"' -> mode(DEFAULT_MODE) ;\n"+ // ttype 2 since '"' ambiguity "ANY : . -> more ;\n"; String found = execLexer("L.g4", grammar, "L", "\"abc\" \"ab\""); String expecting = - "[@0,0:4='\"abc\"',<3>,1:0]\n" + - "[@1,6:9='\"ab\"',<3>,1:6]\n" + + "[@0,0:4='\"abc\"',<2>,1:0]\n" + + "[@1,6:9='\"ab\"',<2>,1:6]\n" + "[@2,10:9='',<-1>,1:10]\n"; assertEquals(expecting, found); }