simplified delete configs algorithm.
This commit is contained in:
parent
84c786da50
commit
b56cd0f587
|
@ -42,8 +42,6 @@ import org.antlr.v4.runtime.misc.Nullable;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
/** "dup" of ParserInterpreter */
|
||||
public class LexerATNSimulator extends ATNSimulator {
|
||||
|
@ -433,10 +431,11 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
captureSimState(prevAccept, input, c);
|
||||
}
|
||||
|
||||
// if we reach lexer accept state, toss out any configs pointing
|
||||
// at wildcard edges in rest of configs work list associated
|
||||
// with this rule (config.alt); that rule is done. this is how
|
||||
// we cut off nongreedy .+ loops.
|
||||
// if we reach lexer accept state with empty stack,
|
||||
// toss out any configs pointing at wildcard edges
|
||||
// in rest of configs work list associated with this
|
||||
// rule (config.alt); that rule is done. this is how we
|
||||
// cut off nongreedy .+ loops.
|
||||
reach = deleteWildcardConfigsForAlt(reach, ci, c);
|
||||
|
||||
// move to next char, looking for longer match
|
||||
|
@ -528,13 +527,16 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
}
|
||||
|
||||
/** Delete configs for alt following ci that have a wildcard edge but
|
||||
* only for configs with same stack. E.g., if we want to kill after
|
||||
* only for configs with empty stack. E.g., if we want to kill after
|
||||
* config (2,1,[$]), then we need to wack only configs with $ stack:
|
||||
*
|
||||
* [..., (2,1,[$]), ..., (7,1,[[$, 6 $]])]
|
||||
*
|
||||
* That means wacking (7,1,[$]) but not (7,1,[6 $]). If incoming config
|
||||
* has multiple stacks, must look for each one in other configs.
|
||||
* That means wacking (7,1,[$]) but not (7,1,[6 $]).
|
||||
*
|
||||
* Incoming config could have multiple stacks but we only care about
|
||||
* empty stack since that means we reached end of a lexer rule from
|
||||
* nextToken directly.
|
||||
*
|
||||
* Closure is unmodified; copy returned.
|
||||
*/
|
||||
|
@ -547,13 +549,6 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
System.out.printf("deleteWildcardConfigsForAlt for alt %d after config %d\n", alt, ci);
|
||||
}
|
||||
|
||||
// collect ctxs from incoming config; must wack all of those.
|
||||
Set<SingletonPredictionContext> contextsToKill =
|
||||
new HashSet<SingletonPredictionContext>();
|
||||
for (SingletonPredictionContext ctx : config.context) {
|
||||
contextsToKill.add(ctx);
|
||||
}
|
||||
|
||||
ATNConfigSet dup = new ATNConfigSet(); // build up as we go thru loop
|
||||
for (int j=0; j<=ci; j++) dup.add(closure.get(j)); // add stuff up to ci
|
||||
int j=ci+1;
|
||||
|
@ -562,11 +557,10 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
boolean isWildcard = c.state.getClass() == ATNState.class && // plain state only, not rulestop etc..
|
||||
c.state.transition(0) instanceof WildcardTransition;
|
||||
if ( c.alt == alt && isWildcard ) {
|
||||
// found config to kill but only if same stack.
|
||||
// find c stacks that are in contextsToKill
|
||||
// found config to kill but only if empty stack.
|
||||
for (SingletonPredictionContext ctx : c.context) {
|
||||
if ( contextsToKill.contains(ctx) ) {
|
||||
// c.alt, c.ctx matches and j > ci => kill it
|
||||
if ( ctx.isEmpty() ) {
|
||||
// c.alt matches, empty stack, and j > ci => kill it
|
||||
if ( debug ) {
|
||||
System.out.format("delete config %s since alt %d and %d leads to wildcard\n",
|
||||
c, c.alt, c.state.stateNumber);
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
lexer grammar L;
|
||||
STRING_START : '"' {pushMode(STRING_MODE); more();} ;
|
||||
WS : ' '|'
|
||||
' {skip();} ;
|
||||
STRING_START : '"' -> pushMode(STRING_MODE), more ;
|
||||
WS : (' '|'\n') -> skip ;
|
||||
mode STRING_MODE;
|
||||
STRING : '"' {popMode();} ;
|
||||
ANY : . {more();} ;
|
||||
STRING : '"' -> popMode ;
|
||||
ANY : . -> more ;
|
||||
|
|
|
@ -125,12 +125,12 @@ public class TestLexerExec extends BaseTest {
|
|||
"STRING_START : '\"' -> pushMode(STRING_MODE), more ;\n" +
|
||||
"WS : (' '|'\n') -> skip ;\n"+
|
||||
"mode STRING_MODE;\n"+
|
||||
"STRING : '\"' -> popMode ;\n"+
|
||||
"STRING : '\"' -> popMode ;\n"+ // token type 2
|
||||
"ANY : . -> more ;\n";
|
||||
String found = execLexer("L.g4", grammar, "L", "\"abc\" \"ab\"");
|
||||
String expecting =
|
||||
"[@0,0:4='\"abc\"',<3>,1:0]\n" +
|
||||
"[@1,6:9='\"ab\"',<3>,1:6]\n" +
|
||||
"[@0,0:4='\"abc\"',<2>,1:0]\n" +
|
||||
"[@1,6:9='\"ab\"',<2>,1:6]\n" +
|
||||
"[@2,10:9='<EOF>',<-1>,1:10]\n";
|
||||
assertEquals(expecting, found);
|
||||
}
|
||||
|
@ -141,12 +141,12 @@ public class TestLexerExec extends BaseTest {
|
|||
"STRING_START : '\"' -> mode(STRING_MODE), more ;\n" +
|
||||
"WS : (' '|'\n') -> skip ;\n"+
|
||||
"mode STRING_MODE;\n"+
|
||||
"STRING : '\"' -> mode(DEFAULT_MODE) ;\n"+
|
||||
"STRING : '\"' -> mode(DEFAULT_MODE) ;\n"+ // ttype 2 since '"' ambiguity
|
||||
"ANY : . -> more ;\n";
|
||||
String found = execLexer("L.g4", grammar, "L", "\"abc\" \"ab\"");
|
||||
String expecting =
|
||||
"[@0,0:4='\"abc\"',<3>,1:0]\n" +
|
||||
"[@1,6:9='\"ab\"',<3>,1:6]\n" +
|
||||
"[@0,0:4='\"abc\"',<2>,1:0]\n" +
|
||||
"[@1,6:9='\"ab\"',<2>,1:6]\n" +
|
||||
"[@2,10:9='<EOF>',<-1>,1:10]\n";
|
||||
assertEquals(expecting, found);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue