forked from jasder/antlr
first wack at fixing nongreedy (ACTION | .)* for recursive rules.
This commit is contained in:
parent
332c9f4452
commit
19782e6d77
|
@ -179,6 +179,10 @@ public class ArrayPredictionContext extends PredictionContext {
|
|||
buf.append("[");
|
||||
for (int i=0; i<invokingStates.length; i++) {
|
||||
if ( i>0 ) buf.append(", ");
|
||||
if ( invokingStates[i]==EMPTY_FULL_CTX_INVOKING_STATE ) {
|
||||
buf.append("$");
|
||||
continue;
|
||||
}
|
||||
buf.append(invokingStates[i]);
|
||||
if ( parents[i]!=null ) {
|
||||
buf.append(' ');
|
||||
|
|
|
@ -42,6 +42,8 @@ import org.antlr.v4.runtime.misc.Nullable;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
/** "dup" of ParserInterpreter */
|
||||
public class LexerATNSimulator extends ATNSimulator {
|
||||
|
@ -431,10 +433,11 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
captureSimState(prevAccept, input, c);
|
||||
}
|
||||
|
||||
// if we reach lexer accept state, toss out any configs in rest
|
||||
// of configs work list associated with this rule (config.alt);
|
||||
// that rule is done. this is how we cut off nongreedy .+ loops.
|
||||
reach = deleteWildcardConfigsForAlt(reach, ci, c.alt);
|
||||
// if we reach lexer accept state, toss out any configs pointing
|
||||
// at wildcard edges in rest of configs work list associated
|
||||
// with this rule (config.alt); that rule is done. this is how
|
||||
// we cut off nongreedy .+ loops.
|
||||
reach = deleteWildcardConfigsForAlt(reach, ci, c);
|
||||
|
||||
// move to next char, looking for longer match
|
||||
// (we continue processing if there are states in reach)
|
||||
|
@ -524,22 +527,62 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
}
|
||||
}
|
||||
|
||||
/** Delete configs for alt following ci. Closure is unmodified; copy returned. */
|
||||
public ATNConfigSet deleteWildcardConfigsForAlt(@NotNull ATNConfigSet closure, int ci, int alt) {
|
||||
/** Delete configs for alt following ci that have a wildcard edge but
|
||||
* only for configs with same stack. E.g., if we want to kill after
|
||||
* config (2,1,[$]), then we need to wack only configs with $ stack:
|
||||
*
|
||||
* [..., (2,1,[$]), ..., (7,1,[[$, 6 $]])]
|
||||
*
|
||||
* That means wacking (7,1,[$]) but not (7,1,[6 $]). If incoming config
|
||||
* has multiple stacks, must look for each one in other configs. :(
|
||||
*
|
||||
* Closure is unmodified; copy returned.
|
||||
*/
|
||||
public ATNConfigSet deleteWildcardConfigsForAlt(@NotNull ATNConfigSet closure,
|
||||
int ci,
|
||||
ATNConfig config)
|
||||
{
|
||||
int alt = config.alt;
|
||||
if ( debug ) {
|
||||
System.out.printf("deleteWildcardConfigsForAlt for alt %d after config %d\n", alt, ci);
|
||||
}
|
||||
|
||||
// collect ctxs from incoming config; must wack all of those.
|
||||
Set<SingletonPredictionContext> contextsToKill =
|
||||
new HashSet<SingletonPredictionContext>();
|
||||
if ( config.context!=null && !config.context.isEmpty() ) {
|
||||
for (SingletonPredictionContext ctx : config.context) {
|
||||
contextsToKill.add(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
ATNConfigSet dup = new ATNConfigSet(closure);
|
||||
int j=ci+1;
|
||||
while ( j < dup.size() ) {
|
||||
ATNConfig c = dup.get(j);
|
||||
boolean isWildcard = c.state.getClass() == ATNState.class && // plain state only, not rulestop etc..
|
||||
c.state.transition(0) instanceof WildcardTransition;
|
||||
boolean killed = false;
|
||||
if ( c.alt == alt && isWildcard ) {
|
||||
// found config to kill but must check stack.
|
||||
// find c stacks that are in contextsToKill
|
||||
if ( c.context!=null && !c.context.isEmpty() ) {
|
||||
for (SingletonPredictionContext ctx : c.context) {
|
||||
if ( !ctx.isEmpty() ) {
|
||||
if ( contextsToKill.contains(ctx) ) {
|
||||
// c.alt, c.ctx matches and j > ci => kill it
|
||||
if ( debug ) {
|
||||
System.out.format("deleteWildcardConfigsForAlt %s\n", c);
|
||||
System.out.format("delete config %s since alt %d and %d leads to wildcard\n",
|
||||
c, c.alt, c.state.stateNumber);
|
||||
}
|
||||
|
||||
dup.remove(j);
|
||||
killed = true;
|
||||
}
|
||||
else j++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( !killed ) j++;
|
||||
}
|
||||
return dup;
|
||||
}
|
||||
|
@ -584,10 +627,13 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
if ( !ctx.isEmpty() ) {
|
||||
PredictionContext newContext = ctx.parent; // "pop" invoking state
|
||||
if ( ctx.invokingState==PredictionContext.EMPTY_FULL_CTX_INVOKING_STATE ) {
|
||||
// we have no context info. Don't pursue.
|
||||
// we have no context info. Don't pursue but
|
||||
// record a config that indicates how we hit end
|
||||
LexerATNConfig c = new LexerATNConfig(config, config.state, ctx);
|
||||
if ( debug ) System.out.println("FALLING off token "+
|
||||
recog.getRuleNames()[config.state.ruleIndex]);
|
||||
configs.add(config);
|
||||
recog.getRuleNames()[config.state.ruleIndex]+
|
||||
" record "+c);
|
||||
configs.add(c);
|
||||
continue;
|
||||
}
|
||||
ATNState invokingState = atn.states.get(ctx.invokingState);
|
||||
|
|
|
@ -76,13 +76,19 @@ public class SingletonPredictionContext extends PredictionContext {
|
|||
}
|
||||
|
||||
SingletonPredictionContext s = (SingletonPredictionContext)o;
|
||||
return invokingState == s.invokingState && parent.equals(s.parent);
|
||||
return invokingState == s.invokingState &&
|
||||
(parent!=null && parent.equals(s.parent));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
String up = parent!=null ? parent.toString() : "";
|
||||
if ( up.length()==0 ) return String.valueOf(invokingState);
|
||||
if ( up.length()==0 ) {
|
||||
if ( invokingState == EMPTY_FULL_CTX_INVOKING_STATE ) {
|
||||
return "$";
|
||||
}
|
||||
return String.valueOf(invokingState);
|
||||
}
|
||||
return String.valueOf(invokingState)+" "+up;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,2 +1 @@
|
|||
{{x}
|
||||
}
|
||||
{{x} }
|
||||
|
|
|
@ -21,5 +21,7 @@ wouldn't it get to the end of the rule also by the wild-card route?
|
|||
alternatives?
|
||||
|
||||
*/
|
||||
//STRING : '"' ( 'x' | . )* '"' ;
|
||||
|
||||
ACTION : '{' ( ACTION | . )* '}' ;
|
||||
WS : [ \r\t\n]+ -> skip ;
|
||||
|
|
|
@ -92,7 +92,7 @@ public class TestLexerExec extends BaseTest {
|
|||
String grammar =
|
||||
"lexer grammar L;\n"+
|
||||
"I : '0'..'9'+ {System.out.println(\"I\");} ;\n"+
|
||||
"HASH : '#' -> type(HASH), skip, more ;";
|
||||
"HASH : '#' -> type(100), skip, more ;";
|
||||
String found = execLexer("L.g4", grammar, "L", "34#11");
|
||||
String expecting =
|
||||
"I\n" +
|
||||
|
|
Loading…
Reference in New Issue