Fix handling of non-greedy blocks in the lexer (uses regex-style non-greedy with unordered alternatives)
This commit is contained in:
parent
28b243cda5
commit
025cc6187a
|
@ -71,6 +71,14 @@ public class ATNConfig {
|
|||
@NotNull
|
||||
public final SemanticContext semanticContext;
|
||||
|
||||
public boolean isGreedy() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public int getNonGreedyDepth() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public ATNConfig(ATNConfig old) { // dup
|
||||
this.state = old.state;
|
||||
this.alt = old.alt;
|
||||
|
|
|
@ -31,6 +31,7 @@ package org.antlr.v4.runtime.atn;
|
|||
|
||||
import org.antlr.v4.runtime.misc.Array2DHashSet;
|
||||
import org.antlr.v4.runtime.misc.DoubleKeyMap;
|
||||
import org.antlr.v4.runtime.misc.NotNull;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.BitSet;
|
||||
|
@ -38,6 +39,7 @@ import java.util.Collection;
|
|||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/** Specialized OrderedHashSet that can track info about the set.
|
||||
|
@ -246,6 +248,7 @@ public class ATNConfigSet implements Set<ATNConfig> {
|
|||
int hashCode = 7;
|
||||
hashCode = 31 * hashCode + o.state.stateNumber;
|
||||
hashCode = 31 * hashCode + o.alt;
|
||||
hashCode = 31 * hashCode + o.getNonGreedyDepth();
|
||||
hashCode = 31 * hashCode + o.semanticContext.hashCode();
|
||||
return hashCode;
|
||||
}
|
||||
|
@ -257,6 +260,7 @@ public class ATNConfigSet implements Set<ATNConfig> {
|
|||
if ( hashCode(a) != hashCode(b) ) return false;
|
||||
return a.state.stateNumber==b.state.stateNumber
|
||||
&& a.alt==b.alt
|
||||
&& a.getNonGreedyDepth() == b.getNonGreedyDepth()
|
||||
&& b.semanticContext.equals(b.semanticContext);
|
||||
}
|
||||
}
|
||||
|
@ -435,6 +439,26 @@ public class ATNConfigSet implements Set<ATNConfig> {
|
|||
return configs.iterator();
|
||||
}
|
||||
|
||||
public void removeNonGreedyConfigsInAlts(@NotNull BitSet alts) {
|
||||
if ( readonly ) throw new IllegalStateException("This set is readonly");
|
||||
|
||||
if (this.configLookup != null) {
|
||||
for (Iterator<ATNConfig> it = this.configLookup.iterator(); it.hasNext(); ) {
|
||||
ATNConfig entry = it.next();
|
||||
if (!entry.isGreedy() && alts.get(entry.alt)) {
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (Iterator<ATNConfig> it = this.configs.iterator(); it.hasNext(); ) {
|
||||
ATNConfig value = it.next();
|
||||
if (!value.isGreedy() && alts.get(value.alt)) {
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
if ( readonly ) throw new IllegalStateException("This set is readonly");
|
||||
|
|
|
@ -117,6 +117,10 @@ public class ATNState {
|
|||
return false;
|
||||
}
|
||||
|
||||
public boolean isNonGreedyExitState() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.valueOf(stateNumber);
|
||||
|
|
|
@ -32,4 +32,9 @@ package org.antlr.v4.runtime.atn;
|
|||
/** Terminal node of a simple (a|b|c) block */
|
||||
public class BlockEndState extends ATNState {
|
||||
public BlockStartState startState;
|
||||
|
||||
@Override
|
||||
public boolean isNonGreedyExitState() {
|
||||
return startState != null && startState.nonGreedy;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,11 +7,14 @@ public class LexerATNConfig extends ATNConfig {
|
|||
/** Capture lexer action we traverse */
|
||||
public int lexerActionIndex = -1;
|
||||
|
||||
private final int nonGreedyDepth;
|
||||
|
||||
public LexerATNConfig(@NotNull ATNState state,
|
||||
int alt,
|
||||
@Nullable PredictionContext context)
|
||||
{
|
||||
super(state, alt, context, SemanticContext.NONE);
|
||||
this.nonGreedyDepth = 0;
|
||||
}
|
||||
|
||||
public LexerATNConfig(@NotNull ATNState state,
|
||||
|
@ -21,17 +24,20 @@ public class LexerATNConfig extends ATNConfig {
|
|||
{
|
||||
super(state, alt, context, SemanticContext.NONE);
|
||||
this.lexerActionIndex = actionIndex;
|
||||
this.nonGreedyDepth = 0;
|
||||
}
|
||||
|
||||
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state) {
|
||||
super(c, state, c.context, c.semanticContext);
|
||||
this.lexerActionIndex = c.lexerActionIndex;
|
||||
this.nonGreedyDepth = c.nonGreedyDepth;
|
||||
}
|
||||
|
||||
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
|
||||
@NotNull SemanticContext semanticContext) {
|
||||
super(c, state, c.context, semanticContext);
|
||||
this.lexerActionIndex = c.lexerActionIndex;
|
||||
this.nonGreedyDepth = c.nonGreedyDepth;
|
||||
}
|
||||
|
||||
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
|
||||
|
@ -39,12 +45,42 @@ public class LexerATNConfig extends ATNConfig {
|
|||
{
|
||||
super(c, state, c.context, c.semanticContext);
|
||||
this.lexerActionIndex = actionIndex;
|
||||
this.nonGreedyDepth = c.nonGreedyDepth;
|
||||
}
|
||||
|
||||
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
|
||||
@Nullable PredictionContext context) {
|
||||
super(c, state, context, c.semanticContext);
|
||||
this.lexerActionIndex = c.lexerActionIndex;
|
||||
this.nonGreedyDepth = c.nonGreedyDepth;
|
||||
}
|
||||
|
||||
private LexerATNConfig(@NotNull LexerATNConfig c, int nonGreedyDepth) {
|
||||
super(c, c.state, c.context, c.semanticContext);
|
||||
this.lexerActionIndex = c.lexerActionIndex;
|
||||
this.nonGreedyDepth = nonGreedyDepth;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isGreedy() {
|
||||
return nonGreedyDepth == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNonGreedyDepth() {
|
||||
return nonGreedyDepth;
|
||||
}
|
||||
|
||||
public LexerATNConfig enterNonGreedyBlock() {
|
||||
return new LexerATNConfig(this, nonGreedyDepth + 1);
|
||||
}
|
||||
|
||||
public LexerATNConfig exitNonGreedyBlock() {
|
||||
if (!isGreedy()) {
|
||||
return this;
|
||||
}
|
||||
|
||||
return new LexerATNConfig(this, nonGreedyDepth - 1);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -42,6 +42,7 @@ import org.antlr.v4.runtime.misc.Nullable;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.BitSet;
|
||||
|
||||
/** "dup" of ParserInterpreter */
|
||||
public class LexerATNSimulator extends ATNSimulator {
|
||||
|
@ -407,6 +408,24 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
System.out.format("processAcceptConfigs: reach=%s, prevAccept=%s, prevIndex=%d\n",
|
||||
reach, prevAccept.config, prevAccept.index);
|
||||
}
|
||||
|
||||
BitSet altsAtAcceptState = new BitSet();
|
||||
BitSet nonGreedyAlts = new BitSet();
|
||||
for (ATNConfig config : reach) {
|
||||
if (config.state instanceof RuleStopState) {
|
||||
altsAtAcceptState.set(config.alt);
|
||||
}
|
||||
|
||||
if (!((LexerATNConfig)config).isGreedy()) {
|
||||
nonGreedyAlts.set(config.alt);
|
||||
}
|
||||
}
|
||||
|
||||
nonGreedyAlts.and(altsAtAcceptState);
|
||||
if (!nonGreedyAlts.isEmpty()) {
|
||||
reach.removeNonGreedyConfigsInAlts(nonGreedyAlts);
|
||||
}
|
||||
|
||||
for (int ci=0; ci<reach.size(); ci++) {
|
||||
LexerATNConfig c = (LexerATNConfig)reach.get(ci);
|
||||
if ( c.state instanceof RuleStopState) {
|
||||
|
@ -431,13 +450,6 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
captureSimState(prevAccept, input, c);
|
||||
}
|
||||
|
||||
// if we reach lexer accept state with empty stack,
|
||||
// toss out any configs pointing at wildcard edges
|
||||
// in rest of configs work list associated with this
|
||||
// rule (config.alt); that rule is done. this is how we
|
||||
// cut off nongreedy .+ loops.
|
||||
reach = deleteWildcardConfigsForAlt(reach, ci, c);
|
||||
|
||||
// move to next char, looking for longer match
|
||||
// (we continue processing if there are states in reach)
|
||||
}
|
||||
|
@ -526,62 +538,6 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
}
|
||||
}
|
||||
|
||||
/** Delete configs for alt following ci that have a wildcard edge but
|
||||
* only for configs with empty stack. E.g., if we want to kill after
|
||||
* config (2,1,[$]), then we need to wack only configs with $ stack:
|
||||
*
|
||||
* [..., (2,1,[$]), ..., (7,1,[[$, 6 $]])]
|
||||
*
|
||||
* That means wacking (7,1,[$]) but not (7,1,[6 $]).
|
||||
*
|
||||
* Incoming config could have multiple stacks but we only care about
|
||||
* empty stack since that means we reached end of a lexer rule from
|
||||
* nextToken directly.
|
||||
*
|
||||
* Closure is unmodified; copy returned.
|
||||
*/
|
||||
public ATNConfigSet deleteWildcardConfigsForAlt(@NotNull ATNConfigSet closure,
|
||||
int ci,
|
||||
ATNConfig config)
|
||||
{
|
||||
int alt = config.alt;
|
||||
if ( debug ) {
|
||||
System.out.printf("deleteWildcardConfigsForAlt for alt %d after config %d\n", alt, ci);
|
||||
}
|
||||
|
||||
ATNConfigSet dup = new ATNConfigSet(); // build up as we go thru loop
|
||||
for (int j=0; j<=ci; j++) dup.add(closure.get(j)); // add stuff up to ci
|
||||
int j=ci+1;
|
||||
while ( j < closure.size() ) {
|
||||
LexerATNConfig c = (LexerATNConfig)closure.get(j);
|
||||
boolean isWildcard = c.state.getClass() == ATNState.class && // plain state only, not rulestop etc..
|
||||
c.state.transition(0) instanceof WildcardTransition;
|
||||
if ( c.alt == alt && isWildcard ) {
|
||||
// found config to kill but only if empty stack.
|
||||
for (SingletonPredictionContext ctx : c.context) {
|
||||
if ( ctx.isEmpty() ) {
|
||||
// c.alt matches, empty stack, and j > ci => kill it
|
||||
if ( debug ) {
|
||||
System.out.format("delete config %s since alt %d and %d leads to wildcard\n",
|
||||
c, c.alt, c.state.stateNumber);
|
||||
}
|
||||
// don't add
|
||||
}
|
||||
else {
|
||||
LexerATNConfig splitConfig =
|
||||
new LexerATNConfig(c.state, c.alt, ctx, c.lexerActionIndex);
|
||||
dup.add(splitConfig);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
dup.add(c); // add entire config
|
||||
}
|
||||
j++;
|
||||
}
|
||||
return dup;
|
||||
}
|
||||
|
||||
@NotNull
|
||||
protected ATNConfigSet computeStartState(@NotNull IntStream input,
|
||||
@NotNull ATNState p)
|
||||
|
@ -601,8 +557,6 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
System.out.println("closure("+config.toString(recog, true)+")");
|
||||
}
|
||||
|
||||
// TODO? if ( closure.contains(t) ) return;
|
||||
|
||||
if ( config.state instanceof RuleStopState ) {
|
||||
if ( debug ) {
|
||||
if ( recog!=null ) {
|
||||
|
@ -651,7 +605,15 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
for (int i=0; i<p.getNumberOfTransitions(); i++) {
|
||||
Transition t = p.transition(i);
|
||||
LexerATNConfig c = getEpsilonTarget(config, t, configs);
|
||||
if ( c!=null ) closure(c, configs);
|
||||
if ( c!=null ) {
|
||||
final int NON_GREEDY_ENTER_ALT = 2;
|
||||
if (i == NON_GREEDY_ENTER_ALT - 1 && ((DecisionState)p).nonGreedy) {
|
||||
assert p.getNumberOfTransitions() == 2;
|
||||
c = c.enterNonGreedyBlock();
|
||||
}
|
||||
|
||||
closure(c, configs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -662,6 +624,9 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
@NotNull ATNConfigSet configs)
|
||||
{
|
||||
ATNState p = config.state;
|
||||
if (p.isNonGreedyExitState()) {
|
||||
config = config.exitNonGreedyBlock();
|
||||
}
|
||||
|
||||
LexerATNConfig c = null;
|
||||
switch (t.getSerializationType()) {
|
||||
|
|
Loading…
Reference in New Issue