Fix handling of non-greedy blocks in the lexer (uses regex-style non-greedy with unordered alternatives)

This commit is contained in:
Sam Harwell 2012-10-14 16:40:53 -05:00
parent 28b243cda5
commit 025cc6187a
6 changed files with 108 additions and 66 deletions

View File

@ -71,6 +71,14 @@ public class ATNConfig {
@NotNull
public final SemanticContext semanticContext;
public boolean isGreedy() {
return true;
}
public int getNonGreedyDepth() {
return 0;
}
public ATNConfig(ATNConfig old) { // dup
this.state = old.state;
this.alt = old.alt;

View File

@ -31,6 +31,7 @@ package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.misc.Array2DHashSet;
import org.antlr.v4.runtime.misc.DoubleKeyMap;
import org.antlr.v4.runtime.misc.NotNull;
import java.util.ArrayList;
import java.util.BitSet;
@ -38,6 +39,7 @@ import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
/** Specialized OrderedHashSet that can track info about the set.
@ -246,6 +248,7 @@ public class ATNConfigSet implements Set<ATNConfig> {
int hashCode = 7;
hashCode = 31 * hashCode + o.state.stateNumber;
hashCode = 31 * hashCode + o.alt;
hashCode = 31 * hashCode + o.getNonGreedyDepth();
hashCode = 31 * hashCode + o.semanticContext.hashCode();
return hashCode;
}
@ -257,6 +260,7 @@ public class ATNConfigSet implements Set<ATNConfig> {
if ( hashCode(a) != hashCode(b) ) return false;
return a.state.stateNumber==b.state.stateNumber
&& a.alt==b.alt
&& a.getNonGreedyDepth() == b.getNonGreedyDepth()
&& b.semanticContext.equals(b.semanticContext);
}
}
@ -435,6 +439,26 @@ public class ATNConfigSet implements Set<ATNConfig> {
return configs.iterator();
}
public void removeNonGreedyConfigsInAlts(@NotNull BitSet alts) {
if ( readonly ) throw new IllegalStateException("This set is readonly");
if (this.configLookup != null) {
for (Iterator<ATNConfig> it = this.configLookup.iterator(); it.hasNext(); ) {
ATNConfig entry = it.next();
if (!entry.isGreedy() && alts.get(entry.alt)) {
it.remove();
}
}
}
for (Iterator<ATNConfig> it = this.configs.iterator(); it.hasNext(); ) {
ATNConfig value = it.next();
if (!value.isGreedy() && alts.get(value.alt)) {
it.remove();
}
}
}
@Override
public void clear() {
if ( readonly ) throw new IllegalStateException("This set is readonly");

View File

@ -117,6 +117,10 @@ public class ATNState {
return false;
}
public boolean isNonGreedyExitState() {
return false;
}
@Override
public String toString() {
return String.valueOf(stateNumber);

View File

@ -32,4 +32,9 @@ package org.antlr.v4.runtime.atn;
/** Terminal node of a simple (a|b|c) block */
public class BlockEndState extends ATNState {
public BlockStartState startState;
@Override
public boolean isNonGreedyExitState() {
return startState != null && startState.nonGreedy;
}
}

View File

@ -7,11 +7,14 @@ public class LexerATNConfig extends ATNConfig {
/** Capture lexer action we traverse */
public int lexerActionIndex = -1;
private final int nonGreedyDepth;
public LexerATNConfig(@NotNull ATNState state,
int alt,
@Nullable PredictionContext context)
{
super(state, alt, context, SemanticContext.NONE);
this.nonGreedyDepth = 0;
}
public LexerATNConfig(@NotNull ATNState state,
@ -21,17 +24,20 @@ public class LexerATNConfig extends ATNConfig {
{
super(state, alt, context, SemanticContext.NONE);
this.lexerActionIndex = actionIndex;
this.nonGreedyDepth = 0;
}
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state) {
super(c, state, c.context, c.semanticContext);
this.lexerActionIndex = c.lexerActionIndex;
this.nonGreedyDepth = c.nonGreedyDepth;
}
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
@NotNull SemanticContext semanticContext) {
super(c, state, c.context, semanticContext);
this.lexerActionIndex = c.lexerActionIndex;
this.nonGreedyDepth = c.nonGreedyDepth;
}
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
@ -39,12 +45,42 @@ public class LexerATNConfig extends ATNConfig {
{
super(c, state, c.context, c.semanticContext);
this.lexerActionIndex = actionIndex;
this.nonGreedyDepth = c.nonGreedyDepth;
}
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
@Nullable PredictionContext context) {
super(c, state, context, c.semanticContext);
this.lexerActionIndex = c.lexerActionIndex;
this.nonGreedyDepth = c.nonGreedyDepth;
}
private LexerATNConfig(@NotNull LexerATNConfig c, int nonGreedyDepth) {
super(c, c.state, c.context, c.semanticContext);
this.lexerActionIndex = c.lexerActionIndex;
this.nonGreedyDepth = nonGreedyDepth;
}
@Override
public boolean isGreedy() {
return nonGreedyDepth == 0;
}
@Override
public int getNonGreedyDepth() {
return nonGreedyDepth;
}
public LexerATNConfig enterNonGreedyBlock() {
return new LexerATNConfig(this, nonGreedyDepth + 1);
}
public LexerATNConfig exitNonGreedyBlock() {
if (!isGreedy()) {
return this;
}
return new LexerATNConfig(this, nonGreedyDepth - 1);
}
}

View File

@ -42,6 +42,7 @@ import org.antlr.v4.runtime.misc.Nullable;
import java.io.IOException;
import java.io.OutputStream;
import java.util.BitSet;
/** "dup" of ParserInterpreter */
public class LexerATNSimulator extends ATNSimulator {
@ -407,6 +408,24 @@ public class LexerATNSimulator extends ATNSimulator {
System.out.format("processAcceptConfigs: reach=%s, prevAccept=%s, prevIndex=%d\n",
reach, prevAccept.config, prevAccept.index);
}
BitSet altsAtAcceptState = new BitSet();
BitSet nonGreedyAlts = new BitSet();
for (ATNConfig config : reach) {
if (config.state instanceof RuleStopState) {
altsAtAcceptState.set(config.alt);
}
if (!((LexerATNConfig)config).isGreedy()) {
nonGreedyAlts.set(config.alt);
}
}
nonGreedyAlts.and(altsAtAcceptState);
if (!nonGreedyAlts.isEmpty()) {
reach.removeNonGreedyConfigsInAlts(nonGreedyAlts);
}
for (int ci=0; ci<reach.size(); ci++) {
LexerATNConfig c = (LexerATNConfig)reach.get(ci);
if ( c.state instanceof RuleStopState) {
@ -431,13 +450,6 @@ public class LexerATNSimulator extends ATNSimulator {
captureSimState(prevAccept, input, c);
}
// if we reach lexer accept state with empty stack,
// toss out any configs pointing at wildcard edges
// in rest of configs work list associated with this
// rule (config.alt); that rule is done. this is how we
// cut off nongreedy .+ loops.
reach = deleteWildcardConfigsForAlt(reach, ci, c);
// move to next char, looking for longer match
// (we continue processing if there are states in reach)
}
@ -526,62 +538,6 @@ public class LexerATNSimulator extends ATNSimulator {
}
}
/** Delete configs for alt following ci that have a wildcard edge but
* only for configs with empty stack. E.g., if we want to kill after
* config (2,1,[$]), then we need to wack only configs with $ stack:
*
* [..., (2,1,[$]), ..., (7,1,[[$, 6 $]])]
*
* That means wacking (7,1,[$]) but not (7,1,[6 $]).
*
* Incoming config could have multiple stacks but we only care about
* empty stack since that means we reached end of a lexer rule from
* nextToken directly.
*
* Closure is unmodified; copy returned.
*/
public ATNConfigSet deleteWildcardConfigsForAlt(@NotNull ATNConfigSet closure,
int ci,
ATNConfig config)
{
int alt = config.alt;
if ( debug ) {
System.out.printf("deleteWildcardConfigsForAlt for alt %d after config %d\n", alt, ci);
}
ATNConfigSet dup = new ATNConfigSet(); // build up as we go thru loop
for (int j=0; j<=ci; j++) dup.add(closure.get(j)); // add stuff up to ci
int j=ci+1;
while ( j < closure.size() ) {
LexerATNConfig c = (LexerATNConfig)closure.get(j);
boolean isWildcard = c.state.getClass() == ATNState.class && // plain state only, not rulestop etc..
c.state.transition(0) instanceof WildcardTransition;
if ( c.alt == alt && isWildcard ) {
// found config to kill but only if empty stack.
for (SingletonPredictionContext ctx : c.context) {
if ( ctx.isEmpty() ) {
// c.alt matches, empty stack, and j > ci => kill it
if ( debug ) {
System.out.format("delete config %s since alt %d and %d leads to wildcard\n",
c, c.alt, c.state.stateNumber);
}
// don't add
}
else {
LexerATNConfig splitConfig =
new LexerATNConfig(c.state, c.alt, ctx, c.lexerActionIndex);
dup.add(splitConfig);
}
}
}
else {
dup.add(c); // add entire config
}
j++;
}
return dup;
}
@NotNull
protected ATNConfigSet computeStartState(@NotNull IntStream input,
@NotNull ATNState p)
@ -601,8 +557,6 @@ public class LexerATNSimulator extends ATNSimulator {
System.out.println("closure("+config.toString(recog, true)+")");
}
// TODO? if ( closure.contains(t) ) return;
if ( config.state instanceof RuleStopState ) {
if ( debug ) {
if ( recog!=null ) {
@ -651,7 +605,15 @@ public class LexerATNSimulator extends ATNSimulator {
for (int i=0; i<p.getNumberOfTransitions(); i++) {
Transition t = p.transition(i);
LexerATNConfig c = getEpsilonTarget(config, t, configs);
if ( c!=null ) closure(c, configs);
if ( c!=null ) {
final int NON_GREEDY_ENTER_ALT = 2;
if (i == NON_GREEDY_ENTER_ALT - 1 && ((DecisionState)p).nonGreedy) {
assert p.getNumberOfTransitions() == 2;
c = c.enterNonGreedyBlock();
}
closure(c, configs);
}
}
}
@ -662,6 +624,9 @@ public class LexerATNSimulator extends ATNSimulator {
@NotNull ATNConfigSet configs)
{
ATNState p = config.state;
if (p.isNonGreedyExitState()) {
config = config.exitNonGreedyBlock();
}
LexerATNConfig c = null;
switch (t.getSerializationType()) {