Fix handling of non-greedy blocks in the lexer (uses regex-style non-greedy with unordered alternatives)

2012-10-14 16:40:53 -05:00 · 2012-10-14 16:40:53 -05:00 · 025cc6187a
parent 28b243cda5
commit 025cc6187a
6 changed files with 108 additions and 66 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfig.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfig.java
@ -71,6 +71,14 @@ public class ATNConfig {
    @NotNull
    public final SemanticContext semanticContext;
 	public boolean isGreedy() {
 		return true;
 	}
 	public int getNonGreedyDepth() {
 		return 0;
 	}
 	public ATNConfig(ATNConfig old) { // dup
 		this.state = old.state;
 		this.alt = old.alt;
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfigSet.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfigSet.java
@ -31,6 +31,7 @@ package org.antlr.v4.runtime.atn;
 import org.antlr.v4.runtime.misc.Array2DHashSet;
 import org.antlr.v4.runtime.misc.DoubleKeyMap;
 import org.antlr.v4.runtime.misc.NotNull;
 import java.util.ArrayList;
 import java.util.BitSet;
@ -38,6 +39,7 @@ import java.util.Collection;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 /** Specialized OrderedHashSet that can track info about the set.
@ -246,6 +248,7 @@ public class ATNConfigSet implements Set<ATNConfig> {
 			int hashCode = 7;
 			hashCode = 31 * hashCode + o.state.stateNumber;
 			hashCode = 31 * hashCode + o.alt;
 			hashCode = 31 * hashCode + o.getNonGreedyDepth();
 			hashCode = 31 * hashCode + o.semanticContext.hashCode();
 	        return hashCode;
 		}
@ -257,6 +260,7 @@ public class ATNConfigSet implements Set<ATNConfig> {
 			if ( hashCode(a) != hashCode(b) ) return false;
 			return a.state.stateNumber==b.state.stateNumber
 				&& a.alt==b.alt
 				&& a.getNonGreedyDepth() == b.getNonGreedyDepth()
 				&& b.semanticContext.equals(b.semanticContext);
 		}
 	}
@ -435,6 +439,26 @@ public class ATNConfigSet implements Set<ATNConfig> {
 		return configs.iterator();
 	}
 	public void removeNonGreedyConfigsInAlts(@NotNull BitSet alts) {
 		if ( readonly ) throw new IllegalStateException("This set is readonly");
 		if (this.configLookup != null) {
 			for (Iterator<ATNConfig> it = this.configLookup.iterator(); it.hasNext(); ) {
 				ATNConfig entry = it.next();
 				if (!entry.isGreedy() && alts.get(entry.alt)) {
 					it.remove();
 				}
 			}
 		}
 		for (Iterator<ATNConfig> it = this.configs.iterator(); it.hasNext(); ) {
 			ATNConfig value = it.next();
 			if (!value.isGreedy() && alts.get(value.alt)) {
 				it.remove();
 			}
 		}
 	}
 	@Override
 	public void clear() {
 		if ( readonly ) throw new IllegalStateException("This set is readonly");
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNState.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNState.java
@ -117,6 +117,10 @@ public class ATNState {
 		return false;
 	}
 	public boolean isNonGreedyExitState() {
 		return false;
 	}
 	@Override
 	public String toString() {
 		return String.valueOf(stateNumber);
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/BlockEndState.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/BlockEndState.java
@ -32,4 +32,9 @@ package org.antlr.v4.runtime.atn;
 /** Terminal node of a simple (a|b|c) block */
 public class BlockEndState extends ATNState {
 	public BlockStartState startState;
 	@Override
 	public boolean isNonGreedyExitState() {
 		return startState != null && startState.nonGreedy;
 	}
 }
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNConfig.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNConfig.java
@ -7,11 +7,14 @@ public class LexerATNConfig extends ATNConfig {
 	/** Capture lexer action we traverse */
 	public int lexerActionIndex = -1;
 	private final int nonGreedyDepth;
 	public LexerATNConfig(@NotNull ATNState state,
 						  int alt,
 						  @Nullable PredictionContext context)
 	{
 		super(state, alt, context, SemanticContext.NONE);
 		this.nonGreedyDepth = 0;
 	}
 	public LexerATNConfig(@NotNull ATNState state,
@ -21,17 +24,20 @@ public class LexerATNConfig extends ATNConfig {
 	{
 		super(state, alt, context, SemanticContext.NONE);
 		this.lexerActionIndex = actionIndex;
 		this.nonGreedyDepth = 0;
 	}
 	public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state) {
 		super(c, state, c.context, c.semanticContext);
 		this.lexerActionIndex = c.lexerActionIndex;
 		this.nonGreedyDepth = c.nonGreedyDepth;
 	}
 	public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
 						  @NotNull SemanticContext semanticContext) {
 		super(c, state, c.context, semanticContext);
 		this.lexerActionIndex = c.lexerActionIndex;
 		this.nonGreedyDepth = c.nonGreedyDepth;
 	}
 	public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
@ -39,12 +45,42 @@ public class LexerATNConfig extends ATNConfig {
 	{
 		super(c, state, c.context, c.semanticContext);
 		this.lexerActionIndex = actionIndex;
 		this.nonGreedyDepth = c.nonGreedyDepth;
 	}
 	public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
 						  @Nullable PredictionContext context) {
 		super(c, state, context, c.semanticContext);
 		this.lexerActionIndex = c.lexerActionIndex;
 		this.nonGreedyDepth = c.nonGreedyDepth;
 	}
 	private LexerATNConfig(@NotNull LexerATNConfig c, int nonGreedyDepth) {
 		super(c, c.state, c.context, c.semanticContext);
 		this.lexerActionIndex = c.lexerActionIndex;
 		this.nonGreedyDepth = nonGreedyDepth;
 	}
 	@Override
 	public boolean isGreedy() {
 		return nonGreedyDepth == 0;
 	}
 	@Override
 	public int getNonGreedyDepth() {
 		return nonGreedyDepth;
 	}
 	public LexerATNConfig enterNonGreedyBlock() {
 		return new LexerATNConfig(this, nonGreedyDepth + 1);
 	}
 	public LexerATNConfig exitNonGreedyBlock() {
 		if (!isGreedy()) {
 			return this;
 		}
 		return new LexerATNConfig(this, nonGreedyDepth - 1);
 	}
 }
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java
@ -42,6 +42,7 @@ import org.antlr.v4.runtime.misc.Nullable;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.util.BitSet;
 /** "dup" of ParserInterpreter */
 public class LexerATNSimulator extends ATNSimulator {
@ -407,6 +408,24 @@ public class LexerATNSimulator extends ATNSimulator {
 			System.out.format("processAcceptConfigs: reach=%s, prevAccept=%s, prevIndex=%d\n",
 						 	  reach, prevAccept.config, prevAccept.index);
 		}
 		BitSet altsAtAcceptState = new BitSet();
 		BitSet nonGreedyAlts = new BitSet();
 		for (ATNConfig config : reach) {
 			if (config.state instanceof RuleStopState) {
 				altsAtAcceptState.set(config.alt);
 			}
 			if (!((LexerATNConfig)config).isGreedy()) {
 				nonGreedyAlts.set(config.alt);
 			}
 		}
 		nonGreedyAlts.and(altsAtAcceptState);
 		if (!nonGreedyAlts.isEmpty()) {
 			reach.removeNonGreedyConfigsInAlts(nonGreedyAlts);
 		}
 		for (int ci=0; ci<reach.size(); ci++) {
 			LexerATNConfig c = (LexerATNConfig)reach.get(ci);
 			if ( c.state instanceof RuleStopState) {
@ -431,13 +450,6 @@ public class LexerATNSimulator extends ATNSimulator {
 					captureSimState(prevAccept, input, c);
 				}
 				// if we reach lexer accept state with empty stack,
 				// toss out any configs pointing at wildcard edges
 				// in rest of configs work list associated with this
 				// rule (config.alt); that rule is done. this is how we
 				// cut off nongreedy .+ loops.
 				reach = deleteWildcardConfigsForAlt(reach, ci, c);
 			 	// move to next char, looking for longer match
 				// (we continue processing if there are states in reach)
 			}
@ -526,62 +538,6 @@ public class LexerATNSimulator extends ATNSimulator {
 		}
 	}
 	/** Delete configs for alt following ci that have a wildcard edge but
 	 *  only for configs with empty stack. E.g., if we want to kill after
 	 *  config (2,1,[$]), then we need to wack only configs with $ stack:
 	 *
 	 *  	[..., (2,1,[$]), ..., (7,1,[[$, 6 $]])]
 	 *
 	 *  That means wacking (7,1,[$]) but not (7,1,[6 $]).
 	 *
 	 *  Incoming config could have multiple stacks but we only care about
 	 *  empty stack since that means we reached end of a lexer rule from
 	 *  nextToken directly.
 	 *
 	 *  Closure is unmodified; copy returned.
 	 */
 	public ATNConfigSet deleteWildcardConfigsForAlt(@NotNull ATNConfigSet closure,
 													int ci,
 													ATNConfig config)
 	{
 		int alt = config.alt;
 		if ( debug ) {
 			System.out.printf("deleteWildcardConfigsForAlt for alt %d after config %d\n", alt, ci);
 		}
 		ATNConfigSet dup = new ATNConfigSet(); // build up as we go thru loop
 		for (int j=0; j<=ci; j++) dup.add(closure.get(j)); // add stuff up to ci
 		int j=ci+1;
 		while ( j < closure.size() ) {
 			LexerATNConfig c = (LexerATNConfig)closure.get(j);
 			boolean isWildcard = c.state.getClass() == ATNState.class && // plain state only, not rulestop etc..
 				    c.state.transition(0) instanceof WildcardTransition;
 			if ( c.alt == alt && isWildcard ) {
 				// found config to kill but only if empty stack.
 				for (SingletonPredictionContext ctx : c.context) {
 					if ( ctx.isEmpty() ) {
 						// c.alt matches, empty stack, and j > ci => kill it
 						if ( debug ) {
 							System.out.format("delete config %s since alt %d and %d leads to wildcard\n",
 											  c, c.alt, c.state.stateNumber);
 						}
 						// don't add
 					}
 					else {
 						LexerATNConfig splitConfig =
 							new LexerATNConfig(c.state, c.alt, ctx, c.lexerActionIndex);
 						dup.add(splitConfig);
 					}
 				}
 			}
 			else {
 				dup.add(c); // add entire config
 			}
 			j++;
 		}
 		return dup;
 	}
 	@NotNull
 	protected ATNConfigSet computeStartState(@NotNull IntStream input,
 											 @NotNull ATNState p)
@ -601,8 +557,6 @@ public class LexerATNSimulator extends ATNSimulator {
 			System.out.println("closure("+config.toString(recog, true)+")");
 		}
 		// TODO? if ( closure.contains(t) ) return;
 		if ( config.state instanceof RuleStopState ) {
 			if ( debug ) {
 				if ( recog!=null ) {
@ -651,7 +605,15 @@ public class LexerATNSimulator extends ATNSimulator {
 		for (int i=0; i<p.getNumberOfTransitions(); i++) {
 			Transition t = p.transition(i);
 			LexerATNConfig c = getEpsilonTarget(config, t, configs);
-			if ( c!=null ) closure(c, configs);
+			if ( c!=null ) {
 				final int NON_GREEDY_ENTER_ALT = 2;
 				if (i == NON_GREEDY_ENTER_ALT - 1 && ((DecisionState)p).nonGreedy) {
 					assert p.getNumberOfTransitions() == 2;
 					c = c.enterNonGreedyBlock();
 				}
 				closure(c, configs);
 			}
 		}
 	}
@ -662,6 +624,9 @@ public class LexerATNSimulator extends ATNSimulator {
 									  @NotNull ATNConfigSet configs)
 	{
 		ATNState p = config.state;
 		if (p.isNonGreedyExitState()) {
 			config = config.exitNonGreedyBlock();
 		}
 		LexerATNConfig c = null;
 		switch (t.getSerializationType()) {