Fix handling of non-greedy blocks in the lexer (uses regex-style non-greedy with unordered alternatives)

2012-10-14 16:40:53 -05:00 · 2012-10-14 16:40:53 -05:00 · 025cc6187a
parent 28b243cda5
commit 025cc6187a
6 changed files with 108 additions and 66 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfig.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfig.java
@ -71,6 +71,14 @@ public class ATNConfig {
    @NotNull
    public final SemanticContext semanticContext;

+	public boolean isGreedy() {
+		return true;
+	}
+
+	public int getNonGreedyDepth() {
+		return 0;
+	}
+
 	public ATNConfig(ATNConfig old) { // dup
 		this.state = old.state;
 		this.alt = old.alt;
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfigSet.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfigSet.java
@ -31,6 +31,7 @@ package org.antlr.v4.runtime.atn;

 import org.antlr.v4.runtime.misc.Array2DHashSet;
 import org.antlr.v4.runtime.misc.DoubleKeyMap;
+import org.antlr.v4.runtime.misc.NotNull;

 import java.util.ArrayList;
 import java.util.BitSet;
@ -38,6 +39,7 @@ import java.util.Collection;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;

 /** Specialized OrderedHashSet that can track info about the set.
@ -246,6 +248,7 @@ public class ATNConfigSet implements Set<ATNConfig> {
 			int hashCode = 7;
 			hashCode = 31 * hashCode + o.state.stateNumber;
 			hashCode = 31 * hashCode + o.alt;
+			hashCode = 31 * hashCode + o.getNonGreedyDepth();
 			hashCode = 31 * hashCode + o.semanticContext.hashCode();
 	        return hashCode;
 		}
@ -257,6 +260,7 @@ public class ATNConfigSet implements Set<ATNConfig> {
 			if ( hashCode(a) != hashCode(b) ) return false;
 			return a.state.stateNumber==b.state.stateNumber
 				&& a.alt==b.alt
+				&& a.getNonGreedyDepth() == b.getNonGreedyDepth()
 				&& b.semanticContext.equals(b.semanticContext);
 		}
 	}
@ -435,6 +439,26 @@ public class ATNConfigSet implements Set<ATNConfig> {
 		return configs.iterator();
 	}

+	public void removeNonGreedyConfigsInAlts(@NotNull BitSet alts) {
+		if ( readonly ) throw new IllegalStateException("This set is readonly");
+
+		if (this.configLookup != null) {
+			for (Iterator<ATNConfig> it = this.configLookup.iterator(); it.hasNext(); ) {
+				ATNConfig entry = it.next();
+				if (!entry.isGreedy() && alts.get(entry.alt)) {
+					it.remove();
+				}
+			}
+		}
+
+		for (Iterator<ATNConfig> it = this.configs.iterator(); it.hasNext(); ) {
+			ATNConfig value = it.next();
+			if (!value.isGreedy() && alts.get(value.alt)) {
+				it.remove();
+			}
+		}
+	}
+
 	@Override
 	public void clear() {
 		if ( readonly ) throw new IllegalStateException("This set is readonly");
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNState.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNState.java
@ -117,6 +117,10 @@ public class ATNState {
 		return false;
 	}

+	public boolean isNonGreedyExitState() {
+		return false;
+	}
+
 	@Override
 	public String toString() {
 		return String.valueOf(stateNumber);
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/BlockEndState.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/BlockEndState.java
@ -32,4 +32,9 @@ package org.antlr.v4.runtime.atn;
 /** Terminal node of a simple (a|b|c) block */
 public class BlockEndState extends ATNState {
 	public BlockStartState startState;
+
+	@Override
+	public boolean isNonGreedyExitState() {
+		return startState != null && startState.nonGreedy;
+	}
 }
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNConfig.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNConfig.java
@ -7,11 +7,14 @@ public class LexerATNConfig extends ATNConfig {
 	/** Capture lexer action we traverse */
 	public int lexerActionIndex = -1;

+	private final int nonGreedyDepth;
+
 	public LexerATNConfig(@NotNull ATNState state,
 						  int alt,
 						  @Nullable PredictionContext context)
 	{
 		super(state, alt, context, SemanticContext.NONE);
+		this.nonGreedyDepth = 0;
 	}

 	public LexerATNConfig(@NotNull ATNState state,
@ -21,17 +24,20 @@ public class LexerATNConfig extends ATNConfig {
 	{
 		super(state, alt, context, SemanticContext.NONE);
 		this.lexerActionIndex = actionIndex;
+		this.nonGreedyDepth = 0;
 	}

 	public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state) {
 		super(c, state, c.context, c.semanticContext);
 		this.lexerActionIndex = c.lexerActionIndex;
+		this.nonGreedyDepth = c.nonGreedyDepth;
 	}

 	public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
 						  @NotNull SemanticContext semanticContext) {
 		super(c, state, c.context, semanticContext);
 		this.lexerActionIndex = c.lexerActionIndex;
+		this.nonGreedyDepth = c.nonGreedyDepth;
 	}

 	public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
@ -39,12 +45,42 @@ public class LexerATNConfig extends ATNConfig {
 	{
 		super(c, state, c.context, c.semanticContext);
 		this.lexerActionIndex = actionIndex;
+		this.nonGreedyDepth = c.nonGreedyDepth;
 	}

 	public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
 						  @Nullable PredictionContext context) {
 		super(c, state, context, c.semanticContext);
 		this.lexerActionIndex = c.lexerActionIndex;
+		this.nonGreedyDepth = c.nonGreedyDepth;
+	}
+
+	private LexerATNConfig(@NotNull LexerATNConfig c, int nonGreedyDepth) {
+		super(c, c.state, c.context, c.semanticContext);
+		this.lexerActionIndex = c.lexerActionIndex;
+		this.nonGreedyDepth = nonGreedyDepth;
+	}
+
+	@Override
+	public boolean isGreedy() {
+		return nonGreedyDepth == 0;
+	}
+
+	@Override
+	public int getNonGreedyDepth() {
+		return nonGreedyDepth;
+	}
+
+	public LexerATNConfig enterNonGreedyBlock() {
+		return new LexerATNConfig(this, nonGreedyDepth + 1);
+	}
+
+	public LexerATNConfig exitNonGreedyBlock() {
+		if (!isGreedy()) {
+			return this;
+		}
+
+		return new LexerATNConfig(this, nonGreedyDepth - 1);
 	}

 }
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java
@ -42,6 +42,7 @@ import org.antlr.v4.runtime.misc.Nullable;

 import java.io.IOException;
 import java.io.OutputStream;
+import java.util.BitSet;

 /** "dup" of ParserInterpreter */
 public class LexerATNSimulator extends ATNSimulator {
@ -407,6 +408,24 @@ public class LexerATNSimulator extends ATNSimulator {
 			System.out.format("processAcceptConfigs: reach=%s, prevAccept=%s, prevIndex=%d\n",
 						 	  reach, prevAccept.config, prevAccept.index);
 		}
+
+		BitSet altsAtAcceptState = new BitSet();
+		BitSet nonGreedyAlts = new BitSet();
+		for (ATNConfig config : reach) {
+			if (config.state instanceof RuleStopState) {
+				altsAtAcceptState.set(config.alt);
+			}
+
+			if (!((LexerATNConfig)config).isGreedy()) {
+				nonGreedyAlts.set(config.alt);
+			}
+		}
+
+		nonGreedyAlts.and(altsAtAcceptState);
+		if (!nonGreedyAlts.isEmpty()) {
+			reach.removeNonGreedyConfigsInAlts(nonGreedyAlts);
+		}
+
 		for (int ci=0; ci<reach.size(); ci++) {
 			LexerATNConfig c = (LexerATNConfig)reach.get(ci);
 			if ( c.state instanceof RuleStopState) {
@ -431,13 +450,6 @@ public class LexerATNSimulator extends ATNSimulator {
 					captureSimState(prevAccept, input, c);
 				}

-				// if we reach lexer accept state with empty stack,
-				// toss out any configs pointing at wildcard edges
-				// in rest of configs work list associated with this
-				// rule (config.alt); that rule is done. this is how we
-				// cut off nongreedy .+ loops.
-				reach = deleteWildcardConfigsForAlt(reach, ci, c);
-
 			 	// move to next char, looking for longer match
 				// (we continue processing if there are states in reach)
 			}
@ -526,62 +538,6 @@ public class LexerATNSimulator extends ATNSimulator {
 		}
 	}

-	/** Delete configs for alt following ci that have a wildcard edge but
-	 *  only for configs with empty stack. E.g., if we want to kill after
-	 *  config (2,1,[$]), then we need to wack only configs with $ stack:
-	 *
-	 *  	[..., (2,1,[$]), ..., (7,1,[[$, 6 $]])]
-	 *
-	 *  That means wacking (7,1,[$]) but not (7,1,[6 $]).
-	 *
-	 *  Incoming config could have multiple stacks but we only care about
-	 *  empty stack since that means we reached end of a lexer rule from
-	 *  nextToken directly.
-	 *
-	 *  Closure is unmodified; copy returned.
-	 */
-	public ATNConfigSet deleteWildcardConfigsForAlt(@NotNull ATNConfigSet closure,
-													int ci,
-													ATNConfig config)
-	{
-		int alt = config.alt;
-		if ( debug ) {
-			System.out.printf("deleteWildcardConfigsForAlt for alt %d after config %d\n", alt, ci);
-		}
-
-		ATNConfigSet dup = new ATNConfigSet(); // build up as we go thru loop
-		for (int j=0; j<=ci; j++) dup.add(closure.get(j)); // add stuff up to ci
-		int j=ci+1;
-		while ( j < closure.size() ) {
-			LexerATNConfig c = (LexerATNConfig)closure.get(j);
-			boolean isWildcard = c.state.getClass() == ATNState.class && // plain state only, not rulestop etc..
-				    c.state.transition(0) instanceof WildcardTransition;
-			if ( c.alt == alt && isWildcard ) {
-				// found config to kill but only if empty stack.
-				for (SingletonPredictionContext ctx : c.context) {
-					if ( ctx.isEmpty() ) {
-						// c.alt matches, empty stack, and j > ci => kill it
-						if ( debug ) {
-							System.out.format("delete config %s since alt %d and %d leads to wildcard\n",
-											  c, c.alt, c.state.stateNumber);
-						}
-						// don't add
-					}
-					else {
-						LexerATNConfig splitConfig =
-							new LexerATNConfig(c.state, c.alt, ctx, c.lexerActionIndex);
-						dup.add(splitConfig);
-					}
-				}
-			}
-			else {
-				dup.add(c); // add entire config
-			}
-			j++;
-		}
-		return dup;
-	}
-
 	@NotNull
 	protected ATNConfigSet computeStartState(@NotNull IntStream input,
 											 @NotNull ATNState p)
@ -601,8 +557,6 @@ public class LexerATNSimulator extends ATNSimulator {
 			System.out.println("closure("+config.toString(recog, true)+")");
 		}

-		// TODO? if ( closure.contains(t) ) return;
-
 		if ( config.state instanceof RuleStopState ) {
 			if ( debug ) {
 				if ( recog!=null ) {
@ -651,7 +605,15 @@ public class LexerATNSimulator extends ATNSimulator {
 		for (int i=0; i<p.getNumberOfTransitions(); i++) {
 			Transition t = p.transition(i);
 			LexerATNConfig c = getEpsilonTarget(config, t, configs);
-			if ( c!=null ) closure(c, configs);
+			if ( c!=null ) {
+				final int NON_GREEDY_ENTER_ALT = 2;
+				if (i == NON_GREEDY_ENTER_ALT - 1 && ((DecisionState)p).nonGreedy) {
+					assert p.getNumberOfTransitions() == 2;
+					c = c.enterNonGreedyBlock();
+				}
+
+				closure(c, configs);
+			}
 		}
 	}

@ -662,6 +624,9 @@ public class LexerATNSimulator extends ATNSimulator {
 									  @NotNull ATNConfigSet configs)
 	{
 		ATNState p = config.state;
+		if (p.isNonGreedyExitState()) {
+			config = config.exitNonGreedyBlock();
+		}

 		LexerATNConfig c = null;
 		switch (t.getSerializationType()) {