forked from jasder/antlr
Fix handling of non-greedy blocks in the lexer (uses regex-style non-greedy with unordered alternatives)
This commit is contained in:
parent
28b243cda5
commit
025cc6187a
|
@ -71,6 +71,14 @@ public class ATNConfig {
|
||||||
@NotNull
|
@NotNull
|
||||||
public final SemanticContext semanticContext;
|
public final SemanticContext semanticContext;
|
||||||
|
|
||||||
|
public boolean isGreedy() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getNonGreedyDepth() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
public ATNConfig(ATNConfig old) { // dup
|
public ATNConfig(ATNConfig old) { // dup
|
||||||
this.state = old.state;
|
this.state = old.state;
|
||||||
this.alt = old.alt;
|
this.alt = old.alt;
|
||||||
|
|
|
@ -31,6 +31,7 @@ package org.antlr.v4.runtime.atn;
|
||||||
|
|
||||||
import org.antlr.v4.runtime.misc.Array2DHashSet;
|
import org.antlr.v4.runtime.misc.Array2DHashSet;
|
||||||
import org.antlr.v4.runtime.misc.DoubleKeyMap;
|
import org.antlr.v4.runtime.misc.DoubleKeyMap;
|
||||||
|
import org.antlr.v4.runtime.misc.NotNull;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.BitSet;
|
import java.util.BitSet;
|
||||||
|
@ -38,6 +39,7 @@ import java.util.Collection;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
/** Specialized OrderedHashSet that can track info about the set.
|
/** Specialized OrderedHashSet that can track info about the set.
|
||||||
|
@ -246,6 +248,7 @@ public class ATNConfigSet implements Set<ATNConfig> {
|
||||||
int hashCode = 7;
|
int hashCode = 7;
|
||||||
hashCode = 31 * hashCode + o.state.stateNumber;
|
hashCode = 31 * hashCode + o.state.stateNumber;
|
||||||
hashCode = 31 * hashCode + o.alt;
|
hashCode = 31 * hashCode + o.alt;
|
||||||
|
hashCode = 31 * hashCode + o.getNonGreedyDepth();
|
||||||
hashCode = 31 * hashCode + o.semanticContext.hashCode();
|
hashCode = 31 * hashCode + o.semanticContext.hashCode();
|
||||||
return hashCode;
|
return hashCode;
|
||||||
}
|
}
|
||||||
|
@ -257,6 +260,7 @@ public class ATNConfigSet implements Set<ATNConfig> {
|
||||||
if ( hashCode(a) != hashCode(b) ) return false;
|
if ( hashCode(a) != hashCode(b) ) return false;
|
||||||
return a.state.stateNumber==b.state.stateNumber
|
return a.state.stateNumber==b.state.stateNumber
|
||||||
&& a.alt==b.alt
|
&& a.alt==b.alt
|
||||||
|
&& a.getNonGreedyDepth() == b.getNonGreedyDepth()
|
||||||
&& b.semanticContext.equals(b.semanticContext);
|
&& b.semanticContext.equals(b.semanticContext);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -435,6 +439,26 @@ public class ATNConfigSet implements Set<ATNConfig> {
|
||||||
return configs.iterator();
|
return configs.iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void removeNonGreedyConfigsInAlts(@NotNull BitSet alts) {
|
||||||
|
if ( readonly ) throw new IllegalStateException("This set is readonly");
|
||||||
|
|
||||||
|
if (this.configLookup != null) {
|
||||||
|
for (Iterator<ATNConfig> it = this.configLookup.iterator(); it.hasNext(); ) {
|
||||||
|
ATNConfig entry = it.next();
|
||||||
|
if (!entry.isGreedy() && alts.get(entry.alt)) {
|
||||||
|
it.remove();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Iterator<ATNConfig> it = this.configs.iterator(); it.hasNext(); ) {
|
||||||
|
ATNConfig value = it.next();
|
||||||
|
if (!value.isGreedy() && alts.get(value.alt)) {
|
||||||
|
it.remove();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void clear() {
|
public void clear() {
|
||||||
if ( readonly ) throw new IllegalStateException("This set is readonly");
|
if ( readonly ) throw new IllegalStateException("This set is readonly");
|
||||||
|
|
|
@ -117,6 +117,10 @@ public class ATNState {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isNonGreedyExitState() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.valueOf(stateNumber);
|
return String.valueOf(stateNumber);
|
||||||
|
|
|
@ -32,4 +32,9 @@ package org.antlr.v4.runtime.atn;
|
||||||
/** Terminal node of a simple (a|b|c) block */
|
/** Terminal node of a simple (a|b|c) block */
|
||||||
public class BlockEndState extends ATNState {
|
public class BlockEndState extends ATNState {
|
||||||
public BlockStartState startState;
|
public BlockStartState startState;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isNonGreedyExitState() {
|
||||||
|
return startState != null && startState.nonGreedy;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,11 +7,14 @@ public class LexerATNConfig extends ATNConfig {
|
||||||
/** Capture lexer action we traverse */
|
/** Capture lexer action we traverse */
|
||||||
public int lexerActionIndex = -1;
|
public int lexerActionIndex = -1;
|
||||||
|
|
||||||
|
private final int nonGreedyDepth;
|
||||||
|
|
||||||
public LexerATNConfig(@NotNull ATNState state,
|
public LexerATNConfig(@NotNull ATNState state,
|
||||||
int alt,
|
int alt,
|
||||||
@Nullable PredictionContext context)
|
@Nullable PredictionContext context)
|
||||||
{
|
{
|
||||||
super(state, alt, context, SemanticContext.NONE);
|
super(state, alt, context, SemanticContext.NONE);
|
||||||
|
this.nonGreedyDepth = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public LexerATNConfig(@NotNull ATNState state,
|
public LexerATNConfig(@NotNull ATNState state,
|
||||||
|
@ -21,17 +24,20 @@ public class LexerATNConfig extends ATNConfig {
|
||||||
{
|
{
|
||||||
super(state, alt, context, SemanticContext.NONE);
|
super(state, alt, context, SemanticContext.NONE);
|
||||||
this.lexerActionIndex = actionIndex;
|
this.lexerActionIndex = actionIndex;
|
||||||
|
this.nonGreedyDepth = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state) {
|
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state) {
|
||||||
super(c, state, c.context, c.semanticContext);
|
super(c, state, c.context, c.semanticContext);
|
||||||
this.lexerActionIndex = c.lexerActionIndex;
|
this.lexerActionIndex = c.lexerActionIndex;
|
||||||
|
this.nonGreedyDepth = c.nonGreedyDepth;
|
||||||
}
|
}
|
||||||
|
|
||||||
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
|
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
|
||||||
@NotNull SemanticContext semanticContext) {
|
@NotNull SemanticContext semanticContext) {
|
||||||
super(c, state, c.context, semanticContext);
|
super(c, state, c.context, semanticContext);
|
||||||
this.lexerActionIndex = c.lexerActionIndex;
|
this.lexerActionIndex = c.lexerActionIndex;
|
||||||
|
this.nonGreedyDepth = c.nonGreedyDepth;
|
||||||
}
|
}
|
||||||
|
|
||||||
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
|
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
|
||||||
|
@ -39,12 +45,42 @@ public class LexerATNConfig extends ATNConfig {
|
||||||
{
|
{
|
||||||
super(c, state, c.context, c.semanticContext);
|
super(c, state, c.context, c.semanticContext);
|
||||||
this.lexerActionIndex = actionIndex;
|
this.lexerActionIndex = actionIndex;
|
||||||
|
this.nonGreedyDepth = c.nonGreedyDepth;
|
||||||
}
|
}
|
||||||
|
|
||||||
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
|
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
|
||||||
@Nullable PredictionContext context) {
|
@Nullable PredictionContext context) {
|
||||||
super(c, state, context, c.semanticContext);
|
super(c, state, context, c.semanticContext);
|
||||||
this.lexerActionIndex = c.lexerActionIndex;
|
this.lexerActionIndex = c.lexerActionIndex;
|
||||||
|
this.nonGreedyDepth = c.nonGreedyDepth;
|
||||||
|
}
|
||||||
|
|
||||||
|
private LexerATNConfig(@NotNull LexerATNConfig c, int nonGreedyDepth) {
|
||||||
|
super(c, c.state, c.context, c.semanticContext);
|
||||||
|
this.lexerActionIndex = c.lexerActionIndex;
|
||||||
|
this.nonGreedyDepth = nonGreedyDepth;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isGreedy() {
|
||||||
|
return nonGreedyDepth == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getNonGreedyDepth() {
|
||||||
|
return nonGreedyDepth;
|
||||||
|
}
|
||||||
|
|
||||||
|
public LexerATNConfig enterNonGreedyBlock() {
|
||||||
|
return new LexerATNConfig(this, nonGreedyDepth + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public LexerATNConfig exitNonGreedyBlock() {
|
||||||
|
if (!isGreedy()) {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new LexerATNConfig(this, nonGreedyDepth - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,6 +42,7 @@ import org.antlr.v4.runtime.misc.Nullable;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
|
import java.util.BitSet;
|
||||||
|
|
||||||
/** "dup" of ParserInterpreter */
|
/** "dup" of ParserInterpreter */
|
||||||
public class LexerATNSimulator extends ATNSimulator {
|
public class LexerATNSimulator extends ATNSimulator {
|
||||||
|
@ -407,6 +408,24 @@ public class LexerATNSimulator extends ATNSimulator {
|
||||||
System.out.format("processAcceptConfigs: reach=%s, prevAccept=%s, prevIndex=%d\n",
|
System.out.format("processAcceptConfigs: reach=%s, prevAccept=%s, prevIndex=%d\n",
|
||||||
reach, prevAccept.config, prevAccept.index);
|
reach, prevAccept.config, prevAccept.index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BitSet altsAtAcceptState = new BitSet();
|
||||||
|
BitSet nonGreedyAlts = new BitSet();
|
||||||
|
for (ATNConfig config : reach) {
|
||||||
|
if (config.state instanceof RuleStopState) {
|
||||||
|
altsAtAcceptState.set(config.alt);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!((LexerATNConfig)config).isGreedy()) {
|
||||||
|
nonGreedyAlts.set(config.alt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nonGreedyAlts.and(altsAtAcceptState);
|
||||||
|
if (!nonGreedyAlts.isEmpty()) {
|
||||||
|
reach.removeNonGreedyConfigsInAlts(nonGreedyAlts);
|
||||||
|
}
|
||||||
|
|
||||||
for (int ci=0; ci<reach.size(); ci++) {
|
for (int ci=0; ci<reach.size(); ci++) {
|
||||||
LexerATNConfig c = (LexerATNConfig)reach.get(ci);
|
LexerATNConfig c = (LexerATNConfig)reach.get(ci);
|
||||||
if ( c.state instanceof RuleStopState) {
|
if ( c.state instanceof RuleStopState) {
|
||||||
|
@ -431,13 +450,6 @@ public class LexerATNSimulator extends ATNSimulator {
|
||||||
captureSimState(prevAccept, input, c);
|
captureSimState(prevAccept, input, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
// if we reach lexer accept state with empty stack,
|
|
||||||
// toss out any configs pointing at wildcard edges
|
|
||||||
// in rest of configs work list associated with this
|
|
||||||
// rule (config.alt); that rule is done. this is how we
|
|
||||||
// cut off nongreedy .+ loops.
|
|
||||||
reach = deleteWildcardConfigsForAlt(reach, ci, c);
|
|
||||||
|
|
||||||
// move to next char, looking for longer match
|
// move to next char, looking for longer match
|
||||||
// (we continue processing if there are states in reach)
|
// (we continue processing if there are states in reach)
|
||||||
}
|
}
|
||||||
|
@ -526,62 +538,6 @@ public class LexerATNSimulator extends ATNSimulator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Delete configs for alt following ci that have a wildcard edge but
|
|
||||||
* only for configs with empty stack. E.g., if we want to kill after
|
|
||||||
* config (2,1,[$]), then we need to wack only configs with $ stack:
|
|
||||||
*
|
|
||||||
* [..., (2,1,[$]), ..., (7,1,[[$, 6 $]])]
|
|
||||||
*
|
|
||||||
* That means wacking (7,1,[$]) but not (7,1,[6 $]).
|
|
||||||
*
|
|
||||||
* Incoming config could have multiple stacks but we only care about
|
|
||||||
* empty stack since that means we reached end of a lexer rule from
|
|
||||||
* nextToken directly.
|
|
||||||
*
|
|
||||||
* Closure is unmodified; copy returned.
|
|
||||||
*/
|
|
||||||
public ATNConfigSet deleteWildcardConfigsForAlt(@NotNull ATNConfigSet closure,
|
|
||||||
int ci,
|
|
||||||
ATNConfig config)
|
|
||||||
{
|
|
||||||
int alt = config.alt;
|
|
||||||
if ( debug ) {
|
|
||||||
System.out.printf("deleteWildcardConfigsForAlt for alt %d after config %d\n", alt, ci);
|
|
||||||
}
|
|
||||||
|
|
||||||
ATNConfigSet dup = new ATNConfigSet(); // build up as we go thru loop
|
|
||||||
for (int j=0; j<=ci; j++) dup.add(closure.get(j)); // add stuff up to ci
|
|
||||||
int j=ci+1;
|
|
||||||
while ( j < closure.size() ) {
|
|
||||||
LexerATNConfig c = (LexerATNConfig)closure.get(j);
|
|
||||||
boolean isWildcard = c.state.getClass() == ATNState.class && // plain state only, not rulestop etc..
|
|
||||||
c.state.transition(0) instanceof WildcardTransition;
|
|
||||||
if ( c.alt == alt && isWildcard ) {
|
|
||||||
// found config to kill but only if empty stack.
|
|
||||||
for (SingletonPredictionContext ctx : c.context) {
|
|
||||||
if ( ctx.isEmpty() ) {
|
|
||||||
// c.alt matches, empty stack, and j > ci => kill it
|
|
||||||
if ( debug ) {
|
|
||||||
System.out.format("delete config %s since alt %d and %d leads to wildcard\n",
|
|
||||||
c, c.alt, c.state.stateNumber);
|
|
||||||
}
|
|
||||||
// don't add
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
LexerATNConfig splitConfig =
|
|
||||||
new LexerATNConfig(c.state, c.alt, ctx, c.lexerActionIndex);
|
|
||||||
dup.add(splitConfig);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
dup.add(c); // add entire config
|
|
||||||
}
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
return dup;
|
|
||||||
}
|
|
||||||
|
|
||||||
@NotNull
|
@NotNull
|
||||||
protected ATNConfigSet computeStartState(@NotNull IntStream input,
|
protected ATNConfigSet computeStartState(@NotNull IntStream input,
|
||||||
@NotNull ATNState p)
|
@NotNull ATNState p)
|
||||||
|
@ -601,8 +557,6 @@ public class LexerATNSimulator extends ATNSimulator {
|
||||||
System.out.println("closure("+config.toString(recog, true)+")");
|
System.out.println("closure("+config.toString(recog, true)+")");
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO? if ( closure.contains(t) ) return;
|
|
||||||
|
|
||||||
if ( config.state instanceof RuleStopState ) {
|
if ( config.state instanceof RuleStopState ) {
|
||||||
if ( debug ) {
|
if ( debug ) {
|
||||||
if ( recog!=null ) {
|
if ( recog!=null ) {
|
||||||
|
@ -651,7 +605,15 @@ public class LexerATNSimulator extends ATNSimulator {
|
||||||
for (int i=0; i<p.getNumberOfTransitions(); i++) {
|
for (int i=0; i<p.getNumberOfTransitions(); i++) {
|
||||||
Transition t = p.transition(i);
|
Transition t = p.transition(i);
|
||||||
LexerATNConfig c = getEpsilonTarget(config, t, configs);
|
LexerATNConfig c = getEpsilonTarget(config, t, configs);
|
||||||
if ( c!=null ) closure(c, configs);
|
if ( c!=null ) {
|
||||||
|
final int NON_GREEDY_ENTER_ALT = 2;
|
||||||
|
if (i == NON_GREEDY_ENTER_ALT - 1 && ((DecisionState)p).nonGreedy) {
|
||||||
|
assert p.getNumberOfTransitions() == 2;
|
||||||
|
c = c.enterNonGreedyBlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
closure(c, configs);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -662,6 +624,9 @@ public class LexerATNSimulator extends ATNSimulator {
|
||||||
@NotNull ATNConfigSet configs)
|
@NotNull ATNConfigSet configs)
|
||||||
{
|
{
|
||||||
ATNState p = config.state;
|
ATNState p = config.state;
|
||||||
|
if (p.isNonGreedyExitState()) {
|
||||||
|
config = config.exitNonGreedyBlock();
|
||||||
|
}
|
||||||
|
|
||||||
LexerATNConfig c = null;
|
LexerATNConfig c = null;
|
||||||
switch (t.getSerializationType()) {
|
switch (t.getSerializationType()) {
|
||||||
|
|
Loading…
Reference in New Issue