Lexer uses strictly-ordered alternatives within a rule. Simplifies code, increases performance when non-terminal (lexer rules) depth is limited, and actually fixes non-greedy behavior

This commit is contained in:
Sam Harwell 2012-10-21 18:50:24 -05:00
parent ed7d4b1dc1
commit 12b2c34946
12 changed files with 399 additions and 417 deletions

View File

@ -1,30 +1,31 @@
/* /*
[The "BSD license"] * [The "BSD license"]
Copyright (c) 2011 Terence Parr * Copyright (c) 2012 Terence Parr
All rights reserved. * Copyright (c) 2012 Sam Harwell
* All rights reserved.
Redistribution and use in source and binary forms, with or without *
modification, are permitted provided that the following conditions * Redistribution and use in source and binary forms, with or without
are met: * modification, are permitted provided that the following conditions
* are met:
1. Redistributions of source code must retain the above copyright *
notice, this list of conditions and the following disclaimer. * 1. Redistributions of source code must retain the above copyright
2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer.
notice, this list of conditions and the following disclaimer in the * 2. Redistributions in binary form must reproduce the above copyright
documentation and/or other materials provided with the distribution. * notice, this list of conditions and the following disclaimer in the
3. The name of the author may not be used to endorse or promote products * documentation and/or other materials provided with the distribution.
derived from this software without specific prior written permission. * 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR *
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
package org.antlr.v4.runtime.atn; package org.antlr.v4.runtime.atn;
@ -71,22 +72,6 @@ public class ATNConfig {
@NotNull @NotNull
public final SemanticContext semanticContext; public final SemanticContext semanticContext;
public boolean isGreedy() {
return true;
}
/** Lexer non-greedy implementations need to track information per
* ATNConfig. When the lexer reaches an accept state for a lexer
* rule, it needs to wipe out any configurations associated with
* that rule that are part of a non-greedy subrule. To do that it
* has to make sure that it tracks when a configuration was derived
* from an element within a non-greedy subrule. We use depth for
* that. We're greedy when the depth is 0.
*/
public int getNonGreedyDepth() {
return 0;
}
public ATNConfig(ATNConfig old) { // dup public ATNConfig(ATNConfig old) { // dup
this.state = old.state; this.state = old.state;
this.alt = old.alt; this.alt = old.alt;

View File

@ -1,37 +1,37 @@
/* /*
[The "BSD license"] * [The "BSD license"]
Copyright (c) 2011 Terence Parr * Copyright (c) 2012 Terence Parr
All rights reserved. * Copyright (c) 2012 Sam Harwell
* All rights reserved.
Redistribution and use in source and binary forms, with or without *
modification, are permitted provided that the following conditions * Redistribution and use in source and binary forms, with or without
are met: * modification, are permitted provided that the following conditions
* are met:
1. Redistributions of source code must retain the above copyright *
notice, this list of conditions and the following disclaimer. * 1. Redistributions of source code must retain the above copyright
2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer.
notice, this list of conditions and the following disclaimer in the * 2. Redistributions in binary form must reproduce the above copyright
documentation and/or other materials provided with the distribution. * notice, this list of conditions and the following disclaimer in the
3. The name of the author may not be used to endorse or promote products * documentation and/or other materials provided with the distribution.
derived from this software without specific prior written permission. * 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR *
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
package org.antlr.v4.runtime.atn; package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.misc.Array2DHashSet; import org.antlr.v4.runtime.misc.Array2DHashSet;
import org.antlr.v4.runtime.misc.DoubleKeyMap; import org.antlr.v4.runtime.misc.DoubleKeyMap;
import org.antlr.v4.runtime.misc.NotNull;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.BitSet; import java.util.BitSet;
@ -39,7 +39,6 @@ import java.util.Collection;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Set; import java.util.Set;
/** Specialized OrderedHashSet that can track info about the set. /** Specialized OrderedHashSet that can track info about the set.
@ -248,7 +247,6 @@ public class ATNConfigSet implements Set<ATNConfig> {
int hashCode = 7; int hashCode = 7;
hashCode = 31 * hashCode + o.state.stateNumber; hashCode = 31 * hashCode + o.state.stateNumber;
hashCode = 31 * hashCode + o.alt; hashCode = 31 * hashCode + o.alt;
hashCode = 31 * hashCode + o.getNonGreedyDepth();
hashCode = 31 * hashCode + o.semanticContext.hashCode(); hashCode = 31 * hashCode + o.semanticContext.hashCode();
return hashCode; return hashCode;
} }
@ -260,7 +258,6 @@ public class ATNConfigSet implements Set<ATNConfig> {
if ( hashCode(a) != hashCode(b) ) return false; if ( hashCode(a) != hashCode(b) ) return false;
return a.state.stateNumber==b.state.stateNumber return a.state.stateNumber==b.state.stateNumber
&& a.alt==b.alt && a.alt==b.alt
&& a.getNonGreedyDepth() == b.getNonGreedyDepth()
&& b.semanticContext.equals(b.semanticContext); && b.semanticContext.equals(b.semanticContext);
} }
} }
@ -439,26 +436,6 @@ public class ATNConfigSet implements Set<ATNConfig> {
return configs.iterator(); return configs.iterator();
} }
public void removeNonGreedyConfigsInAlts(@NotNull BitSet alts) {
if ( readonly ) throw new IllegalStateException("This set is readonly");
if (this.configLookup != null) {
for (Iterator<ATNConfig> it = this.configLookup.iterator(); it.hasNext(); ) {
ATNConfig entry = it.next();
if (!entry.isGreedy() && alts.get(entry.alt)) {
it.remove();
}
}
}
for (Iterator<ATNConfig> it = this.configs.iterator(); it.hasNext(); ) {
ATNConfig value = it.next();
if (!value.isGreedy() && alts.get(value.alt)) {
it.remove();
}
}
}
@Override @Override
public void clear() { public void clear() {
if ( readonly ) throw new IllegalStateException("This set is readonly"); if ( readonly ) throw new IllegalStateException("This set is readonly");

View File

@ -1,30 +1,31 @@
/* /*
[The "BSD license"] * [The "BSD license"]
Copyright (c) 2011 Terence Parr * Copyright (c) 2012 Terence Parr
All rights reserved. * Copyright (c) 2012 Sam Harwell
* All rights reserved.
Redistribution and use in source and binary forms, with or without *
modification, are permitted provided that the following conditions * Redistribution and use in source and binary forms, with or without
are met: * modification, are permitted provided that the following conditions
* are met:
1. Redistributions of source code must retain the above copyright *
notice, this list of conditions and the following disclaimer. * 1. Redistributions of source code must retain the above copyright
2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer.
notice, this list of conditions and the following disclaimer in the * 2. Redistributions in binary form must reproduce the above copyright
documentation and/or other materials provided with the distribution. * notice, this list of conditions and the following disclaimer in the
3. The name of the author may not be used to endorse or promote products * documentation and/or other materials provided with the distribution.
derived from this software without specific prior written permission. * 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR *
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
package org.antlr.v4.runtime.atn; package org.antlr.v4.runtime.atn;
@ -276,11 +277,9 @@ public abstract class ATNSimulator {
int ndecisions = toInt(data[p++]); int ndecisions = toInt(data[p++]);
for (int i=1; i<=ndecisions; i++) { for (int i=1; i<=ndecisions; i++) {
int s = toInt(data[p++]); int s = toInt(data[p++]);
int nonGreedy = toInt(data[p++]);
DecisionState decState = (DecisionState)atn.states.get(s); DecisionState decState = (DecisionState)atn.states.get(s);
atn.decisionToState.add(decState); atn.decisionToState.add(decState);
decState.decision = i-1; decState.decision = i-1;
decState.nonGreedy = nonGreedy != 0;
} }
verifyATN(atn); verifyATN(atn);

View File

@ -32,9 +32,4 @@ package org.antlr.v4.runtime.atn;
/** Terminal node of a simple (a|b|c) block */ /** Terminal node of a simple (a|b|c) block */
public class BlockEndState extends ATNState { public class BlockEndState extends ATNState {
public BlockStartState startState; public BlockStartState startState;
@Override
public boolean isNonGreedyExitState() {
return startState != null && startState.nonGreedy;
}
} }

View File

@ -31,5 +31,4 @@ package org.antlr.v4.runtime.atn;
public class DecisionState extends ATNState { public class DecisionState extends ATNState {
public int decision = -1; public int decision = -1;
public boolean nonGreedy;
} }

View File

@ -1,3 +1,33 @@
/*
* [The "BSD license"]
* Copyright (c) 2012 Terence Parr
* Copyright (c) 2012 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.runtime.atn; package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.misc.NotNull; import org.antlr.v4.runtime.misc.NotNull;
@ -7,14 +37,11 @@ public class LexerATNConfig extends ATNConfig {
/** Capture lexer action we traverse */ /** Capture lexer action we traverse */
public int lexerActionIndex = -1; public int lexerActionIndex = -1;
private final int nonGreedyDepth;
public LexerATNConfig(@NotNull ATNState state, public LexerATNConfig(@NotNull ATNState state,
int alt, int alt,
@Nullable PredictionContext context) @Nullable PredictionContext context)
{ {
super(state, alt, context, SemanticContext.NONE); super(state, alt, context, SemanticContext.NONE);
this.nonGreedyDepth = 0;
} }
public LexerATNConfig(@NotNull ATNState state, public LexerATNConfig(@NotNull ATNState state,
@ -24,20 +51,17 @@ public class LexerATNConfig extends ATNConfig {
{ {
super(state, alt, context, SemanticContext.NONE); super(state, alt, context, SemanticContext.NONE);
this.lexerActionIndex = actionIndex; this.lexerActionIndex = actionIndex;
this.nonGreedyDepth = 0;
} }
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state) { public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state) {
super(c, state, c.context, c.semanticContext); super(c, state, c.context, c.semanticContext);
this.lexerActionIndex = c.lexerActionIndex; this.lexerActionIndex = c.lexerActionIndex;
this.nonGreedyDepth = c.nonGreedyDepth;
} }
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state, public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
@NotNull SemanticContext semanticContext) { @NotNull SemanticContext semanticContext) {
super(c, state, c.context, semanticContext); super(c, state, c.context, semanticContext);
this.lexerActionIndex = c.lexerActionIndex; this.lexerActionIndex = c.lexerActionIndex;
this.nonGreedyDepth = c.nonGreedyDepth;
} }
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state, public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
@ -45,42 +69,11 @@ public class LexerATNConfig extends ATNConfig {
{ {
super(c, state, c.context, c.semanticContext); super(c, state, c.context, c.semanticContext);
this.lexerActionIndex = actionIndex; this.lexerActionIndex = actionIndex;
this.nonGreedyDepth = c.nonGreedyDepth;
} }
public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state, public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state,
@Nullable PredictionContext context) { @Nullable PredictionContext context) {
super(c, state, context, c.semanticContext); super(c, state, context, c.semanticContext);
this.lexerActionIndex = c.lexerActionIndex; this.lexerActionIndex = c.lexerActionIndex;
this.nonGreedyDepth = c.nonGreedyDepth;
} }
private LexerATNConfig(@NotNull LexerATNConfig c, int nonGreedyDepth) {
super(c, c.state, c.context, c.semanticContext);
this.lexerActionIndex = c.lexerActionIndex;
this.nonGreedyDepth = nonGreedyDepth;
}
@Override
public boolean isGreedy() {
return nonGreedyDepth == 0;
}
@Override
public int getNonGreedyDepth() {
return nonGreedyDepth;
}
public LexerATNConfig enterNonGreedyBlock() {
return new LexerATNConfig(this, nonGreedyDepth + 1);
}
public LexerATNConfig exitNonGreedyBlock() {
if (isGreedy()) {
return this;
}
return new LexerATNConfig(this, nonGreedyDepth - 1);
}
} }

View File

@ -1,30 +1,31 @@
/* /*
[The "BSD license"] * [The "BSD license"]
Copyright (c) 2011 Terence Parr * Copyright (c) 2012 Terence Parr
All rights reserved. * Copyright (c) 2012 Sam Harwell
* All rights reserved.
Redistribution and use in source and binary forms, with or without *
modification, are permitted provided that the following conditions * Redistribution and use in source and binary forms, with or without
are met: * modification, are permitted provided that the following conditions
* are met:
1. Redistributions of source code must retain the above copyright *
notice, this list of conditions and the following disclaimer. * 1. Redistributions of source code must retain the above copyright
2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer.
notice, this list of conditions and the following disclaimer in the * 2. Redistributions in binary form must reproduce the above copyright
documentation and/or other materials provided with the distribution. * notice, this list of conditions and the following disclaimer in the
3. The name of the author may not be used to endorse or promote products * documentation and/or other materials provided with the distribution.
derived from this software without specific prior written permission. * 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR *
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
package org.antlr.v4.runtime.atn; package org.antlr.v4.runtime.atn;
@ -42,7 +43,6 @@ import org.antlr.v4.runtime.misc.Nullable;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.util.BitSet;
/** "dup" of ParserInterpreter */ /** "dup" of ParserInterpreter */
public class LexerATNSimulator extends ATNSimulator { public class LexerATNSimulator extends ATNSimulator {
@ -315,7 +315,7 @@ public class LexerATNSimulator extends ATNSimulator {
} }
if (target == null) { if (target == null) {
reach = new ATNConfigSet(); reach = new OrderedATNConfigSet();
// if we don't find an existing DFA state // if we don't find an existing DFA state
// Fill reach starting from closure, following t transitions // Fill reach starting from closure, following t transitions
@ -387,7 +387,14 @@ public class LexerATNSimulator extends ATNSimulator {
* we can reach upon input t. Parameter reach is a return parameter. * we can reach upon input t. Parameter reach is a return parameter.
*/ */
protected void getReachableConfigSet(ATNConfigSet closure, ATNConfigSet reach, int t) { protected void getReachableConfigSet(ATNConfigSet closure, ATNConfigSet reach, int t) {
// this is used to skip processing for configs which have a lower priority
// than a config that already reached an accept state for the same rule
int skipAlt = ATN.INVALID_ALT_NUMBER;
for (ATNConfig c : closure) { for (ATNConfig c : closure) {
if (c.alt == skipAlt) {
continue;
}
if ( debug ) { if ( debug ) {
System.out.format("testing %s at %s\n", getTokenName(t), c.toString(recog, true)); System.out.format("testing %s at %s\n", getTokenName(t), c.toString(recog, true));
} }
@ -397,7 +404,12 @@ public class LexerATNSimulator extends ATNSimulator {
Transition trans = c.state.transition(ti); Transition trans = c.state.transition(ti);
ATNState target = getReachableTarget(trans, t); ATNState target = getReachableTarget(trans, t);
if ( target!=null ) { if ( target!=null ) {
closure(new LexerATNConfig((LexerATNConfig)c, target), reach); if (closure(new LexerATNConfig((LexerATNConfig)c, target), reach)) {
// any remaining configs for this alt have a lower priority than
// the one that just reached an accept state.
skipAlt = c.alt;
break;
}
} }
} }
} }
@ -409,51 +421,12 @@ public class LexerATNSimulator extends ATNSimulator {
reach, prevAccept.config, prevAccept.index); reach, prevAccept.config, prevAccept.index);
} }
/* Non-greedy handling works by removing all non-greedy configurations
* from reach when an accept state is reached for the same token. For
* example, consider the following two tokens:
*
* BLOCK : '{' .* '}';
* OPTIONAL_BLOCK : '{' .* '}' '?';
*
* With the following input:
*
* {stuff}?
*
* After matching '}', an accept state at the end of BLOCK is reached,
* so any configurations inside the non-greedy .* loop in BLOCK will be
* removed from reach. The configuration(s) inside the non-greedy .*
* loop in OPTIONAL_BLOCK are unaffected by this because no
* configuration is in an accept state for OPTIONAL_BLOCK at this input
* symbol.
*/
BitSet altsAtAcceptState = new BitSet();
BitSet nonGreedyAlts = new BitSet();
LexerATNConfig acceptConfig = null; LexerATNConfig acceptConfig = null;
for (ATNConfig config : reach) { for (ATNConfig config : reach) {
if (config.state instanceof RuleStopState) { if (config.state instanceof RuleStopState) {
altsAtAcceptState.set(config.alt); acceptConfig = (LexerATNConfig)config;
break;
if ( debug ) {
System.out.format("processAcceptConfigs: hit accept config %s index %d\n",
config, input.index());
}
if (acceptConfig == null) {
acceptConfig = (LexerATNConfig)config;
}
} }
if ( !config.isGreedy() ) {
assert !(config.state instanceof RuleStopState);
nonGreedyAlts.set(config.alt);
}
}
nonGreedyAlts.and(altsAtAcceptState);
// this is now "alts with at least one non-greedy config and one accept config"
if (!nonGreedyAlts.isEmpty()) {
reach.removeNonGreedyConfigsInAlts(nonGreedyAlts);
} }
// mark the new preferred accept state // mark the new preferred accept state
@ -463,6 +436,7 @@ public class LexerATNSimulator extends ATNSimulator {
System.out.println("processAcceptConfigs: found longer token"); System.out.println("processAcceptConfigs: found longer token");
} }
} }
// condition > not >= will favor prev accept at same index. // condition > not >= will favor prev accept at same index.
// This way, "int" is keyword not ID if listed first. // This way, "int" is keyword not ID if listed first.
traceAcceptState(acceptConfig.alt); traceAcceptState(acceptConfig.alt);
@ -558,7 +532,7 @@ public class LexerATNSimulator extends ATNSimulator {
@NotNull ATNState p) @NotNull ATNState p)
{ {
PredictionContext initialContext = PredictionContext.EMPTY; PredictionContext initialContext = PredictionContext.EMPTY;
ATNConfigSet configs = new ATNConfigSet(); ATNConfigSet configs = new OrderedATNConfigSet();
for (int i=0; i<p.getNumberOfTransitions(); i++) { for (int i=0; i<p.getNumberOfTransitions(); i++) {
ATNState target = p.transition(i).target; ATNState target = p.transition(i).target;
LexerATNConfig c = new LexerATNConfig(target, i+1, initialContext); LexerATNConfig c = new LexerATNConfig(target, i+1, initialContext);
@ -567,7 +541,17 @@ public class LexerATNSimulator extends ATNSimulator {
return configs; return configs;
} }
protected void closure(@NotNull LexerATNConfig config, @NotNull ATNConfigSet configs) { /**
* Since the alternatives within any lexer decision are ordered by
* preference, this method stops pursuing the closure as soon as an accept
* state is reached. After the first accept state is reached by depth-first
* search from {@code config}, all other (potentially reachable) states for
* this rule would have a lower priority.
*
* @return {@code true} if an accept state is reached, otherwise
* {@code false}.
*/
protected boolean closure(@NotNull LexerATNConfig config, @NotNull ATNConfigSet configs) {
if ( debug ) { if ( debug ) {
System.out.println("closure("+config.toString(recog, true)+")"); System.out.println("closure("+config.toString(recog, true)+")");
} }
@ -585,10 +569,12 @@ public class LexerATNSimulator extends ATNSimulator {
if ( config.context == null || config.context.hasEmptyPath() ) { if ( config.context == null || config.context.hasEmptyPath() ) {
if (config.context == null || config.context.isEmpty()) { if (config.context == null || config.context.isEmpty()) {
configs.add(config); configs.add(config);
return; }
else {
configs.add(new LexerATNConfig(config, config.state, PredictionContext.EMPTY));
} }
configs.add(new LexerATNConfig(config, config.state, PredictionContext.EMPTY)); return true;
} }
if ( config.context!=null && !config.context.isEmpty() ) { if ( config.context!=null && !config.context.isEmpty() ) {
@ -609,11 +595,13 @@ public class LexerATNSimulator extends ATNSimulator {
RuleTransition rt = (RuleTransition)invokingState.transition(0); RuleTransition rt = (RuleTransition)invokingState.transition(0);
ATNState retState = rt.followState; ATNState retState = rt.followState;
LexerATNConfig c = new LexerATNConfig(retState, config.alt, newContext); LexerATNConfig c = new LexerATNConfig(retState, config.alt, newContext);
closure(c, configs); if (closure(c, configs)) {
return true;
}
} }
} }
} }
return; return false;
} }
// optimization // optimization
@ -622,19 +610,17 @@ public class LexerATNSimulator extends ATNSimulator {
} }
ATNState p = config.state; ATNState p = config.state;
boolean nonGreedy = (p instanceof DecisionState && ((DecisionState)p).nonGreedy && !(p instanceof PlusLoopbackState))
|| (p instanceof PlusBlockStartState && ((PlusBlockStartState)p).loopBackState.nonGreedy);
for (int i=0; i<p.getNumberOfTransitions(); i++) { for (int i=0; i<p.getNumberOfTransitions(); i++) {
Transition t = p.transition(i); Transition t = p.transition(i);
LexerATNConfig c = getEpsilonTarget(config, t, configs); LexerATNConfig c = getEpsilonTarget(config, t, configs);
if ( c!=null ) { if ( c!=null ) {
if (nonGreedy) { if (closure(c, configs)) {
c = c.enterNonGreedyBlock(); return true;
} }
closure(c, configs);
} }
} }
return false;
} }
// side-effect: can alter configs.hasSemanticContext // side-effect: can alter configs.hasSemanticContext
@ -644,46 +630,6 @@ public class LexerATNSimulator extends ATNSimulator {
@NotNull ATNConfigSet configs) @NotNull ATNConfigSet configs)
{ {
ATNState p = config.state; ATNState p = config.state;
switch (p.getStateType()) {
case ATNState.PLUS_LOOP_BACK:
if (((PlusLoopbackState)p).nonGreedy) {
config = config.exitNonGreedyBlock();
}
break;
case ATNState.STAR_LOOP_BACK:
if (((StarLoopbackState)p).getLoopEntryState().nonGreedy) {
config = config.exitNonGreedyBlock();
}
break;
case ATNState.LOOP_END:
ATNState loopBackState = ((LoopEndState)p).loopBackState;
if (loopBackState instanceof PlusLoopbackState) {
if (((PlusLoopbackState)loopBackState).nonGreedy) {
config = config.exitNonGreedyBlock();
}
}
else {
StarLoopbackState starLoopbackState = (StarLoopbackState)loopBackState;
if (starLoopbackState.getLoopEntryState().nonGreedy) {
config = config.exitNonGreedyBlock();
}
}
break;
case ATNState.BLOCK_END:
if (p.isNonGreedyExitState()) {
config = config.exitNonGreedyBlock();
}
break;
default:
break;
}
LexerATNConfig c = null; LexerATNConfig c = null;
switch (t.getSerializationType()) { switch (t.getSerializationType()) {
case Transition.RULE: case Transition.RULE:

View File

@ -0,0 +1,57 @@
/*
* [The "BSD license"]
* Copyright (c) 2012 Terence Parr
* Copyright (c) 2012 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.runtime.atn;
/**
*
* @author Sam Harwell
*/
public class OrderedATNConfigSet extends ATNConfigSet {
public OrderedATNConfigSet() {
this.configLookup = new LexerConfigHashSet();
}
protected static class LexerConfigHashSet extends ConfigHashSet {
@Override
public int hashCode(ATNConfig o) {
return o.hashCode();
}
@Override
public boolean equals(ATNConfig a, ATNConfig b) {
return a.equals(b);
}
}
}

View File

@ -1,30 +1,31 @@
/* /*
[The "BSD license"] * [The "BSD license"]
Copyright (c) 2011 Terence Parr * Copyright (c) 2012 Terence Parr
All rights reserved. * Copyright (c) 2012 Sam Harwell
* All rights reserved.
Redistribution and use in source and binary forms, with or without *
modification, are permitted provided that the following conditions * Redistribution and use in source and binary forms, with or without
are met: * modification, are permitted provided that the following conditions
* are met:
1. Redistributions of source code must retain the above copyright *
notice, this list of conditions and the following disclaimer. * 1. Redistributions of source code must retain the above copyright
2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer.
notice, this list of conditions and the following disclaimer in the * 2. Redistributions in binary form must reproduce the above copyright
documentation and/or other materials provided with the distribution. * notice, this list of conditions and the following disclaimer in the
3. The name of the author may not be used to endorse or promote products * documentation and/or other materials provided with the distribution.
derived from this software without specific prior written permission. * 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR *
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
package org.antlr.v4.automata; package org.antlr.v4.automata;
@ -226,7 +227,6 @@ public class ATNSerializer {
data.add(ndecisions); data.add(ndecisions);
for (DecisionState decStartState : atn.decisionToState) { for (DecisionState decStartState : atn.decisionToState) {
data.add(decStartState.stateNumber); data.add(decStartState.stateNumber);
data.add(decStartState.nonGreedy ? 1 : 0);
} }
return data; return data;
} }
@ -300,7 +300,6 @@ public class ATNSerializer {
int ndecisions = ATNSimulator.toInt(data[p++]); int ndecisions = ATNSimulator.toInt(data[p++]);
for (int i=1; i<=ndecisions; i++) { for (int i=1; i<=ndecisions; i++) {
int s = ATNSimulator.toInt(data[p++]); int s = ATNSimulator.toInt(data[p++]);
boolean nonGreedy = ATNSimulator.toInt(data[p++]) != 0;
buf.append(i-1).append(":").append(s).append("\n"); buf.append(i-1).append(":").append(s).append("\n");
} }
return buf.toString(); return buf.toString();

View File

@ -1,30 +1,31 @@
/* /*
[The "BSD license"] * [The "BSD license"]
Copyright (c) 2011 Terence Parr * Copyright (c) 2012 Terence Parr
All rights reserved. * Copyright (c) 2012 Sam Harwell
* All rights reserved.
Redistribution and use in source and binary forms, with or without *
modification, are permitted provided that the following conditions * Redistribution and use in source and binary forms, with or without
are met: * modification, are permitted provided that the following conditions
* are met:
1. Redistributions of source code must retain the above copyright *
notice, this list of conditions and the following disclaimer. * 1. Redistributions of source code must retain the above copyright
2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer.
notice, this list of conditions and the following disclaimer in the * 2. Redistributions in binary form must reproduce the above copyright
documentation and/or other materials provided with the distribution. * notice, this list of conditions and the following disclaimer in the
3. The name of the author may not be used to endorse or promote products * documentation and/or other materials provided with the distribution.
derived from this software without specific prior written permission. * 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR *
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
package org.antlr.v4.automata; package org.antlr.v4.automata;
@ -425,9 +426,7 @@ public class ParserATNFactory implements ATNFactory {
BlockStartState blkStart = (BlockStartState)blk.left; BlockStartState blkStart = (BlockStartState)blk.left;
BlockAST blkAST = (BlockAST)optAST.getChild(0); BlockAST blkAST = (BlockAST)optAST.getChild(0);
blkStart.nonGreedy = !isGreedy(blkAST); if (isGreedy(blkAST)) {
if (!blkStart.nonGreedy) {
epsilon(blkStart, blk.right); epsilon(blkStart, blk.right);
} else { } else {
Transition existing = blkStart.removeTransition(0); Transition existing = blkStart.removeTransition(0);
@ -464,8 +463,7 @@ public class ParserATNFactory implements ATNFactory {
epsilon(blkEnd, loop); // blk can see loop back epsilon(blkEnd, loop); // blk can see loop back
BlockAST blkAST = (BlockAST)plusAST.getChild(0); BlockAST blkAST = (BlockAST)plusAST.getChild(0);
loop.nonGreedy = !isGreedy(blkAST); if ( isGreedy(blkAST) ) {
if ( !loop.nonGreedy ) {
epsilon(loop, blkStart); // loop back to start epsilon(loop, blkStart); // loop back to start
epsilon(loop, end); // or exit epsilon(loop, end); // or exit
} }
@ -504,8 +502,7 @@ public class ParserATNFactory implements ATNFactory {
end.loopBackState = loop; end.loopBackState = loop;
BlockAST blkAST = (BlockAST)starAST.getChild(0); BlockAST blkAST = (BlockAST)starAST.getChild(0);
entry.nonGreedy = !isGreedy(blkAST); if ( isGreedy(blkAST) ) {
if ( !entry.nonGreedy ) {
epsilon(entry, blkStart); // loop enter edge (alt 1) epsilon(entry, blkStart); // loop enter edge (alt 1)
epsilon(entry, end); // bypass loop edge (alt 2) epsilon(entry, end); // bypass loop edge (alt 2)
} }

View File

@ -1,8 +1,37 @@
/*
* [The "BSD license"]
* Copyright (c) 2012 Terence Parr
* Copyright (c) 2012 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.test; package org.antlr.v4.test;
import org.antlr.v4.runtime.ANTLRInputStream; import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.CharStream; import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.atn.ATN; import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNState; import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.misc.Utils; import org.antlr.v4.runtime.misc.Utils;
@ -39,10 +68,12 @@ public class TestATNLexerInterpreter extends BaseTest {
LexerGrammar lg = new LexerGrammar( LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n"+ "lexer grammar L;\n"+
"A : 'xy'\n" + "A : 'xy'\n" +
" | 'xyz'\n" + // make sure nongreedy mech cut off doesn't kill this alt " | 'xyz'\n" + // this alt shouldn't be reachable since the alts are ordered
" ;\n" +
"Z : 'z'\n" +
" ;\n"); " ;\n");
checkLexerMatches(lg, "xy", "A, EOF"); checkLexerMatches(lg, "xy", "A, EOF");
checkLexerMatches(lg, "xyz", "A, EOF"); checkLexerMatches(lg, "xyz", "A, Z, EOF");
} }
@Test public void testShortLongRule2() throws Exception { @Test public void testShortLongRule2() throws Exception {
@ -60,6 +91,8 @@ public class TestATNLexerInterpreter extends BaseTest {
"lexer grammar L;\n"+ "lexer grammar L;\n"+
"A : 'xy' .\n" + // should pursue '.' since xyz hits stop first, before 2nd alt "A : 'xy' .\n" + // should pursue '.' since xyz hits stop first, before 2nd alt
" | 'xy'\n" + " | 'xy'\n" +
" ;\n" +
"Z : 'z'\n" +
" ;\n"); " ;\n");
checkLexerMatches(lg, "xy", "A, EOF"); checkLexerMatches(lg, "xy", "A, EOF");
checkLexerMatches(lg, "xyz", "A, EOF"); checkLexerMatches(lg, "xyz", "A, EOF");
@ -69,22 +102,12 @@ public class TestATNLexerInterpreter extends BaseTest {
LexerGrammar lg = new LexerGrammar( LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n"+ "lexer grammar L;\n"+
"A : 'xy'\n" + "A : 'xy'\n" +
" | 'xy' .\n" + // should pursue '.' since A is greedy " | 'xy' .\n" + // should not pursue '.' since alts are ordered
" ;\n" +
"Z : 'z'\n" +
" ;\n"); " ;\n");
checkLexerMatches(lg, "xy", "A, EOF"); checkLexerMatches(lg, "xy", "A, EOF");
RecognitionException e = checkLexerMatches(lg, "xyz", "A, EOF"); checkLexerMatches(lg, "xyz", "A, Z, EOF");
assertNull(e);
}
@Test public void testWildcardQuirk() throws Exception {
LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n"+
"A : 'xy'\n" +
" | 'xy' . 'z'\n" + // will pursue '.' since A is greedy
" ;\n");
// checkLexerMatches(lg, "xy", "A, EOF");
RecognitionException e = checkLexerMatches(lg, "xyqz", "A, EOF");
assertNull(e);
} }
@Test public void testWildcardNonQuirkWhenSplitBetweenTwoRules() throws Exception { @Test public void testWildcardNonQuirkWhenSplitBetweenTwoRules() throws Exception {
@ -93,7 +116,7 @@ public class TestATNLexerInterpreter extends BaseTest {
"A : 'xy' ;\n" + "A : 'xy' ;\n" +
"B : 'xy' . 'z' ;\n"); "B : 'xy' . 'z' ;\n");
checkLexerMatches(lg, "xy", "A, EOF"); checkLexerMatches(lg, "xy", "A, EOF");
checkLexerMatches(lg, "xyz", "B, EOF"); checkLexerMatches(lg, "xyqz", "B, EOF");
} }
@Test public void testLexerLoops() throws Exception { @Test public void testLexerLoops() throws Exception {
@ -151,22 +174,15 @@ public class TestATNLexerInterpreter extends BaseTest {
@Test public void testRecursiveLexerRuleRefWithWildcard() throws Exception { @Test public void testRecursiveLexerRuleRefWithWildcard() throws Exception {
LexerGrammar lg = new LexerGrammar( LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n"+ "lexer grammar L;\n"+
"CMT : '/*' (CMT | .)+ '*/' ;\n" + "CMT : '/*' (CMT | .)* '*/' ;\n" +
"WS : (' '|'\n')+ ;"); "WS : (' '|'\n')+ ;");
String expecting = "CMT, WS, CMT, WS, CMT, WS, EOF";
// stuff on end of comment matches another rule String expecting = "CMT, WS, CMT, WS, EOF";
checkLexerMatches(lg, checkLexerMatches(lg,
"/* ick */\n" + "/* ick */\n" +
"/* /* */\n" + "/* /* */\n" +
"/* /*nested*/ */\n", "/* /*nested*/ */\n",
expecting); expecting);
// stuff on end of comment doesn't match another rule
expecting = "CMT, WS, CMT, WS, CMT, WS, EOF";
checkLexerMatches(lg,
"/* ick */x\n" +
"/* /* */x\n" +
"/* /*nested*/ */x\n",
expecting);
} }
@Test public void testLexerWildcardNonGreedyLoopByDefault() throws Exception { @Test public void testLexerWildcardNonGreedyLoopByDefault() throws Exception {
@ -265,20 +281,14 @@ public class TestATNLexerInterpreter extends BaseTest {
checkLexerMatches(lg, "a", expecting); checkLexerMatches(lg, "a", expecting);
} }
protected RecognitionException checkLexerMatches(LexerGrammar lg, String inputString, String expecting) { protected void checkLexerMatches(LexerGrammar lg, String inputString, String expecting) {
ATN atn = createATN(lg, true); ATN atn = createATN(lg, true);
CharStream input = new ANTLRInputStream(inputString); CharStream input = new ANTLRInputStream(inputString);
ATNState startState = atn.modeNameToStartState.get("DEFAULT_MODE"); ATNState startState = atn.modeNameToStartState.get("DEFAULT_MODE");
DOTGenerator dot = new DOTGenerator(lg); DOTGenerator dot = new DOTGenerator(lg);
System.out.println(dot.getDOT(startState, true)); System.out.println(dot.getDOT(startState, true));
List<String> tokenTypes = null; List<String> tokenTypes = getTokenTypes(lg, atn, input, false);
RecognitionException retException = null;
try {
tokenTypes = getTokenTypes(lg, atn, input, false);
}
catch (RecognitionException lre) { retException = lre; }
if ( retException!=null ) return retException;
String result = Utils.join(tokenTypes.iterator(), ", "); String result = Utils.join(tokenTypes.iterator(), ", ");
System.out.println(tokenTypes); System.out.println(tokenTypes);
@ -288,7 +298,6 @@ public class TestATNLexerInterpreter extends BaseTest {
input.seek(0); input.seek(0);
List<String> tokenTypes2 = getTokenTypes(lg, atn, input, true); List<String> tokenTypes2 = getTokenTypes(lg, atn, input, true);
assertEquals("interp vs adaptive types differ", tokenTypes, tokenTypes2); assertEquals("interp vs adaptive types differ", tokenTypes, tokenTypes2);
return null;
} }
} }

View File

@ -1,3 +1,33 @@
/*
* [The "BSD license"]
* Copyright (c) 2012 Terence Parr
* Copyright (c) 2012 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.test; package org.antlr.v4.test;
import org.junit.Test; import org.junit.Test;
@ -29,6 +59,20 @@ public class TestLexerExec extends BaseTest {
assertEquals(expecting, found); assertEquals(expecting, found);
} }
@Test
public void testImplicitNonGreedyTermination() throws Exception {
String grammar =
"lexer grammar L;\n"
+ "STRING : '\"' ('\"\"' | .)* '\"';";
String found = execLexer("L.g4", grammar, "L", "\"hi\"\"mom\"");
assertEquals(
"[@0,0:3='\"hi\"',<1>,1:0]\n" +
"[@1,4:8='\"mom\"',<1>,1:4]\n" +
"[@2,9:8='<EOF>',<-1>,1:9]\n", found);
assertNull(stderrDuringParse);
}
@Test @Test
public void testImplicitGreedyOptional() throws Exception { public void testImplicitGreedyOptional() throws Exception {
String grammar = String grammar =
@ -168,11 +212,9 @@ public class TestLexerExec extends BaseTest {
String expecting = String expecting =
"[@0,0:8='/* ick */',<1>,1:0]\n" + "[@0,0:8='/* ick */',<1>,1:0]\n" +
"[@1,9:9='\\n',<2>,1:9]\n" + "[@1,9:9='\\n',<2>,1:9]\n" +
"[@2,10:17='/* /* */',<1>,2:0]\n" + "[@2,10:34='/* /* */\\n/* /*nested*/ */',<1>,2:0]\n" +
"[@3,18:18='\\n',<2>,2:8]\n" + "[@3,35:35='\\n',<2>,3:16]\n" +
"[@4,19:31='/* /*nested*/',<1>,3:0]\n" + "[@4,36:35='<EOF>',<-1>,4:17]\n";
"[@5,32:32=' ',<2>,3:13]\n" +
"[@6,36:35='<EOF>',<-1>,4:0]\n";
// stuff on end of comment matches another rule // stuff on end of comment matches another rule
String found = execLexer("L.g4", grammar, "L", String found = execLexer("L.g4", grammar, "L",
@ -180,19 +222,14 @@ public class TestLexerExec extends BaseTest {
"/* /* */\n" + "/* /* */\n" +
"/* /*nested*/ */\n"); "/* /*nested*/ */\n");
assertEquals(expecting, found); assertEquals(expecting, found);
assertEquals( assertNull(stderrDuringParse);
"line 3:14 token recognition error at: '*'\n" +
"line 3:15 token recognition error at: '/\n'\n", stderrDuringParse);
// stuff on end of comment doesn't match another rule // stuff on end of comment doesn't match another rule
expecting = expecting =
"[@0,0:8='/* ick */',<1>,1:0]\n" + "[@0,0:8='/* ick */',<1>,1:0]\n" +
"[@1,10:10='\\n',<2>,1:10]\n" + "[@1,10:10='\\n',<2>,1:10]\n" +
"[@2,11:18='/* /* */',<1>,2:0]\n" + "[@2,11:36='/* /* */x\\n/* /*nested*/ */',<1>,2:0]\n" +
"[@3,20:20='\\n',<2>,2:9]\n" + "[@3,38:38='\\n',<2>,3:17]\n" +
"[@4,21:33='/* /*nested*/',<1>,3:0]\n" + "[@4,39:38='<EOF>',<-1>,4:18]\n";
"[@5,34:34=' ',<2>,3:13]\n" +
"[@6,38:38='\\n',<2>,3:17]\n" +
"[@7,39:38='<EOF>',<-1>,4:18]\n";
found = execLexer("L.g4", grammar, "L", found = execLexer("L.g4", grammar, "L",
"/* ick */x\n" + "/* ick */x\n" +
"/* /* */x\n" + "/* /* */x\n" +
@ -200,9 +237,7 @@ public class TestLexerExec extends BaseTest {
assertEquals(expecting, found); assertEquals(expecting, found);
assertEquals( assertEquals(
"line 1:9 token recognition error at: 'x'\n" + "line 1:9 token recognition error at: 'x'\n" +
"line 2:8 token recognition error at: 'x'\n" + "line 3:16 token recognition error at: 'x'\n", stderrDuringParse);
"line 3:14 token recognition error at: '*'\n" +
"line 3:15 token recognition error at: '/x'\n", stderrDuringParse);
} }
@Test public void testRecursiveLexerRuleRefWithWildcardPlus() throws Exception { @Test public void testRecursiveLexerRuleRefWithWildcardPlus() throws Exception {
@ -215,11 +250,9 @@ public class TestLexerExec extends BaseTest {
String expecting = String expecting =
"[@0,0:8='/* ick */',<1>,1:0]\n" + "[@0,0:8='/* ick */',<1>,1:0]\n" +
"[@1,9:9='\\n',<2>,1:9]\n" + "[@1,9:9='\\n',<2>,1:9]\n" +
"[@2,10:17='/* /* */',<1>,2:0]\n" + "[@2,10:34='/* /* */\\n/* /*nested*/ */',<1>,2:0]\n" +
"[@3,18:18='\\n',<2>,2:8]\n" + "[@3,35:35='\\n',<2>,3:16]\n" +
"[@4,19:31='/* /*nested*/',<1>,3:0]\n" + "[@4,36:35='<EOF>',<-1>,4:17]\n";
"[@5,32:32=' ',<2>,3:13]\n" +
"[@6,36:35='<EOF>',<-1>,4:0]\n";
// stuff on end of comment matches another rule // stuff on end of comment matches another rule
String found = execLexer("L.g4", grammar, "L", String found = execLexer("L.g4", grammar, "L",
@ -227,19 +260,14 @@ public class TestLexerExec extends BaseTest {
"/* /* */\n" + "/* /* */\n" +
"/* /*nested*/ */\n"); "/* /*nested*/ */\n");
assertEquals(expecting, found); assertEquals(expecting, found);
assertEquals( assertNull(stderrDuringParse);
"line 3:14 token recognition error at: '*'\n" +
"line 3:15 token recognition error at: '/\n'\n", stderrDuringParse);
// stuff on end of comment doesn't match another rule // stuff on end of comment doesn't match another rule
expecting = expecting =
"[@0,0:8='/* ick */',<1>,1:0]\n" + "[@0,0:8='/* ick */',<1>,1:0]\n" +
"[@1,10:10='\\n',<2>,1:10]\n" + "[@1,10:10='\\n',<2>,1:10]\n" +
"[@2,11:18='/* /* */',<1>,2:0]\n" + "[@2,11:36='/* /* */x\\n/* /*nested*/ */',<1>,2:0]\n" +
"[@3,20:20='\\n',<2>,2:9]\n" + "[@3,38:38='\\n',<2>,3:17]\n" +
"[@4,21:33='/* /*nested*/',<1>,3:0]\n" + "[@4,39:38='<EOF>',<-1>,4:18]\n";
"[@5,34:34=' ',<2>,3:13]\n" +
"[@6,38:38='\\n',<2>,3:17]\n" +
"[@7,39:38='<EOF>',<-1>,4:18]\n";
found = execLexer("L.g4", grammar, "L", found = execLexer("L.g4", grammar, "L",
"/* ick */x\n" + "/* ick */x\n" +
"/* /* */x\n" + "/* /* */x\n" +
@ -247,9 +275,7 @@ public class TestLexerExec extends BaseTest {
assertEquals(expecting, found); assertEquals(expecting, found);
assertEquals( assertEquals(
"line 1:9 token recognition error at: 'x'\n" + "line 1:9 token recognition error at: 'x'\n" +
"line 2:8 token recognition error at: 'x'\n" + "line 3:16 token recognition error at: 'x'\n", stderrDuringParse);
"line 3:14 token recognition error at: '*'\n" +
"line 3:15 token recognition error at: '/x'\n", stderrDuringParse);
} }
@Test public void testActionExecutedInDFA() throws Exception { @Test public void testActionExecutedInDFA() throws Exception {