From 12b2c349469b8cc07e4776529fb41a13050d86ec Mon Sep 17 00:00:00 2001 From: Sam Harwell Date: Sun, 21 Oct 2012 18:50:24 -0500 Subject: [PATCH] Lexer uses strictly-ordered alternatives within a rule. Simplifies code, increases performance when non-terminal (lexer rules) depth is limited, and actually fixes non-greedy behavior --- .../org/antlr/v4/runtime/atn/ATNConfig.java | 69 +++---- .../antlr/v4/runtime/atn/ATNConfigSet.java | 77 +++---- .../antlr/v4/runtime/atn/ATNSimulator.java | 55 +++-- .../antlr/v4/runtime/atn/BlockEndState.java | 5 - .../antlr/v4/runtime/atn/DecisionState.java | 1 - .../antlr/v4/runtime/atn/LexerATNConfig.java | 67 +++--- .../v4/runtime/atn/LexerATNSimulator.java | 190 +++++++----------- .../v4/runtime/atn/OrderedATNConfigSet.java | 57 ++++++ .../org/antlr/v4/automata/ATNSerializer.java | 55 +++-- .../antlr/v4/automata/ParserATNFactory.java | 63 +++--- .../v4/test/TestATNLexerInterpreter.java | 83 ++++---- .../test/org/antlr/v4/test/TestLexerExec.java | 94 +++++---- 12 files changed, 399 insertions(+), 417 deletions(-) create mode 100644 runtime/Java/src/org/antlr/v4/runtime/atn/OrderedATNConfigSet.java diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfig.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfig.java index 449eacc4c..0e5b3ddfa 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfig.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfig.java @@ -1,30 +1,31 @@ /* - [The "BSD license"] - Copyright (c) 2011 Terence Parr - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * [The "BSD license"] + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.antlr.v4.runtime.atn; @@ -71,22 +72,6 @@ public class ATNConfig { @NotNull public final SemanticContext semanticContext; - public boolean isGreedy() { - return true; - } - - /** Lexer non-greedy implementations need to track information per - * ATNConfig. When the lexer reaches an accept state for a lexer - * rule, it needs to wipe out any configurations associated with - * that rule that are part of a non-greedy subrule. To do that it - * has to make sure that it tracks when a configuration was derived - * from an element within a non-greedy subrule. We use depth for - * that. We're greedy when the depth is 0. - */ - public int getNonGreedyDepth() { - return 0; - } - public ATNConfig(ATNConfig old) { // dup this.state = old.state; this.alt = old.alt; diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfigSet.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfigSet.java index c6673cf86..c0839fe6d 100755 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfigSet.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfigSet.java @@ -1,37 +1,37 @@ /* - [The "BSD license"] - Copyright (c) 2011 Terence Parr - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * [The "BSD license"] + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.antlr.v4.runtime.atn; import org.antlr.v4.runtime.misc.Array2DHashSet; import org.antlr.v4.runtime.misc.DoubleKeyMap; -import org.antlr.v4.runtime.misc.NotNull; import java.util.ArrayList; import java.util.BitSet; @@ -39,7 +39,6 @@ import java.util.Collection; import java.util.HashSet; import java.util.Iterator; import java.util.List; -import java.util.Map; import java.util.Set; /** Specialized OrderedHashSet that can track info about the set. @@ -248,7 +247,6 @@ public class ATNConfigSet implements Set { int hashCode = 7; hashCode = 31 * hashCode + o.state.stateNumber; hashCode = 31 * hashCode + o.alt; - hashCode = 31 * hashCode + o.getNonGreedyDepth(); hashCode = 31 * hashCode + o.semanticContext.hashCode(); return hashCode; } @@ -260,7 +258,6 @@ public class ATNConfigSet implements Set { if ( hashCode(a) != hashCode(b) ) return false; return a.state.stateNumber==b.state.stateNumber && a.alt==b.alt - && a.getNonGreedyDepth() == b.getNonGreedyDepth() && b.semanticContext.equals(b.semanticContext); } } @@ -439,26 +436,6 @@ public class ATNConfigSet implements Set { return configs.iterator(); } - public void removeNonGreedyConfigsInAlts(@NotNull BitSet alts) { - if ( readonly ) throw new IllegalStateException("This set is readonly"); - - if (this.configLookup != null) { - for (Iterator it = this.configLookup.iterator(); it.hasNext(); ) { - ATNConfig entry = it.next(); - if (!entry.isGreedy() && alts.get(entry.alt)) { - it.remove(); - } - } - } - - for (Iterator it = this.configs.iterator(); it.hasNext(); ) { - ATNConfig value = it.next(); - if (!value.isGreedy() && alts.get(value.alt)) { - it.remove(); - } - } - } - @Override public void clear() { if ( readonly ) throw new IllegalStateException("This set is readonly"); diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSimulator.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSimulator.java index 0ad61f917..b82274249 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSimulator.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSimulator.java @@ -1,30 +1,31 @@ /* - [The "BSD license"] - Copyright (c) 2011 Terence Parr - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * [The "BSD license"] + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.antlr.v4.runtime.atn; @@ -276,11 +277,9 @@ public abstract class ATNSimulator { int ndecisions = toInt(data[p++]); for (int i=1; i<=ndecisions; i++) { int s = toInt(data[p++]); - int nonGreedy = toInt(data[p++]); DecisionState decState = (DecisionState)atn.states.get(s); atn.decisionToState.add(decState); decState.decision = i-1; - decState.nonGreedy = nonGreedy != 0; } verifyATN(atn); diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/BlockEndState.java b/runtime/Java/src/org/antlr/v4/runtime/atn/BlockEndState.java index 0b423e14c..f82ed4dbf 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/BlockEndState.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/BlockEndState.java @@ -32,9 +32,4 @@ package org.antlr.v4.runtime.atn; /** Terminal node of a simple (a|b|c) block */ public class BlockEndState extends ATNState { public BlockStartState startState; - - @Override - public boolean isNonGreedyExitState() { - return startState != null && startState.nonGreedy; - } } diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/DecisionState.java b/runtime/Java/src/org/antlr/v4/runtime/atn/DecisionState.java index 38eb13173..161c978a1 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/DecisionState.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/DecisionState.java @@ -31,5 +31,4 @@ package org.antlr.v4.runtime.atn; public class DecisionState extends ATNState { public int decision = -1; - public boolean nonGreedy; } diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNConfig.java b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNConfig.java index d3f857d85..ca25d07d4 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNConfig.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNConfig.java @@ -1,3 +1,33 @@ +/* + * [The "BSD license"] + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + package org.antlr.v4.runtime.atn; import org.antlr.v4.runtime.misc.NotNull; @@ -7,14 +37,11 @@ public class LexerATNConfig extends ATNConfig { /** Capture lexer action we traverse */ public int lexerActionIndex = -1; - private final int nonGreedyDepth; - public LexerATNConfig(@NotNull ATNState state, int alt, @Nullable PredictionContext context) { super(state, alt, context, SemanticContext.NONE); - this.nonGreedyDepth = 0; } public LexerATNConfig(@NotNull ATNState state, @@ -24,20 +51,17 @@ public class LexerATNConfig extends ATNConfig { { super(state, alt, context, SemanticContext.NONE); this.lexerActionIndex = actionIndex; - this.nonGreedyDepth = 0; } public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state) { super(c, state, c.context, c.semanticContext); this.lexerActionIndex = c.lexerActionIndex; - this.nonGreedyDepth = c.nonGreedyDepth; } public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state, @NotNull SemanticContext semanticContext) { super(c, state, c.context, semanticContext); this.lexerActionIndex = c.lexerActionIndex; - this.nonGreedyDepth = c.nonGreedyDepth; } public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state, @@ -45,42 +69,11 @@ public class LexerATNConfig extends ATNConfig { { super(c, state, c.context, c.semanticContext); this.lexerActionIndex = actionIndex; - this.nonGreedyDepth = c.nonGreedyDepth; } public LexerATNConfig(@NotNull LexerATNConfig c, @NotNull ATNState state, @Nullable PredictionContext context) { super(c, state, context, c.semanticContext); this.lexerActionIndex = c.lexerActionIndex; - this.nonGreedyDepth = c.nonGreedyDepth; } - - private LexerATNConfig(@NotNull LexerATNConfig c, int nonGreedyDepth) { - super(c, c.state, c.context, c.semanticContext); - this.lexerActionIndex = c.lexerActionIndex; - this.nonGreedyDepth = nonGreedyDepth; - } - - @Override - public boolean isGreedy() { - return nonGreedyDepth == 0; - } - - @Override - public int getNonGreedyDepth() { - return nonGreedyDepth; - } - - public LexerATNConfig enterNonGreedyBlock() { - return new LexerATNConfig(this, nonGreedyDepth + 1); - } - - public LexerATNConfig exitNonGreedyBlock() { - if (isGreedy()) { - return this; - } - - return new LexerATNConfig(this, nonGreedyDepth - 1); - } - } diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java index 0c659afac..218afd536 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java @@ -1,30 +1,31 @@ /* - [The "BSD license"] - Copyright (c) 2011 Terence Parr - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * [The "BSD license"] + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.antlr.v4.runtime.atn; @@ -42,7 +43,6 @@ import org.antlr.v4.runtime.misc.Nullable; import java.io.IOException; import java.io.OutputStream; -import java.util.BitSet; /** "dup" of ParserInterpreter */ public class LexerATNSimulator extends ATNSimulator { @@ -315,7 +315,7 @@ public class LexerATNSimulator extends ATNSimulator { } if (target == null) { - reach = new ATNConfigSet(); + reach = new OrderedATNConfigSet(); // if we don't find an existing DFA state // Fill reach starting from closure, following t transitions @@ -387,7 +387,14 @@ public class LexerATNSimulator extends ATNSimulator { * we can reach upon input t. Parameter reach is a return parameter. */ protected void getReachableConfigSet(ATNConfigSet closure, ATNConfigSet reach, int t) { + // this is used to skip processing for configs which have a lower priority + // than a config that already reached an accept state for the same rule + int skipAlt = ATN.INVALID_ALT_NUMBER; for (ATNConfig c : closure) { + if (c.alt == skipAlt) { + continue; + } + if ( debug ) { System.out.format("testing %s at %s\n", getTokenName(t), c.toString(recog, true)); } @@ -397,7 +404,12 @@ public class LexerATNSimulator extends ATNSimulator { Transition trans = c.state.transition(ti); ATNState target = getReachableTarget(trans, t); if ( target!=null ) { - closure(new LexerATNConfig((LexerATNConfig)c, target), reach); + if (closure(new LexerATNConfig((LexerATNConfig)c, target), reach)) { + // any remaining configs for this alt have a lower priority than + // the one that just reached an accept state. + skipAlt = c.alt; + break; + } } } } @@ -409,51 +421,12 @@ public class LexerATNSimulator extends ATNSimulator { reach, prevAccept.config, prevAccept.index); } - /* Non-greedy handling works by removing all non-greedy configurations - * from reach when an accept state is reached for the same token. For - * example, consider the following two tokens: - * - * BLOCK : '{' .* '}'; - * OPTIONAL_BLOCK : '{' .* '}' '?'; - * - * With the following input: - * - * {stuff}? - * - * After matching '}', an accept state at the end of BLOCK is reached, - * so any configurations inside the non-greedy .* loop in BLOCK will be - * removed from reach. The configuration(s) inside the non-greedy .* - * loop in OPTIONAL_BLOCK are unaffected by this because no - * configuration is in an accept state for OPTIONAL_BLOCK at this input - * symbol. - */ - BitSet altsAtAcceptState = new BitSet(); - BitSet nonGreedyAlts = new BitSet(); LexerATNConfig acceptConfig = null; for (ATNConfig config : reach) { if (config.state instanceof RuleStopState) { - altsAtAcceptState.set(config.alt); - - if ( debug ) { - System.out.format("processAcceptConfigs: hit accept config %s index %d\n", - config, input.index()); - } - - if (acceptConfig == null) { - acceptConfig = (LexerATNConfig)config; - } + acceptConfig = (LexerATNConfig)config; + break; } - - if ( !config.isGreedy() ) { - assert !(config.state instanceof RuleStopState); - nonGreedyAlts.set(config.alt); - } - } - - nonGreedyAlts.and(altsAtAcceptState); - // this is now "alts with at least one non-greedy config and one accept config" - if (!nonGreedyAlts.isEmpty()) { - reach.removeNonGreedyConfigsInAlts(nonGreedyAlts); } // mark the new preferred accept state @@ -463,6 +436,7 @@ public class LexerATNSimulator extends ATNSimulator { System.out.println("processAcceptConfigs: found longer token"); } } + // condition > not >= will favor prev accept at same index. // This way, "int" is keyword not ID if listed first. traceAcceptState(acceptConfig.alt); @@ -558,7 +532,7 @@ public class LexerATNSimulator extends ATNSimulator { @NotNull ATNState p) { PredictionContext initialContext = PredictionContext.EMPTY; - ATNConfigSet configs = new ATNConfigSet(); + ATNConfigSet configs = new OrderedATNConfigSet(); for (int i=0; i tokenTypes = null; - RecognitionException retException = null; - try { - tokenTypes = getTokenTypes(lg, atn, input, false); - } - catch (RecognitionException lre) { retException = lre; } - if ( retException!=null ) return retException; + List tokenTypes = getTokenTypes(lg, atn, input, false); String result = Utils.join(tokenTypes.iterator(), ", "); System.out.println(tokenTypes); @@ -288,7 +298,6 @@ public class TestATNLexerInterpreter extends BaseTest { input.seek(0); List tokenTypes2 = getTokenTypes(lg, atn, input, true); assertEquals("interp vs adaptive types differ", tokenTypes, tokenTypes2); - return null; } } diff --git a/tool/test/org/antlr/v4/test/TestLexerExec.java b/tool/test/org/antlr/v4/test/TestLexerExec.java index 0d1a05b15..a66b45354 100644 --- a/tool/test/org/antlr/v4/test/TestLexerExec.java +++ b/tool/test/org/antlr/v4/test/TestLexerExec.java @@ -1,3 +1,33 @@ +/* + * [The "BSD license"] + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + package org.antlr.v4.test; import org.junit.Test; @@ -29,6 +59,20 @@ public class TestLexerExec extends BaseTest { assertEquals(expecting, found); } + @Test + public void testImplicitNonGreedyTermination() throws Exception { + String grammar = + "lexer grammar L;\n" + + "STRING : '\"' ('\"\"' | .)* '\"';"; + + String found = execLexer("L.g4", grammar, "L", "\"hi\"\"mom\""); + assertEquals( + "[@0,0:3='\"hi\"',<1>,1:0]\n" + + "[@1,4:8='\"mom\"',<1>,1:4]\n" + + "[@2,9:8='',<-1>,1:9]\n", found); + assertNull(stderrDuringParse); + } + @Test public void testImplicitGreedyOptional() throws Exception { String grammar = @@ -168,11 +212,9 @@ public class TestLexerExec extends BaseTest { String expecting = "[@0,0:8='/* ick */',<1>,1:0]\n" + "[@1,9:9='\\n',<2>,1:9]\n" + - "[@2,10:17='/* /* */',<1>,2:0]\n" + - "[@3,18:18='\\n',<2>,2:8]\n" + - "[@4,19:31='/* /*nested*/',<1>,3:0]\n" + - "[@5,32:32=' ',<2>,3:13]\n" + - "[@6,36:35='',<-1>,4:0]\n"; + "[@2,10:34='/* /* */\\n/* /*nested*/ */',<1>,2:0]\n" + + "[@3,35:35='\\n',<2>,3:16]\n" + + "[@4,36:35='',<-1>,4:17]\n"; // stuff on end of comment matches another rule String found = execLexer("L.g4", grammar, "L", @@ -180,19 +222,14 @@ public class TestLexerExec extends BaseTest { "/* /* */\n" + "/* /*nested*/ */\n"); assertEquals(expecting, found); - assertEquals( - "line 3:14 token recognition error at: '*'\n" + - "line 3:15 token recognition error at: '/\n'\n", stderrDuringParse); + assertNull(stderrDuringParse); // stuff on end of comment doesn't match another rule expecting = "[@0,0:8='/* ick */',<1>,1:0]\n" + "[@1,10:10='\\n',<2>,1:10]\n" + - "[@2,11:18='/* /* */',<1>,2:0]\n" + - "[@3,20:20='\\n',<2>,2:9]\n" + - "[@4,21:33='/* /*nested*/',<1>,3:0]\n" + - "[@5,34:34=' ',<2>,3:13]\n" + - "[@6,38:38='\\n',<2>,3:17]\n" + - "[@7,39:38='',<-1>,4:18]\n"; + "[@2,11:36='/* /* */x\\n/* /*nested*/ */',<1>,2:0]\n" + + "[@3,38:38='\\n',<2>,3:17]\n" + + "[@4,39:38='',<-1>,4:18]\n"; found = execLexer("L.g4", grammar, "L", "/* ick */x\n" + "/* /* */x\n" + @@ -200,9 +237,7 @@ public class TestLexerExec extends BaseTest { assertEquals(expecting, found); assertEquals( "line 1:9 token recognition error at: 'x'\n" + - "line 2:8 token recognition error at: 'x'\n" + - "line 3:14 token recognition error at: '*'\n" + - "line 3:15 token recognition error at: '/x'\n", stderrDuringParse); + "line 3:16 token recognition error at: 'x'\n", stderrDuringParse); } @Test public void testRecursiveLexerRuleRefWithWildcardPlus() throws Exception { @@ -215,11 +250,9 @@ public class TestLexerExec extends BaseTest { String expecting = "[@0,0:8='/* ick */',<1>,1:0]\n" + "[@1,9:9='\\n',<2>,1:9]\n" + - "[@2,10:17='/* /* */',<1>,2:0]\n" + - "[@3,18:18='\\n',<2>,2:8]\n" + - "[@4,19:31='/* /*nested*/',<1>,3:0]\n" + - "[@5,32:32=' ',<2>,3:13]\n" + - "[@6,36:35='',<-1>,4:0]\n"; + "[@2,10:34='/* /* */\\n/* /*nested*/ */',<1>,2:0]\n" + + "[@3,35:35='\\n',<2>,3:16]\n" + + "[@4,36:35='',<-1>,4:17]\n"; // stuff on end of comment matches another rule String found = execLexer("L.g4", grammar, "L", @@ -227,19 +260,14 @@ public class TestLexerExec extends BaseTest { "/* /* */\n" + "/* /*nested*/ */\n"); assertEquals(expecting, found); - assertEquals( - "line 3:14 token recognition error at: '*'\n" + - "line 3:15 token recognition error at: '/\n'\n", stderrDuringParse); + assertNull(stderrDuringParse); // stuff on end of comment doesn't match another rule expecting = "[@0,0:8='/* ick */',<1>,1:0]\n" + "[@1,10:10='\\n',<2>,1:10]\n" + - "[@2,11:18='/* /* */',<1>,2:0]\n" + - "[@3,20:20='\\n',<2>,2:9]\n" + - "[@4,21:33='/* /*nested*/',<1>,3:0]\n" + - "[@5,34:34=' ',<2>,3:13]\n" + - "[@6,38:38='\\n',<2>,3:17]\n" + - "[@7,39:38='',<-1>,4:18]\n"; + "[@2,11:36='/* /* */x\\n/* /*nested*/ */',<1>,2:0]\n" + + "[@3,38:38='\\n',<2>,3:17]\n" + + "[@4,39:38='',<-1>,4:18]\n"; found = execLexer("L.g4", grammar, "L", "/* ick */x\n" + "/* /* */x\n" + @@ -247,9 +275,7 @@ public class TestLexerExec extends BaseTest { assertEquals(expecting, found); assertEquals( "line 1:9 token recognition error at: 'x'\n" + - "line 2:8 token recognition error at: 'x'\n" + - "line 3:14 token recognition error at: '*'\n" + - "line 3:15 token recognition error at: '/x'\n", stderrDuringParse); + "line 3:16 token recognition error at: 'x'\n", stderrDuringParse); } @Test public void testActionExecutedInDFA() throws Exception {