remove incidentTransition from ATNState, add computation for next tokens within rule, add nextTokenWithinRule to ATNState, add EPSILON as -2 token type

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9459]
This commit is contained in:
parrt 2011-11-25 16:09:00 -08:00
parent 6898dc6f5e
commit d2b24da47f
11 changed files with 226 additions and 99 deletions

View File

@ -0,0 +1,57 @@
/*
[The "BSD license"]
Copyright (c) 2011 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.runtime;
/** Bail out of parser at first syntax error */
public class BailErrorStrategy<Symbol> extends DefaultErrorStrategy<Symbol> {
/** Instead of recovering from exception e, Re-throw wrote it wrapped
* in a generic RuntimeException so it is not caught by the
* rule function catches. Exception e is the "cause" of the
* RuntimeException.
*/
@Override
public void recover(BaseRecognizer recognizer, RecognitionException e) {
throw new RuntimeException(e);
}
/** Make sure we don't attempt to recover inline; if the parser
* successfully recovers, it won't throw an exception.
*/
@Override
public Symbol recoverInline(BaseRecognizer recognizer)
throws RecognitionException
{
throw new RuntimeException(new InputMismatchException(recognizer));
}
/** Make sure we don't attempt to recover from problems in subrules. */
@Override
public void sync(BaseRecognizer recognizer) { }
}

View File

@ -28,8 +28,7 @@
*/
package org.antlr.v4.runtime;
import org.antlr.v4.runtime.atn.ATNConfig;
import org.antlr.v4.runtime.atn.ParserATNSimulator;
import org.antlr.v4.runtime.atn.*;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.Nullable;
import org.antlr.v4.runtime.misc.OrderedHashSet;
@ -267,9 +266,35 @@ public abstract class BaseRecognizer<Symbol> extends Recognizer<Symbol, ParserAT
return false;
}
public IntervalSet getExpectedTokens() {
return getInterpreter().atn.nextTokens(_ctx);
}
public IntervalSet getExpectedTokens() {
// return getInterpreter().atn.nextTokens(_ctx);
ATN atn = getInterpreter().atn;
RuleContext ctx = _ctx;
ATNState s = atn.states.get(ctx.s);
IntervalSet following = atn.nextTokens(s);
// System.out.println("following "+s+"="+following);
if ( !following.contains(Token.EPSILON) ) return following;
IntervalSet expected = new IntervalSet();
expected.addAll(following);
while ( ctx!=null && ctx.invokingState>=0 && following.contains(Token.EPSILON) ) {
ATNState invokingState = atn.states.get(ctx.invokingState);
RuleTransition rt = (RuleTransition)invokingState.transition(0);
following = atn.nextTokens(rt.followState);
expected.addAll(following);
expected.remove(Token.EPSILON);
ctx = ctx.parent;
}
if ( following.contains(Token.EPSILON) ) {
expected.add(Token.EOF);
}
return expected;
}
public IntervalSet getExpectedTokensWithinCurrentRule() {
ATN atn = getInterpreter().atn;
ATNState s = atn.states.get(_ctx.s);
return atn.nextTokens(s);
}
/** Return List<String> of the rule names in your parser instance
* leading up to a call to the current rule. You could override if

View File

@ -30,7 +30,8 @@
package org.antlr.v4.runtime;
import org.antlr.v4.runtime.atn.*;
import org.antlr.v4.runtime.misc.*;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.tree.AST;
/** This is the default error handling mechanism for ANTLR parsers
@ -148,17 +149,18 @@ public class DefaultErrorStrategy<Symbol> implements ANTLRErrorStrategy<Symbol>
ATNState s = recognizer.getInterpreter().atn.states.get(recognizer._ctx.s);
// System.err.println("sync @ "+s.stateNumber+"="+s.getClass().getSimpleName());
// If already recovering, don't try to sync
if ( errorRecoveryMode ) return;
if ( errorRecoveryMode ) return;
// TODO: CACHE THESE RESULTS!!
IntervalSet expecting = getExpectedTokens(recognizer);
// System.err.println("sync expecting: "+expecting);
SymbolStream<Symbol> tokens = recognizer.getInputStream();
int la = tokens.LA(1);
// try cheaper subset first; might get lucky. seems to shave a wee bit off
if ( recognizer.getATN().nextTokens(s).contains(la) || la==Token.EOF ) return;
IntervalSet expecting = recognizer.getExpectedTokens();
// TODO: subclass this class for treeparsers
SymbolStream<Symbol> tokens = recognizer.getInputStream();
int la = tokens.LA(1);
// Return but don't end recovery. only do that upon valid token match
if ( la==Token.EOF || expecting.contains(la) ) return;
if ( expecting.contains(la) ) return;
if ( s instanceof PlusBlockStartState ||
s instanceof StarLoopEntryState ||
@ -528,10 +530,11 @@ public class DefaultErrorStrategy<Symbol> implements ANTLRErrorStrategy<Symbol>
// compute what follows who invoked us
ATNState invokingState = atn.states.get(ctx.invokingState);
RuleTransition rt = (RuleTransition)invokingState.transition(0);
IntervalSet follow = atn.nextTokens(rt.followState, null);
IntervalSet follow = atn.nextTokens(rt.followState);
recoverSet.addAll(follow);
ctx = ctx.parent;
}
recoverSet.remove(Token.EPSILON);
// System.out.println("recover set "+recoverSet.toString(recognizer.getTokenNames()));
return recoverSet;
}

View File

@ -38,6 +38,11 @@ public interface Token {
public static final Token INVALID_TOKEN = new CommonToken(INVALID_TYPE);
public static final int MIN_TOKEN_TYPE = 1;
/** During lookahead operations, this "token" signifies we hit rule end ATN state
* and did not follow it despite needing to.
*/
public static final int EPSILON = -2;
/** imaginary tree navigation type; traverse "get child" link */
public static final int DOWN = 1;

View File

@ -29,11 +29,14 @@
package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.NotNull;
import java.util.*;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
/** */
public class ATN {
@ -78,9 +81,9 @@ public class ATN {
public ATN() { }
/** Compute the set of valid tokens reachable from the current
* position in the parse. ctx must not be null.
* position in the parse.
*/
public IntervalSet nextTokens(RuleContext ctx) {
public IntervalSet nextTokens(@NotNull RuleContext ctx) {
ATNState s = states.get(ctx.s);
if ( s == null ) return null;
return nextTokens(s, ctx);
@ -97,6 +100,16 @@ public class ATN {
return next;
}
/** Compute the set of valid tokens that can occur starting in s and staying in same rule.
* EPSILON is in set if we reach end of rule.
*/
public IntervalSet nextTokens(ATNState s) {
if ( s.nextTokenWithinRule != null ) return s.nextTokenWithinRule;
s.nextTokenWithinRule = nextTokens(s, null);
s.nextTokenWithinRule.setReadonly(true);
return s.nextTokenWithinRule;
}
public void addState(@NotNull ATNState state) {
state.atn = this;
states.add(state);

View File

@ -29,6 +29,8 @@
package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.misc.IntervalSet;
import java.util.*;
public class ATNState {
@ -73,41 +75,35 @@ public class ATNState {
public static final Map<Class<? extends ATNState>, Integer> serializationTypes =
Collections.unmodifiableMap(new HashMap<Class<? extends ATNState>, Integer>() {{
put(ATNState.class, BASIC);
put(RuleStartState.class, RULE_START);
put(BlockStartState.class, BLOCK_START);
put(PlusBlockStartState.class, PLUS_BLOCK_START);
put(StarBlockStartState.class, STAR_BLOCK_START);
put(TokensStartState.class, TOKEN_START);
put(RuleStopState.class, RULE_STOP);
put(BlockEndState.class, BLOCK_END);
put(PlusLoopbackState.class, PLUS_LOOP_BACK);
put(StarLoopbackState.class, STAR_LOOP_BACK);
put(StarLoopEntryState.class, STAR_LOOP_ENTRY);
}});
put(ATNState.class, BASIC);
put(RuleStartState.class, RULE_START);
put(BlockStartState.class, BLOCK_START);
put(PlusBlockStartState.class, PLUS_BLOCK_START);
put(StarBlockStartState.class, STAR_BLOCK_START);
put(TokensStartState.class, TOKEN_START);
put(RuleStopState.class, RULE_STOP);
put(BlockEndState.class, BLOCK_END);
put(PlusLoopbackState.class, PLUS_LOOP_BACK);
put(StarLoopbackState.class, STAR_LOOP_BACK);
put(StarLoopEntryState.class, STAR_LOOP_ENTRY);
}});
public static final int INVALID_STATE_NUMBER = -1;
/** Which ATN are we in? */
public ATN atn = null;
public int stateNumber = INVALID_STATE_NUMBER;
public int ruleIndex; // at runtime, we don't have Rule objects
public int epsilonOnlyTransitions = -1;
/** Which ATN are we in? */
public ATN atn = null;
//public Transition transition;
/** Track the transitions emanating from this ATN state. */
protected final List<Transition> transitions =
new ArrayList<Transition>(INITIAL_NUM_TRANSITIONS);
/** For o-A->o type ATN tranitions, record the label that leads to this
* state. Useful for creating rich error messages when we find
* insufficiently (with preds) covered states.
*/
public Transition incidentTransition;
public IntervalSet nextTokenWithinRule;
@Override
public int hashCode() { return stateNumber; }

View File

@ -29,13 +29,14 @@
package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.IntervalSet;
import java.util.*;
import java.util.HashSet;
import java.util.Set;
public class LL1Analyzer {
/** Used during LOOK to detect computation cycles. E.g., ()* causes
@ -64,52 +65,68 @@ public class LL1Analyzer {
return look;
}
@NotNull
public IntervalSet LOOK(@NotNull ATNState s, @Nullable RuleContext ctx) {
IntervalSet r = new IntervalSet();
_LOOK(s, ctx, r, new HashSet<ATNConfig>());
return r;
}
/** Get lookahead, using ctx if we reach end of rule. If ctx is EMPTY, don't chase FOLLOW.
* If ctx is null, EPSILON is in set if we can reach end of rule.
*/
@NotNull
public IntervalSet LOOK(@NotNull ATNState s, @Nullable RuleContext ctx) {
IntervalSet r = new IntervalSet();
_LOOK(s, ctx, r, new HashSet<ATNConfig>());
return r;
}
protected void _LOOK(@NotNull ATNState s, @Nullable RuleContext ctx, @NotNull IntervalSet look,
@NotNull Set<ATNConfig> lookBusy) {
/** Computer set of tokens that can come next. If the context is EMPTY,
* then we don't go anywhere when we hit the end of the rule. We have
* the correct set. If the context is null, that means that we did not want
* any tokens following this rule--just the tokens that could be found within this
* rule. Add EPSILON to the set indicating we reached the end of the ruled out having
* to match a token.
*/
protected void _LOOK(@NotNull ATNState s, @Nullable RuleContext ctx, @NotNull IntervalSet look,
@NotNull Set<ATNConfig> lookBusy) {
// System.out.println("_LOOK("+s.stateNumber+", ctx="+ctx);
ATNConfig c = new ATNConfig(s, 0, ctx);
if ( lookBusy.contains(c) ) return;
lookBusy.add(c);
ATNConfig c = new ATNConfig(s, 0, ctx);
if ( lookBusy.contains(c) ) return;
lookBusy.add(c);
if ( s instanceof RuleStopState && ctx != null && ctx.invokingState!=-1 ) {
ATNState invokingState = atn.states.get(ctx.invokingState);
RuleTransition rt = (RuleTransition)invokingState.transition(0);
ATNState retState = rt.followState;
if ( s instanceof RuleStopState ) {
if ( ctx==null ) {
look.add(Token.EPSILON);
return;
}
if ( ctx.invokingState!=-1 ) {
ATNState invokingState = atn.states.get(ctx.invokingState);
RuleTransition rt = (RuleTransition)invokingState.transition(0);
ATNState retState = rt.followState;
// System.out.println("popping back to "+retState);
_LOOK(retState, ctx.parent, look, lookBusy);
return;
}
_LOOK(retState, ctx.parent, look, lookBusy);
return;
}
}
int n = s.getNumberOfTransitions();
for (int i=0; i<n; i++) {
Transition t = s.transition(i);
if ( t.getClass() == RuleTransition.class ) {
RuleContext newContext =
new RuleContext(ctx, s.stateNumber, t.target.stateNumber);
_LOOK(t.target, newContext, look, lookBusy);
}
else if ( t.isEpsilon() ) {
_LOOK(t.target, ctx, look, lookBusy);
}
else if ( t.getClass() == WildcardTransition.class ) {
look.addAll( IntervalSet.of(Token.MIN_USER_TOKEN_TYPE, atn.maxTokenType) );
}
else {
int n = s.getNumberOfTransitions();
for (int i=0; i<n; i++) {
Transition t = s.transition(i);
if ( t.getClass() == RuleTransition.class ) {
RuleContext newContext =
new RuleContext(ctx, s.stateNumber, t.target.stateNumber);
_LOOK(t.target, newContext, look, lookBusy);
}
else if ( t.isEpsilon() ) {
_LOOK(t.target, ctx, look, lookBusy);
}
else if ( t.getClass() == WildcardTransition.class ) {
look.addAll( IntervalSet.of(Token.MIN_USER_TOKEN_TYPE, atn.maxTokenType) );
}
else {
// System.out.println("adding "+ t);
IntervalSet set = t.label();
if (t instanceof NotSetTransition) {
set = set.complement(IntervalSet.of(Token.MIN_USER_TOKEN_TYPE, atn.maxTokenType));
}
look.addAll(set);
}
}
}
IntervalSet set = t.label();
if (t instanceof NotSetTransition) {
set = set.complement(IntervalSet.of(Token.MIN_USER_TOKEN_TYPE, atn.maxTokenType));
}
look.addAll(set);
}
}
}
}

View File

@ -481,7 +481,8 @@ public class IntervalSet implements IntSet {
int a = I.a;
int b = I.b;
if ( a==b ) {
if ( a==-1 ) buf.append("<EOF>");
if ( a==Token.EOF ) buf.append("<EOF>");
else if ( a==Token.EPSILON ) buf.append("<EPSILON>");
else buf.append(tokenNames[a]);
}
else {

View File

@ -1,6 +1,8 @@
import org.antlr.runtime.debug.BlankDebugEventListener;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.atn.*;
import org.antlr.v4.runtime.ANTLRFileStream;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.atn.LexerATNSimulator;
import org.antlr.v4.runtime.atn.ParserATNSimulator;
import java.io.File;
@ -100,12 +102,14 @@ class TestJava {
// Create a parser that reads from the scanner
if ( parser==null ) {
parser = new JavaParser(tokens);
// parser.setErrorHandler(new BailErrorStrategy<Token>());
// parser.getInterpreter().setContextSensitive(true);
}
parser.setTokenStream(tokens);
// start parsing at the compilationUnit rule
parser.compilationUnit();
//System.err.println("finished "+f);
// System.out.println("cache size = "+DefaultErrorStrategy.cache.size());
}
}
catch (Exception e) {

View File

@ -132,7 +132,6 @@ public class LexerATNFactory extends ParserATNFactory {
else {
left.addTransition(new SetTransition(right, set));
}
right.incidentTransition = left.transition(0);
associatedAST.atnState = left;
return new Handle(left, right);
}

View File

@ -30,18 +30,28 @@
package org.antlr.v4.automata;
import org.antlr.runtime.*;
import org.antlr.runtime.tree.*;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.runtime.tree.Tree;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.parse.*;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.parse.ATNBuilder;
import org.antlr.v4.parse.GrammarASTAdaptor;
import org.antlr.v4.runtime.atn.*;
import org.antlr.v4.runtime.misc.*;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
import org.antlr.v4.semantics.UseDefAnalyzer;
import org.antlr.v4.tool.*;
import org.antlr.v4.tool.ErrorManager;
import org.antlr.v4.tool.ErrorType;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.Rule;
import org.antlr.v4.tool.ast.*;
import java.lang.reflect.Constructor;
import java.util.*;
import java.util.Collection;
import java.util.List;
/** ATN construction routines triggered by ATNBuilder.g.
*
@ -109,7 +119,6 @@ public class ParserATNFactory implements ATNFactory {
ATNState right = newState(node);
int ttype = g.getTokenType(node.getText());
left.addTransition(new AtomTransition(right, ttype));
right.incidentTransition = left.transition(0);
node.atnState = left;
return new Handle(left, right);
}
@ -133,7 +142,6 @@ public class ParserATNFactory implements ATNFactory {
else {
left.addTransition(new SetTransition(right, set));
}
right.incidentTransition = left.transition(0);
associatedAST.atnState = left;
return new Handle(left, right);
}
@ -428,7 +436,6 @@ public class ParserATNFactory implements ATNFactory {
ATNState left = newState(node);
ATNState right = newState(node);
left.addTransition(new WildcardTransition(right));
right.incidentTransition = left.transition(0);
node.atnState = left;
return new Handle(left, right);
}