added start rule; fixed error recovery set stuff

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6887]
This commit is contained in:
parrt 2010-05-23 16:41:44 -08:00
parent 865437647d
commit 8ee1042f03
21 changed files with 153 additions and 125 deletions

View File

@ -40,6 +40,8 @@ import java.util.*;
* backtracking.
*/
public abstract class BaseRecognizer {
public static final int EOF=-1;
public static final int MEMO_RULE_FAILED = -2;
public static final int MEMO_RULE_UNKNOWN = -1;
@ -142,7 +144,7 @@ public abstract class BaseRecognizer {
}
// compute what can follow this grammar element reference
if ( follow.member(Token.EOR_TOKEN_TYPE) ) {
LABitSet viableTokensFollowingThisRule = computeContextSensitiveRuleFOLLOW();
LABitSet viableTokensFollowingThisRule = computeNextViableTokenSet();
follow = follow.or(viableTokensFollowingThisRule);
if ( state.ctx.sp>=0 ) { // remove EOR if we're not the start symbol
follow.remove(Token.EOR_TOKEN_TYPE);
@ -369,7 +371,8 @@ public abstract class BaseRecognizer {
* rule invocation, the parser pushes the set of tokens that can
* follow that rule reference on the stack; this amounts to
* computing FIRST of what follows the rule reference in the
* enclosing rule. This local follow set only includes tokens
* enclosing rule. See LinearApproximator.FIRST().
* This local follow set only includes tokens
* from within the rule; i.e., the FIRST computation done by
* ANTLR stops at the end of a rule.
*
@ -394,8 +397,8 @@ public abstract class BaseRecognizer {
* ;
*
* At each rule invocation, the set of tokens that could follow
* that rule is pushed on a stack. Here are the various "local"
* follow sets:
* that rule is pushed on a stack. Here are the various
* context-sensitive follow sets:
*
* FOLLOW(b1_in_a) = FIRST(']') = ']'
* FOLLOW(b2_in_a) = FIRST(')') = ')'
@ -407,10 +410,10 @@ public abstract class BaseRecognizer {
*
* and, hence, the follow context stack is:
*
* depth local follow set after call to rule
* depth follow set start of rule execution
* 0 <EOF> a (from main())
* 1 ']' b
* 3 '^' c
* 2 '^' c
*
* Notice that ')' is not included, because b would have to have
* been called from a different context in rule a for ')' to be
@ -423,9 +426,9 @@ public abstract class BaseRecognizer {
* resync to one of those tokens. Note that FOLLOW(c)='^' and if
* we resync'd to that token, we'd consume until EOF. We need to
* sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
* In this case, for input "[]", LA(1) is in this set so we would
* not consume anything and after printing an error rule c would
* return normally. It would not find the required '^' though.
* In this case, for input "[]", LA(1) is ']' and in the set, so we would
* not consume anything. After printing an error, rule c would
* return normally. Rule b would not find the required '^' though.
* At this point, it gets a mismatched token error and throws an
* exception (since LA(1) is not in the viable following token
* set). The rule exception handler tries to recover, but finds
@ -433,7 +436,7 @@ public abstract class BaseRecognizer {
* exits normally returning to rule a. Now it finds the ']' (and
* with the successful match exits errorRecovery mode).
*
* So, you cna see that the parser walks up call chain looking
* So, you can see that the parser walks up the call chain looking
* for the token that was a member of the recovery set.
*
* Errors are not generated in errorRecovery mode.
@ -453,11 +456,17 @@ public abstract class BaseRecognizer {
* Parsers":
* ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
*
* Like Grosch I implemented local FOLLOW sets that are combined
* Like Grosch I implement context-sensitive FOLLOW sets that are combined
* at run-time upon error to avoid overhead during parsing.
*/
protected LABitSet computeErrorRecoverySet() {
return combineFollows(false);
int top = state.ctx.sp;
LABitSet followSet = new LABitSet();
for (int i=top; i>=0; i--) { // i==0 is EOF context for start rule invocation
LABitSet f = (LABitSet)state.ctx.get(i).follow;
followSet.orInPlace(f);
}
return followSet;
}
/** Compute the context-sensitive FOLLOW set for current rule.
@ -512,37 +521,17 @@ public abstract class BaseRecognizer {
* a missing token in the input stream. "Insert" one by just not
* throwing an exception.
*/
protected LABitSet computeContextSensitiveRuleFOLLOW() {
return combineFollows(true);
}
// TODO: what is exact? it seems to only add sets from above on stack
// if EOR is in set i. When it sees a set w/o EOR, it stops adding.
// Why would we ever want them all? Maybe no viable alt instead of
// mismatched token?
protected LABitSet combineFollows(boolean exact) {
public LABitSet computeNextViableTokenSet() {
int top = state.ctx.sp;
LABitSet followSet = new LABitSet();
for (int i=top; i>=0; i--) {
LABitSet localFollowSet = (LABitSet)state.ctx.get(i).follow;
/*
System.out.println("local follow depth "+i+"="+
localFollowSet.toString(getTokenNames())+")");
*/
followSet.orInPlace(localFollowSet);
if ( exact ) {
// can we see end of rule?
if ( localFollowSet.member(Token.EOR_TOKEN_TYPE) ) {
// Only leave EOR in set if at top (start rule); this lets
// us know if have to include follow(start rule); i.e., EOF
if ( i>0 ) {
followSet.remove(Token.EOR_TOKEN_TYPE);
}
}
else { // can't see end of rule, quit
break;
}
}
for (int i=top; i>=0; i--) { // i==0 is EOF context for start rule invocation
LABitSet f = (LABitSet)state.ctx.get(i).follow;
followSet.orInPlace(f);
// can we see end of rule? if not, don't include follow of this rule
if ( !f.member(Token.EOR_TOKEN_TYPE) ) break;
// else combine with tokens that can follow this rule (rm EOR also)
// EOR indicates we have to include follow(start rule); i.e., EOF
followSet.remove(Token.EOR_TOKEN_TYPE);
}
return followSet;
}

View File

@ -34,7 +34,7 @@ public class EarlyExitException extends RecognitionException {
/** Used for remote debugger deserialization */
public EarlyExitException() {;}
public EarlyExitException(BaseRecognizer recognizer, LABitSet expecting) {
super(recognizer, expecting);
public EarlyExitException(BaseRecognizer recognizer, LABitSet firstSet) {
super(recognizer, firstSet);
}
}

View File

@ -33,8 +33,8 @@ public class MismatchedSetException extends RecognitionException {
/** Used for remote debugger deserialization */
public MismatchedSetException() {;}
public MismatchedSetException(BaseRecognizer recognizer, LABitSet expecting) {
super(recognizer, expecting);
public MismatchedSetException(BaseRecognizer recognizer, LABitSet firstSet) {
super(recognizer, firstSet);
}
public String toString() {

View File

@ -34,8 +34,8 @@ public class MismatchedTokenException extends RecognitionException {
/** Used for remote debugger deserialization */
public MismatchedTokenException() {;}
public MismatchedTokenException(BaseRecognizer recognizer, int expecting) {
super(recognizer, LABitSet.of(expecting));
public MismatchedTokenException(BaseRecognizer recognizer, int firstSet) {
super(recognizer, LABitSet.of(firstSet));
}
public String toString() {

View File

@ -36,9 +36,9 @@ public class MismatchedTreeNodeException extends RecognitionException {
}
public MismatchedTreeNodeException(BaseRecognizer recognizer,
int expecting)
int firstSet)
{
super(recognizer, LABitSet.of(expecting));
super(recognizer, LABitSet.of(firstSet));
}
public String toString() {

View File

@ -35,9 +35,9 @@ public class NoViableAltException extends RecognitionException {
public NoViableAltException() {;}
public NoViableAltException(BaseRecognizer recognizer,
LABitSet expecting)
LABitSet firstSet)
{
super(recognizer, expecting);
super(recognizer, firstSet);
}
public String toString() {

View File

@ -30,8 +30,8 @@ package org.antlr.v4.runtime;
import org.antlr.runtime.Token;
import org.antlr.v4.runtime.misc.LABitSet;
/** Rules that return more than a single value must return an object
* containing all the values. Besides the properties defined in
/** Rules return values in an object containing all the values.
* Besides the properties defined in
* RuleLabelScope.predefinedRulePropertiesScope there may be user-defined
* return values. This class simply defines the minimum properties that
* are always defined and methods to access the others that might be
@ -52,6 +52,6 @@ public class ParserRuleContext extends RuleContext {
public Token start, stop;
public Object getStart() { return start; }
public Object getStop() { return stop; }
public ParserRuleContext() {;}
public ParserRuleContext() { super(); }
public ParserRuleContext(LABitSet follow) { super(follow); }
}

View File

@ -113,9 +113,12 @@ public class RecognitionException extends RuntimeException {
this(recognizer, null);
}
public RecognitionException(BaseRecognizer recognizer, LABitSet expecting) {
public RecognitionException(BaseRecognizer recognizer, LABitSet firstSet) {
this.recognizer = recognizer;
this.expecting = expecting;
// firstSet is what can we're expecting within rule that calls this ctor.
// must combine with context-sensitive FOLLOW of that rule.
LABitSet viableTokensFollowingThisRule = recognizer.computeNextViableTokenSet();
this.expecting = viableTokensFollowingThisRule.or(firstSet);
IntStream input = recognizer.state.input;
this.index = input.index();
if ( input instanceof TokenStream ) {

View File

@ -40,6 +40,9 @@ import java.util.Map;
public class RecognizerSharedState {
public IntStream input;
/** First on stack is fake a call to start rule from S' : S EOF ;
* Generated start rule does this.
*/
public QStack<RuleContext> ctx;
/** This is true when we see an error and before having successfully
@ -81,19 +84,19 @@ public class RecognizerSharedState {
List<ANTLRParserListener> listeners;
public RecognizerSharedState() {
this.ctx = new QStack<RuleContext>();
ctx = new QStack<RuleContext>();
}
public RecognizerSharedState(RecognizerSharedState state) {
this.ctx = state.ctx;
this.errorRecovery = state.errorRecovery;
this.lastErrorIndex = state.lastErrorIndex;
this.failed = state.failed;
this.syntaxErrors = state.syntaxErrors;
this.backtracking = state.backtracking;
if ( state.ruleMemo!=null ) {
this.ruleMemo = new Map[state.ruleMemo.length];
System.arraycopy(state.ruleMemo, 0, this.ruleMemo, 0, state.ruleMemo.length);
}
}
// public RecognizerSharedState(RecognizerSharedState state) {
// this.ctx = state.ctx;
// this.errorRecovery = state.errorRecovery;
// this.lastErrorIndex = state.lastErrorIndex;
// this.failed = state.failed;
// this.syntaxErrors = state.syntaxErrors;
// this.backtracking = state.backtracking;
// if ( state.ruleMemo!=null ) {
// this.ruleMemo = new Map[state.ruleMemo.length];
// System.arraycopy(state.ruleMemo, 0, this.ruleMemo, 0, state.ruleMemo.length);
// }
// }
}

View File

@ -29,7 +29,9 @@ package org.antlr.v4.runtime;
import org.antlr.v4.runtime.misc.LABitSet;
/** Rules can return start/stop info as well as possible trees and templates */
/** Rules can return start/stop info as well as possible trees and templates.
* Each context must have a FOLLOW context. It's EOF if none is specified.
*/
public class RuleContext {
/** Track the set of token types that can follow any rule invocation. */
public LABitSet follow;
@ -48,6 +50,6 @@ public class RuleContext {
*/
public Object getTemplate() { return null; }
public RuleContext() {;}
public RuleContext() { this(LABitSet.EOF_SET); }
public RuleContext(LABitSet follow) { this.follow = follow; }
}

View File

@ -14,6 +14,8 @@ public class LABitSet {
*/
public final static int MOD_MASK = BITS - 1;
public static final LABitSet EOF_SET = LABitSet.of(Token.EOF);
/** The actual data bits */
public long bits[];

View File

@ -37,7 +37,6 @@ public class <parser.name> extends Parser {
TokenType(int type) { this.type = type; }
}
!>
public static final int EOF=-1;
<parser.tokens.keys:{k | public static final int <k>=<parser.tokens.(k)>;}; separator="\n">
<scopes>
<namedActions.members>
@ -68,10 +67,8 @@ RuleFunction(f,code,decls,context,scope,namedActions,finallyAction) ::= <<
<context>
<scope>
<if(f.modifiers)><f.modifiers:{f | <f> }><else>public final <endif><if(f.ctxType)><f.ctxType><else>void<endif> <f.name>(<f.ctxType> _ctx) throws RecognitionException {
<if(f.ctxType)>
<if(f.modifiers)><f.modifiers:{f | <f> }><else>public final <endif><f.ctxType> <f.name>(<f.ctxType> _ctx) throws RecognitionException {
state.ctx.push(_ctx);
<endif>
<if(f.scope)>
<f.scope.name>_stack.push(new <f.scope.name>());
<endif>
@ -95,6 +92,13 @@ RuleFunction(f,code,decls,context,scope,namedActions,finallyAction) ::= <<
}
>>
/** Convenience method to call from outside */
StartRuleFunction(f) ::= <<
<if(f.modifiers)><f.modifiers:{f | <f> }><else>public final <endif><f.ctxType> <f.name>(<f.args; separator=", ">) throws RecognitionException {
return <f.name>(new <f.ctxType>(<f.args:{a | <a.name>, }>LABitSet.EOF_SET));
}
>>
CodeBlock(c, ops) ::= <<
<ops; separator="\n">
>>
@ -258,7 +262,8 @@ StructDecl(s,attrs) ::= <<
public static class <s.name> extends ParserRuleContext {
<attrs:{a | <a>;}; separator="\n">
<if(s.ctorAttrs)>
public <s.name>(<s.ctorAttrs; separator=", ">) {
public <s.name>(<s.ctorAttrs:{a | <a>,}> LABitSet follow) {
super(follow);
<s.ctorAttrs:{a | this.<a.name> = <a.name>;}; separator="\n">
}
<endif>

View File

@ -26,6 +26,25 @@ public class LinearApproximator {
int max_k = MAX_LINEAR_APPROXIMATE_DEPTH;
/** Used during LOOK to detect computation cycles. E.g., ()* causes
* infinite loop without it. If we get to same state with same k
* and same context, must be infinite loop. Analogous to
* closureBusy in NFA to DFA conversion.
*/
Set<LookaheadNFAConfig> lookBusy = new HashSet<LookaheadNFAConfig>();
/** The lookahead associated with an alternative, 1..k. A WORK ARRAY. */
IntervalSet[] look;
/** Our goal is to produce a DFA that looks like we created the
* usual way through subset construction. To look the same, we
* have to store a set of NFA configurations within each DFA state.
*
* A WORK ARRAY. Stores the NFA configurations for each lookahead
* depth, 1..k.
*/
OrderedHashSet<NFAConfig>[] configs;
/** Records state of a LOOK operation; used just for lookahead busy checks */
static class LookaheadNFAConfig {
public NFAState s;
@ -47,25 +66,6 @@ public class LinearApproximator {
}
}
/** Used during LOOK to detect computation cycles. E.g., ()* causes
* infinite loop without it. If we get to same state with same k
* and same context, must be infinite loop. Analogous to
* closureBusy in NFA to DFA conversion.
*/
Set<LookaheadNFAConfig> lookBusy = new HashSet<LookaheadNFAConfig>();
/** The lookahead associated with an alternative, 1..k. A WORK ARRAY. */
IntervalSet[] look;
/** Our goal is to produce a DFA that looks like we created the
* usual way through subset construction. To look the same, we
* have to store a set of NFA configurations within each DFA state.
*
* A WORK ARRAY. Stores the NFA configurations for each lookahead
* depth, 1..k.
*/
OrderedHashSet<NFAConfig>[] configs;
public LinearApproximator(Grammar g, int decision) {
this.g = g;
this.decision = decision;
@ -213,26 +213,43 @@ public class LinearApproximator {
}
}
/** Compute FOLLOW of element but don't leave rule to compute global
* context-free FOLLOW. Used for rule invocation, match token, and
* error sync'ing.
/* A bit set used for prediction contains all possible tokens
that can predict a particular alternative or set of alternatives.
Bit sets used for error recovery and expecting, however, are incomplete.
They only contain tokens extracted from the current rule. They don't include
any tokens from rules that invoke it (when the lookahead computation
reaches the end of the rule). Instead, the dynamic follow is used
because it contains the exact set of tokens that can follow an
invocation instead of all possible. It's the true expected set
of tokens at runtime. To indicate that a bit set is incomplete,
we include EOR (end of rule) token type. If we reach end of
a start rule, include EOF.
See BaseRecognizer.computeErrorRecoverySet() and friends for more
information on combining run-time bit sets.
*/
public IntervalSet LOOK(NFAState s) {
System.out.println("LOOK("+s.stateNumber+")");
/** Compute set of tokens that we can reach from s, but don't leave rule
* to compute global, context-free FOLLOW. Used for error handling
* after rule invocation and match tokens. Also used in exceptions
* to show what we were expecting.
*/
public IntervalSet FIRST(NFAState s) {
//System.out.println("FIRST("+s.stateNumber+")");
lookBusy.clear();
IntervalSet fset = new IntervalSet();
_LOOK(s, NFAContext.EMPTY(), fset);
_FIRST(s, NFAContext.EMPTY(), fset);
return fset;
}
void _LOOK(NFAState s, NFAContext context, IntervalSet fset) {
//System.out.println("_LOOK("+s.stateNumber+", "+k+", ctx="+context);
void _FIRST(NFAState s, NFAContext context, IntervalSet fset) {
//System.out.println("_FIRST("+s.stateNumber+", "+k+", ctx="+context);
LookaheadNFAConfig ac = new LookaheadNFAConfig(s,1,context);
if ( lookBusy.contains(ac) ) return;
lookBusy.add(ac);
if ( s instanceof RuleStopState ) {
if ( !context.isEmpty() ) _LOOK(context.returnState, context.parent, fset);
if ( !context.isEmpty() ) _FIRST(context.returnState, context.parent, fset);
else fset.add(Token.EOR_TOKEN_TYPE); // hit end of rule
return;
}
@ -243,10 +260,10 @@ public class LinearApproximator {
if ( t instanceof RuleTransition ) {
NFAContext newContext =
new NFAContext(context, ((RuleTransition)t).followState);
_LOOK(t.target, newContext, fset);
_FIRST(t.target, newContext, fset);
}
else if ( t.isEpsilon() ) {
_LOOK(t.target, context, fset);
_FIRST(t.target, context, fset);
}
else {
fset.addAll( t.label() );

View File

@ -39,7 +39,6 @@ public class NFA {
public int defineDecisionState(DecisionState s) {
decisionToNFAState.add(s);
s.decision = decisionToNFAState.size()-1;
System.out.println("dec state "+s.stateNumber+" gets dec # "+s.decision);
return s.decision;
}
}

View File

@ -99,17 +99,18 @@ public abstract class OutputModelFactory {
return b;
}
public BitSetDecl createTestBitSet(GrammarAST ast, IntervalSet set) {
String inRuleName = ast.nfaState.rule.name;
String name = "LOOK_in_"+inRuleName+"_"+ast.token.getTokenIndex();
BitSetDecl b = new BitSetDecl(this, name, set);
return b;
}
public BitSetDecl createExpectingBitSet(GrammarAST ast, int decision, IntervalSet set) {
String inRuleName = ast.nfaState.rule.name;
String name = "EXPECTING_in_"+inRuleName+"_"+decision;
BitSetDecl b = new BitSetDecl(this, name, set);
return b;
}
public BitSetDecl createTestBitSet(GrammarAST ast, IntervalSet set) {
String inRuleName = ast.nfaState.rule.name;
String name = "LOOK_in_"+inRuleName+"_"+ast.token.getTokenIndex();
BitSetDecl b = new BitSetDecl(this, name, set);
return b;
}
}

View File

@ -23,7 +23,7 @@ public abstract class Choice extends SrcOp {
// TODO: use existing lookahead! don't compute
LinearApproximator approx = new LinearApproximator(factory.g, decision);
NFAState decisionState = ast.nfaState;
expecting = approx.LOOK(decisionState);
expecting = approx.FIRST(decisionState);
System.out.println(blkOrEbnfRootAST.toStringTree()+" choice expecting="+expecting);
}

View File

@ -51,7 +51,7 @@ public class InvokeRule extends SrcOp implements LabeledOp {
LinearApproximator approx = new LinearApproximator(factory.g, NFA.INVALID_DECISION_NUMBER);
RuleTransition call = (RuleTransition)ast.nfaState.transition(0);
IntervalSet fset = approx.LOOK(call.followState);
IntervalSet fset = approx.FIRST(call.followState);
System.out.println("follow rule ref "+name+"="+fset);
follow = factory.createFollowBitSet(ast, fset);
factory.defineBitSet(follow);

View File

@ -41,7 +41,7 @@ public class MatchToken extends SrcOp implements LabeledOp {
}
LinearApproximator approx = new LinearApproximator(factory.g, NFA.INVALID_DECISION_NUMBER);
IntervalSet fset = approx.LOOK(ast.nfaState.transition(0).target);
IntervalSet fset = approx.FIRST(ast.nfaState.transition(0).target);
System.out.println("follow match "+name+"="+fset);
follow = factory.createFollowBitSet(ast, fset);
factory.defineBitSet(follow);

View File

@ -29,7 +29,10 @@ public class Parser extends OutputModelObject {
for (AttributeDict d : factory.g.scopes.values()) {
scopes.add( new DynamicScopeStruct(factory, d.name, d.attributes.values()) );
}
for (Rule r : factory.g.rules.values()) funcs.add( new RuleFunction(factory, r) );
for (Rule r : factory.g.rules.values()) {
if ( r.isStartRule ) funcs.add( new StartRuleFunction(factory, r) );
funcs.add( new RuleFunction(factory, r) );
}
}
// @Override

View File

@ -7,7 +7,6 @@ import org.antlr.v4.misc.OrderedHashSet;
import org.antlr.v4.misc.Utils;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.parse.GrammarASTAdaptor;
import org.antlr.v4.parse.ScopeParser;
import org.antlr.v4.tool.Attribute;
import org.antlr.v4.tool.GrammarAST;
import org.antlr.v4.tool.Rule;
@ -25,7 +24,6 @@ public class RuleFunction extends OutputModelObject {
public List<String> elementsReferencedInRewrite;
public List<String> exceptions;
public Action finallyAction;
public boolean isStartRule;
public Map<String, Action> namedActions;
public StructDecl context;
@ -38,7 +36,6 @@ public class RuleFunction extends OutputModelObject {
public RuleFunction(OutputModelFactory factory, Rule r) {
super(factory);
this.name = r.name;
this.isStartRule = r.isStartRule;
if ( r.modifiers!=null && r.modifiers.size()>0 ) {
this.modifiers = new ArrayList<String>();
for (GrammarAST t : r.modifiers) modifiers.add(t.getText());
@ -63,18 +60,14 @@ public class RuleFunction extends OutputModelObject {
r.scope.attributes.values());
}
//globalScopesUsed = new ArrayList<String>();
//for (Token t : r.useScopes) globalScopesUsed.add(t.getText());
globalScopesUsed = Utils.apply(r.useScopes, "getText");
if ( argsAndReturnValues.size()>0 ) {
context = new StructDecl(factory, factory.gen.target.getRuleFunctionContextStructName(r),
argsAndReturnValues);
ctorAttrs.add(ScopeParser.parseAttributeDef("LABitSet follow"));
context.ctorAttrs = ctorAttrs;
}
ruleLabels = r.getLabelNames();
tokenLabels = r.getTokenRefs();
exceptions = Utils.nodesToStrings(r.exceptionActions);

View File

@ -0,0 +1,11 @@
package org.antlr.v4.codegen.src;
import org.antlr.v4.codegen.OutputModelFactory;
import org.antlr.v4.tool.Rule;
/** */
public class StartRuleFunction extends RuleFunction {
public StartRuleFunction(OutputModelFactory factory, Rule r) {
super(factory, r);
}
}