reorg, de-emphasis label fields

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6725]
This commit is contained in:
parrt 2010-02-24 11:51:32 -08:00
parent 80ea7cf0db
commit cbb8a6917a
13 changed files with 169 additions and 30 deletions

View File

@ -3,7 +3,7 @@ package org.antlr.v4.automata;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.GrammarAST;
public class ActionTransition extends Transition {
public class ActionTransition extends NFATransition {
public GrammarAST actionAST;
public ActionTransition(GrammarAST actionAST) {

View File

@ -3,7 +3,7 @@ package org.antlr.v4.automata;
import org.antlr.v4.misc.IntervalSet;
/** */
public class AtomTransition extends Transition {
public class AtomTransition extends NFATransition {
/** The token type or character value; or, signifies special label. */
protected int label;

View File

@ -0,0 +1,45 @@
package org.antlr.v4.automata;
/** A DFA state represents a set of possible NFA configurations.
* As Aho, Sethi, Ullman p. 117 says "The DFA uses its state
* to keep track of all possible states the NFA can be in after
* reading each input symbol. That is to say, after reading
* input a1a2..an, the DFA is in a state that represents the
* subset T of the states of the NFA that are reachable from the
* NFA's start state along some path labeled a1a2..an."
* In conventional NFA->DFA conversion, therefore, the subset T
* would be a bitset representing the set of states the
* NFA could be in. We need to track the alt predicted by each
* state as well, however. More importantly, we need to maintain
* a stack of states, tracking the closure operations as they
* jump from rule to rule, emulating rule invocations (method calls).
* Recall that NFAs do not normally have a stack like a pushdown-machine
* so I have to add one to simulate the proper lookahead sequences for
* the underlying LL grammar from which the NFA was derived.
*
* I use a list of NFAConfiguration objects. An NFAConfiguration
* is both a state (ala normal conversion) and an NFAContext describing
* the chain of rules (if any) followed to arrive at that state. There
* is also the semantic context, which is the "set" of predicates found
* on the path to this configuration.
*
* A DFA state may have multiple references to a particular state,
* but with different NFAContexts (with same or different alts)
* meaning that state was reached via a different set of rule invocations.
*/
public class DFAState extends State {
@Override
public int getNumberOfTransitions() {
return 0;
}
@Override
public void addTransition(NFATransition e) {
}
@Override
public NFATransition transition(int i) {
return null;
}
}

View File

@ -0,0 +1,7 @@
package org.antlr.v4.automata;
/** A DFA edge (NFA edges are called transitions) */
public class Edge {
public DFAState target;
}

View File

@ -0,0 +1,9 @@
package org.antlr.v4.automata;
public class EpsilonTransition extends NFATransition {
public EpsilonTransition(NFAState target) { super(target); }
public int compareTo(Object o) {
return 0;
}
}

View File

@ -11,17 +11,17 @@ import org.antlr.runtime.Token;
public abstract class Label implements /*Comparable, */ Cloneable {
public static final int INVALID = -7;
public static final int ACTION = -6;
// public static final int ACTION = -6;
public static final int EPSILON = -5;
//public static final int EPSILON = -5;
public static final String EPSILON_STR = "<EPSILON>";
//public static final String EPSILON_STR = "<EPSILON>";
/** label is a semantic predicate; implies label is epsilon also */
public static final int SEMPRED = -4;
// public static final int SEMPRED = -4;
/** label is a set of tokens or char */
public static final int SET = -3;
// public static final int SET = -3;
/** End of Token is like EOF for lexer rules. It implies that no more
* characters are available and that NFA conversion should terminate

View File

@ -0,0 +1,28 @@
package org.antlr.v4.automata;
/**
* // I need to distinguish between NFA decision states for (...)* and (...)+
// during NFA interpretation.
public static final int LOOPBACK = 1;
public static final int BLOCK_START = 2;
public static final int OPTIONAL_BLOCK_START = 3;
public static final int BYPASS = 4;
public static final int RIGHT_EDGE_OF_BLOCK = 5;
make subclasses for all of these
*/
public class NFAState extends State {
@Override
public int getNumberOfTransitions() {
return 0;
}
@Override
public void addTransition(NFATransition e) {
}
@Override
public NFATransition transition(int i) {
return null;
}
}

View File

@ -0,0 +1,22 @@
package org.antlr.v4.automata;
/** An NFA transition between any two NFA states. Subclasses define
* atom, set, epsilon, action, predicate, rule transitions.
*
* This is a one way link. It emanates from a state (usually via a list of
* transitions) and has a target state.
*
* Since we never have to change the NFA transitions once we construct it,
* we can fix these transitions as specific classes. The DFA transitions
* on the other hand need to update the labels as it adds transitions to
* the states. We'll use the term Edge for the DFA to distinguish them from
* NFA transitions.
*/
public abstract class NFATransition implements Comparable {
/** The target of this transition */
public NFAState target;
public NFATransition() { }
public NFATransition(NFAState target) { this.target = target; }
}

View File

@ -0,0 +1,25 @@
package org.antlr.v4.automata;
import org.antlr.v4.tool.Rule;
/** */
public class RuleTransition extends NFATransition {
/** Ptr to the rule definition object for this rule ref */
public Rule rule;
/** What node to begin computations following ref to rule */
public NFAState followState;
public RuleTransition(Rule rule,
NFAState ruleStart,
NFAState followState)
{
super(ruleStart);
this.rule = rule;
this.followState = followState;
}
public int compareTo(Object o) {
return 0;
}
}

View File

@ -6,7 +6,6 @@ import org.antlr.v4.misc.IntervalSet;
/** A label containing a set of values */
public class SetTransition extends Label {
/** A set of token types or character codes if label==SET */
// TODO: try IntervalSet for everything
protected IntSet label;
public SetTransition(IntSet label) {

View File

@ -0,0 +1,19 @@
package org.antlr.v4.automata;
/** A generic state machine state. */
public abstract class State {
public static final int INVALID_STATE_NUMBER = -1;
public int stateNumber = INVALID_STATE_NUMBER;
/** An accept state is an end of rule state for lexers and
* parser grammar rules.
*/
public boolean acceptState = false;
public abstract int getNumberOfTransitions();
public abstract void addTransition(NFATransition e);
public abstract NFATransition transition(int i);
}

View File

@ -1,15 +0,0 @@
package org.antlr.v4.automata;
import org.antlr.analysis.State;
/** An NFA transition between any two NFA states. Subclasses define
* atom, set, epsilon, action, predicate, rule transitions.
*
* This is a one way link. It emanates from a state (usually via a list of
* transitions) and has a target state.
*/
public abstract class Transition implements Comparable {
/** The target of this transition */
public State target;
}

View File

@ -117,18 +117,18 @@ public class Grammar implements AttributeResolver {
typeToTokenList.setSize(Label.NUM_FAUX_LABELS+Token.MIN_TOKEN_TYPE-1);
typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.INVALID, "<INVALID>");
typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOT, "<EOT>");
typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SEMPRED, "<SEMPRED>");
typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SET, "<SET>");
typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EPSILON, Label.EPSILON_STR);
// typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SEMPRED, "<SEMPRED>");
// typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SET, "<SET>");
// typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EPSILON, Label.EPSILON_STR);
typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOF, "EOF");
typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOR_TOKEN_TYPE-1, "<EOR>");
typeToTokenList.set(Label.NUM_FAUX_LABELS+Token.DOWN-1, "DOWN");
typeToTokenList.set(Label.NUM_FAUX_LABELS+Token.UP-1, "UP");
tokenNameToTypeMap.put("<INVALID>", Label.INVALID);
tokenNameToTypeMap.put("<ACTION>", Label.ACTION);
tokenNameToTypeMap.put("<EPSILON>", Label.EPSILON);
tokenNameToTypeMap.put("<SEMPRED>", Label.SEMPRED);
tokenNameToTypeMap.put("<SET>", Label.SET);
// tokenNameToTypeMap.put("<ACTION>", Label.ACTION);
// tokenNameToTypeMap.put("<EPSILON>", Label.EPSILON);
// tokenNameToTypeMap.put("<SEMPRED>", Label.SEMPRED);
// tokenNameToTypeMap.put("<SET>", Label.SET);
tokenNameToTypeMap.put("<EOT>", Label.EOT);
tokenNameToTypeMap.put("EOF", Label.EOF);
tokenNameToTypeMap.put("<EOR>", Label.EOR_TOKEN_TYPE);