401 lines
12 KiB
Java
401 lines
12 KiB
Java
package org.antlr.v4.automata;
|
|
|
|
|
|
import org.antlr.runtime.RecognitionException;
|
|
import org.antlr.runtime.tree.CommonTreeNodeStream;
|
|
import org.antlr.v4.codegen.Target;
|
|
import org.antlr.v4.misc.IntervalSet;
|
|
import org.antlr.v4.parse.ANTLRParser;
|
|
import org.antlr.v4.parse.GrammarASTAdaptor;
|
|
import org.antlr.v4.parse.NFABuilder;
|
|
import org.antlr.v4.tool.*;
|
|
|
|
import java.lang.reflect.Constructor;
|
|
import java.util.List;
|
|
|
|
/** NFA construction routines triggered by NFABuilder.g.
|
|
*
|
|
* No side-effects. It builds an NFA object and returns it.
|
|
*/
|
|
public class ParserNFAFactory implements NFAFactory {
|
|
public Grammar g;
|
|
public Rule currentRule;
|
|
NFA nfa;
|
|
|
|
public ParserNFAFactory(Grammar g) { this.g = g; nfa = new NFA(g); }
|
|
|
|
public NFA createNFA() {
|
|
_createNFA();
|
|
addEOFTransitionToStartRules();
|
|
return nfa;
|
|
}
|
|
|
|
public void _createNFA() {
|
|
createRuleStartAndStopNFAStates();
|
|
|
|
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
|
|
for (Rule r : g.rules.values()) {
|
|
// find rule's block
|
|
GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK);
|
|
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
|
|
NFABuilder b = new NFABuilder(nodes,this);
|
|
try {
|
|
setCurrentRuleName(r.name);
|
|
Handle h = b.block();
|
|
rule(r.ast, r.name, h);
|
|
}
|
|
catch (RecognitionException re) {
|
|
ErrorManager.fatalInternalError("bad grammar AST structure", re);
|
|
}
|
|
}
|
|
}
|
|
|
|
public void setCurrentRuleName(String name) {
|
|
this.currentRule = g.getRule(name);
|
|
}
|
|
|
|
/* start->ruleblock->end */
|
|
public Handle rule(GrammarAST ruleAST, String name, Handle blk) {
|
|
Rule r = g.getRule(name);
|
|
RuleStartState start = nfa.ruleToStartState.get(r);
|
|
epsilon(start, blk.left);
|
|
RuleStopState stop = nfa.ruleToStopState.get(r);
|
|
epsilon(blk.right, stop);
|
|
Handle h = new Handle(start, stop);
|
|
// FASerializer ser = new FASerializer(g, h.left);
|
|
// System.out.println(ruleAST.toStringTree()+":\n"+ser);
|
|
return h;
|
|
}
|
|
|
|
public NFAState newState(Class nodeType, GrammarAST node) {
|
|
try {
|
|
Constructor ctor = nodeType.getConstructor(NFA.class);
|
|
NFAState s = (NFAState)ctor.newInstance(nfa);
|
|
s.ast = node;
|
|
s.rule = currentRule;
|
|
nfa.addState(s);
|
|
return s;
|
|
}
|
|
catch (Exception e) {
|
|
ErrorManager.internalError("can't create NFA node: "+nodeType.getName(), e);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public BasicState newState(GrammarAST node) {
|
|
BasicState n = new BasicState(nfa);
|
|
n.rule = currentRule;
|
|
n.ast = node;
|
|
nfa.addState(n);
|
|
return n;
|
|
}
|
|
|
|
public BasicState newState() { return newState(null); }
|
|
|
|
/** From label A build Graph o-A->o */
|
|
public Handle tokenRef(TerminalAST node) {
|
|
BasicState left = newState(node);
|
|
BasicState right = newState(node);
|
|
int ttype = g.getTokenType(node.getText());
|
|
left.transition = new AtomTransition(ttype, right);
|
|
right.incidentTransition = left.transition;
|
|
|
|
return new Handle(left, right);
|
|
}
|
|
|
|
/** From set build single edge graph o->o-set->o. To conform to
|
|
* what an alt block looks like, must have extra state on left.
|
|
*/
|
|
public Handle set(IntervalSet set, GrammarAST associatedAST) {
|
|
BasicState left = newState(associatedAST);
|
|
BasicState right = newState(associatedAST);
|
|
left.transition = new SetTransition(set, right);
|
|
right.incidentTransition = left.transition;
|
|
|
|
return new Handle(left, right);
|
|
}
|
|
|
|
public Handle tree(List<Handle> els) {
|
|
return null;
|
|
}
|
|
|
|
/** Not valid for non-lexers */
|
|
public Handle range(GrammarAST a, GrammarAST b) { throw new UnsupportedOperationException(); }
|
|
|
|
public Handle not(GrammarAST n, Handle A) {
|
|
GrammarAST ast = A.left.ast;
|
|
int ttype = 0;
|
|
if ( g.getType()==ANTLRParser.LEXER ) {
|
|
ttype = Target.getCharValueFromGrammarCharLiteral(ast.getText());
|
|
}
|
|
else {
|
|
ttype = g.getTokenType(ast.getText());
|
|
}
|
|
IntervalSet notAtom =
|
|
(IntervalSet)IntervalSet.of(ttype).complement(g.getTokenTypes());
|
|
if ( notAtom.isNil() ) {
|
|
ErrorManager.grammarError(ErrorType.EMPTY_COMPLEMENT,
|
|
g.fileName,
|
|
ast.token,
|
|
ast.getText());
|
|
}
|
|
return set(notAtom, n);
|
|
}
|
|
|
|
/** For a non-lexer, just build a simple token reference atom. */
|
|
public Handle stringLiteral(TerminalAST stringLiteralAST) {
|
|
return tokenRef(stringLiteralAST);
|
|
}
|
|
|
|
/** For reference to rule r, build
|
|
*
|
|
* o->(r) o
|
|
*
|
|
* where (r) is the start of rule r and the trailing o is not linked
|
|
* to from rule ref state directly (uses followState).
|
|
*/
|
|
public Handle ruleRef(GrammarAST node) {
|
|
Rule r = g.getRule(node.getText());
|
|
RuleStartState start = nfa.ruleToStartState.get(r);
|
|
BasicState left = newState(node);
|
|
BasicState right = newState(node);
|
|
RuleTransition call = new RuleTransition(r, start, right);
|
|
call.followState = right;
|
|
left.addTransition(call);
|
|
|
|
// add follow edge from end of invoked rule
|
|
RuleStopState stop = nfa.ruleToStopState.get(r);
|
|
epsilon(stop, right);
|
|
|
|
return new Handle(left, right);
|
|
}
|
|
|
|
/** From an empty alternative build o-e->o */
|
|
public Handle epsilon(GrammarAST node) {
|
|
BasicState left = newState(node);
|
|
BasicState right = newState(node);
|
|
epsilon(left, right);
|
|
return new Handle(left, right);
|
|
}
|
|
|
|
/** Build what amounts to an epsilon transition with a semantic
|
|
* predicate action. The pred is a pointer into the AST of
|
|
* the SEMPRED token.
|
|
*/
|
|
public Handle sempred(GrammarAST pred) {
|
|
//System.out.println("sempred: "+ pred);
|
|
BasicState left = newState(pred);
|
|
NFAState right = newState(pred);
|
|
left.transition = new PredicateTransition(pred, right);
|
|
return new Handle(left, right);
|
|
}
|
|
|
|
public Handle gated_sempred(GrammarAST pred) {
|
|
return null;
|
|
}
|
|
|
|
/** Build what amounts to an epsilon transition with an action.
|
|
* The action goes into NFA though it is ignored during analysis.
|
|
* It slows things down a bit, but I must ignore predicates after
|
|
* having seen an action (5-5-2008).
|
|
*/
|
|
public Handle action(GrammarAST action) {
|
|
//System.out.println("action: "+action);
|
|
BasicState left = newState(action);
|
|
NFAState right = newState(action);
|
|
left.transition = new ActionTransition(action, right);
|
|
return new Handle(left, right);
|
|
}
|
|
|
|
/** From a set ('a'|'b') build
|
|
*
|
|
* o->o-'a'..'b'->o->o (last NFAState is blockEndNFAState pointed to by all alts)
|
|
*/
|
|
public Handle blockFromSet(Handle set) { return null; }
|
|
|
|
/** From A|B|..|Z alternative block build
|
|
*
|
|
* o->o-A->o->o (last NFAState is BlockEndState pointed to by all alts)
|
|
* | ^
|
|
* |->o-B->o--|
|
|
* | |
|
|
* ... |
|
|
* | |
|
|
* |->o-Z->o--|
|
|
*
|
|
* So start node points at every alternative with epsilon transition
|
|
* and every alt right side points at a block end NFAState.
|
|
*
|
|
* Special case: only one alternative: don't make a block with alt
|
|
* begin/end.
|
|
*
|
|
* Special case: if just a list of tokens/chars/sets, then collapse
|
|
* to a single edge'd o-set->o graph.
|
|
*
|
|
* TODO: Set alt number (1..n) in the states?
|
|
*/
|
|
public Handle block(GrammarAST blkAST, List<Handle> alts) {
|
|
if ( alts.size()==1 ) return alts.get(0);
|
|
|
|
BlockStartState start = (BlockStartState)newState(BlockStartState.class, blkAST);
|
|
BlockEndState end = (BlockEndState)newState(BlockEndState.class, blkAST);
|
|
for (Handle alt : alts) {
|
|
epsilon(start, alt.left);
|
|
epsilon(alt.right, end);
|
|
}
|
|
nfa.defineDecisionState(start);
|
|
Handle h = new Handle(start, end);
|
|
// FASerializer ser = new FASerializer(g, h.left);
|
|
// System.out.println(blkAST.toStringTree()+":\n"+ser);
|
|
return h;
|
|
}
|
|
|
|
public Handle alt(List<Handle> els) {
|
|
Handle prev = null;
|
|
for (Handle el : els) { // hook up elements
|
|
if ( prev!=null ) epsilon(prev.right, el.left);
|
|
prev = el;
|
|
}
|
|
Handle first = els.get(0);
|
|
Handle last = els.get(els.size()-1);
|
|
return new Handle(first.left, last.right);
|
|
}
|
|
|
|
/** From (A)? build either:
|
|
*
|
|
* o--A->o
|
|
* | ^
|
|
* o---->|
|
|
*
|
|
* or, if A is a block, just add an empty alt to the end of the block
|
|
*/
|
|
public Handle optional(GrammarAST optAST, Handle blk) {
|
|
if ( blk.left instanceof BlockStartState ) {
|
|
epsilon(blk.left, blk.right);
|
|
// FASerializer ser = new FASerializer(g, blk.left);
|
|
// System.out.println(optAST.toStringTree()+":\n"+ser);
|
|
return blk;
|
|
}
|
|
|
|
// construct block
|
|
BlockStartState start = (BlockStartState)newState(BlockStartState.class, optAST);
|
|
BlockEndState end = (BlockEndState)newState(BlockEndState.class, optAST);
|
|
epsilon(start, blk.left);
|
|
epsilon(blk.right, end);
|
|
epsilon(start, end);
|
|
|
|
nfa.defineDecisionState(start);
|
|
|
|
Handle h = new Handle(start, end);
|
|
// FASerializer ser = new FASerializer(g, h.left);
|
|
// System.out.println(optAST.toStringTree()+":\n"+ser);
|
|
return h;
|
|
}
|
|
|
|
/** From (A)+ build
|
|
*
|
|
* |------|
|
|
* v |
|
|
* o->o-A-o->o->o
|
|
*
|
|
* Meaning that the last NFAState in A blk points to loop back node,
|
|
* which points back to block start. We add start/end nodes to
|
|
* outside.
|
|
*/
|
|
public Handle plus(GrammarAST plusAST, Handle blk) {
|
|
PlusBlockStartState start = (PlusBlockStartState)newState(PlusBlockStartState.class, plusAST);
|
|
LoopbackState loop = (LoopbackState)newState(LoopbackState.class, plusAST);
|
|
BlockEndState end = (BlockEndState)newState(BlockEndState.class, plusAST);
|
|
epsilon(start, blk.left);
|
|
epsilon(loop, blk.left);
|
|
epsilon(blk.right, loop);
|
|
epsilon(loop, end);
|
|
nfa.defineDecisionState(loop);
|
|
return new Handle(start, end);
|
|
}
|
|
|
|
/** From (A)* build
|
|
*
|
|
* |------|
|
|
* v |
|
|
* o->o-A-o->o->o
|
|
* | ^
|
|
* o------------| (optional branch is 2nd alt of StarBlockStartState)
|
|
*
|
|
* There are 2 or 3 decision points in a A*. If A is not a block (i.e.,
|
|
* it only has one alt), then there are two decisions: the optional bypass
|
|
* and then loopback. If A is a block of alts, then there are three
|
|
* decisions: bypass, loopback, and A's decision point.
|
|
*
|
|
* Note that the optional bypass must be outside the loop as (A|B)* is
|
|
* not the same thing as (A|B|)+.
|
|
*
|
|
* This is an accurate NFA representation of the meaning of (A)*, but
|
|
* for generating code, I don't need a DFA for the optional branch by
|
|
* virtue of how I generate code. The exit-loopback-branch decision
|
|
* is sufficient to let me make an appropriate enter, exit, loop
|
|
* determination.
|
|
*/
|
|
public Handle star(GrammarAST starAST, Handle blk) {
|
|
StarBlockStartState start = (StarBlockStartState)newState(StarBlockStartState.class, starAST);
|
|
LoopbackState loop = (LoopbackState)newState(LoopbackState.class, starAST);
|
|
BlockEndState end = (BlockEndState)newState(BlockEndState.class, starAST);
|
|
epsilon(start, blk.left);
|
|
epsilon(start, end); // bypass edge
|
|
epsilon(loop, blk.left);
|
|
epsilon(blk.right, loop);
|
|
epsilon(loop, end);
|
|
nfa.defineDecisionState(start);
|
|
nfa.defineDecisionState(loop);
|
|
return new Handle(start, end);
|
|
}
|
|
|
|
/** Build an atom with all possible values in its label */
|
|
public Handle wildcard(GrammarAST associatedAST) { return null; }
|
|
|
|
/** Build a subrule matching ^(. .*) (any tree or node). Let's use
|
|
* (^(. .+) | .) to be safe.
|
|
*/
|
|
public Handle wildcardTree(GrammarAST associatedAST) { return null; }
|
|
|
|
void epsilon(NFAState a, NFAState b) {
|
|
if ( a!=null ) a.addTransition(new EpsilonTransition(b));
|
|
}
|
|
|
|
/** Define all the rule begin/end NFAStates to solve forward reference
|
|
* issues.
|
|
*/
|
|
void createRuleStartAndStopNFAStates() {
|
|
for (Rule r : g.rules.values()) {
|
|
RuleStartState start = (RuleStartState)newState(RuleStartState.class, r.ast);
|
|
RuleStopState stop = (RuleStopState)newState(RuleStopState.class, r.ast);
|
|
start.stopState = stop;
|
|
start.rule = r;
|
|
stop.rule = r;
|
|
nfa.ruleToStartState.put(r, start);
|
|
nfa.ruleToStopState.put(r, stop);
|
|
}
|
|
}
|
|
|
|
/** add an EOF transition to any rule end NFAState that points to nothing
|
|
* (i.e., for all those rules not invoked by another rule). These
|
|
* are start symbols then.
|
|
*
|
|
* Return the number of grammar entry points; i.e., how many rules are
|
|
* not invoked by another rule (they can only be invoked from outside).
|
|
* These are the start rules.
|
|
*/
|
|
public int addEOFTransitionToStartRules() {
|
|
int n = 0;
|
|
for (Rule r : g.rules.values()) {
|
|
NFAState stop = nfa.ruleToStopState.get(r);
|
|
if ( stop.getNumberOfTransitions()>0 ) continue;
|
|
n++;
|
|
BasicState eofTarget = newState(r.ast);
|
|
Transition t = new AtomTransition(Label.EOF, eofTarget);
|
|
stop.addTransition(t);
|
|
}
|
|
return n;
|
|
}
|
|
}
|