forked from jasder/antlr
added +, rule ref, etc...
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6736]
This commit is contained in:
parent
d7bda19fd1
commit
6832a3c89c
|
@ -1,15 +1,14 @@
|
|||
package org.antlr.v4;
|
||||
|
||||
import org.antlr.runtime.*;
|
||||
import org.antlr.runtime.tree.BufferedTreeNodeStream;
|
||||
import org.antlr.runtime.tree.TreeWizard;
|
||||
import org.antlr.v4.automata.LexerNFAFactory;
|
||||
import org.antlr.v4.automata.NFA;
|
||||
import org.antlr.v4.automata.NFAFactory;
|
||||
import org.antlr.v4.automata.ParserNFAFactory;
|
||||
import org.antlr.v4.parse.ANTLRLexer;
|
||||
import org.antlr.v4.parse.ANTLRParser;
|
||||
import org.antlr.v4.parse.GrammarASTAdaptor;
|
||||
import org.antlr.v4.parse.NFABuilder;
|
||||
import org.antlr.v4.semantics.SemanticPipeline;
|
||||
import org.antlr.v4.tool.*;
|
||||
|
||||
|
@ -371,22 +370,8 @@ public class Tool {
|
|||
// BUILD NFA FROM AST
|
||||
NFAFactory factory = new ParserNFAFactory(g);
|
||||
if ( g.getType()==ANTLRParser.LEXER ) factory = new LexerNFAFactory(g);
|
||||
GrammarAST rules = (GrammarAST)g.ast.getFirstChildWithType(ANTLRParser.RULES);
|
||||
List<GrammarAST> kids = rules.getChildren();
|
||||
for (GrammarAST n : kids) {
|
||||
if ( n.getType()!=ANTLRParser.RULE ) continue;
|
||||
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
|
||||
BufferedTreeNodeStream nodes =
|
||||
new BufferedTreeNodeStream(adaptor,n);
|
||||
NFABuilder b = new NFABuilder(nodes,factory);
|
||||
try {
|
||||
b.rule();
|
||||
}
|
||||
catch (RecognitionException re) {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
NFA nfa = factory.createNFA();
|
||||
|
||||
// PERFORM GRAMMAR ANALYSIS ON NFA: BUILD DECISION DFAs
|
||||
|
||||
// GENERATE CODE
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
package org.antlr.v4.automata;
|
||||
|
||||
/** */
|
||||
public class BlockEndState extends NFAState {
|
||||
public class BlockEndState extends BasicState {
|
||||
public BlockEndState(NFA nfa) { super(nfa); }
|
||||
}
|
||||
|
|
|
@ -31,12 +31,14 @@ public class FASerializer {
|
|||
State s = null;
|
||||
while ( work.size()>0 ) {
|
||||
s = work.remove(0);
|
||||
if ( marked.contains(s) ) continue;
|
||||
int n = s.getNumberOfTransitions();
|
||||
//System.out.println("visit "+getStateString(s)+"; edges="+n);
|
||||
marked.add(s);
|
||||
for (int i=0; i<n; i++) {
|
||||
Transition t = s.transition(i);
|
||||
work.add( t.target );
|
||||
if ( t instanceof RuleTransition ) work.add(((RuleTransition)t).followState);
|
||||
else work.add( t.target );
|
||||
buf.append(getStateString(s));
|
||||
if ( t instanceof EpsilonTransition ) {
|
||||
buf.append("->"+getStateString(t.target)+'\n');
|
||||
|
@ -61,13 +63,13 @@ public class FASerializer {
|
|||
// }
|
||||
// else
|
||||
if ( s instanceof StarBlockStartState ) stateStr = "StarBlockStart_"+n;
|
||||
if ( s instanceof PlusBlockStartState ) stateStr = "PlusBlockStart_"+n;
|
||||
if ( s instanceof StarBlockStartState ) stateStr = "StarBlockStart_"+n;
|
||||
if ( s instanceof BlockStartState ) stateStr = "BlockStart_"+n;
|
||||
if ( s instanceof BlockEndState ) stateStr = "BlockEnd_"+n;
|
||||
if ( s instanceof RuleStartState ) stateStr = "RuleStart_"+n;
|
||||
if ( s instanceof RuleStopState ) stateStr = "RuleStop"+n;
|
||||
if ( s instanceof LoopbackState ) stateStr = "LoopBack_"+n;
|
||||
else if ( s instanceof PlusBlockStartState ) stateStr = "PlusBlockStart_"+n;
|
||||
else if ( s instanceof StarBlockStartState ) stateStr = "StarBlockStart_"+n;
|
||||
else if ( s instanceof BlockStartState ) stateStr = "BlockStart_"+n;
|
||||
else if ( s instanceof BlockEndState ) stateStr = "BlockEnd_"+n;
|
||||
else if ( s instanceof RuleStartState ) stateStr = "RuleStart_"+((RuleStartState)s).rule.name+"_"+n;
|
||||
else if ( s instanceof RuleStopState ) stateStr = "RuleStop_"+((RuleStopState)s).rule.name+"_"+n;
|
||||
else if ( s instanceof LoopbackState ) stateStr = "LoopBack_"+n;
|
||||
return stateStr;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,33 @@
|
|||
package org.antlr.v4.automata;
|
||||
|
||||
/** */
|
||||
public class LoopbackState extends NFAState {
|
||||
BlockStartState loopStartState;
|
||||
public class LoopbackState extends BasicState {
|
||||
EpsilonTransition loopBack; // edge 2 (transition is edge 1)
|
||||
|
||||
/** What's its decision number from 1..n? */
|
||||
protected int decisionNumber = 0;
|
||||
|
||||
public LoopbackState(NFA nfa) { super(nfa); }
|
||||
public LoopbackState(NFA nfa) { super(nfa); }
|
||||
|
||||
@Override
|
||||
public int getNumberOfTransitions() {
|
||||
int n = 0;
|
||||
if ( transition!=null ) n++;
|
||||
if ( loopBack!=null ) n++;
|
||||
return n;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addTransition(Transition e) {
|
||||
if ( getNumberOfTransitions()>=2 ) throw new IllegalArgumentException("only two transitions");
|
||||
if ( transition==null ) transition = e;
|
||||
else loopBack = (EpsilonTransition)e;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Transition transition(int i) {
|
||||
if ( i>=2 ) throw new IllegalArgumentException("only two transitions");
|
||||
if ( i==1 ) return transition;
|
||||
return loopBack;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,9 +1,12 @@
|
|||
package org.antlr.v4.automata;
|
||||
|
||||
import org.antlr.v4.tool.Grammar;
|
||||
import org.antlr.v4.tool.Rule;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/** */
|
||||
public class NFA {
|
||||
|
@ -14,7 +17,10 @@ public class NFA {
|
|||
* can go back later and build DFA predictors for them. This includes
|
||||
* all the rules, subrules, optional blocks, ()+, ()* etc...
|
||||
*/
|
||||
protected List<NFAState> decisionToNFAState = new ArrayList<NFAState>();
|
||||
public List<NFAState> decisionToNFAState = new ArrayList<NFAState>();
|
||||
|
||||
Map<Rule, RuleStartState> ruleToStartState = new HashMap<Rule, RuleStartState>();
|
||||
Map<Rule, RuleStopState> ruleToStopState = new HashMap<Rule, RuleStopState>();
|
||||
|
||||
int stateNumber = 0;
|
||||
|
||||
|
|
|
@ -1,16 +1,20 @@
|
|||
package org.antlr.v4.automata;
|
||||
|
||||
|
||||
import org.antlr.runtime.RecognitionException;
|
||||
import org.antlr.runtime.tree.CommonTreeNodeStream;
|
||||
import org.antlr.v4.misc.IntSet;
|
||||
import org.antlr.v4.parse.GrammarASTAdaptor;
|
||||
import org.antlr.v4.parse.NFABuilder;
|
||||
import org.antlr.v4.tool.*;
|
||||
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
// TODO: investigate o-X->o for basic states with typename for transition
|
||||
|
||||
/** Superclass of NFABuilder.g that provides actual NFA construction routines. */
|
||||
/** NFA construction routines triggered by NFABuilder.g.
|
||||
*
|
||||
* No side-effects. It builds an NFA object and returns it.
|
||||
*/
|
||||
public class ParserNFAFactory implements NFAFactory {
|
||||
public Grammar g;
|
||||
public Rule currentRule;
|
||||
|
@ -18,29 +22,40 @@ public class ParserNFAFactory implements NFAFactory {
|
|||
|
||||
public ParserNFAFactory(Grammar g) { this.g = g; nfa = new NFA(g); }
|
||||
|
||||
public NFA getNFA() {
|
||||
addEOFStates(g.rules.values());
|
||||
return null;
|
||||
public NFA createNFA() {
|
||||
createRuleStartAndStopNFAStates();
|
||||
|
||||
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
|
||||
for (Rule r : g.rules.values()) {
|
||||
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,r.ast);
|
||||
NFABuilder b = new NFABuilder(nodes,this);
|
||||
try { b.rule(); }
|
||||
catch (RecognitionException re) {
|
||||
ErrorManager.fatalInternalError("bad grammar AST structure", re);
|
||||
}
|
||||
}
|
||||
|
||||
addEOFTransitionToStartRules();
|
||||
return nfa;
|
||||
}
|
||||
|
||||
/** add an EOF transition to any rule end NFAState that points to nothing
|
||||
* (i.e., for all those rules not invoked by another rule). These
|
||||
* are start symbols then.
|
||||
*
|
||||
* Return the number of grammar entry points; i.e., how many rules are
|
||||
* not invoked by another rule (they can only be invoked from outside).
|
||||
* These are the start rules.
|
||||
*/
|
||||
public int addEOFStates(Collection<Rule> rules) { return 0; }
|
||||
|
||||
|
||||
|
||||
public Rule getCurrentRule() { return currentRule; }
|
||||
|
||||
public void setCurrentRuleName(String name) {
|
||||
this.currentRule = g.getRule(name);
|
||||
}
|
||||
|
||||
/* start->ruleblock->end */
|
||||
public Handle rule(GrammarAST ruleAST, String name, Handle blk) {
|
||||
Rule r = g.getRule(name);
|
||||
RuleStartState start = nfa.ruleToStartState.get(r);
|
||||
epsilon(start, blk.left);
|
||||
RuleStopState stop = nfa.ruleToStopState.get(r);
|
||||
epsilon(blk.right, stop);
|
||||
Handle h = new Handle(start, stop);
|
||||
FASerializer ser = new FASerializer(g, h.left);
|
||||
System.out.println(ruleAST.toStringTree()+":\n"+ser);
|
||||
return h;
|
||||
}
|
||||
|
||||
public NFAState newState(Class nodeType, GrammarAST node) {
|
||||
try {
|
||||
Constructor ctor = nodeType.getConstructor(NFA.class);
|
||||
|
@ -66,7 +81,6 @@ public class ParserNFAFactory implements NFAFactory {
|
|||
|
||||
/** From label A build Graph o-A->o */
|
||||
public Handle tokenRef(TerminalAST node) {
|
||||
System.out.println("tokenRef: "+node);
|
||||
BasicState left = newState(node);
|
||||
BasicState right = newState(node);
|
||||
int ttype = g.getTokenType(node.getText());
|
||||
|
@ -99,31 +113,36 @@ public class ParserNFAFactory implements NFAFactory {
|
|||
* the DFA. Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states
|
||||
* for n characters.
|
||||
*/
|
||||
public Handle stringLiteral(GrammarAST stringLiteralAST) {
|
||||
System.out.println("stringLiteral: "+stringLiteralAST);
|
||||
return null;
|
||||
public Handle stringLiteral(TerminalAST stringLiteralAST) {
|
||||
return tokenRef(stringLiteralAST);
|
||||
}
|
||||
|
||||
/** For reference to rule r, build
|
||||
*
|
||||
* o-e->(r) o
|
||||
* o->(r) o
|
||||
*
|
||||
* where (r) is the start of rule r and the trailing o is not linked
|
||||
* to from rule ref state directly (it's done thru the transition(0)
|
||||
* RuleClosureTransition.
|
||||
*
|
||||
* If the rule r is just a list of tokens, it's block will be just
|
||||
* a set on an edge o->o->o-set->o->o->o, could inline it rather than doing
|
||||
* the rule reference, but i'm not doing this yet as I'm not sure
|
||||
* it would help much in the NFA->DFA construction.
|
||||
*
|
||||
* TODO add to codegen: collapse alt blks that are sets into single matchSet
|
||||
* @param node
|
||||
* to from rule ref state directly (uses followState).
|
||||
*/
|
||||
public Handle ruleRef(GrammarAST node) { return null; }
|
||||
public Handle ruleRef(GrammarAST node) {
|
||||
Rule r = g.getRule(node.getText());
|
||||
RuleStartState start = nfa.ruleToStartState.get(r);
|
||||
RuleStartState stop = nfa.ruleToStartState.get(r);
|
||||
BasicState left = newState(node);
|
||||
BasicState right = newState(node);
|
||||
RuleTransition call = new RuleTransition(r, start, stop);
|
||||
call.followState = right;
|
||||
left.transition = call;
|
||||
return new Handle(left, right);
|
||||
}
|
||||
|
||||
/** From an empty alternative build o-e->o */
|
||||
public Handle epsilon() { return null; }
|
||||
public Handle epsilon(GrammarAST node) {
|
||||
BasicState left = newState(node);
|
||||
BasicState right = newState(node);
|
||||
epsilon(left, right);
|
||||
return new Handle(left, right);
|
||||
}
|
||||
|
||||
/** Build what amounts to an epsilon transition with a semantic
|
||||
* predicate action. The pred is a pointer into the AST of
|
||||
|
@ -154,13 +173,6 @@ public class ParserNFAFactory implements NFAFactory {
|
|||
return new Handle(left, right);
|
||||
}
|
||||
|
||||
/** From A B build A-e->B (that is, build an epsilon arc from right
|
||||
* of A to left of B).
|
||||
*
|
||||
* As a convenience, return B if A is null or return A if B is null.
|
||||
*/
|
||||
public Handle sequence(Handle A, Handle B) { return null; }
|
||||
|
||||
/** From a set ('a'|'b') build
|
||||
*
|
||||
* o->o-'a'..'b'->o->o (last NFAState is blockEndNFAState pointed to by all alts)
|
||||
|
@ -189,7 +201,6 @@ public class ParserNFAFactory implements NFAFactory {
|
|||
* TODO: Set alt number (1..n) in the states?
|
||||
*/
|
||||
public Handle block(GrammarAST blkAST, List<Handle> alts) {
|
||||
System.out.println("block: "+alts);
|
||||
if ( alts.size()==1 ) return alts.get(0);
|
||||
|
||||
BlockStartState start = (BlockStartState)newState(BlockStartState.class, blkAST);
|
||||
|
@ -206,6 +217,11 @@ public class ParserNFAFactory implements NFAFactory {
|
|||
}
|
||||
|
||||
public Handle alt(List<Handle> els) {
|
||||
Handle prev = null;
|
||||
for (Handle el : els) { // hook up elements
|
||||
if ( prev!=null ) epsilon(prev.right, el.left);
|
||||
prev = el;
|
||||
}
|
||||
Handle first = els.get(0);
|
||||
Handle last = els.get(els.size()-1);
|
||||
return new Handle(first.left, last.right);
|
||||
|
@ -244,18 +260,24 @@ public class ParserNFAFactory implements NFAFactory {
|
|||
|
||||
/** From (A)+ build
|
||||
*
|
||||
* |---| (Transition 2 from A.right points at alt 1)
|
||||
* v | (follow of loop is Transition 1)
|
||||
* o->o-A-o->o
|
||||
* |------|
|
||||
* v |
|
||||
* o->o-A-o->o->o
|
||||
*
|
||||
* Meaning that the last NFAState in A points back to A's left Transition NFAState
|
||||
* and we add a new begin/end NFAState. A can be single alternative or
|
||||
* multiple.
|
||||
*
|
||||
* During analysis we'll call the follow link (transition 1) alt n+1 for
|
||||
* an n-alt A block.
|
||||
* Meaning that the last NFAState in A blk points to loop back node,
|
||||
* which points back to block start. We add start/end nodes to
|
||||
* outside.
|
||||
*/
|
||||
public Handle plus(GrammarAST plusAST, Handle blk) { return null; }
|
||||
public Handle plus(GrammarAST plusAST, Handle blk) {
|
||||
PlusBlockStartState start = (PlusBlockStartState)newState(PlusBlockStartState.class, plusAST);
|
||||
LoopbackState loop = (LoopbackState)newState(LoopbackState.class, plusAST);
|
||||
BlockEndState end = (BlockEndState)newState(BlockEndState.class, plusAST);
|
||||
epsilon(start, blk.left);
|
||||
epsilon(loop, blk.left);
|
||||
epsilon(blk.right, loop);
|
||||
epsilon(loop, end);
|
||||
return new Handle(start, end);
|
||||
}
|
||||
|
||||
/** From (A)* build
|
||||
*
|
||||
|
@ -300,4 +322,42 @@ public class ParserNFAFactory implements NFAFactory {
|
|||
void epsilon(NFAState a, NFAState b) {
|
||||
a.addTransition(new EpsilonTransition(b));
|
||||
}
|
||||
|
||||
/** Define all the rule begin/end NFAStates to solve forward reference
|
||||
* issues.
|
||||
*/
|
||||
void createRuleStartAndStopNFAStates() {
|
||||
for (Rule r : g.rules.values()) {
|
||||
RuleStartState start = (RuleStartState)newState(RuleStartState.class, r.ast);
|
||||
RuleStopState stop = (RuleStopState)newState(RuleStopState.class, r.ast);
|
||||
start.stopState = stop;
|
||||
start.rule = r;
|
||||
stop.rule = r;
|
||||
nfa.ruleToStartState.put(r, start);
|
||||
nfa.ruleToStopState.put(r, stop);
|
||||
}
|
||||
}
|
||||
|
||||
/** add an EOF transition to any rule end NFAState that points to nothing
|
||||
* (i.e., for all those rules not invoked by another rule). These
|
||||
* are start symbols then.
|
||||
*
|
||||
* Return the number of grammar entry points; i.e., how many rules are
|
||||
* not invoked by another rule (they can only be invoked from outside).
|
||||
* These are the start rules.
|
||||
*/
|
||||
public int addEOFTransitionToStartRules() {
|
||||
int n = 0;
|
||||
for (Rule r : g.rules.values()) {
|
||||
NFAState stop = nfa.ruleToStopState.get(r);
|
||||
if ( stop.getNumberOfTransitions()==0 ) {
|
||||
n++;
|
||||
continue;
|
||||
}
|
||||
BasicState eofTarget = newState(r.ast);
|
||||
Transition t = new AtomTransition(Label.EOF, eofTarget);
|
||||
stop.addTransition(t);
|
||||
}
|
||||
return n;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
package org.antlr.v4.automata;
|
||||
|
||||
/** */
|
||||
public class RuleStartState extends NFAState {
|
||||
RuleStopState stopState;
|
||||
public RuleStartState(NFA nfa) { super(nfa); }
|
||||
import org.antlr.v4.tool.Rule;
|
||||
|
||||
public class RuleStartState extends BasicState {
|
||||
public RuleStopState stopState;
|
||||
public Rule rule;
|
||||
|
||||
public RuleStartState(NFA nfa) { super(nfa); }
|
||||
}
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
package org.antlr.v4.automata;
|
||||
|
||||
/** */
|
||||
public class RuleStopState extends NFAState {
|
||||
import org.antlr.v4.tool.Rule;
|
||||
|
||||
public class RuleStopState extends BasicState {
|
||||
public Rule rule;
|
||||
public RuleStopState(NFA nfa) { super(nfa); }
|
||||
}
|
||||
|
|
|
@ -73,18 +73,9 @@ import org.antlr.v4.runtime.tree.CommonTree; // use updated v4 one not v3
|
|||
}
|
||||
}
|
||||
|
||||
// IGNORE EVERYTHING UNTIL WE SEE A RULE OR BLOCK SUBTREE
|
||||
|
||||
topdown
|
||||
: rule
|
||||
;
|
||||
|
||||
bottomup
|
||||
: block // match block innermost to outermost all the way out to rule block
|
||||
;
|
||||
|
||||
rule returns [NFAFactory.Handle p]
|
||||
: ^(RULE name=ID ~BLOCK* block) {factory.setCurrentRuleName($name.text);}
|
||||
: ^(RULE name=ID ~BLOCK* {factory.setCurrentRuleName($name.text);} block)
|
||||
{$p = factory.rule($RULE, $name.text, $block.p);}
|
||||
;
|
||||
|
||||
block returns [NFAFactory.Handle p]
|
||||
|
@ -96,7 +87,7 @@ block returns [NFAFactory.Handle p]
|
|||
alternative returns [NFAFactory.Handle p]
|
||||
@init {List<NFAFactory.Handle> els = new ArrayList<NFAFactory.Handle>();}
|
||||
: ^(ALT_REWRITE a=alternative .) {$p = $a.p;}
|
||||
| ^(ALT EPSILON) {$p = factory.epsilon();}
|
||||
| ^(ALT EPSILON) {$p = factory.epsilon($EPSILON);}
|
||||
| ^(ALT (e=element {els.add($e.p);})+)
|
||||
{$p = factory.alt(els);}
|
||||
;
|
||||
|
@ -156,7 +147,7 @@ notSet returns [NFAFactory.Handle p]
|
|||
|
||||
notTerminal returns [NFAFactory.Handle p]
|
||||
: TOKEN_REF {$p = factory.tokenRef((TerminalAST)$TOKEN_REF);}
|
||||
| STRING_LITERAL {$p = factory.stringLiteral($start);}
|
||||
| STRING_LITERAL {$p = factory.stringLiteral((TerminalAST)$start);}
|
||||
;
|
||||
|
||||
ruleref returns [NFAFactory.Handle p]
|
||||
|
@ -170,8 +161,8 @@ range returns [NFAFactory.Handle p]
|
|||
;
|
||||
|
||||
terminal returns [NFAFactory.Handle p]
|
||||
: ^(STRING_LITERAL .) {$p = factory.stringLiteral($start);}
|
||||
| STRING_LITERAL {$p = factory.stringLiteral($start);}
|
||||
: ^(STRING_LITERAL .) {$p = factory.stringLiteral((TerminalAST)$start);}
|
||||
| STRING_LITERAL {$p = factory.stringLiteral((TerminalAST)$start);}
|
||||
| ^(TOKEN_REF ARG_ACTION .) {$p = factory.tokenRef((TerminalAST)$start);}
|
||||
| ^(TOKEN_REF .) {$p = factory.tokenRef((TerminalAST)$start);}
|
||||
| TOKEN_REF {$p = factory.tokenRef((TerminalAST)$start);}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue