got left-recursion core in

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6738]
This commit is contained in:
parrt 2010-03-06 17:50:19 -08:00
parent 572aeb0071
commit 4125ed8fb4
18 changed files with 899 additions and 829 deletions

View File

@ -2,8 +2,8 @@ package org.antlr.v4;
import org.antlr.runtime.*;
import org.antlr.runtime.tree.TreeWizard;
import org.antlr.v4.analysis.AnalysisPipeline;
import org.antlr.v4.automata.LexerNFAFactory;
import org.antlr.v4.automata.NFA;
import org.antlr.v4.automata.NFAFactory;
import org.antlr.v4.automata.ParserNFAFactory;
import org.antlr.v4.parse.ANTLRLexer;
@ -370,10 +370,12 @@ public class Tool {
// BUILD NFA FROM AST
NFAFactory factory = new ParserNFAFactory(g);
if ( g.getType()==ANTLRParser.LEXER ) factory = new LexerNFAFactory(g);
NFA nfa = factory.createNFA();
// PERFORM GRAMMAR ANALYSIS ON NFA: BUILD DECISION DFAs
g.nfa = factory.createNFA();
// PERFORM GRAMMAR ANALYSIS ON NFA: BUILD DECISION DFAs
AnalysisPipeline anal = new AnalysisPipeline();
anal.process(g);
// GENERATE CODE
}

View File

@ -0,0 +1,24 @@
package org.antlr.v4.analysis;
import org.antlr.v4.automata.DecisionState;
import org.antlr.v4.tool.Grammar;
public class AnalysisPipeline {
public void process(Grammar g) {
// LEFT-RECURSION CHECK
LeftRecursionDetector lr = new LeftRecursionDetector(g.nfa);
lr.check();
if ( lr.listOfRecursiveCycles.size()>0 ) return; // bail out
// BUILD DFA FOR EACH DECISION
for (DecisionState s : g.nfa.decisionToNFAState) {
createDFA(s);
}
}
public void createDFA(DecisionState s) {
// TRY APPROXIMATE LL(*) ANALYSIS
// REAL LL(*) ANALYSIS IF THAT FAILS
}
}

View File

@ -0,0 +1,109 @@
package org.antlr.v4.analysis;
import org.antlr.v4.automata.*;
import org.antlr.v4.tool.Rule;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class LeftRecursionDetector {
public NFA nfa;
/** Holds a list of cycles (sets of rule names). */
public List<Set<Rule>> listOfRecursiveCycles = new ArrayList<Set<Rule>>();
/** Which rule start states have we visited while looking for a single
* left-recursion check?
*/
Set<RuleStartState> rulesVisitedPerRuleCheck = new HashSet<RuleStartState>();
/** prevents epsilon-loop-induced infinite recursion. */
Set<NFAState> visitedStates = new HashSet<NFAState>();
public LeftRecursionDetector(NFA nfa) { this.nfa = nfa; }
public void check() {
for (RuleStartState start : nfa.ruleToStartState.values()) {
//System.out.print("check "+start.rule.name);
rulesVisitedPerRuleCheck.clear();
rulesVisitedPerRuleCheck.add(start);
FASerializer ser = new FASerializer(nfa.g, start);
//System.out.print(":\n"+ser+"\n");
check(start.rule, start);
}
//System.out.println("cycles="+listOfRecursiveCycles);
}
/** From state s, look for any transition to a rule that is currently
* being traced. When tracing r, visitedPerRuleCheck has r
* initially. If you reach a rule stop state, return but notify the
* invoking rule that the called rule is nullable. This implies that
* invoking rule must look at follow transition for that invoking state.
*
* The visitedStates tracks visited states within a single rule so
* we can avoid epsilon-loop-induced infinite recursion here. Keep
* filling the cycles in listOfRecursiveCycles and also, as a
* side-effect, set leftRecursiveRules.
*/
public boolean check(Rule enclosingRule, NFAState s) {
if ( s instanceof RuleStopState ) return true;
if ( visitedStates.contains(s) ) return false;
visitedStates.add(s);
int n = s.getNumberOfTransitions();
boolean stateReachesStopState = false;
for (int i=0; i<n; i++) {
Transition t = s.transition(i);
if ( t instanceof RuleTransition ) {
RuleTransition rt = (RuleTransition) t;
Rule r = rt.rule;
if ( rulesVisitedPerRuleCheck.contains((RuleStartState)t.target) ) {
addRulesToCycle(enclosingRule, r);
}
else {
rulesVisitedPerRuleCheck.add((RuleStartState)t.target);
boolean nullable = check(r, t.target);
if ( nullable ) {
stateReachesStopState |= check(enclosingRule, rt.followState);
}
}
}
else if ( t.isEpsilon() ) {
stateReachesStopState |= check(enclosingRule, t.target);
}
// ignore non-epsilon transitions
}
return stateReachesStopState;
}
/** enclosingRule calls targetRule. Find the cycle containing
* the target and add the caller. Find the cycle containing the caller
* and add the target. If no cycles contain either, then create a new
* cycle.
*/
protected void addRulesToCycle(Rule enclosingRule, Rule targetRule) {
System.err.println("left-recursion to "+targetRule.name+" from "+enclosingRule.name);
boolean foundCycle = false;
for (int i = 0; i < listOfRecursiveCycles.size(); i++) {
Set<Rule> rulesInCycle = listOfRecursiveCycles.get(i);
// ensure both rules are in same cycle
if ( rulesInCycle.contains(targetRule) ) {
rulesInCycle.add(enclosingRule);
foundCycle = true;
}
if ( rulesInCycle.contains(enclosingRule) ) {
rulesInCycle.add(targetRule);
foundCycle = true;
}
}
if ( !foundCycle ) {
Set<Rule> cycle = new HashSet<Rule>();
cycle.add(targetRule);
cycle.add(enclosingRule);
listOfRecursiveCycles.add(cycle);
}
}
}

View File

@ -4,7 +4,7 @@ import java.util.ArrayList;
import java.util.List;
/** */
public class BlockStartState extends NFAState {
public class BlockStartState extends DecisionState {
public static final int INITIAL_NUM_TRANSITIONS = 4;
BlockEndState endState;

View File

@ -0,0 +1,5 @@
package org.antlr.v4.automata;
public class DecisionState extends BasicState {
public DecisionState(NFA nfa) { super(nfa); }
}

View File

@ -3,6 +3,8 @@ package org.antlr.v4.automata;
public class EpsilonTransition extends Transition {
public EpsilonTransition(NFAState target) { super(target); }
public boolean isEpsilon() { return true; }
public int compareTo(Object o) {
return 0;
}

View File

@ -1,11 +1,9 @@
package org.antlr.v4.automata;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.v4.codegen.Target;
import org.antlr.v4.parse.GrammarASTAdaptor;
import org.antlr.v4.parse.NFABuilder;
import org.antlr.v4.tool.*;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.GrammarAST;
import org.antlr.v4.tool.TerminalAST;
import org.stringtemplate.v4.misc.Misc;
/** */
@ -13,18 +11,7 @@ public class LexerNFAFactory extends ParserNFAFactory {
public LexerNFAFactory(Grammar g) { super(g); }
public NFA createNFA() {
createRuleStartAndStopNFAStates();
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
for (Rule r : g.rules.values()) {
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,r.ast);
NFABuilder b = new NFABuilder(nodes,this);
try { b.rule(); }
catch (RecognitionException re) {
ErrorManager.fatalInternalError("bad grammar AST structure", re);
}
}
_createNFA();
return nfa;
}

View File

@ -1,7 +1,7 @@
package org.antlr.v4.automata;
/** */
public class LoopbackState extends BasicState {
public class LoopbackState extends DecisionState {
EpsilonTransition loopBack; // edge 2 (transition is edge 1)
/** What's its decision number from 1..n? */

View File

@ -4,7 +4,7 @@ import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.Rule;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@ -17,10 +17,10 @@ public class NFA {
* can go back later and build DFA predictors for them. This includes
* all the rules, subrules, optional blocks, ()+, ()* etc...
*/
public List<NFAState> decisionToNFAState = new ArrayList<NFAState>();
public List<DecisionState> decisionToNFAState = new ArrayList<DecisionState>();
public Map<Rule, RuleStartState> ruleToStartState = new HashMap<Rule, RuleStartState>();
public Map<Rule, RuleStopState> ruleToStopState = new HashMap<Rule, RuleStopState>();
public Map<Rule, RuleStartState> ruleToStartState = new LinkedHashMap<Rule, RuleStartState>();
public Map<Rule, RuleStopState> ruleToStopState = new LinkedHashMap<Rule, RuleStopState>();
int stateNumber = 0;
@ -31,7 +31,7 @@ public class NFA {
state.stateNumber = stateNumber++;
}
public int defineDecisionState(NFAState s) {
public int defineDecisionState(DecisionState s) {
decisionToNFAState.add(s);
return decisionToNFAState.size()-1;
}

View File

@ -4,6 +4,7 @@ package org.antlr.v4.automata;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.v4.misc.IntSet;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.parse.GrammarASTAdaptor;
import org.antlr.v4.parse.NFABuilder;
import org.antlr.v4.tool.*;
@ -23,20 +24,29 @@ public class ParserNFAFactory implements NFAFactory {
public ParserNFAFactory(Grammar g) { this.g = g; nfa = new NFA(g); }
public NFA createNFA() {
_createNFA();
addEOFTransitionToStartRules();
return nfa;
}
public void _createNFA() {
createRuleStartAndStopNFAStates();
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
for (Rule r : g.rules.values()) {
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,r.ast);
// find rule's block
GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK);
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
NFABuilder b = new NFABuilder(nodes,this);
try { b.rule(); }
try {
setCurrentRuleName(r.name);
Handle h = b.block();
rule(r.ast, r.name, h);
}
catch (RecognitionException re) {
ErrorManager.fatalInternalError("bad grammar AST structure", re);
}
}
addEOFTransitionToStartRules();
return nfa;
}
public void setCurrentRuleName(String name) {
@ -51,8 +61,8 @@ public class ParserNFAFactory implements NFAFactory {
RuleStopState stop = nfa.ruleToStopState.get(r);
epsilon(blk.right, stop);
Handle h = new Handle(start, stop);
FASerializer ser = new FASerializer(g, h.left);
System.out.println(ruleAST.toStringTree()+":\n"+ser);
// FASerializer ser = new FASerializer(g, h.left);
// System.out.println(ruleAST.toStringTree()+":\n"+ser);
return h;
}
@ -145,7 +155,7 @@ public class ParserNFAFactory implements NFAFactory {
* the SEMPRED token.
*/
public Handle sempred(GrammarAST pred) {
System.out.println("sempred: "+ pred);
//System.out.println("sempred: "+ pred);
BasicState left = newState(pred);
NFAState right = newState(pred);
left.transition = new PredicateTransition(pred, right);
@ -162,7 +172,7 @@ public class ParserNFAFactory implements NFAFactory {
* having seen an action (5-5-2008).
*/
public Handle action(GrammarAST action) {
System.out.println("action: "+action);
//System.out.println("action: "+action);
BasicState left = newState(action);
NFAState right = newState(action);
left.transition = new ActionTransition(action, right);
@ -205,10 +215,10 @@ public class ParserNFAFactory implements NFAFactory {
epsilon(start, alt.left);
epsilon(alt.right, end);
}
Handle h = new Handle(start, end);
FASerializer ser = new FASerializer(g, h.left);
nfa.defineDecisionState(start);
System.out.println(blkAST.toStringTree()+":\n"+ser);
Handle h = new Handle(start, end);
// FASerializer ser = new FASerializer(g, h.left);
// System.out.println(blkAST.toStringTree()+":\n"+ser);
return h;
}
@ -234,8 +244,8 @@ public class ParserNFAFactory implements NFAFactory {
public Handle optional(GrammarAST optAST, Handle blk) {
if ( blk.left instanceof BlockStartState ) {
epsilon(blk.left, blk.right);
FASerializer ser = new FASerializer(g, blk.left);
System.out.println(optAST.toStringTree()+":\n"+ser);
// FASerializer ser = new FASerializer(g, blk.left);
// System.out.println(optAST.toStringTree()+":\n"+ser);
return blk;
}
@ -249,8 +259,8 @@ public class ParserNFAFactory implements NFAFactory {
nfa.defineDecisionState(start);
Handle h = new Handle(start, end);
FASerializer ser = new FASerializer(g, h.left);
System.out.println(optAST.toStringTree()+":\n"+ser);
// FASerializer ser = new FASerializer(g, h.left);
// System.out.println(optAST.toStringTree()+":\n"+ser);
return h;
}
@ -272,6 +282,7 @@ public class ParserNFAFactory implements NFAFactory {
epsilon(loop, blk.left);
epsilon(blk.right, loop);
epsilon(loop, end);
nfa.defineDecisionState(loop);
return new Handle(start, end);
}
@ -306,6 +317,8 @@ public class ParserNFAFactory implements NFAFactory {
epsilon(loop, blk.left);
epsilon(blk.right, loop);
epsilon(loop, end);
nfa.defineDecisionState(start);
nfa.defineDecisionState(loop);
return new Handle(start, end);
}

View File

@ -17,6 +17,8 @@ public class PredicateTransition extends Transition {
this.semanticContext = new SemanticContext.Predicate(predicateASTNode);
}
public boolean isEpsilon() { return true; }
public int compareTo(Object o) {
return 0;
}

View File

@ -19,6 +19,8 @@ public class RuleTransition extends Transition {
this.followState = followState;
}
public boolean isEpsilon() { return true; }
public int compareTo(Object o) {
return 0;
}

View File

@ -14,9 +14,12 @@ package org.antlr.v4.automata;
*/
public abstract class Transition implements Comparable {
/** The target of this transition */
public State target;
public NFAState target;
public Transition() { }
public Transition(NFAState target) { this.target = target; }
/** Are we epsilon, action, sempred? */
public boolean isEpsilon() { return false; }
}

View File

@ -73,11 +73,6 @@ import org.antlr.v4.runtime.tree.CommonTree; // use updated v4 one not v3
}
}
rule returns [NFAFactory.Handle p]
: ^(RULE name=ID ~BLOCK* {factory.setCurrentRuleName($name.text);} block)
{$p = factory.rule($RULE, $name.text, $block.p);}
;
block returns [NFAFactory.Handle p]
@init {List<NFAFactory.Handle> alts = new ArrayList<NFAFactory.Handle>();}
: ^(BLOCK (^(OPTIONS .+))? (a=alternative {alts.add($a.p);})+)

File diff suppressed because it is too large Load Diff

View File

@ -4,6 +4,7 @@ import org.antlr.runtime.*;
import org.antlr.runtime.tree.TreeWizard;
import org.antlr.v4.Tool;
import org.antlr.v4.automata.Label;
import org.antlr.v4.automata.NFA;
import org.antlr.v4.codegen.Target;
import org.antlr.v4.parse.ANTLRLexer;
import org.antlr.v4.parse.ANTLRParser;
@ -34,7 +35,6 @@ public class Grammar implements AttributeResolver {
put("combined:TOKEN_LABEL", AttributeDict.predefinedTokenDict);
}};
public Tool tool;
public String name;
public GrammarRootAST ast;
public String text; // testing only
@ -49,6 +49,13 @@ public class Grammar implements AttributeResolver {
public List<Grammar> importedGrammars;
public Map<String, Rule> rules = new LinkedHashMap<String, Rule>();
/** The NFA that represents the grammar with edges labelled with tokens
* or epsilon. It is more suitable to analysis than an AST representation.
*/
public NFA nfa;
public Tool tool;
/** Token names and literal tokens like "void" are uniquely indexed.
* with -1 implying EOF. Characters are different; they go from
* -1 (EOF) to \uFFFE. For example, 0 could be a binary byte you
@ -120,6 +127,8 @@ public class Grammar implements AttributeResolver {
}
initTokenSymbolTables();
if ( this.ast==null || this.ast.hasErrors ) return;
Tool antlr = new Tool();
SemanticPipeline sem = new SemanticPipeline();
sem.process(this);

View File

@ -203,11 +203,12 @@ public class Rule implements AttributeResolver {
@Override
public String toString() {
return "Rule{" +
"name='" + name + '\'' +
", args=" + args +
", retvals=" + retvals +
", scope=" + scope +
'}';
StringBuilder buf = new StringBuilder();
buf.append("Rule{name="+name);
if ( args!=null ) buf.append(", args=" + args);
if ( retvals!=null ) buf.append(", retvals=" + retvals);
if ( scope!=null ) buf.append(", scope=" + scope);
buf.append("}");
return buf.toString();
}
}

View File

@ -33,7 +33,6 @@ import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.Token;
import org.antlr.runtime.TokenSource;
import org.antlr.v4.Tool;
import org.antlr.v4.semantics.SemanticPipeline;
import org.antlr.v4.tool.ANTLRErrorListener;
import org.antlr.v4.tool.ErrorManager;
import org.antlr.v4.tool.Grammar;
@ -433,10 +432,6 @@ public abstract class BaseTest {
if ( g.ast!=null ) System.out.println(g.ast.toStringTree());
else System.out.println("null tree");
}
if ( g.ast!=null && !g.ast.hasErrors ) {
SemanticPipeline sem = new SemanticPipeline();
sem.process(g);
}
}
catch (RecognitionException re) {
re.printStackTrace(System.err);