From 3b52eb4e6ae4354f5035212dfff4ed1884b61d39 Mon Sep 17 00:00:00 2001 From: parrt Date: Fri, 22 Jul 2011 17:19:26 -0800 Subject: [PATCH] snapshot [git-p4: depot-paths = "//depot/code/antlr4/main/": change = 8899] --- .../src/org/antlr/v4/runtime/Recognizer.java | 4 +- .../v4/runtime/atn/LexerATNSimulator.java | 6 +- .../v4/runtime/atn/ParserATNSimulator.java | 50 ++- tool/playground/T.g | 38 +- .../v4/tool/templates/LeftRecursiveRules.stg | 79 ++++ .../v4/tool/templates/codegen/Java/Java.stg | 67 +++- tool/src/org/antlr/v4/Tool.java | 5 +- .../antlr/v4/codegen/ActionTranslator.java | 2 + .../org/antlr/v4/codegen/CodeGenerator.java | 24 +- .../v4/codegen/OutputModelController.java | 69 ++-- .../org/antlr/v4/codegen/model/Parser.java | 10 +- .../v4/codegen/model/RuleActionFunction.java | 51 +++ .../v4/codegen/model/RuleSempredFunction.java | 39 ++ tool/src/org/antlr/v4/parse/ANTLRParser.g | 1 + .../v4/parse/LeftRecursiveRuleAnalyzer.java | 364 ++++++++++++++++++ .../antlr/v4/parse/LeftRecursiveRuleWalker.g | 231 +++++++++++ tool/src/org/antlr/v4/tool/Alternative.java | 2 + tool/src/org/antlr/v4/tool/ErrorType.java | 5 +- tool/src/org/antlr/v4/tool/Grammar.java | 23 ++ tool/src/org/antlr/v4/tool/GrammarAST.java | 17 + .../src/org/antlr/v4/tool/GrammarRootAST.java | 4 +- .../v4/tool/GrammarTransformPipeline.java | 92 ++++- tool/src/org/antlr/v4/tool/Rule.java | 13 +- 23 files changed, 1085 insertions(+), 111 deletions(-) create mode 100644 tool/resources/org/antlr/v4/tool/templates/LeftRecursiveRules.stg create mode 100644 tool/src/org/antlr/v4/codegen/model/RuleActionFunction.java create mode 100644 tool/src/org/antlr/v4/codegen/model/RuleSempredFunction.java create mode 100644 tool/src/org/antlr/v4/parse/LeftRecursiveRuleAnalyzer.java create mode 100644 tool/src/org/antlr/v4/parse/LeftRecursiveRuleWalker.g diff --git a/runtime/Java/src/org/antlr/v4/runtime/Recognizer.java b/runtime/Java/src/org/antlr/v4/runtime/Recognizer.java index 42a3755bb..b710205da 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/Recognizer.java +++ b/runtime/Java/src/org/antlr/v4/runtime/Recognizer.java @@ -218,11 +218,11 @@ public class Recognizer { // subclass needs to override these if there are sempreds or actions // that the ATN interp needs to execute - public boolean sempred(int ruleIndex, int actionIndex) { + public boolean _sempred(RuleContext _localctx, int ruleIndex, int actionIndex) { return true; } /** In lexer, both indexes are same; one action per rule. */ - public void action(int ruleIndex, int actionIndex) { + public void _action(RuleContext _localctx, int ruleIndex, int actionIndex) { } } diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java index a21306f9d..830dad9fb 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java @@ -148,7 +148,7 @@ public class LexerATNSimulator extends ATNSimulator { recog.getRuleNames()[prevAcceptState.ruleIndex]+ ":"+ actionIndex); } - if ( actionIndex>=0 ) recog.action(prevAcceptState.ruleIndex, actionIndex); + if ( actionIndex>=0 ) recog._action(null, prevAcceptState.ruleIndex, actionIndex); } input.seek(prevAcceptMarker); return prevAcceptState.prediction; @@ -237,7 +237,7 @@ public class LexerATNSimulator extends ATNSimulator { else System.out.println("ACTION "+ruleIndex+":"+ruleIndex); } int actionIndex = atn.ruleToActionIndex[ruleIndex]; - if ( actionIndex>=0 ) recog.action(ruleIndex, actionIndex); + if ( actionIndex>=0 ) recog._action(null, ruleIndex, actionIndex); return ttype; } @@ -338,7 +338,7 @@ public class LexerATNSimulator extends ATNSimulator { } else if ( t.getClass() == PredicateTransition.class ) { PredicateTransition pt = (PredicateTransition)t; - if ( recog.sempred(pt.ruleIndex, pt.predIndex) ) { + if ( recog._sempred(null, pt.ruleIndex, pt.predIndex) ) { c = new ATNConfig(config, t.target); c.traversedPredicate = true; } diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java index 761d8f1a2..618f41b6d 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java @@ -52,6 +52,14 @@ public class ParserATNSimulator extends ATNSimulator { protected Set closureBusy = new HashSet(); + /** Upon entry for prediction, we might need to execute actions and + * we need to know original context passed to us from parser or + * lexer. This is the stack and any args, locals, labels, etc... + * Meaningless across adaptivePredict() calls but easier as field + * than passing down many function calls. + protected RuleContext _ctx; + */ + public ParserATNSimulator(ATN atn) { super(atn); ctxToDFAs = new HashMap(); @@ -68,20 +76,21 @@ public class ParserATNSimulator extends ATNSimulator { // System.out.println(dot.getDOT(atn.rules.get(1), parser.getRuleNames())); } - public int adaptivePredict(TokenStream input, int decision, RuleContext ctx) { + public int adaptivePredict(TokenStream input, int decision, RuleContext originalContext) { predict_calls++; +// this._ctx = originalContext; DFA dfa = decisionToDFA[decision]; if ( dfa==null || dfa.s0==null ) { ATNState startState = atn.decisionToState.get(decision); decisionToDFA[decision] = dfa = new DFA(startState); dfa.decision = decision; - return predictATN(dfa, input, decision, ctx, false); + return predictATN(dfa, input, decision, originalContext, false); } else { //dump(dfa); // start with the DFA int m = input.mark(); - int alt = execDFA(input, dfa, dfa.s0, ctx); + int alt = execDFA(input, dfa, dfa.s0, originalContext); input.seek(m); return alt; } @@ -95,7 +104,7 @@ public class ParserATNSimulator extends ATNSimulator { if ( originalContext==null ) originalContext = RuleContext.EMPTY; RuleContext ctx = RuleContext.EMPTY; if ( useContext ) ctx = originalContext; - OrderedHashSet s0_closure = computeStartState(dfa.atnStartState, ctx); + OrderedHashSet s0_closure = computeStartState(dfa.atnStartState, ctx, originalContext); dfa.s0 = addDFAState(dfa, s0_closure); if ( prevAccept!=null ) { dfa.s0.isAcceptState = true; @@ -119,7 +128,7 @@ public class ParserATNSimulator extends ATNSimulator { public int matchATN(TokenStream input, ATNState startState) { DFA dfa = new DFA(startState); RuleContext ctx = new ParserRuleContext(); - OrderedHashSet s0_closure = computeStartState(startState, ctx); + OrderedHashSet s0_closure = computeStartState(startState, ctx, RuleContext.EMPTY); return execATN(input, dfa, input.index(), s0_closure, ctx, false); } @@ -243,7 +252,7 @@ public class ParserATNSimulator extends ATNSimulator { Transition trans = c.state.transition(ti); ATNState target = getReachableTarget(trans, t); if ( target!=null ) { - closure(new ATNConfig(c, target), reach); + closure(new ATNConfig(c, target), reach, originalContext); } } } @@ -367,9 +376,10 @@ public class ParserATNSimulator extends ATNSimulator { return predictedAlt; } - public OrderedHashSet computeStartState(ATNState p, RuleContext ctx) { - RuleContext initialContext = null; - initialContext = ctx; // always at least the implicit call to start rule + public OrderedHashSet computeStartState(ATNState p, RuleContext ctx, + RuleContext originalContext) + { + RuleContext initialContext = ctx; // always at least the implicit call to start rule OrderedHashSet configs = new OrderedHashSet(); prevAccept = null; // might reach end rule; track prevAcceptIndex = -1; @@ -377,7 +387,7 @@ public class ParserATNSimulator extends ATNSimulator { for (int i=0; i configs) { + protected void closure(ATNConfig config, OrderedHashSet configs, + RuleContext originalContext) { closureBusy.clear(); - closure(config, configs, closureBusy); + closure(config, configs, originalContext, closureBusy); } protected void closure(ATNConfig config, OrderedHashSet configs, + RuleContext originalContext, Set closureBusy) { if ( debug ) System.out.println("closure("+config+")"); @@ -427,7 +439,7 @@ public class ParserATNSimulator extends ATNSimulator { RuleTransition rt = (RuleTransition)invokingState.transition(0); ATNState retState = rt.followState; ATNConfig c = new ATNConfig(retState, config.alt, newContext); - closure(c, configs, closureBusy); + closure(c, configs, originalContext, closureBusy); return; } // else if we have no context info, just chase follow links @@ -440,12 +452,14 @@ public class ParserATNSimulator extends ATNSimulator { for (int i=0; i=0 ) { if ( debug ) System.out.println("DO ACTION "+at.ruleIndex+":"+at.actionIndex); - parser.action(at.ruleIndex, at.actionIndex); + parser._action(originalContext, at.ruleIndex, at.actionIndex); } else { // non-forced action traversed to get to t.target diff --git a/tool/playground/T.g b/tool/playground/T.g index 9bf5d38f8..801449473 100644 --- a/tool/playground/T.g +++ b/tool/playground/T.g @@ -1,32 +1,18 @@ grammar T; -options {output=AST;} -tokens {DUH;} -a : (A|B) ; -b : (A|B) -> A ; -/* -type returns [int i] : ID; -ID : 'a'..'z'+ ; -INT : '0'..'9'+; -PLUS : '+'; -WS : (' '|'\n') {$channel=HIDDEN;} ; -*/ +//options {output=AST;} + +a : a PLUS a + | INT + ; /* -c : A B C -> A ( D A B C*)* (B A*)? ; - -A : 'a'; -B : 'b'; -C : 'c'; -D : 'd'; -SEMI : ';'; -WS : ' '|'\t'|'\n' {skip();} ; +a : a_[0] ; +a_[int _p] : a_primary ( {$_p <= 2}? PLUS a{} )* + ; +a_primary : INT ; */ - /* -r[int a] returns [int b] -scope {int qq;} - : x=ID y=r[34] z+=b {$b = 99;} - ; - -b : r[34] {$r::qq = 3;} ; +a : a_[0] ; +a_[int _p] : a_primary ( {_p <= 2}? B )* ; +a_primary : A ; */ diff --git a/tool/resources/org/antlr/v4/tool/templates/LeftRecursiveRules.stg b/tool/resources/org/antlr/v4/tool/templates/LeftRecursiveRules.stg new file mode 100644 index 000000000..719214bb0 --- /dev/null +++ b/tool/resources/org/antlr/v4/tool/templates/LeftRecursiveRules.stg @@ -0,0 +1,79 @@ +/* + [The "BSD license"] + Copyright (c) 2010 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** How to generate rules derived from left-recursive rules. + * These rely on recRuleDefArg(), recRuleAltPredicate(), + * recRuleArg(), recRuleSetResultAction(), recRuleSetReturnAction() + * templates in main language.stg + */ +group LeftRecursiveRules; + +recRuleName(ruleName) ::= "_" +recPrimaryName(ruleName) ::= "_primary" + +recRuleStart(ruleName, minPrec, userRetvals, userRetvalAssignments) ::= << + returns [] + : [] + + { + + } + + ; +>> + +recRule(ruleName, precArgDef, argName, alts, setResultAction, buildAST, + userRetvals, userRetvalAssignments) ::= << +[] returns [] + : + + { + + } + + + { + + } + + ( + + )* + ; +>> + +recPrimaryRule(ruleName, alts, userRetvals) ::= << + returns [] + : + ; +>> + +recRuleAlt(alt, pred) ::= "{}? " + +recRuleRef(ruleName, arg) ::= "[]" + diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg index f76dea73c..0ee712a3b 100644 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg @@ -35,7 +35,7 @@ import java.util.ArrayList; >> -Parser(parser, scopes, funcs, atn, actions, sempreds) ::= << +Parser(parser, scopes, funcs, atn, actionFuncs, sempredFuncs) ::= << public class extends Parser { public static final int =}; separator=", ", wrap, anchor>; @@ -44,7 +44,6 @@ public class extends Parser { "}; separator=", ", wrap, anchor> }; public static final String[] ruleNames = { - "\", "}; separator=", ", wrap, anchor> }; @@ -59,21 +58,32 @@ public class extends Parser { @Override public ATN getATN() { return _ATN; } - + + public void _action(RuleContext _localctx, int ruleIndex, int predIndex) { + switch ( predIndex ) { + : _action(()_localctx, predIndex);}; separator="\n"> + } + } + + + + public boolean _sempred(RuleContext _localctx, int ruleIndex, int predIndex) { + switch ( predIndex ) { + : return _sempred(()_localctx, predIndex);}; separator="\n"> + } + return true; + } + + + } >> -dumpActions(actions,sempreds) ::= << - - public boolean sempred(int ruleIndex, int predIndex) { - switch ( predIndex ) { - : return ;}; separator="\n"> - } - return true; - } - +/* +dumpActions(actions) ::= << public void action(int ruleIndex, int actionIndex) { switch ( actionIndex ) { @@ -83,6 +93,7 @@ case : break;}; separator="\n"> } >> +*/ ctor(p) ::= << public (TokenStream input) { @@ -91,6 +102,25 @@ public (TokenStream input) { } >> +RuleActionFunction(r, actions) ::= << +public void _action( _localctx, int actionIndex) { + switch ( actionIndex ) { + : break;}; separator="\n"> + } +} +>> + +RuleSempredFunction(r, actions) ::= << +public boolean _sempred( _localctx, int predIndex) { + switch ( predIndex ) { + : return ;}; separator="\n"> + } + return true; +} +>> + RuleFunction(currentRule,code,locals,ruleCtx,namedActions,finallyAction,postamble) ::= << public QStack\<\> _stk = new QStack\<\>(); @@ -395,6 +425,13 @@ AttributeDecl(d) ::= "" /** If we don't know location of label def x, use this template */ labelref(x) ::= "_localctx." +// used for left-recursive rules +recRuleDefArg() ::= "int _p" +recRuleArg() ::= "$_p" +recRuleAltPredicate(ruleName,opPrec) ::= " \<= " +recRuleSetResultAction() ::= "root_0=$_primary.tree;" +recRuleSetReturnAction(src,name) ::= "$=$.;" + // AST stuff (TODO: separate?) RootDecl(d) ::= "Object = _adaptor.nil();" @@ -548,7 +585,7 @@ public class extends Lexer { - + !> } >> @@ -565,6 +602,7 @@ static { } >> +/* actionMethod(name, ruleIndex, actions) ::= << public void _actions(int action) { System.out.println("exec action "+action); @@ -589,6 +627,7 @@ public boolean _sempreds(int pred) { } }<\n> >> +*/ /** Using a type to init value map, try to init a type; if not in table * must be an object, default value is "null". diff --git a/tool/src/org/antlr/v4/Tool.java b/tool/src/org/antlr/v4/Tool.java index da9acfc05..5556d25fd 100644 --- a/tool/src/org/antlr/v4/Tool.java +++ b/tool/src/org/antlr/v4/Tool.java @@ -227,7 +227,7 @@ public class Tool { integrateImportedGrammars(g); - GrammarTransformPipeline transform = new GrammarTransformPipeline(); + GrammarTransformPipeline transform = new GrammarTransformPipeline(this); transform.process(g.ast); LexerGrammar lexerg = null; @@ -341,7 +341,8 @@ public class Tool { ParserRuleReturnScope r = p.grammarSpec(); GrammarRootAST root = (GrammarRootAST)r.getTree(); if ( root instanceof GrammarRootAST ) { - ((GrammarRootAST)root).hasErrors = p.getNumberOfSyntaxErrors()>0; + root.hasErrors = p.getNumberOfSyntaxErrors()>0; + root.tokens = tokens; } return root; } diff --git a/tool/src/org/antlr/v4/codegen/ActionTranslator.java b/tool/src/org/antlr/v4/codegen/ActionTranslator.java index 97e4fb9d2..f803b98af 100644 --- a/tool/src/org/antlr/v4/codegen/ActionTranslator.java +++ b/tool/src/org/antlr/v4/codegen/ActionTranslator.java @@ -67,6 +67,7 @@ public class ActionTranslator implements ActionSplitterListener { put("int", TokenPropertyRef_int.class); }}; + CodeGenerator gen; ActionAST node; RuleFunction rf; List chunks = new ArrayList(); @@ -75,6 +76,7 @@ public class ActionTranslator implements ActionSplitterListener { public ActionTranslator(OutputModelFactory factory, ActionAST node) { this.factory = factory; this.node = node; + this.gen = factory.getGenerator(); } public static String toString(List chunks) { diff --git a/tool/src/org/antlr/v4/codegen/CodeGenerator.java b/tool/src/org/antlr/v4/codegen/CodeGenerator.java index 5efae6c6f..e2430dbd4 100644 --- a/tool/src/org/antlr/v4/codegen/CodeGenerator.java +++ b/tool/src/org/antlr/v4/codegen/CodeGenerator.java @@ -29,6 +29,7 @@ package org.antlr.v4.codegen; +import org.antlr.v4.Tool; import org.antlr.v4.codegen.model.OutputModelObject; import org.antlr.v4.runtime.Token; import org.antlr.v4.tool.*; @@ -48,14 +49,19 @@ public class CodeGenerator { "=\n}>"; public Grammar g; + public Tool tool; public Target target; public STGroup templates; public int lineWidth = 72; public CodeGenerator(Grammar g) { + this(g.tool, g, g.getOption("language", "Java")); + } + + public CodeGenerator(Tool tool, Grammar g, String language) { this.g = g; - String language = g.getOption("language", "Java"); + this.tool = tool; loadLanguageTarget(language); loadTemplates(language); } @@ -74,17 +80,17 @@ public class CodeGenerator { target = new Target(this); // use default } catch (InvocationTargetException ite) { - g.tool.errMgr.toolError(ErrorType.CANNOT_CREATE_TARGET_GENERATOR, + tool.errMgr.toolError(ErrorType.CANNOT_CREATE_TARGET_GENERATOR, ite, targetName); } catch (InstantiationException ie) { - g.tool.errMgr.toolError(ErrorType.CANNOT_CREATE_TARGET_GENERATOR, + tool.errMgr.toolError(ErrorType.CANNOT_CREATE_TARGET_GENERATOR, ie, targetName); } catch (IllegalAccessException cnfe) { - g.tool.errMgr.toolError(ErrorType.CANNOT_CREATE_TARGET_GENERATOR, + tool.errMgr.toolError(ErrorType.CANNOT_CREATE_TARGET_GENERATOR, cnfe, targetName); } @@ -96,7 +102,7 @@ public class CodeGenerator { templates.registerRenderer(Integer.class, new NumberRenderer()); } catch (IllegalArgumentException iae) { - g.tool.errMgr.toolError(ErrorType.CANNOT_CREATE_TARGET_GENERATOR, + tool.errMgr.toolError(ErrorType.CANNOT_CREATE_TARGET_GENERATOR, iae, language); } @@ -121,10 +127,10 @@ public class CodeGenerator { else outputModel = controller.buildParserOutputModel(); // CREATE TEMPLATES BY WALKING MODEL - OutputModelWalker walker = new OutputModelWalker(g.tool, templates); + OutputModelWalker walker = new OutputModelWalker(tool, templates); ST st = walker.walk(outputModel); - if ( g.tool.launch_ST_inspector ) { + if ( tool.launch_ST_inspector ) { st.inspect(); //if ( templates.isDefined("headerFile") ) headerFileST.inspect(); } @@ -185,7 +191,7 @@ public class CodeGenerator { } } catch (IOException ioe) { - g.tool.errMgr.toolError(ErrorType.CANNOT_WRITE_FILE, + tool.errMgr.toolError(ErrorType.CANNOT_WRITE_FILE, ioe, fileName); } @@ -193,7 +199,7 @@ public class CodeGenerator { public void write(ST code, String fileName) throws IOException { long start = System.currentTimeMillis(); - Writer w = g.tool.getOutputFileWriter(g, fileName); + Writer w = tool.getOutputFileWriter(g, fileName); STWriter wr = new AutoIndentWriter(w); wr.setLineWidth(lineWidth); code.write(wr); diff --git a/tool/src/org/antlr/v4/codegen/OutputModelController.java b/tool/src/org/antlr/v4/codegen/OutputModelController.java index ebaba6f76..4b134361f 100644 --- a/tool/src/org/antlr/v4/codegen/OutputModelController.java +++ b/tool/src/org/antlr/v4/codegen/OutputModelController.java @@ -82,29 +82,7 @@ public class OutputModelController { file.parser = parser; for (Rule r : g.rules.values()) { - RuleFunction function = rule(r); - parser.funcs.add(function); - - // TRIGGER factory functions for rule alts, elements - pushCurrentRule(function); - GrammarASTAdaptor adaptor = new GrammarASTAdaptor(r.ast.token.getInputStream()); - GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK); - CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk); - walker = new SourceGenTriggers(nodes, this); - try { - function.code = DefaultOutputModelFactory.list(walker.block(null, null, null)); // walk AST of rule alts/elements - } - catch (Exception e){ - e.printStackTrace(System.err); - } - - function.ctxType = gen.target.getRuleFunctionContextStructName(function); - function.ruleCtx.name = function.ctxType; - - function.postamble = rulePostamble(function, r); - - if ( function.ruleCtx.isEmpty() ) function.ruleCtx = null; - popCurrentRule(); + buildRuleFunction(parser, r); } return file; @@ -138,6 +116,51 @@ public class OutputModelController { return new Lexer(delegate, file); } + /** Create RuleFunction per rule and update sempreds,actions of parser + * output object with stuff found in r. + */ + public void buildRuleFunction(Parser parser, Rule r) { + CodeGenerator gen = delegate.getGenerator(); + RuleFunction function = rule(r); + parser.funcs.add(function); + + // TRIGGER factory functions for rule alts, elements + pushCurrentRule(function); + GrammarASTAdaptor adaptor = new GrammarASTAdaptor(r.ast.token.getInputStream()); + GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK); + CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk); + walker = new SourceGenTriggers(nodes, this); + try { + function.code = DefaultOutputModelFactory.list(walker.block(null, null, null)); // walk AST of rule alts/elements + } + catch (Exception e){ + e.printStackTrace(System.err); + } + + function.ctxType = gen.target.getRuleFunctionContextStructName(function); + function.ruleCtx.name = function.ctxType; + + function.postamble = rulePostamble(function, r); + + Grammar g = getGrammar(); + for (ActionAST a : r.actions) { + if ( a instanceof PredAST ) { + PredAST p = (PredAST)a; + RuleSempredFunction rsf = new RuleSempredFunction(delegate, r, function.ctxType); + parser.sempredFuncs.add(rsf); + rsf.actions.put(g.sempreds.get(p), new Action(delegate, p)); + } + else if ( a.getType()==ANTLRParser.FORCED_ACTION ) { + RuleActionFunction raf = new RuleActionFunction(delegate, r, function.ctxType); + parser.actionFuncs.add(raf); + raf.actions.put(g.actions.get(a), new ForcedAction(delegate, a)); + } + } + + if ( function.ruleCtx.isEmpty() ) function.ruleCtx = null; + popCurrentRule(); + } + public RuleFunction rule(Rule r) { RuleFunction rf = delegate.rule(r); for (CodeGeneratorExtension ext : extensions) rf = ext.rule(rf); diff --git a/tool/src/org/antlr/v4/codegen/model/Parser.java b/tool/src/org/antlr/v4/codegen/model/Parser.java index d49e443a8..bf9e556b6 100644 --- a/tool/src/org/antlr/v4/codegen/model/Parser.java +++ b/tool/src/org/antlr/v4/codegen/model/Parser.java @@ -30,7 +30,7 @@ package org.antlr.v4.codegen.model; import org.antlr.v4.codegen.OutputModelFactory; -import org.antlr.v4.tool.*; +import org.antlr.v4.tool.Grammar; import java.util.*; @@ -44,8 +44,10 @@ public class Parser extends OutputModelObject { @ModelElement public List funcs = new ArrayList(); @ModelElement public SerializedATN atn; - @ModelElement public LinkedHashMap actions; - @ModelElement public LinkedHashMap sempreds; + @ModelElement public List actionFuncs = + new ArrayList(); + @ModelElement public List sempredFuncs = + new ArrayList(); public Parser(OutputModelFactory factory, ParserFile file) { this.factory = factory; @@ -61,6 +63,7 @@ public class Parser extends OutputModelObject { ruleNames = g.rules.keySet(); atn = new SerializedATN(factory, g.atn); + /* sempreds = new LinkedHashMap(); for (PredAST p : g.sempreds.keySet()) { sempreds.put(g.sempreds.get(p), new Action(factory, p)); @@ -69,5 +72,6 @@ public class Parser extends OutputModelObject { for (ActionAST a : g.actions.keySet()) { actions.put(g.actions.get(a), new ForcedAction(factory, a)); } + */ } } diff --git a/tool/src/org/antlr/v4/codegen/model/RuleActionFunction.java b/tool/src/org/antlr/v4/codegen/model/RuleActionFunction.java new file mode 100644 index 000000000..a9d9b60d0 --- /dev/null +++ b/tool/src/org/antlr/v4/codegen/model/RuleActionFunction.java @@ -0,0 +1,51 @@ +/* + [The "BSD license"] + Copyright (c) 2011 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.antlr.v4.codegen.model; + +import org.antlr.v4.codegen.OutputModelFactory; +import org.antlr.v4.tool.Rule; + +import java.util.LinkedHashMap; + +public class RuleActionFunction extends OutputModelObject { + public String name; + public String ctxType; + public int ruleIndex; + + @ModelElement public LinkedHashMap actions = + new LinkedHashMap(); + + public RuleActionFunction(OutputModelFactory factory, Rule r, String ctxType) { + super(factory); + name = r.name; + ruleIndex = r.index; + this.ctxType = ctxType; + } +} diff --git a/tool/src/org/antlr/v4/codegen/model/RuleSempredFunction.java b/tool/src/org/antlr/v4/codegen/model/RuleSempredFunction.java new file mode 100644 index 000000000..1018af3fc --- /dev/null +++ b/tool/src/org/antlr/v4/codegen/model/RuleSempredFunction.java @@ -0,0 +1,39 @@ +/* + [The "BSD license"] + Copyright (c) 2011 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.antlr.v4.codegen.model; + +import org.antlr.v4.codegen.OutputModelFactory; +import org.antlr.v4.tool.Rule; + +public class RuleSempredFunction extends RuleActionFunction { + public RuleSempredFunction(OutputModelFactory factory, Rule r, String ctxType) { + super(factory, r, ctxType); + } +} diff --git a/tool/src/org/antlr/v4/parse/ANTLRParser.g b/tool/src/org/antlr/v4/parse/ANTLRParser.g index b0711532f..dc2c5dda1 100644 --- a/tool/src/org/antlr/v4/parse/ANTLRParser.g +++ b/tool/src/org/antlr/v4/parse/ANTLRParser.g @@ -59,6 +59,7 @@ options { tokens { LEXER; RULE; + PREC_RULE; // flip to this if we find that it's left-recursive RULES; RULEMODIFIERS; RULEACTIONS; diff --git a/tool/src/org/antlr/v4/parse/LeftRecursiveRuleAnalyzer.java b/tool/src/org/antlr/v4/parse/LeftRecursiveRuleAnalyzer.java new file mode 100644 index 000000000..2056b6f33 --- /dev/null +++ b/tool/src/org/antlr/v4/parse/LeftRecursiveRuleAnalyzer.java @@ -0,0 +1,364 @@ +/* + [The "BSD license"] + Copyright (c) 2011 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.antlr.v4.parse; + +import org.antlr.runtime.*; +import org.antlr.runtime.tree.CommonTreeNodeStream; +import org.antlr.v4.Tool; +import org.antlr.v4.codegen.CodeGenerator; +import org.antlr.v4.tool.*; +import org.stringtemplate.v4.*; + +import java.util.*; + +/** */ +public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker { + public static enum ASSOC { left, right }; + + public Tool tool; + public String ruleName; + public LinkedHashMap binaryAlts = new LinkedHashMap(); + public LinkedHashMap ternaryAlts = new LinkedHashMap(); + public LinkedHashMap suffixAlts = new LinkedHashMap(); + public List prefixAlts = new ArrayList(); + public List otherAlts = new ArrayList(); + + public GrammarAST retvals; + + public STGroup recRuleTemplates; + public STGroup codegenTemplates; + public String language; + + public Map altAssociativity = new HashMap(); + + public LeftRecursiveRuleAnalyzer(TokenStream tokens, GrammarAST ruleAST, + Tool tool, String ruleName, String language) + { + super(new CommonTreeNodeStream(new GrammarASTAdaptor(ruleAST.token.getInputStream()), ruleAST)); + ((CommonTreeNodeStream)input).setTokenStream(tokens); + this.tool = tool; + this.ruleName = ruleName; + this.language = language; + loadPrecRuleTemplates(); + } + + public void loadPrecRuleTemplates() { + String templateGroupFile = "org/antlr/v4/tool/templates/LeftRecursiveRules.stg"; + recRuleTemplates = new STGroupFile(templateGroupFile); + if ( recRuleTemplates==null || !recRuleTemplates.isDefined("recRuleName") ) { + tool.errMgr.toolError(ErrorType.MISSING_CODE_GEN_TEMPLATES, "LeftRecursiveRules"); + } + + // use codegen to get correct language templates; that's it though + CodeGenerator gen = new CodeGenerator(tool, null, language); + codegenTemplates = gen.templates; + } + + @Override + public void setReturnValues(GrammarAST t) { + System.out.println(t); + retvals = t; + } + + @Override + public void setTokenPrec(GrammarAST t, int alt) { + ASSOC assoc = ASSOC.left; + if ( ((TerminalAST)t).getOptions()!=null ) { + String a = ((TerminalAST)t).getOption("assoc"); + if ( a!=null ) { + if ( a.equals(ASSOC.right.toString()) ) { + assoc = ASSOC.right; + } + else { + tool.errMgr.toolError(ErrorType.ILLEGAL_OPTION_VALUE, "assoc", assoc); + } + } + } + + if ( altAssociativity.get(alt)!=null && altAssociativity.get(alt)!=assoc ) { + tool.errMgr.toolError(ErrorType.ALL_OPS_NEED_SAME_ASSOC, alt); + } + altAssociativity.put(alt, assoc); + + //System.out.println("op " + alt + ": " + t.getText()+", assoc="+assoc); + } + + @Override + public void binaryAlt(GrammarAST altTree, GrammarAST rewriteTree, int alt) { + altTree = altTree.dupTree(); + + stripLeftRecursion(altTree); + + // rewrite e to be e_[rec_arg] + int nextPrec = nextPrecedence(alt); + ST refST = recRuleTemplates.getInstanceOf("recRuleRef"); + refST.add("ruleName", ruleName); + refST.add("arg", nextPrec); + altTree = replaceRuleRefs(altTree, refST.render()); + + String altText = text(altTree); + altText = altText.trim(); + altText += "{}"; // add empty alt to prevent pred hoisting + ST nameST = recRuleTemplates.getInstanceOf("recRuleName"); + nameST.add("ruleName", ruleName); + if ( rewriteTree!=null ) { + rewriteTree = rewriteTree.dupTree(); + rewriteTree = replaceRuleRefs(rewriteTree, "$" + nameST.render()); + } + String rewriteText = text(rewriteTree); + binaryAlts.put(alt, altText + " " + rewriteText); + //System.out.println("binaryAlt " + alt + ": " + altText + ", rewrite=" + rewriteText); + } + + /** Convert e ? e : e -> ? e : e_[nextPrec] */ + @Override + public void ternaryAlt(GrammarAST altTree, GrammarAST rewriteTree, int alt) { + altTree = altTree.dupTree(); + + stripLeftRecursion(altTree); + + int nextPrec = nextPrecedence(alt); + ST refST = recRuleTemplates.getInstanceOf("recRuleRef"); + refST.add("ruleName", ruleName); + refST.add("arg", nextPrec); + altTree = replaceLastRuleRef(altTree, refST.render()); + + String altText = text(altTree); + altText = altText.trim(); + altText += "{}"; // add empty alt to prevent pred hoisting + ST nameST = recRuleTemplates.getInstanceOf("recRuleName"); + nameST.add("ruleName", ruleName); + if ( rewriteTree!=null ) { + rewriteTree = rewriteTree.dupTree(); + rewriteTree = replaceRuleRefs(rewriteTree, "$" + nameST.render()); + } + String rewriteText = text(rewriteTree); + ternaryAlts.put(alt, altText + " " + rewriteText); + //System.out.println("ternaryAlt " + alt + ": " + altText + ", rewrite=" + rewriteText); + } + + @Override + public void prefixAlt(GrammarAST altTree, GrammarAST rewriteTree, int alt) { + altTree = altTree.dupTree(); + + int nextPrec = precedence(alt); + // rewrite e to be e_[rec_arg] + ST refST = recRuleTemplates.getInstanceOf("recRuleRef"); + refST.add("ruleName", ruleName); + refST.add("arg", nextPrec); + altTree = replaceRuleRefs(altTree, refST.render()); + String altText = text(altTree); + altText = altText.trim(); + altText += "{}"; // add empty alt to prevent pred hoisting + + ST nameST = recRuleTemplates.getInstanceOf("recRuleName"); + nameST.add("ruleName", ruleName); + if ( rewriteTree!=null ) { + rewriteTree = rewriteTree.dupTree(); + rewriteTree = replaceRuleRefs(rewriteTree, nameST.render()); + } + String rewriteText = text(rewriteTree); + + prefixAlts.add(altText + " " + rewriteText); + //System.out.println("prefixAlt " + alt + ": " + altText + ", rewrite=" + rewriteText); + } + + @Override + public void suffixAlt(GrammarAST altTree, GrammarAST rewriteTree, int alt) { + altTree = altTree.dupTree(); + stripLeftRecursion(altTree); + ST nameST = recRuleTemplates.getInstanceOf("recRuleName"); + nameST.add("ruleName", ruleName); + if ( rewriteTree!=null ) { + rewriteTree = rewriteTree.dupTree(); + rewriteTree = replaceRuleRefs(rewriteTree, "$" + nameST.render()); + } + String rewriteText = text(rewriteTree); + String altText = text(altTree); + altText = altText.trim(); + suffixAlts.put(alt, altText + " " + rewriteText); +// System.out.println("suffixAlt " + alt + ": " + altText + ", rewrite=" + rewriteText); + } + + @Override + public void otherAlt(GrammarAST altTree, GrammarAST rewriteTree, int alt) { + altTree = altTree.dupTree(); + if ( rewriteTree!=null ) rewriteTree = rewriteTree.dupTree(); + stripLeftRecursion(altTree); + String altText = text(altTree); + + String rewriteText = text(rewriteTree); + otherAlts.add(altText + " " + rewriteText); + //System.out.println("otherAlt " + alt + ": " + altText + ", rewrite=" + rewriteText); + } + + // --------- get transformed rules ---------------- + + public String getArtificialPrecStartRule() { + ST ruleST = recRuleTemplates.getInstanceOf("recRuleStart"); + ruleST.add("ruleName", ruleName); + ruleST.add("minPrec", 0); + ruleST.add("userRetvals", retvals); + fillRetValAssignments(ruleST, "recRuleName"); + + System.out.println(ruleST.render()); + return ruleST.render(); + } + + public String getArtificialOpPrecRule() { + ST ruleST = recRuleTemplates.getInstanceOf("recRule"); + ruleST.add("ruleName", ruleName); + // TODO: ruleST.add("buildAST", grammar.hasASTOption()); + ST argDefST = + codegenTemplates.getInstanceOf("recRuleDefArg"); + ruleST.add("precArgDef", argDefST); + ST ruleArgST = + codegenTemplates.getInstanceOf("recRuleArg"); + ruleST.add("argName", ruleArgST); + ST setResultST = + codegenTemplates.getInstanceOf("recRuleSetResultAction"); + ruleST.add("setResultAction", setResultST); + ruleST.add("userRetvals", retvals); + fillRetValAssignments(ruleST, "recPrimaryName"); + + LinkedHashMap opPrecRuleAlts = new LinkedHashMap(); + opPrecRuleAlts.putAll(binaryAlts); + opPrecRuleAlts.putAll(ternaryAlts); + opPrecRuleAlts.putAll(suffixAlts); + for (int alt : opPrecRuleAlts.keySet()) { + String altText = opPrecRuleAlts.get(alt); + ST altST = recRuleTemplates.getInstanceOf("recRuleAlt"); + ST predST = + codegenTemplates.getInstanceOf("recRuleAltPredicate"); + predST.add("opPrec", precedence(alt)); + predST.add("ruleName", ruleName); + altST.add("pred", predST); + altST.add("alt", altText); + ruleST.add("alts", altST); + } + + System.out.println(ruleST.render()); + + return ruleST.render(); + } + + public String getArtificialPrimaryRule() { + ST ruleST = recRuleTemplates.getInstanceOf("recPrimaryRule"); + ruleST.add("ruleName", ruleName); + ruleST.add("alts", prefixAlts); + ruleST.add("alts", otherAlts); + ruleST.add("userRetvals", retvals); + System.out.println(ruleST.render()); + return ruleST.render(); + } + + public GrammarAST replaceRuleRefs(GrammarAST t, String name) { + if ( t==null ) return null; + for (GrammarAST rref : t.getNodesWithType(RULE_REF)) { + if ( rref.getText().equals(ruleName) ) rref.setText(name); + } + return t; + } + + public static boolean hasImmediateRecursiveRuleRefs(GrammarAST t, String ruleName) { + if ( t==null ) return false; + for (GrammarAST rref : t.getNodesWithType(RULE_REF)) { + if ( rref.getText().equals(ruleName) ) return true; + } + return false; + } + + public GrammarAST replaceLastRuleRef(GrammarAST t, String name) { + if ( t==null ) return null; + GrammarAST last = null; + for (GrammarAST rref : t.getNodesWithType(RULE_REF)) { last = rref; } + if ( last !=null && last.getText().equals(ruleName) ) last.setText(name); + return t; + } + + public void stripLeftRecursion(GrammarAST altAST) { + GrammarAST rref = (GrammarAST)altAST.getChild(0); + if ( rref.getType()== ANTLRParser.RULE_REF && + rref.getText().equals(ruleName)) + { + // remove rule ref + altAST.deleteChild(0); + // reset index so it prints properly + GrammarAST newFirstChild = (GrammarAST) altAST.getChild(0); + altAST.setTokenStartIndex(newFirstChild.getTokenStartIndex()); + } + } + + public String text(GrammarAST t) { + if ( t==null ) return ""; + TokenStream tokens = input.getTokenStream(); + CommonToken ta = (CommonToken)tokens.get(t.getTokenStartIndex()); + CommonToken tb = (CommonToken)tokens.get(t.getTokenStopIndex()); + return tokens.toString(ta, tb); + } + + public int precedence(int alt) { + return numAlts-alt+1; + } + + public int nextPrecedence(int alt) { + int p = precedence(alt); + if ( altAssociativity.get(alt)==ASSOC.left ) p++; + return p; + } + + public void fillRetValAssignments(ST ruleST, String srcName) { + if ( retvals==null ) return; + + // complicated since we must be target-independent + AttributeDict args = ScopeParser.parseTypeList(retvals.token.getText()); + + for (String name : args.attributes.keySet()) { + ST setRetValST = + codegenTemplates.getInstanceOf("recRuleSetReturnAction"); + ST ruleNameST = recRuleTemplates.getInstanceOf(srcName); + ruleNameST.add("ruleName", ruleName); + setRetValST.add("src", ruleNameST); + setRetValST.add("name", name); + ruleST.add("userRetvalAssignments",setRetValST); + } + } + + @Override + public String toString() { + return "PrecRuleOperatorCollector{" + + "binaryAlts=" + binaryAlts + + ", ternaryAlts=" + ternaryAlts + + ", suffixAlts=" + suffixAlts + + ", prefixAlts=" + prefixAlts + + ", otherAlts=" + otherAlts + + '}'; + } +} diff --git a/tool/src/org/antlr/v4/parse/LeftRecursiveRuleWalker.g b/tool/src/org/antlr/v4/parse/LeftRecursiveRuleWalker.g new file mode 100644 index 000000000..8cc2441a4 --- /dev/null +++ b/tool/src/org/antlr/v4/parse/LeftRecursiveRuleWalker.g @@ -0,0 +1,231 @@ +/* + * [The "BSD license"] + * Copyright (c) 2011 Terence Parr + * All rights reserved. + * + * Grammar conversion to ANTLR v3: + * Copyright (c) 2011 Sam Harwell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** Find left-recursive rules */ +tree grammar LeftRecursiveRuleWalker; + +options { + tokenVocab=ANTLRParser; + ASTLabelType=GrammarAST; +} + +@header { +package org.antlr.v4.parse; + +import org.antlr.v4.misc.*; +import org.antlr.v4.tool.*; +} + +@members { +private String ruleName; +private int currentOuterAltNumber; // which outer alt of rule? +public int numAlts; // how many alts for this rule total? + +public void setTokenPrec(GrammarAST t, int alt) {} +public void binaryAlt(GrammarAST altTree, GrammarAST rewriteTree, int alt) {} +public void ternaryAlt(GrammarAST altTree, GrammarAST rewriteTree, int alt) {} +public void prefixAlt(GrammarAST altTree, GrammarAST rewriteTree, int alt) {} +public void suffixAlt(GrammarAST altTree, GrammarAST rewriteTree, int alt) {} +public void otherAlt(GrammarAST altTree, GrammarAST rewriteTree, int alt) {} +public void setReturnValues(GrammarAST t) {} +} + +public +rec_rule returns [boolean isLeftRec] +@init +{ + currentOuterAltNumber = 1; +} + : ^( r=RULE id=ID {ruleName=$id.getText();} + ruleModifier? + (^(ARG ARG_ACTION))? + (^(RET ARG_ACTION))? + ( ^(THROWS .+) )? + ( ^(OPTIONS .*) + | ^(AT ID ACTION) + )* + ruleBlock {$isLeftRec = $ruleBlock.isLeftRec;} + exceptionGroup + ) + // why do this? +// {if ($ruleBlock.isLeftRec) $r.setType(PREC_RULE);} + ; + +ruleModifier + : PUBLIC + | PRIVATE + | PROTECTED + | FRAGMENT + ; + +ruleBlock returns [boolean isLeftRec] +@init{boolean lr=false; this.numAlts = $start.getChildCount();} + : ^( BLOCK + ( + ( o=outerAlternative[null] + | ^( r=ALT_REWRITE o=outerAlternative[(GrammarAST)$r.getChild(1)] rewrite ) + ) + {if ($o.isLeftRec) $isLeftRec = true;} + {currentOuterAltNumber++;} + )+ + ) + ; + +/** An alt is either prefix, suffix, binary, or ternary operation or "other" */ +outerAlternative[GrammarAST rew] returns [boolean isLeftRec] + : (binaryMultipleOp)=> binaryMultipleOp + {binaryAlt($start, $rew, currentOuterAltNumber); $isLeftRec=true;} + | (binary)=> binary + {binaryAlt($start, $rew, currentOuterAltNumber); $isLeftRec=true;} + | (ternary)=> ternary + {ternaryAlt($start, $rew, currentOuterAltNumber); $isLeftRec=true;} + | (prefix)=> prefix + {prefixAlt($start, $rew, currentOuterAltNumber);} + | (suffix)=> suffix + {suffixAlt($start, $rew, currentOuterAltNumber); $isLeftRec=true;} + | ^(ALT element+) // "other" case + {otherAlt($start, $rew, currentOuterAltNumber);} + ; + +binary + : ^( ALT recurseNoLabel op=token recurse ) {setTokenPrec($op.t, currentOuterAltNumber);} + ; + +binaryMultipleOp + : ^( ALT recurseNoLabel ^( BLOCK ( ^( ALT op=token {setTokenPrec($op.t, currentOuterAltNumber);} ) )+ ) recurse ) + ; + +ternary + : ^( ALT recurseNoLabel op=token recurse token recurse ) {setTokenPrec($op.t, currentOuterAltNumber);} + ; + +prefix + : ^( ALT {setTokenPrec((GrammarAST)input.LT(1), currentOuterAltNumber);} + ({!((CommonTree)input.LT(1)).getText().equals(ruleName)}? element)+ + recurse + ) + ; + +suffix : ^( ALT recurseNoLabel {setTokenPrec((GrammarAST)input.LT(1), currentOuterAltNumber);} element+ ) ; + +recurse + : ^(ASSIGN ID recurseNoLabel) + | ^(PLUS_ASSIGN ID recurseNoLabel) + | recurseNoLabel + ; + +recurseNoLabel : {((CommonTree)input.LT(1)).getText().equals(ruleName)}? RULE_REF; + +token returns [GrammarAST t=null] + : ^(ASSIGN ID s=token {$t = $s.t;}) + | ^(PLUS_ASSIGN ID s=token {$t = $s.t;}) + | ^(ROOT s=token {$t = $s.t;}) + | ^(BANG s=token {$t = $s.t;}) + | b=STRING_LITERAL {$t = $b;} + | c=TOKEN_REF {$t = $c;} + ; + +exceptionGroup + : exceptionHandler* finallyClause? + ; + +exceptionHandler + : ^(CATCH ARG_ACTION ACTION) + ; + +finallyClause + : ^(FINALLY ACTION) + ; + +element + : ^(ROOT element) + | ^(BANG element) + | atom + | ^(NOT element) + | ^(RANGE atom atom) + | ^(ASSIGN ID element) + | ^(PLUS_ASSIGN ID element) + | ^(SET setElement+) + | RULE_REF + | ebnf + | tree_ + | FORCED_ACTION + | ACTION + | SEMPRED + | EPSILON + ; + +setElement + : STRING_LITERAL + | TOKEN_REF + ; + +ebnf: block + | ^( OPTIONAL block ) + | ^( CLOSURE block ) + | ^( POSITIVE_CLOSURE block ) + ; + +block + : ^(BLOCK ACTION? alternative+) + ; + +alternative + : ^(ALT_REWRITE alternative rewrite) + | ^(ALT element+) + ; + +tree_ + : ^(TREE_BEGIN element+) + ; + +atom + : ^(RULE_REF ARG_ACTION?) + | ^(TOKEN_REF ARG_ACTION?) + | STRING_LITERAL + | WILDCARD + | ^(DOT ID element) + ; + +ast_suffix + : ROOT + | BANG + ; + +rewrite + : rewrite_result* + ; + +rewrite_result + : ^(ST_RESULT .*) + | ^(RESULT .*) + ; diff --git a/tool/src/org/antlr/v4/tool/Alternative.java b/tool/src/org/antlr/v4/tool/Alternative.java index d837c0f8b..209429268 100644 --- a/tool/src/org/antlr/v4/tool/Alternative.java +++ b/tool/src/org/antlr/v4/tool/Alternative.java @@ -65,6 +65,8 @@ public class Alternative implements AttributeResolver { * and catch/finally (not in an alt). Also tracks predicates, rewrite actions. * We need to examine these actions before code generation so * that we can detect refs to $rule.attr etc... + * + * This tracks per alt */ public List actions = new ArrayList(); diff --git a/tool/src/org/antlr/v4/tool/ErrorType.java b/tool/src/org/antlr/v4/tool/ErrorType.java index 57f785cf7..6c786ffdc 100644 --- a/tool/src/org/antlr/v4/tool/ErrorType.java +++ b/tool/src/org/antlr/v4/tool/ErrorType.java @@ -100,6 +100,7 @@ public enum ErrorType { ARGS_ON_TOKEN_REF("token reference may not have parameters", ErrorSeverity.ERROR), RULE_REF_AMBIG_WITH_RULE_IN_ALT("", ErrorSeverity.ERROR), ILLEGAL_OPTION("illegal option ", ErrorSeverity.WARNING), + ILLEGAL_OPTION_VALUE("illegal option value =", ErrorSeverity.WARNING), LIST_LABEL_INVALID_UNLESS_RETVAL_STRUCT("", ErrorSeverity.ERROR), REWRITE_ELEMENT_NOT_PRESENT_ON_LHS("reference to rewrite element not found to left of ->", ErrorSeverity.ERROR), UNDEFINED_TOKEN_REF_IN_REWRITE("token in rewrite is undefined", ErrorSeverity.ERROR), @@ -136,9 +137,7 @@ public enum ErrorType { AST_OP_IN_ALT_WITH_REWRITE("rule alt uses rewrite syntax and also an AST operator", ErrorSeverity.ERROR), WILDCARD_AS_ROOT("Wildcard invalid as root; wildcard can itself be a tree", ErrorSeverity.ERROR), CONFLICTING_OPTION_IN_TREE_FILTER("option = conflicts with tree grammar filter mode", ErrorSeverity.ERROR), - - AMBIGUITY("", ErrorSeverity.ERROR), - UNREACHABLE_ALTS("", ErrorSeverity.ERROR), + ALL_OPS_NEED_SAME_ASSOC("all operators of alt of left-recursive rule must have same associativity", ErrorSeverity.WARNING), // these next 3 can happen in recursion-limited LL("", *) //RECURSION_OVERFLOW("", ErrorSeverity.ERROR), diff --git a/tool/src/org/antlr/v4/tool/Grammar.java b/tool/src/org/antlr/v4/tool/Grammar.java index bb13ffb85..8e9b56f2e 100644 --- a/tool/src/org/antlr/v4/tool/Grammar.java +++ b/tool/src/org/antlr/v4/tool/Grammar.java @@ -644,6 +644,11 @@ public class Grammar implements AttributeResolver { return 0; } + public org.antlr.runtime.TokenStream getTokenStream() { + if ( ast!=null ) return ast.tokens; + return null; + } + public boolean isLexer() { return getType()==ANTLRParser.LEXER; } public boolean isParser() { return getType()==ANTLRParser.PARSER; } public boolean isTreeGrammar() { return getType()==ANTLRParser.TREE; } @@ -684,6 +689,24 @@ public class Grammar implements AttributeResolver { return outputOption!=null && outputOption.equals("AST"); } + /** Manually get language option from tree */ + // TODO: move to general tree visitor/parser class? + public static String getLanguageOption(GrammarRootAST ast) { + GrammarAST options = (GrammarAST)ast.getFirstChildWithType(ANTLRParser.OPTIONS); + String language = "Java"; + if ( options!=null ) { + for (Object o : options.getChildren()) { + GrammarAST c = (GrammarAST)o; + if ( c.getType() == ANTLRParser.ASSIGN && + c.getChild(0).getText().equals("language") ) + { + language = c.getChild(1).getText(); + } + } + } + return language; + } + public static Map getStringLiteralAliasesFromLexerRules(GrammarRootAST ast) { GrammarAST combinedRulesRoot = (GrammarAST)ast.getFirstChildWithType(ANTLRParser.RULES); diff --git a/tool/src/org/antlr/v4/tool/GrammarAST.java b/tool/src/org/antlr/v4/tool/GrammarAST.java index 9b5c4f544..b44756547 100644 --- a/tool/src/org/antlr/v4/tool/GrammarAST.java +++ b/tool/src/org/antlr/v4/tool/GrammarAST.java @@ -45,6 +45,8 @@ public class GrammarAST extends CommonTree { /** If we build an ATN, we make AST node point at left edge of ATN construct */ public ATNState atnState; + public String textOverride; + public GrammarAST() {;} public GrammarAST(Token t) { super(t); } public GrammarAST(GrammarAST node) { super(node); } @@ -128,6 +130,14 @@ public class GrammarAST extends CommonTree { return null; } + public void setType(int type) { + token.setType(type); + } + + public void setText(String text) { + textOverride = text; // don't alt tokens as others might see + } + // @Override // public boolean equals(Object obj) { // return super.equals(obj); @@ -138,6 +148,13 @@ public class GrammarAST extends CommonTree { return new GrammarAST(this); } + public GrammarAST dupTree() { + GrammarAST t = this; + CharStream input = this.token.getInputStream(); + GrammarASTAdaptor adaptor = new GrammarASTAdaptor(input); + return (GrammarAST)adaptor.dupTree(t); + } + @Override public String toString() { return super.toString(); diff --git a/tool/src/org/antlr/v4/tool/GrammarRootAST.java b/tool/src/org/antlr/v4/tool/GrammarRootAST.java index b0fcce4d8..c36367823 100644 --- a/tool/src/org/antlr/v4/tool/GrammarRootAST.java +++ b/tool/src/org/antlr/v4/tool/GrammarRootAST.java @@ -29,7 +29,7 @@ package org.antlr.v4.tool; -import org.antlr.runtime.Token; +import org.antlr.runtime.*; import org.antlr.runtime.tree.Tree; import java.util.*; @@ -43,6 +43,8 @@ public class GrammarRootAST extends GrammarASTWithOptions { }; public int grammarType; // LEXER, PARSER, TREE, GRAMMAR (combined) public boolean hasErrors; + /** Track stream used to create this tree */ + public TokenStream tokens; public GrammarRootAST(GrammarAST node) { super(node); diff --git a/tool/src/org/antlr/v4/tool/GrammarTransformPipeline.java b/tool/src/org/antlr/v4/tool/GrammarTransformPipeline.java index e8708dac5..750db0676 100644 --- a/tool/src/org/antlr/v4/tool/GrammarTransformPipeline.java +++ b/tool/src/org/antlr/v4/tool/GrammarTransformPipeline.java @@ -29,26 +29,106 @@ package org.antlr.v4.tool; +import org.antlr.runtime.*; +import org.antlr.v4.Tool; import org.antlr.v4.parse.*; +import java.util.*; + /** Handle left-recursion and block-set transforms */ public class GrammarTransformPipeline { - //public GrammarAST ast; + public Tool tool; - public GrammarTransformPipeline() { -// this.ast = ast; + public GrammarTransformPipeline(Tool tool) { + this.tool = tool; } - public void process(GrammarAST ast) { + public void process(GrammarRootAST ast) { if ( ast==null ) return; + System.out.println("before: "+ast.toStringTree()); + if ( ast.grammarType==ANTLRParser.PARSER || ast.grammarType==ANTLRParser.COMBINED ) { + translateLeftRecursiveRules(ast); + } + + reduceBlocksToSets(ast); + System.out.println("after: "+ast.toStringTree()); + } + + public void reduceBlocksToSets(GrammarRootAST ast) { org.antlr.runtime.tree.CommonTreeNodeStream nodes = new org.antlr.runtime.tree.CommonTreeNodeStream(ast); GrammarASTAdaptor adaptor = new GrammarASTAdaptor(); BlockSetTransformer transformer = new BlockSetTransformer(nodes); transformer.setTreeAdaptor(adaptor); -// System.out.println("before: "+ast.toStringTree()); transformer.downup(ast); -// System.out.println("after: "+ast.toStringTree()); } + + public void translateLeftRecursiveRules(GrammarRootAST ast) { + String language = Grammar.getLanguageOption(ast); + for (GrammarAST r : ast.getNodesWithType(ANTLRParser.RULE)) { + String ruleName = r.getChild(0).getText(); + if ( !Character.isUpperCase(ruleName.charAt(0)) ) { + if ( LeftRecursiveRuleAnalyzer.hasImmediateRecursiveRuleRefs(r, ruleName) ) { + translateLeftRecursiveRule(ast, r, language); + } + } + } + } + + public void translateLeftRecursiveRule(GrammarRootAST ast, + GrammarAST ruleAST, + String language) + { + //System.out.println(ruleAST.toStringTree()); + TokenStream tokens = ast.tokens; + String ruleName = ruleAST.getChild(0).getText(); + LeftRecursiveRuleAnalyzer leftRecursiveRuleWalker = + new LeftRecursiveRuleAnalyzer(tokens, ruleAST, tool, ruleName, language); + boolean isLeftRec = false; + try { +// System.out.println("TESTING ---------------\n"+ +// leftRecursiveRuleWalker.text(ruleAST)); + isLeftRec = leftRecursiveRuleWalker.rec_rule(); + } + catch (RecognitionException re) { + tool.errMgr.toolError(ErrorType.INTERNAL_ERROR, "bad ast structure", re); + } + if ( !isLeftRec ) return; + + // delete old rule + GrammarAST RULES = (GrammarAST)ast.getFirstChildWithType(ANTLRParser.RULES); + RULES.deleteChild(ruleAST); + + List rules = new ArrayList(); + rules.add( leftRecursiveRuleWalker.getArtificialPrecStartRule() ) ; + rules.add( leftRecursiveRuleWalker.getArtificialOpPrecRule() ); + rules.add( leftRecursiveRuleWalker.getArtificialPrimaryRule() ); + for (String ruleText : rules) { +// System.out.println("created: "+ruleText); + GrammarAST t = parseArtificialRule(ruleText); + // insert into grammar tree + RULES.addChild(t); + System.out.println("added: "+t.toStringTree()); + } + } + + public GrammarAST parseArtificialRule(String ruleText) { + ANTLRLexer lexer = new ANTLRLexer(new ANTLRStringStream(ruleText)); + GrammarASTAdaptor adaptor = new GrammarASTAdaptor(); + CommonTokenStream tokens = new CommonTokenStream(lexer); + ToolANTLRParser p = new ToolANTLRParser(tokens, tool); + p.setTreeAdaptor(adaptor); + try { + ParserRuleReturnScope r = p.rule(); + return (GrammarAST)r.getTree(); + } + catch (Exception e) { + tool.errMgr.toolError(ErrorType.INTERNAL_ERROR, + "error parsing rule created during left-recursion detection: "+ruleText, + e); + } + return null; + } + } diff --git a/tool/src/org/antlr/v4/tool/Rule.java b/tool/src/org/antlr/v4/tool/Rule.java index 20843242c..a12ea9705 100644 --- a/tool/src/org/antlr/v4/tool/Rule.java +++ b/tool/src/org/antlr/v4/tool/Rule.java @@ -99,6 +99,15 @@ public class Rule implements AttributeResolver { */ public List exceptionActions = new ArrayList(); + /** Track all executable actions other than named actions like @init + * and catch/finally (not in an alt). Also tracks predicates, rewrite actions. + * We need to examine these actions before code generation so + * that we can detect refs to $rule.attr etc... + * + * This tracks per rule; Alternative objs also track per alt. + */ + public List actions = new ArrayList(); + public ActionAST finallyAction; public int numberOfAlts; @@ -122,14 +131,16 @@ public class Rule implements AttributeResolver { } public void defineActionInAlt(int currentAlt, ActionAST actionAST) { + actions.add(actionAST); alt[currentAlt].actions.add(actionAST); - if ( g.isLexer() || actionAST.getType()== ANTLRParser.FORCED_ACTION ) { + if ( g.isLexer() || actionAST.getType()==ANTLRParser.FORCED_ACTION ) { actionIndex = g.actions.size(); g.actions.put(actionAST, actionIndex); } } public void definePredicateInAlt(int currentAlt, PredAST predAST) { + actions.add(predAST); alt[currentAlt].actions.add(predAST); g.sempreds.put(predAST, g.sempreds.size()); }