From 531afe5af8ca9768d55779c3db87db48ecdacf1e Mon Sep 17 00:00:00 2001 From: Terence Parr Date: Wed, 7 Aug 2013 16:46:52 -0700 Subject: [PATCH] changed grammar to allow on | operator for alt. still allowed on token refs but ignored. Simplified left-recursion elemination rules. see CHANGES.TXT Aug 7, 2013. BREAKING CHANGE. Must alter ternary op alts in grammars. --- CHANGES.txt | 39 ++++++++ .../analysis/LeftRecursiveRuleAnalyzer.java | 93 +++++++------------ tool/src/org/antlr/v4/parse/ANTLRParser.g | 11 ++- .../antlr/v4/parse/LeftRecursiveRuleWalker.g | 59 +++++------- tool/src/org/antlr/v4/tool/Grammar.java | 1 + .../src/org/antlr/v4/tool/ast/GrammarAST.java | 15 +++ .../org/antlr/v4/test/TestLeftRecursion.java | 36 +++---- 7 files changed, 140 insertions(+), 114 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 2cfb40e5f..681074169 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,44 @@ ANTLR v4 Honey Badger +August 7, 2013 + +* [BREAKING CHANGE] Altered left-recursion elimination to be simpler. Now, + we use the following patterns: + + * Binary expressions are expressions which contain a recursive invocation of + the rule as the first and last element of the alternative. + + * Suffix expressions contain a recursive invocation of the rule as the first + element of the alternative, but not as the last element. + + * Prefix expressions contain a recursive invocation of the rule as the last + element of the alternative, but not as the first element. + +There is no such thing as a "ternary" expression--they are just binary +expressions in disguise. + +The right associativity specifiers no longer on the individual tokens because +it's done on alternative basis anyway. The option is now on the individual +alternative; e.g., + + e : e '*' e + | e '+' e + | e '?' e ':' e + | e '=' e + | INT + ; + +If your language uses a right-associative ternary operator, you will need +to update your grammar to include on the alternative operator. + +This also fixes #245 and fixes #268: + +https://github.com/antlr/antlr4/issues/245 +https://github.com/antlr/antlr4/issues/268 + +To smooth the transition, is still allowed on token references +but it is ignored. + June 30, 2013 -- 4.1 release June 24, 2013 diff --git a/tool/src/org/antlr/v4/analysis/LeftRecursiveRuleAnalyzer.java b/tool/src/org/antlr/v4/analysis/LeftRecursiveRuleAnalyzer.java index 77e58cf3a..e78eff5cc 100644 --- a/tool/src/org/antlr/v4/analysis/LeftRecursiveRuleAnalyzer.java +++ b/tool/src/org/antlr/v4/analysis/LeftRecursiveRuleAnalyzer.java @@ -38,11 +38,11 @@ import org.antlr.v4.Tool; import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.parse.GrammarASTAdaptor; import org.antlr.v4.parse.LeftRecursiveRuleWalker; +import org.antlr.v4.runtime.misc.IntervalSet; import org.antlr.v4.runtime.misc.Pair; import org.antlr.v4.tool.ErrorType; import org.antlr.v4.tool.ast.AltAST; import org.antlr.v4.tool.ast.GrammarAST; -import org.antlr.v4.tool.ast.GrammarASTWithOptions; import org.stringtemplate.v4.ST; import org.stringtemplate.v4.STGroup; import org.stringtemplate.v4.STGroupFile; @@ -115,21 +115,19 @@ public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker { } @Override - public void setTokenAssoc(GrammarAST t, int alt) { + public void setAltAssoc(AltAST t, int alt) { ASSOC assoc = ASSOC.left; - if ( t instanceof GrammarASTWithOptions ) { - if ( ((GrammarASTWithOptions)t).getOptions()!=null ) { - String a = ((GrammarASTWithOptions)t).getOptionString("assoc"); - if ( a!=null ) { - if ( a.equals(ASSOC.right.toString()) ) { - assoc = ASSOC.right; - } - else if ( a.equals(ASSOC.left.toString()) ) { - assoc = ASSOC.left; - } - else { - tool.errMgr.toolError(ErrorType.ILLEGAL_OPTION_VALUE, "assoc", assoc); - } + if ( t.getOptions()!=null ) { + String a = t.getOptionString("assoc"); + if ( a!=null ) { + if ( a.equals(ASSOC.right.toString()) ) { + assoc = ASSOC.right; + } + else if ( a.equals(ASSOC.left.toString()) ) { + assoc = ASSOC.left; + } + else { + tool.errMgr.toolError(ErrorType.ILLEGAL_OPTION_VALUE, "assoc", assoc); } } } @@ -139,7 +137,7 @@ public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker { } altAssociativity.put(alt, assoc); - //System.out.println("op " + alt + ": " + t.getText()+", assoc="+assoc); + System.out.println("setAltAssoc: op " + alt + ": " + t.getText()+", assoc="+assoc); } @Override @@ -169,32 +167,6 @@ public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker { //System.out.println("binaryAlt " + alt + ": " + altText + ", rewrite=" + rewriteText); } - /** Convert e ? e : e -> ? e : e_[nextPrec] */ - @Override - public void ternaryAlt(AltAST originalAltTree, int alt) { - AltAST altTree = (AltAST)originalAltTree.dupTree(); - String altLabel = altTree.altLabel!=null ? altTree.altLabel.getText() : null; - - GrammarAST lrlabel = stripLeftRecursion(altTree); - String label = lrlabel != null ? lrlabel.getText() : null; - if ( lrlabel!=null ) { - leftRecursiveRuleRefLabels.add(new Pair(lrlabel,altLabel)); - } - stripAssocOptions(altTree); - stripAltLabel(altTree); - - int nextPrec = nextPrecedence(alt); - altTree = addPrecedenceArgToLastRule(altTree, nextPrec); - - String altText = text(altTree); - altText = altText.trim(); - LeftRecursiveRuleAltInfo a = - new LeftRecursiveRuleAltInfo(alt, altText, label, altLabel, originalAltTree); - a.nextPrec = nextPrec; - ternaryAlts.put(alt, a); - //System.out.println("ternaryAlt " + alt + ": " + altText + ", rewrite=" + rewriteText); - } - @Override public void prefixAlt(AltAST originalAltTree, int alt) { AltAST altTree = (AltAST)originalAltTree.dupTree(); @@ -282,24 +254,18 @@ public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker { public AltAST addPrecedenceArgToRules(AltAST t, int prec) { if ( t==null ) return null; - for (GrammarAST rref : t.getNodesWithType(RULE_REF)) { - if ( rref.getText().equals(ruleName) ) { + // get all top-level rule refs from ALT + List outerAltRuleRefs = t.getNodesWithTypePreorderDFS(IntervalSet.of(RULE_REF)); + for (GrammarAST rref : outerAltRuleRefs) { + boolean recursive = rref.getText().equals(ruleName); + boolean rightmost = rref == outerAltRuleRefs.get(outerAltRuleRefs.size()-1); + if ( recursive && rightmost ) { rref.setText(ruleName+"["+prec+"]"); } } return t; } - public AltAST addPrecedenceArgToLastRule(AltAST t, int prec) { - if ( t==null ) return null; - GrammarAST last = null; - for (GrammarAST rref : t.getNodesWithType(RULE_REF)) { last = rref; } - if ( last !=null && last.getText().equals(ruleName) ) { - last.setText(ruleName+"["+prec+"]"); - } - return t; - } - public void stripAssocOptions(GrammarAST t) { if ( t==null ) return; for (GrammarAST options : t.getNodesWithType(ELEMENT_OPTIONS)) { @@ -347,15 +313,21 @@ public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker { public GrammarAST stripLeftRecursion(GrammarAST altAST) { GrammarAST lrlabel=null; GrammarAST first = (GrammarAST)altAST.getChild(0); + int leftRecurRuleIndex = 0; + if ( first.getType() == ELEMENT_OPTIONS ) { + first = (GrammarAST)altAST.getChild(1); + leftRecurRuleIndex = 1; + } Tree rref = first.getChild(1); // if label=rule if ( (first.getType()==RULE_REF && first.getText().equals(ruleName)) || (rref!=null && rref.getType()==RULE_REF && rref.getText().equals(ruleName)) ) { if ( first.getType()==ASSIGN ) lrlabel = (GrammarAST)first.getChild(0); - // remove rule ref (first child) - altAST.deleteChild(0); - // reset index so it prints properly - GrammarAST newFirstChild = (GrammarAST)altAST.getChild(0); + // remove rule ref (first child unless options present) + altAST.deleteChild(leftRecurRuleIndex); + // reset index so it prints properly (sets token range of + // ALT to start to right of left recur rule we deleted) + GrammarAST newFirstChild = (GrammarAST)altAST.getChild(leftRecurRuleIndex); altAST.setTokenStartIndex(newFirstChild.getTokenStartIndex()); } return lrlabel; @@ -385,10 +357,11 @@ public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker { return numAlts-alt+1; } + // Assumes left assoc public int nextPrecedence(int alt) { int p = precedence(alt); - if ( altAssociativity.get(alt)==ASSOC.left ) p++; - return p; + if ( altAssociativity.get(alt)==ASSOC.right ) return p; + return p+1; } @Override diff --git a/tool/src/org/antlr/v4/parse/ANTLRParser.g b/tool/src/org/antlr/v4/parse/ANTLRParser.g index 2998f7c91..bfcf5c1b8 100644 --- a/tool/src/org/antlr/v4/parse/ANTLRParser.g +++ b/tool/src/org/antlr/v4/parse/ANTLRParser.g @@ -654,8 +654,12 @@ altList // An individual alt with an optional alt option like alternative @init { paraphrases.push("matching alternative"); } -@after { paraphrases.pop(); } - : ruleElementOptions? e+=element+ -> ^(ALT ruleElementOptions? $e+) +@after { + paraphrases.pop(); + Grammar.setNodeOptions($tree, $o.tree); +} + : o=ruleElementOptions? + e+=element+ -> ^(ALT ruleElementOptions? $e+) | -> ^(ALT EPSILON) // empty alt ; @@ -879,7 +883,8 @@ if ( options!=null ) { // Terminals may be adorned with certain options when // reference in the grammar: TOK<,,,> ruleElementOptions - : LT (elementOption (COMMA elementOption)*)? GT -> ^(ELEMENT_OPTIONS[$LT,"ELEMENT_OPTIONS"] elementOption*) + : LT (elementOption (COMMA elementOption)*)? GT + -> ^(ELEMENT_OPTIONS[$LT,"ELEMENT_OPTIONS"] elementOption*) ; // When used with elements we can specify what the tree node type can diff --git a/tool/src/org/antlr/v4/parse/LeftRecursiveRuleWalker.g b/tool/src/org/antlr/v4/parse/LeftRecursiveRuleWalker.g index 24948df37..16980b536 100644 --- a/tool/src/org/antlr/v4/parse/LeftRecursiveRuleWalker.g +++ b/tool/src/org/antlr/v4/parse/LeftRecursiveRuleWalker.g @@ -49,9 +49,8 @@ private String ruleName; private int currentOuterAltNumber; // which outer alt of rule? public int numAlts; // how many alts for this rule total? -public void setTokenAssoc(GrammarAST t, int alt) {} +public void setAltAssoc(AltAST altTree, int alt) {} public void binaryAlt(AltAST altTree, int alt) {} -public void ternaryAlt(AltAST altTree, int alt) {} public void prefixAlt(AltAST altTree, int alt) {} public void suffixAlt(AltAST altTree, int alt) {} public void otherAlt(AltAST altTree, int alt) {} @@ -112,47 +111,37 @@ ruleBlock returns [boolean isLeftRec] /** An alt is either prefix, suffix, binary, or ternary operation or "other" */ outerAlternative returns [boolean isLeftRec] - : (binaryMultipleOp)=> binaryMultipleOp + : (binary)=> binary {binaryAlt((AltAST)$start, currentOuterAltNumber); $isLeftRec=true;} - | (binary)=> binary - {binaryAlt((AltAST)$start, currentOuterAltNumber); $isLeftRec=true;} - | (ternary)=> ternary - {ternaryAlt((AltAST)$start, currentOuterAltNumber); $isLeftRec=true;} | (prefix)=> prefix {prefixAlt((AltAST)$start, currentOuterAltNumber);} | (suffix)=> suffix {suffixAlt((AltAST)$start, currentOuterAltNumber); $isLeftRec=true;} - | ^(ALT element+) // "other" case - {otherAlt((AltAST)$start, currentOuterAltNumber);} - ; - -// (ALT (= a e) (= op (SET '*' '/')) (= b e) {}) (ALT INT {}) (ALT '(' (= x e) ')' {}) -binaryMultipleOp - : ^( ALT recurse bops recurse ACTION? ) - ; - -bops: ^(ASSIGN ID bops) - | ^( BLOCK ( ^( ALT (op=token)+ {setTokenAssoc($op.t, currentOuterAltNumber);} ) )+ ) - | ^(SET (op=token)+ {setTokenAssoc($op.t, currentOuterAltNumber);}) + | nonLeftRecur {otherAlt((AltAST)$start, currentOuterAltNumber);} ; binary - : ^( ALT recurse (op=token)+ {setTokenAssoc($op.t, currentOuterAltNumber);} recurse ACTION? ) - ; - -ternary - : ^( ALT recurse op=token recurse token recurse ACTION? ) {setTokenAssoc($op.t, currentOuterAltNumber);} + : ^( ALT ruleElementOptions? recurse element+ recurse ACTION? ) + {setAltAssoc((AltAST)$ALT,currentOuterAltNumber);} ; prefix - : ^( ALT {setTokenAssoc((GrammarAST)input.LT(1), currentOuterAltNumber);} + : ^( ALT ruleElementOptions? ({!((CommonTree)input.LT(1)).getText().equals(ruleName)}? element)+ recurse ACTION? ) + {setAltAssoc((AltAST)$ALT,currentOuterAltNumber);} ; -suffix : ^( ALT recurse {setTokenAssoc((GrammarAST)input.LT(1), currentOuterAltNumber);} element+ ) ; +suffix + : ^( ALT ruleElementOptions? recurse element+ ) + {setAltAssoc((AltAST)$ALT,currentOuterAltNumber);} + ; +nonLeftRecur + : ^(ALT element+) // no assoc for these; ignore if present + ; + recurse : ^(ASSIGN ID recurseNoLabel) | recurseNoLabel @@ -164,16 +153,16 @@ token returns [GrammarAST t=null] : ^(ASSIGN ID s=token {$t = $s.t;}) | ^(PLUS_ASSIGN ID s=token {$t = $s.t;}) | b=STRING_LITERAL {$t = $b;} - | ^(b=STRING_LITERAL elementOptions) {$t = $b;} - | ^(c=TOKEN_REF elementOptions) {$t = $c;} + | ^(b=STRING_LITERAL ruleElementOptions) {$t = $b;} + | ^(c=TOKEN_REF ruleElementOptions) {$t = $c;} | c=TOKEN_REF {$t = $c;} ; -elementOptions - : ^(ELEMENT_OPTIONS elementOption*) +ruleElementOptions + : ^(ELEMENT_OPTIONS ruleElementOption*) ; -elementOption +ruleElementOption : ID | ^(ASSIGN ID ID) | ^(ASSIGN ID STRING_LITERAL) @@ -211,16 +200,16 @@ block ; alternative - : ^(ALT elementOptions? element+) + : ^(ALT ruleElementOptions? element+) ; atom : ^(RULE_REF ARG_ACTION?) - | ^(STRING_LITERAL elementOptions) + | ^(STRING_LITERAL ruleElementOptions) | STRING_LITERAL - | ^(TOKEN_REF elementOptions) + | ^(TOKEN_REF ruleElementOptions) | TOKEN_REF - | ^(WILDCARD elementOptions) + | ^(WILDCARD ruleElementOptions) | WILDCARD | ^(DOT ID element) ; diff --git a/tool/src/org/antlr/v4/tool/Grammar.java b/tool/src/org/antlr/v4/tool/Grammar.java index 67d9f04b9..5f79b354d 100644 --- a/tool/src/org/antlr/v4/tool/Grammar.java +++ b/tool/src/org/antlr/v4/tool/Grammar.java @@ -754,6 +754,7 @@ public class Grammar implements AttributeResolver { * set option assoc=right in TOKEN_REF. */ public static void setNodeOptions(GrammarAST node, GrammarAST options) { + if ( options==null ) return; GrammarASTWithOptions t = (GrammarASTWithOptions)node; if ( t.getChildCount()==0 || options.getChildCount()==0 ) return; for (Object o : options.getChildren()) { diff --git a/tool/src/org/antlr/v4/tool/ast/GrammarAST.java b/tool/src/org/antlr/v4/tool/ast/GrammarAST.java index c0a3f02f5..0dc75db48 100644 --- a/tool/src/org/antlr/v4/tool/ast/GrammarAST.java +++ b/tool/src/org/antlr/v4/tool/ast/GrammarAST.java @@ -115,6 +115,21 @@ public class GrammarAST extends CommonTree { return nodes; } + public List getNodesWithTypePreorderDFS(IntervalSet types) { + ArrayList nodes = new ArrayList(); + getNodesWithTypePreorderDFS_(nodes, types); + return nodes; + } + + public void getNodesWithTypePreorderDFS_(List nodes, IntervalSet types) { + if ( types.contains(this.getType()) ) nodes.add(this); + // walk all children of root. + for (int i= 0; i < getChildCount(); i++) { + GrammarAST child = (GrammarAST)getChild(i); + child.getNodesWithTypePreorderDFS_(nodes, types); + } + } + public AltAST getOutermostAltNode() { if ( this instanceof AltAST && parent.parent instanceof RuleAST ) { return (AltAST)this; diff --git a/tool/test/org/antlr/v4/test/TestLeftRecursion.java b/tool/test/org/antlr/v4/test/TestLeftRecursion.java index 271032d17..ff71f58ed 100644 --- a/tool/test/org/antlr/v4/test/TestLeftRecursion.java +++ b/tool/test/org/antlr/v4/test/TestLeftRecursion.java @@ -33,7 +33,8 @@ package org.antlr.v4.test; import org.antlr.v4.tool.ErrorType; import org.junit.Test; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; /** */ public class TestLeftRecursion extends BaseTest { @@ -115,8 +116,8 @@ public class TestLeftRecursion extends BaseTest { "s @after {System.out.println($ctx.toStringTree(this));} : e EOF ;\n" + // must indicate EOF can follow or 'a' won't match "e : e '*' e" + " | e '+' e" + - " | e '?' e ':' e" + - " | e '=' e" + + " | e '?' e ':' e" + + " | e '=' e" + " | ID" + " ;\n" + "ID : 'a'..'z'+ ;\n" + @@ -198,23 +199,24 @@ public class TestLeftRecursion extends BaseTest { " | e 'instanceof' e\n" + " | e ('==' | '!=') e\n" + " | e '&' e\n" + - " | e '^' e\n" + + " | e '^' e\n" + " | e '|' e\n" + " | e '&&' e\n" + " | e '||' e\n" + " | e '?' e ':' e\n" + - " | e ('='\n" + - " |'+='\n" + - " |'-='\n" + - " |'*='\n" + - " |'/='\n" + - " |'&='\n" + - " |'|='\n" + - " |'^='\n" + - " |'>>='\n" + - " |'>>>='\n" + - " |'<<='\n" + - " |'%=') e\n" + + " |" + + " e ('='\n" + + " |'+='\n" + + " |'-='\n" + + " |'*='\n" + + " |'/='\n" + + " |'&='\n" + + " |'|='\n" + + " |'^='\n" + + " |'>>='\n" + + " |'>>>='\n" + + " |'<<='\n" + + " |'%=') e\n" + " ;\n" + "type: ID \n" + " | ID '[' ']'\n" + @@ -229,6 +231,8 @@ public class TestLeftRecursion extends BaseTest { "(a|b)&c", "(s (e (e ( (e (e a) | (e b)) )) & (e c)) )", "a > b", "(s (e (e a) > (e b)) )", "a >> b", "(s (e (e a) >> (e b)) )", + "a=b=c", "(s (e (e a) = (e (e b) = (e c))) )", + "a^b^c", "(s (e (e a) ^ (e (e b) ^ (e c))) )", "(T)x", "(s (e ( (type T) ) (e x)) )", "new A().b", "(s (e (e new (type A) ( )) . b) )", "(T)t.f()", "(s (e (e ( (type T) ) (e (e t) . f)) ( )) )",