Merge pull request #314 from parrt/reorg-left-recur-elim

Reorg left recur elim
This commit is contained in:
Terence Parr 2013-08-08 16:13:49 -07:00
commit 2637d64320
9 changed files with 136 additions and 115 deletions

View File

@ -1,5 +1,44 @@
ANTLR v4 Honey Badger
August 7, 2013
* [BREAKING CHANGE] Altered left-recursion elimination to be simpler. Now,
we use the following patterns:
* Binary expressions are expressions which contain a recursive invocation of
the rule as the first and last element of the alternative.
* Suffix expressions contain a recursive invocation of the rule as the first
element of the alternative, but not as the last element.
* Prefix expressions contain a recursive invocation of the rule as the last
element of the alternative, but not as the first element.
There is no such thing as a "ternary" expression--they are just binary
expressions in disguise.
The right associativity specifiers no longer on the individual tokens because
it's done on alternative basis anyway. The option is now on the individual
alternative; e.g.,
e : e '*' e
| e '+' e
|<assoc=right> e '?' e ':' e
|<assoc=right> e '=' e
| INT
;
If your language uses a right-associative ternary operator, you will need
to update your grammar to include <assoc=right> on the alternative operator.
This also fixes #245 and fixes #268:
https://github.com/antlr/antlr4/issues/245
https://github.com/antlr/antlr4/issues/268
To smooth the transition, <assoc=right> is still allowed on token references
but it is ignored.
June 30, 2013 -- 4.1 release
June 24, 2013

View File

@ -38,11 +38,11 @@ import org.antlr.v4.Tool;
import org.antlr.v4.codegen.CodeGenerator;
import org.antlr.v4.parse.GrammarASTAdaptor;
import org.antlr.v4.parse.LeftRecursiveRuleWalker;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.Pair;
import org.antlr.v4.tool.ErrorType;
import org.antlr.v4.tool.ast.AltAST;
import org.antlr.v4.tool.ast.GrammarAST;
import org.antlr.v4.tool.ast.GrammarASTWithOptions;
import org.stringtemplate.v4.ST;
import org.stringtemplate.v4.STGroup;
import org.stringtemplate.v4.STGroupFile;
@ -115,21 +115,19 @@ public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker {
}
@Override
public void setTokenPrec(GrammarAST t, int alt) {
public void setAltAssoc(AltAST t, int alt) {
ASSOC assoc = ASSOC.left;
if ( t instanceof GrammarASTWithOptions ) {
if ( ((GrammarASTWithOptions)t).getOptions()!=null ) {
String a = ((GrammarASTWithOptions)t).getOptionString("assoc");
if ( a!=null ) {
if ( a.equals(ASSOC.right.toString()) ) {
assoc = ASSOC.right;
}
else if ( a.equals(ASSOC.left.toString()) ) {
assoc = ASSOC.left;
}
else {
tool.errMgr.toolError(ErrorType.ILLEGAL_OPTION_VALUE, "assoc", assoc);
}
if ( t.getOptions()!=null ) {
String a = t.getOptionString("assoc");
if ( a!=null ) {
if ( a.equals(ASSOC.right.toString()) ) {
assoc = ASSOC.right;
}
else if ( a.equals(ASSOC.left.toString()) ) {
assoc = ASSOC.left;
}
else {
tool.errMgr.toolError(ErrorType.ILLEGAL_OPTION_VALUE, "assoc", assoc);
}
}
}
@ -139,7 +137,7 @@ public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker {
}
altAssociativity.put(alt, assoc);
//System.out.println("op " + alt + ": " + t.getText()+", assoc="+assoc);
System.out.println("setAltAssoc: op " + alt + ": " + t.getText()+", assoc="+assoc);
}
@Override
@ -169,32 +167,6 @@ public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker {
//System.out.println("binaryAlt " + alt + ": " + altText + ", rewrite=" + rewriteText);
}
/** Convert e ? e : e -> ? e : e_[nextPrec] */
@Override
public void ternaryAlt(AltAST originalAltTree, int alt) {
AltAST altTree = (AltAST)originalAltTree.dupTree();
String altLabel = altTree.altLabel!=null ? altTree.altLabel.getText() : null;
GrammarAST lrlabel = stripLeftRecursion(altTree);
String label = lrlabel != null ? lrlabel.getText() : null;
if ( lrlabel!=null ) {
leftRecursiveRuleRefLabels.add(new Pair<GrammarAST,String>(lrlabel,altLabel));
}
stripAssocOptions(altTree);
stripAltLabel(altTree);
int nextPrec = nextPrecedence(alt);
altTree = addPrecedenceArgToLastRule(altTree, nextPrec);
String altText = text(altTree);
altText = altText.trim();
LeftRecursiveRuleAltInfo a =
new LeftRecursiveRuleAltInfo(alt, altText, label, altLabel, originalAltTree);
a.nextPrec = nextPrec;
ternaryAlts.put(alt, a);
//System.out.println("ternaryAlt " + alt + ": " + altText + ", rewrite=" + rewriteText);
}
@Override
public void prefixAlt(AltAST originalAltTree, int alt) {
AltAST altTree = (AltAST)originalAltTree.dupTree();
@ -282,24 +254,18 @@ public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker {
public AltAST addPrecedenceArgToRules(AltAST t, int prec) {
if ( t==null ) return null;
for (GrammarAST rref : t.getNodesWithType(RULE_REF)) {
if ( rref.getText().equals(ruleName) ) {
// get all top-level rule refs from ALT
List<GrammarAST> outerAltRuleRefs = t.getNodesWithTypePreorderDFS(IntervalSet.of(RULE_REF));
for (GrammarAST rref : outerAltRuleRefs) {
boolean recursive = rref.getText().equals(ruleName);
boolean rightmost = rref == outerAltRuleRefs.get(outerAltRuleRefs.size()-1);
if ( recursive && rightmost ) {
rref.setText(ruleName+"["+prec+"]");
}
}
return t;
}
public AltAST addPrecedenceArgToLastRule(AltAST t, int prec) {
if ( t==null ) return null;
GrammarAST last = null;
for (GrammarAST rref : t.getNodesWithType(RULE_REF)) { last = rref; }
if ( last !=null && last.getText().equals(ruleName) ) {
last.setText(ruleName+"["+prec+"]");
}
return t;
}
public void stripAssocOptions(GrammarAST t) {
if ( t==null ) return;
for (GrammarAST options : t.getNodesWithType(ELEMENT_OPTIONS)) {
@ -347,15 +313,21 @@ public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker {
public GrammarAST stripLeftRecursion(GrammarAST altAST) {
GrammarAST lrlabel=null;
GrammarAST first = (GrammarAST)altAST.getChild(0);
int leftRecurRuleIndex = 0;
if ( first.getType() == ELEMENT_OPTIONS ) {
first = (GrammarAST)altAST.getChild(1);
leftRecurRuleIndex = 1;
}
Tree rref = first.getChild(1); // if label=rule
if ( (first.getType()==RULE_REF && first.getText().equals(ruleName)) ||
(rref!=null && rref.getType()==RULE_REF && rref.getText().equals(ruleName)) )
{
if ( first.getType()==ASSIGN ) lrlabel = (GrammarAST)first.getChild(0);
// remove rule ref (first child)
altAST.deleteChild(0);
// reset index so it prints properly
GrammarAST newFirstChild = (GrammarAST)altAST.getChild(0);
// remove rule ref (first child unless options present)
altAST.deleteChild(leftRecurRuleIndex);
// reset index so it prints properly (sets token range of
// ALT to start to right of left recur rule we deleted)
GrammarAST newFirstChild = (GrammarAST)altAST.getChild(leftRecurRuleIndex);
altAST.setTokenStartIndex(newFirstChild.getTokenStartIndex());
}
return lrlabel;
@ -385,10 +357,11 @@ public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker {
return numAlts-alt+1;
}
// Assumes left assoc
public int nextPrecedence(int alt) {
int p = precedence(alt);
if ( altAssociativity.get(alt)==ASSOC.left ) p++;
return p;
if ( altAssociativity.get(alt)==ASSOC.right ) return p;
return p+1;
}
@Override

View File

@ -651,17 +651,16 @@ altList
: alternative (OR alternative)* -> alternative+
;
// An individual alt with an optional rewrite clause for the
// elements of the alt.
// An individual alt with an optional alt option like <assoc=right>
alternative
@init { paraphrases.push("matching alternative"); }
@after { paraphrases.pop(); }
: elements -> elements
| -> ^(ALT<AltAST> EPSILON) // empty alt
;
elements
: e+=element+ -> ^(ALT<AltAST> $e+)
@after {
paraphrases.pop();
Grammar.setNodeOptions($tree, $o.tree);
}
: o=elementOptions?
e+=element+ -> ^(ALT<AltAST> elementOptions? $e+)
| -> ^(ALT<AltAST> EPSILON) // empty alt
;
element
@ -884,7 +883,8 @@ if ( options!=null ) {
// Terminals may be adorned with certain options when
// reference in the grammar: TOK<,,,>
elementOptions
: LT (elementOption (COMMA elementOption)*)? GT -> ^(ELEMENT_OPTIONS[$LT,"ELEMENT_OPTIONS"] elementOption*)
: LT (elementOption (COMMA elementOption)*)? GT
-> ^(ELEMENT_OPTIONS[$LT,"ELEMENT_OPTIONS"] elementOption*)
;
// When used with elements we can specify what the tree node type can

View File

@ -766,7 +766,7 @@ alternative
finishAlt((AltAST)$start);
exitAlternative((AltAST)$start);
}
: ^(ALT element+)
: ^(ALT elementOptions? element+)
| ^(ALT EPSILON)
;

View File

@ -49,9 +49,8 @@ private String ruleName;
private int currentOuterAltNumber; // which outer alt of rule?
public int numAlts; // how many alts for this rule total?
public void setTokenPrec(GrammarAST t, int alt) {}
public void setAltAssoc(AltAST altTree, int alt) {}
public void binaryAlt(AltAST altTree, int alt) {}
public void ternaryAlt(AltAST altTree, int alt) {}
public void prefixAlt(AltAST altTree, int alt) {}
public void suffixAlt(AltAST altTree, int alt) {}
public void otherAlt(AltAST altTree, int alt) {}
@ -112,46 +111,36 @@ ruleBlock returns [boolean isLeftRec]
/** An alt is either prefix, suffix, binary, or ternary operation or "other" */
outerAlternative returns [boolean isLeftRec]
: (binaryMultipleOp)=> binaryMultipleOp
: (binary)=> binary
{binaryAlt((AltAST)$start, currentOuterAltNumber); $isLeftRec=true;}
| (binary)=> binary
{binaryAlt((AltAST)$start, currentOuterAltNumber); $isLeftRec=true;}
| (ternary)=> ternary
{ternaryAlt((AltAST)$start, currentOuterAltNumber); $isLeftRec=true;}
| (prefix)=> prefix
{prefixAlt((AltAST)$start, currentOuterAltNumber);}
| (suffix)=> suffix
{suffixAlt((AltAST)$start, currentOuterAltNumber); $isLeftRec=true;}
| ^(ALT element+) // "other" case
{otherAlt((AltAST)$start, currentOuterAltNumber);}
;
// (ALT (= a e) (= op (SET '*' '/')) (= b e) {}) (ALT INT {}) (ALT '(' (= x e) ')' {})
binaryMultipleOp
: ^( ALT recurse bops recurse ACTION? )
;
bops: ^(ASSIGN ID bops)
| ^( BLOCK ( ^( ALT (op=token)+ {setTokenPrec($op.t, currentOuterAltNumber);} ) )+ )
| ^(SET (op=token)+ {setTokenPrec($op.t, currentOuterAltNumber);})
| nonLeftRecur {otherAlt((AltAST)$start, currentOuterAltNumber);}
;
binary
: ^( ALT recurse (op=token)+ {setTokenPrec($op.t, currentOuterAltNumber);} recurse ACTION? )
;
ternary
: ^( ALT recurse op=token recurse token recurse ACTION? ) {setTokenPrec($op.t, currentOuterAltNumber);}
: ^( ALT elementOptions? recurse element+ recurse ACTION? )
{setAltAssoc((AltAST)$ALT,currentOuterAltNumber);}
;
prefix
: ^( ALT {setTokenPrec((GrammarAST)input.LT(1), currentOuterAltNumber);}
: ^( ALT elementOptions?
({!((CommonTree)input.LT(1)).getText().equals(ruleName)}? element)+
recurse ACTION?
)
{setAltAssoc((AltAST)$ALT,currentOuterAltNumber);}
;
suffix : ^( ALT recurse {setTokenPrec((GrammarAST)input.LT(1), currentOuterAltNumber);} element+ ) ;
suffix
: ^( ALT elementOptions? recurse element+ )
{setAltAssoc((AltAST)$ALT,currentOuterAltNumber);}
;
nonLeftRecur
: ^(ALT element+) // no assoc for these; ignore if <assoc=...> present
;
recurse
: ^(ASSIGN ID recurseNoLabel)
@ -211,7 +200,7 @@ block
;
alternative
: ^(ALT element+)
: ^(ALT elementOptions? element+)
;
atom

View File

@ -754,6 +754,7 @@ public class Grammar implements AttributeResolver {
* set option assoc=right in TOKEN_REF.
*/
public static void setNodeOptions(GrammarAST node, GrammarAST options) {
if ( options==null ) return;
GrammarASTWithOptions t = (GrammarASTWithOptions)node;
if ( t.getChildCount()==0 || options.getChildCount()==0 ) return;
for (Object o : options.getChildren()) {

View File

@ -35,7 +35,7 @@ import org.antlr.v4.analysis.LeftRecursiveRuleAltInfo;
import org.antlr.v4.tool.Alternative;
/** Any ALT (which can be child of ALT_REWRITE node) */
public class AltAST extends GrammarAST {
public class AltAST extends GrammarASTWithOptions {
public Alternative alt;
/** If we transformed this alt from a left-recursive one, need info on it */

View File

@ -115,6 +115,21 @@ public class GrammarAST extends CommonTree {
return nodes;
}
public List<GrammarAST> getNodesWithTypePreorderDFS(IntervalSet types) {
ArrayList<GrammarAST> nodes = new ArrayList<GrammarAST>();
getNodesWithTypePreorderDFS_(nodes, types);
return nodes;
}
public void getNodesWithTypePreorderDFS_(List<GrammarAST> nodes, IntervalSet types) {
if ( types.contains(this.getType()) ) nodes.add(this);
// walk all children of root.
for (int i= 0; i < getChildCount(); i++) {
GrammarAST child = (GrammarAST)getChild(i);
child.getNodesWithTypePreorderDFS_(nodes, types);
}
}
public AltAST getOutermostAltNode() {
if ( this instanceof AltAST && parent.parent instanceof RuleAST ) {
return (AltAST)this;

View File

@ -33,7 +33,8 @@ package org.antlr.v4.test;
import org.antlr.v4.tool.ErrorType;
import org.junit.Test;
import static org.junit.Assert.*;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
/** */
public class TestLeftRecursion extends BaseTest {
@ -115,8 +116,8 @@ public class TestLeftRecursion extends BaseTest {
"s @after {System.out.println($ctx.toStringTree(this));} : e EOF ;\n" + // must indicate EOF can follow or 'a<EOF>' won't match
"e : e '*' e" +
" | e '+' e" +
" | e '?'<assoc=right> e ':' e" +
" | e '='<assoc=right> e" +
" |<assoc=right> e '?' e ':' e" +
" |<assoc=right> e '=' e" +
" | ID" +
" ;\n" +
"ID : 'a'..'z'+ ;\n" +
@ -198,23 +199,24 @@ public class TestLeftRecursion extends BaseTest {
" | e 'instanceof' e\n" +
" | e ('==' | '!=') e\n" +
" | e '&' e\n" +
" | e '^'<assoc=right> e\n" +
" |<assoc=right> e '^' e\n" +
" | e '|' e\n" +
" | e '&&' e\n" +
" | e '||' e\n" +
" | e '?' e ':' e\n" +
" | e ('='<assoc=right>\n" +
" |'+='<assoc=right>\n" +
" |'-='<assoc=right>\n" +
" |'*='<assoc=right>\n" +
" |'/='<assoc=right>\n" +
" |'&='<assoc=right>\n" +
" |'|='<assoc=right>\n" +
" |'^='<assoc=right>\n" +
" |'>>='<assoc=right>\n" +
" |'>>>='<assoc=right>\n" +
" |'<<='<assoc=right>\n" +
" |'%='<assoc=right>) e\n" +
" |<assoc=right>" +
" e ('='\n" +
" |'+='\n" +
" |'-='\n" +
" |'*='\n" +
" |'/='\n" +
" |'&='\n" +
" |'|='\n" +
" |'^='\n" +
" |'>>='\n" +
" |'>>>='\n" +
" |'<<='\n" +
" |'%=') e\n" +
" ;\n" +
"type: ID \n" +
" | ID '[' ']'\n" +
@ -229,6 +231,8 @@ public class TestLeftRecursion extends BaseTest {
"(a|b)&c", "(s (e (e ( (e (e a) | (e b)) )) & (e c)) <EOF>)",
"a > b", "(s (e (e a) > (e b)) <EOF>)",
"a >> b", "(s (e (e a) >> (e b)) <EOF>)",
"a=b=c", "(s (e (e a) = (e (e b) = (e c))) <EOF>)",
"a^b^c", "(s (e (e a) ^ (e (e b) ^ (e c))) <EOF>)",
"(T)x", "(s (e ( (type T) ) (e x)) <EOF>)",
"new A().b", "(s (e (e new (type A) ( )) . b) <EOF>)",
"(T)t.f()", "(s (e (e ( (type T) ) (e (e t) . f)) ( )) <EOF>)",