reorg; cleanup. added ast.g ptr setting to nodes created during left-recursion elim.

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 8915]
This commit is contained in:
parrt 2011-07-27 17:03:54 -08:00
parent e084c12483
commit 10d7a79324
6 changed files with 292 additions and 321 deletions

View File

@ -244,8 +244,7 @@ public class LexerATNSimulator extends ATNSimulator {
public ATNState getReachableTarget(Transition trans, int t) {
if ( trans instanceof AtomTransition ) {
AtomTransition at = (AtomTransition)trans;
boolean not = trans instanceof NotAtomTransition;
if ( !not && at.label == t || not && at.label!=t ) {
if ( at.label == t ) {
if ( debug ) {
System.out.println("match "+getTokenName(at.label));
}

View File

@ -1,43 +0,0 @@
/*
[The "BSD license"]
Copyright (c) 2011 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.runtime.atn;
public class NotAtomTransition extends AtomTransition {
public NotAtomTransition(int label, ATNState target) {
super(label, target);
}
public NotAtomTransition(ATNState target) {
super(target);
}
public String toString() {
return '~'+super.toString();
}
}

View File

@ -30,8 +30,6 @@
package org.antlr.v4;
import org.antlr.runtime.*;
import org.antlr.runtime.misc.DoubleKeyMap;
import org.antlr.runtime.tree.*;
import org.antlr.v4.analysis.AnalysisPipeline;
import org.antlr.v4.automata.*;
import org.antlr.v4.codegen.CodeGenPipeline;
@ -225,7 +223,7 @@ public class Tool {
public void process(Grammar g) {
g.loadImportedGrammars();
integrateImportedGrammars(g);
GrammarTransformPipeline.integrateImportedGrammars(g);
GrammarTransformPipeline transform = new GrammarTransformPipeline(this);
transform.process(g.ast);
@ -235,7 +233,7 @@ public class Tool {
if ( g.ast!=null && g.ast.grammarType== ANTLRParser.COMBINED &&
!g.ast.hasErrors )
{
lexerAST = extractImplicitLexer(g); // alters g.ast
lexerAST = GrammarTransformPipeline.extractImplicitLexer(g); // alters g.ast
if ( lexerAST!=null ) {
lexerg = new LexerGrammar(this, lexerAST);
lexerg.fileName = g.fileName;
@ -295,11 +293,7 @@ public class Tool {
else g = new Grammar(this, ast);
// ensure each node has pointer to surrounding grammar
TreeVisitor v = new TreeVisitor(new GrammarASTAdaptor());
v.visit(ast, new TreeVisitorAction() {
public Object pre(Object t) { ((GrammarAST)t).g = g; return t; }
public Object post(Object t) { return t; }
});
GrammarTransformPipeline.setGrammarPtr(g, ast);
return g;
}
@ -353,269 +347,6 @@ public class Tool {
return null;
}
/** Merge all the rules, token definitions, and named actions from
imported grammars into the root grammar tree. Perform:
(tokens { X (= Y 'y')) + (tokens { Z ) -> (tokens { X (= Y 'y') Z)
(@ members {foo}) + (@ members {bar}) -> (@ members {foobar})
(RULES (RULE x y)) + (RULES (RULE z)) -> (RULES (RULE x y z))
Rules in root prevent same rule from being appended to RULES node.
The goal is a complete combined grammar so we can ignore subordinate
grammars.
*/
public void integrateImportedGrammars(Grammar rootGrammar) {
List<Grammar> imports = rootGrammar.getAllImportedGrammars();
if ( imports==null ) return;
GrammarAST root = rootGrammar.ast;
GrammarAST id = (GrammarAST) root.getChild(0);
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(id.token.getInputStream());
GrammarAST tokensRoot = (GrammarAST)root.getFirstChildWithType(ANTLRParser.TOKENS);
List<GrammarAST> actionRoots = root.getNodesWithType(ANTLRParser.AT);
// Compute list of rules in root grammar and ensure we have a RULES node
GrammarAST RULES = (GrammarAST)root.getFirstChildWithType(ANTLRParser.RULES);
Set<String> rootRuleNames = new HashSet<String>();
if ( RULES==null ) { // no rules in root, make RULES node, hook in
RULES = (GrammarAST)adaptor.create(ANTLRParser.RULES, "RULES");
RULES.g = rootGrammar;
root.addChild(RULES);
}
else {
// make list of rules we have in root grammar
List<GrammarAST> rootRules = RULES.getNodesWithType(ANTLRParser.RULE);
for (GrammarAST r : rootRules) rootRuleNames.add(r.getChild(0).getText());
}
for (Grammar imp : imports) {
// COPY TOKENS
GrammarAST imp_tokensRoot = (GrammarAST)imp.ast.getFirstChildWithType(ANTLRParser.TOKENS);
if ( imp_tokensRoot!=null ) {
System.out.println("imported tokens: "+imp_tokensRoot.getChildren());
if ( tokensRoot==null ) {
tokensRoot = (GrammarAST)adaptor.create(ANTLRParser.TOKENS, "TOKENS");
tokensRoot.g = rootGrammar;
root.insertChild(1, tokensRoot); // ^(GRAMMAR ID TOKENS...)
}
tokensRoot.addChildren(imp_tokensRoot.getChildren());
}
List<GrammarAST> all_actionRoots = new ArrayList<GrammarAST>();
List<GrammarAST> imp_actionRoots = imp.ast.getNodesWithType(ANTLRParser.AT);
if ( actionRoots!=null ) all_actionRoots.addAll(actionRoots);
all_actionRoots.addAll(imp_actionRoots);
// COPY ACTIONS
if ( imp_actionRoots!=null ) {
DoubleKeyMap<String, String, GrammarAST> namedActions =
new DoubleKeyMap<String, String, GrammarAST>();
System.out.println("imported actions: "+imp_actionRoots);
for (GrammarAST at : all_actionRoots) {
String scopeName = rootGrammar.getDefaultActionScope();
GrammarAST scope, name, action;
if ( at.getChildCount()>2 ) { // must have a scope
scope = (GrammarAST)at.getChild(1);
scopeName = scope.getText();
name = (GrammarAST)at.getChild(1);
action = (GrammarAST)at.getChild(2);
}
else {
name = (GrammarAST)at.getChild(0);
action = (GrammarAST)at.getChild(1);
}
GrammarAST prevAction = namedActions.get(scopeName, name.getText());
if ( prevAction==null ) {
namedActions.put(scopeName, name.getText(), action);
}
else {
if ( prevAction.g == at.g ) {
errMgr.grammarError(ErrorType.ACTION_REDEFINITION,
at.g.fileName, name.token, name.getText());
}
else {
String s1 = prevAction.getText();
s1 = s1.substring(1, s1.length()-1);
String s2 = action.getText();
s2 = s2.substring(1, s2.length()-1);
String combinedAction = "{"+s1 + '\n'+ s2+"}";
prevAction.token.setText(combinedAction);
}
}
}
// at this point, we have complete list of combined actions,
// some of which are already living in root grammar.
// Merge in any actions not in root grammar into root's tree.
for (String scopeName : namedActions.keySet()) {
for (String name : namedActions.keySet(scopeName)) {
GrammarAST action = namedActions.get(scopeName, name);
System.out.println(action.g.name+" "+scopeName+":"+name+"="+action.getText());
if ( action.g != rootGrammar ) {
root.insertChild(1, action.getParent());
}
}
}
}
// COPY RULES
List<GrammarAST> rules = imp.ast.getNodesWithType(ANTLRParser.RULE);
if ( rules!=null ) {
for (GrammarAST r : rules) {
System.out.println("imported rule: "+r.toStringTree());
String name = r.getChild(0).getText();
boolean rootAlreadyHasRule = rootRuleNames.contains(name);
if ( !rootAlreadyHasRule ) {
RULES.addChild(r); // merge in if not overridden
rootRuleNames.add(name);
}
}
}
GrammarAST optionsRoot = (GrammarAST)imp.ast.getFirstChildWithType(ANTLRParser.OPTIONS);
if ( optionsRoot!=null ) {
errMgr.grammarError(ErrorType.OPTIONS_IN_DELEGATE,
optionsRoot.g.fileName, optionsRoot.token, imp.name);
}
}
System.out.println("Grammar: "+rootGrammar.ast.toStringTree());
}
/** Build lexer grammar from combined grammar that looks like:
*
* (COMBINED_GRAMMAR A
* (tokens { X (= Y 'y'))
* (OPTIONS (= x 'y'))
* (@ members {foo})
* (@ lexer header {package jj;})
* (RULES (RULE .+)))
*
* Move rules and actions to new tree, don't dup. Split AST apart.
* We'll have this Grammar share token symbols later; don't generate
* tokenVocab or tokens{} section.
*
* Side-effects: it removes children from GRAMMAR & RULES nodes
* in combined AST. Anything cut out is dup'd before
* adding to lexer to avoid "who's ur daddy" issues
*/
public GrammarRootAST extractImplicitLexer(Grammar combinedGrammar) {
GrammarRootAST combinedAST = combinedGrammar.ast;
//System.out.println("before="+combinedAST.toStringTree());
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.token.getInputStream());
List<GrammarAST> elements = combinedAST.getChildren();
// MAKE A GRAMMAR ROOT and ID
String lexerName = combinedAST.getChild(0).getText()+"Lexer";
GrammarRootAST lexerAST =
new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR,"LEXER_GRAMMAR"));
lexerAST.grammarType = ANTLRParser.LEXER;
lexerAST.token.setInputStream(combinedAST.token.getInputStream());
lexerAST.addChild((GrammarAST)adaptor.create(ANTLRParser.ID, lexerName));
// MOVE OPTIONS
GrammarAST optionsRoot =
(GrammarAST)combinedAST.getFirstChildWithType(ANTLRParser.OPTIONS);
if ( optionsRoot!=null ) {
GrammarAST lexerOptionsRoot = (GrammarAST)adaptor.dupNode(optionsRoot);
lexerAST.addChild(lexerOptionsRoot);
List<GrammarAST> options = optionsRoot.getChildren();
for (GrammarAST o : options) {
String optionName = o.getChild(0).getText();
if ( !Grammar.doNotCopyOptionsToLexer.contains(optionName) ) {
lexerOptionsRoot.addChild((Tree)adaptor.dupTree(o));
}
}
}
// MOVE lexer:: actions
List<GrammarAST> actionsWeMoved = new ArrayList<GrammarAST>();
for (GrammarAST e : elements) {
if ( e.getType()==ANTLRParser.AT ) {
if ( e.getChild(0).getText().equals("lexer") ) {
lexerAST.addChild((Tree)adaptor.dupTree(e));
actionsWeMoved.add(e);
}
}
}
for (GrammarAST r : actionsWeMoved) {
combinedAST.deleteChild( r );
}
GrammarAST combinedRulesRoot =
(GrammarAST)combinedAST.getFirstChildWithType(ANTLRParser.RULES);
if ( combinedRulesRoot==null ) return lexerAST;
// MOVE lexer rules
GrammarAST lexerRulesRoot =
(GrammarAST)adaptor.create(ANTLRParser.RULES, "RULES");
lexerAST.addChild(lexerRulesRoot);
List<GrammarAST> rulesWeMoved = new ArrayList<GrammarAST>();
List<GrammarASTWithOptions> rules = combinedRulesRoot.getChildren();
for (GrammarASTWithOptions r : rules) {
String ruleName = r.getChild(0).getText();
if ( Character.isUpperCase(ruleName.charAt(0)) ) {
lexerRulesRoot.addChild((Tree)adaptor.dupTree(r));
rulesWeMoved.add(r);
}
}
int nLexicalRules = rulesWeMoved.size();
for (GrammarAST r : rulesWeMoved) {
combinedRulesRoot.deleteChild( r );
}
// Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if'
Map<String,String> litAliases =
Grammar.getStringLiteralAliasesFromLexerRules(lexerAST);
if ( nLexicalRules==0 && (litAliases==null||litAliases.size()==0) &&
combinedGrammar.stringLiteralToTypeMap.size()==0 )
{
// no rules, tokens{}, or 'literals' in grammar
return null;
}
Set<String> stringLiterals = combinedGrammar.getStringLiterals();
// add strings from combined grammar (and imported grammars) into lexer
// put them first as they are keywords; must resolve ambigs to these rules
// System.out.println("strings from parser: "+stringLiterals);
for (String lit : stringLiterals) {
if ( litAliases!=null && litAliases.containsKey(lit) ) continue; // already has rule
// create for each literal: (RULE <uniquename> (BLOCK (ALT <lit>))
String rname = combinedGrammar.getStringLiteralLexerRuleName(lit);
// can't use wizard; need special node types
GrammarAST litRule = new RuleAST(ANTLRParser.RULE);
BlockAST blk = new BlockAST(ANTLRParser.BLOCK);
AltAST alt = new AltAST(ANTLRParser.ALT);
TerminalAST slit = new TerminalAST(new org.antlr.runtime.CommonToken(ANTLRParser.STRING_LITERAL, lit));
alt.addChild(slit);
blk.addChild(alt);
CommonToken idToken = new CommonToken(ANTLRParser.ID, rname);
litRule.addChild(new TerminalAST(idToken));
litRule.addChild(blk);
lexerRulesRoot.getChildren().add(0, litRule); // add first
lexerRulesRoot.freshenParentAndChildIndexes(); // reset indexes and set litRule parent
}
// TODO: take out after stable if slow
lexerAST.sanityCheckParentAndChildIndexes();
combinedAST.sanityCheckParentAndChildIndexes();
// System.out.println(combinedAST.toTokenString());
// lexerAST.freshenParentAndChildIndexesDeeply();
// combinedAST.freshenParentAndChildIndexesDeeply();
System.out.println("after extract implicit lexer ="+combinedAST.toStringTree());
System.out.println("lexer ="+lexerAST.toStringTree());
return lexerAST;
}
public void generateATNs(Grammar g) {
DOTGenerator dotGenerator = new DOTGenerator(g);
List<Grammar> grammars = new ArrayList<Grammar>();

View File

@ -51,6 +51,9 @@ setAlt
// (BLOCK (ALT (+ (BLOCK (ALT INT) (ALT ID)))))
ebnfBlockSet
@after {
GrammarTransformPipeline.setGrammarPtr(g, $tree);
}
: ^(ebnfSuffix blockSet) -> ^(ebnfSuffix ^(BLOCK<BlockAST> ^(ALT blockSet)))
;
@ -64,6 +67,9 @@ ebnfSuffix
blockSet
@init {
boolean inLexer = Character.isUpperCase(currentRuleName.charAt(0));
}
@after {
GrammarTransformPipeline.setGrammarPtr(g, $tree);
}
: {!inContext("RULE")}? // if not rule block and > 1 alt
^(BLOCK ^(ALT setElement[inLexer]) ( ^(ALT setElement[inLexer]) )+)
@ -71,6 +77,9 @@ boolean inLexer = Character.isUpperCase(currentRuleName.charAt(0));
;
setElement[boolean inLexer]
@after {
GrammarTransformPipeline.setGrammarPtr(g, $tree);
}
: {!rewriteElems.contains($start.getText())}?
( a=STRING_LITERAL {!inLexer || CharSupport.getCharValueFromGrammarCharLiteral($a.getText())!=-1}?
| {!inLexer}?=> TOKEN_REF

View File

@ -244,7 +244,6 @@ public class DOTGenerator {
String label = String.valueOf(atom.label);
if ( isLexer ) label = "'"+getEdgeLabel(String.valueOf((char)atom.label))+"'";
else if ( grammar!=null ) label = grammar.getTokenDisplayName(atom.label);
if ( edge instanceof NotAtomTransition ) label = "~"+label;
edgeST.add("label", getEdgeLabel(label));
}
else if ( edge instanceof SetTransition ) {

View File

@ -30,6 +30,8 @@
package org.antlr.v4.tool;
import org.antlr.runtime.*;
import org.antlr.runtime.misc.DoubleKeyMap;
import org.antlr.runtime.tree.*;
import org.antlr.v4.Tool;
import org.antlr.v4.parse.*;
@ -82,6 +84,7 @@ public class GrammarTransformPipeline {
{
//System.out.println(ruleAST.toStringTree());
TokenStream tokens = ast.tokens;
Grammar g = ast.g;
String ruleName = ruleAST.getChild(0).getText();
LeftRecursiveRuleAnalyzer leftRecursiveRuleWalker =
new LeftRecursiveRuleAnalyzer(tokens, ruleAST, tool, ruleName, language);
@ -106,14 +109,14 @@ public class GrammarTransformPipeline {
rules.add( leftRecursiveRuleWalker.getArtificialPrimaryRule() );
for (String ruleText : rules) {
// System.out.println("created: "+ruleText);
GrammarAST t = parseArtificialRule(ruleText);
GrammarAST t = parseArtificialRule(g, ruleText);
// insert into grammar tree
RULES.addChild(t);
System.out.println("added: "+t.toStringTree());
}
}
public GrammarAST parseArtificialRule(String ruleText) {
public GrammarAST parseArtificialRule(final Grammar g, String ruleText) {
ANTLRLexer lexer = new ANTLRLexer(new ANTLRStringStream(ruleText));
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
CommonTokenStream tokens = new CommonTokenStream(lexer);
@ -121,7 +124,9 @@ public class GrammarTransformPipeline {
p.setTreeAdaptor(adaptor);
try {
ParserRuleReturnScope r = p.rule();
return (GrammarAST)r.getTree();
GrammarAST tree = (GrammarAST) r.getTree();
setGrammarPtr(g, tree);
return tree;
}
catch (Exception e) {
tool.errMgr.toolError(ErrorType.INTERNAL_ERROR,
@ -131,4 +136,275 @@ public class GrammarTransformPipeline {
return null;
}
public static void setGrammarPtr(final Grammar g, GrammarAST tree) {
// ensure each node has pointer to surrounding grammar
TreeVisitor v = new TreeVisitor(new GrammarASTAdaptor());
v.visit(tree, new TreeVisitorAction() {
public Object pre(Object t) { ((GrammarAST)t).g = g; return t; }
public Object post(Object t) { return t; }
});
}
/** Merge all the rules, token definitions, and named actions from
imported grammars into the root grammar tree. Perform:
(tokens { X (= Y 'y')) + (tokens { Z ) -> (tokens { X (= Y 'y') Z)
(@ members {foo}) + (@ members {bar}) -> (@ members {foobar})
(RULES (RULE x y)) + (RULES (RULE z)) -> (RULES (RULE x y z))
Rules in root prevent same rule from being appended to RULES node.
The goal is a complete combined grammar so we can ignore subordinate
grammars.
*/
public static void integrateImportedGrammars(Grammar rootGrammar) {
List<Grammar> imports = rootGrammar.getAllImportedGrammars();
if ( imports==null ) return;
GrammarAST root = rootGrammar.ast;
GrammarAST id = (GrammarAST) root.getChild(0);
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(id.token.getInputStream());
GrammarAST tokensRoot = (GrammarAST)root.getFirstChildWithType(ANTLRParser.TOKENS);
List<GrammarAST> actionRoots = root.getNodesWithType(ANTLRParser.AT);
// Compute list of rules in root grammar and ensure we have a RULES node
GrammarAST RULES = (GrammarAST)root.getFirstChildWithType(ANTLRParser.RULES);
Set<String> rootRuleNames = new HashSet<String>();
if ( RULES==null ) { // no rules in root, make RULES node, hook in
RULES = (GrammarAST)adaptor.create(ANTLRParser.RULES, "RULES");
RULES.g = rootGrammar;
root.addChild(RULES);
}
else {
// make list of rules we have in root grammar
List<GrammarAST> rootRules = RULES.getNodesWithType(ANTLRParser.RULE);
for (GrammarAST r : rootRules) rootRuleNames.add(r.getChild(0).getText());
}
for (Grammar imp : imports) {
// COPY TOKENS
GrammarAST imp_tokensRoot = (GrammarAST)imp.ast.getFirstChildWithType(ANTLRParser.TOKENS);
if ( imp_tokensRoot!=null ) {
System.out.println("imported tokens: "+imp_tokensRoot.getChildren());
if ( tokensRoot==null ) {
tokensRoot = (GrammarAST)adaptor.create(ANTLRParser.TOKENS, "TOKENS");
tokensRoot.g = rootGrammar;
root.insertChild(1, tokensRoot); // ^(GRAMMAR ID TOKENS...)
}
tokensRoot.addChildren(imp_tokensRoot.getChildren());
}
List<GrammarAST> all_actionRoots = new ArrayList<GrammarAST>();
List<GrammarAST> imp_actionRoots = imp.ast.getNodesWithType(ANTLRParser.AT);
if ( actionRoots!=null ) all_actionRoots.addAll(actionRoots);
all_actionRoots.addAll(imp_actionRoots);
// COPY ACTIONS
if ( imp_actionRoots!=null ) {
DoubleKeyMap<String, String, GrammarAST> namedActions =
new DoubleKeyMap<String, String, GrammarAST>();
System.out.println("imported actions: "+imp_actionRoots);
for (GrammarAST at : all_actionRoots) {
String scopeName = rootGrammar.getDefaultActionScope();
GrammarAST scope, name, action;
if ( at.getChildCount()>2 ) { // must have a scope
scope = (GrammarAST)at.getChild(1);
scopeName = scope.getText();
name = (GrammarAST)at.getChild(1);
action = (GrammarAST)at.getChild(2);
}
else {
name = (GrammarAST)at.getChild(0);
action = (GrammarAST)at.getChild(1);
}
GrammarAST prevAction = namedActions.get(scopeName, name.getText());
if ( prevAction==null ) {
namedActions.put(scopeName, name.getText(), action);
}
else {
if ( prevAction.g == at.g ) {
rootGrammar.tool.errMgr.grammarError(ErrorType.ACTION_REDEFINITION,
at.g.fileName, name.token, name.getText());
}
else {
String s1 = prevAction.getText();
s1 = s1.substring(1, s1.length()-1);
String s2 = action.getText();
s2 = s2.substring(1, s2.length()-1);
String combinedAction = "{"+s1 + '\n'+ s2+"}";
prevAction.token.setText(combinedAction);
}
}
}
// at this point, we have complete list of combined actions,
// some of which are already living in root grammar.
// Merge in any actions not in root grammar into root's tree.
for (String scopeName : namedActions.keySet()) {
for (String name : namedActions.keySet(scopeName)) {
GrammarAST action = namedActions.get(scopeName, name);
System.out.println(action.g.name+" "+scopeName+":"+name+"="+action.getText());
if ( action.g != rootGrammar ) {
root.insertChild(1, action.getParent());
}
}
}
}
// COPY RULES
List<GrammarAST> rules = imp.ast.getNodesWithType(ANTLRParser.RULE);
if ( rules!=null ) {
for (GrammarAST r : rules) {
System.out.println("imported rule: "+r.toStringTree());
String name = r.getChild(0).getText();
boolean rootAlreadyHasRule = rootRuleNames.contains(name);
if ( !rootAlreadyHasRule ) {
RULES.addChild(r); // merge in if not overridden
rootRuleNames.add(name);
}
}
}
GrammarAST optionsRoot = (GrammarAST)imp.ast.getFirstChildWithType(ANTLRParser.OPTIONS);
if ( optionsRoot!=null ) {
rootGrammar.tool.errMgr.grammarError(ErrorType.OPTIONS_IN_DELEGATE,
optionsRoot.g.fileName, optionsRoot.token, imp.name);
}
}
System.out.println("Grammar: "+rootGrammar.ast.toStringTree());
}
/** Build lexer grammar from combined grammar that looks like:
*
* (COMBINED_GRAMMAR A
* (tokens { X (= Y 'y'))
* (OPTIONS (= x 'y'))
* (@ members {foo})
* (@ lexer header {package jj;})
* (RULES (RULE .+)))
*
* Move rules and actions to new tree, don't dup. Split AST apart.
* We'll have this Grammar share token symbols later; don't generate
* tokenVocab or tokens{} section.
*
* Side-effects: it removes children from GRAMMAR & RULES nodes
* in combined AST. Anything cut out is dup'd before
* adding to lexer to avoid "who's ur daddy" issues
*/
public static GrammarRootAST extractImplicitLexer(Grammar combinedGrammar) {
GrammarRootAST combinedAST = combinedGrammar.ast;
//System.out.println("before="+combinedAST.toStringTree());
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.token.getInputStream());
List<GrammarAST> elements = combinedAST.getChildren();
// MAKE A GRAMMAR ROOT and ID
String lexerName = combinedAST.getChild(0).getText()+"Lexer";
GrammarRootAST lexerAST =
new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR,"LEXER_GRAMMAR"));
lexerAST.grammarType = ANTLRParser.LEXER;
lexerAST.token.setInputStream(combinedAST.token.getInputStream());
lexerAST.addChild((GrammarAST)adaptor.create(ANTLRParser.ID, lexerName));
// MOVE OPTIONS
GrammarAST optionsRoot =
(GrammarAST)combinedAST.getFirstChildWithType(ANTLRParser.OPTIONS);
if ( optionsRoot!=null ) {
GrammarAST lexerOptionsRoot = (GrammarAST)adaptor.dupNode(optionsRoot);
lexerAST.addChild(lexerOptionsRoot);
List<GrammarAST> options = optionsRoot.getChildren();
for (GrammarAST o : options) {
String optionName = o.getChild(0).getText();
if ( !Grammar.doNotCopyOptionsToLexer.contains(optionName) ) {
lexerOptionsRoot.addChild((Tree)adaptor.dupTree(o));
}
}
}
// MOVE lexer:: actions
List<GrammarAST> actionsWeMoved = new ArrayList<GrammarAST>();
for (GrammarAST e : elements) {
if ( e.getType()==ANTLRParser.AT ) {
if ( e.getChild(0).getText().equals("lexer") ) {
lexerAST.addChild((Tree)adaptor.dupTree(e));
actionsWeMoved.add(e);
}
}
}
for (GrammarAST r : actionsWeMoved) {
combinedAST.deleteChild( r );
}
GrammarAST combinedRulesRoot =
(GrammarAST)combinedAST.getFirstChildWithType(ANTLRParser.RULES);
if ( combinedRulesRoot==null ) return lexerAST;
// MOVE lexer rules
GrammarAST lexerRulesRoot =
(GrammarAST)adaptor.create(ANTLRParser.RULES, "RULES");
lexerAST.addChild(lexerRulesRoot);
List<GrammarAST> rulesWeMoved = new ArrayList<GrammarAST>();
List<GrammarASTWithOptions> rules = combinedRulesRoot.getChildren();
for (GrammarASTWithOptions r : rules) {
String ruleName = r.getChild(0).getText();
if ( Character.isUpperCase(ruleName.charAt(0)) ) {
lexerRulesRoot.addChild((Tree)adaptor.dupTree(r));
rulesWeMoved.add(r);
}
}
int nLexicalRules = rulesWeMoved.size();
for (GrammarAST r : rulesWeMoved) {
combinedRulesRoot.deleteChild( r );
}
// Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if'
Map<String,String> litAliases =
Grammar.getStringLiteralAliasesFromLexerRules(lexerAST);
if ( nLexicalRules==0 && (litAliases==null||litAliases.size()==0) &&
combinedGrammar.stringLiteralToTypeMap.size()==0 )
{
// no rules, tokens{}, or 'literals' in grammar
return null;
}
Set<String> stringLiterals = combinedGrammar.getStringLiterals();
// add strings from combined grammar (and imported grammars) into lexer
// put them first as they are keywords; must resolve ambigs to these rules
// System.out.println("strings from parser: "+stringLiterals);
for (String lit : stringLiterals) {
if ( litAliases!=null && litAliases.containsKey(lit) ) continue; // already has rule
// create for each literal: (RULE <uniquename> (BLOCK (ALT <lit>))
String rname = combinedGrammar.getStringLiteralLexerRuleName(lit);
// can't use wizard; need special node types
GrammarAST litRule = new RuleAST(ANTLRParser.RULE);
BlockAST blk = new BlockAST(ANTLRParser.BLOCK);
AltAST alt = new AltAST(ANTLRParser.ALT);
TerminalAST slit = new TerminalAST(new CommonToken(ANTLRParser.STRING_LITERAL, lit));
alt.addChild(slit);
blk.addChild(alt);
CommonToken idToken = new CommonToken(ANTLRParser.ID, rname);
litRule.addChild(new TerminalAST(idToken));
litRule.addChild(blk);
lexerRulesRoot.getChildren().add(0, litRule); // add first
lexerRulesRoot.freshenParentAndChildIndexes(); // reset indexes and set litRule parent
}
// TODO: take out after stable if slow
lexerAST.sanityCheckParentAndChildIndexes();
combinedAST.sanityCheckParentAndChildIndexes();
// System.out.println(combinedAST.toTokenString());
// lexerAST.freshenParentAndChildIndexesDeeply();
// combinedAST.freshenParentAndChildIndexesDeeply();
System.out.println("after extract implicit lexer ="+combinedAST.toStringTree());
System.out.println("lexer ="+lexerAST.toStringTree());
return lexerAST;
}
}