From 6fa5e52d5ee043686a211242a2d756122917f2d7 Mon Sep 17 00:00:00 2001 From: parrt Date: Mon, 26 Dec 2011 15:14:49 -0800 Subject: [PATCH] added => skip, channel(99), more, mode(xx), push(xx), pop lexer syntax. separated lexer rules from others in parser / AST now. [git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9749] --- tool/playground/T.g | 6 +- .../org/antlr/v4/automata/ATNSerializer.java | 4 +- tool/src/org/antlr/v4/parse/ANTLRLexer.g | 7 +- tool/src/org/antlr/v4/parse/ANTLRParser.g | 70 ++- tool/src/org/antlr/v4/parse/ASTVerifier.g | 424 ------------------ tool/src/org/antlr/v4/parse/ATNBuilder.g | 8 +- .../org/antlr/v4/parse/BlockSetTransformer.g | 2 +- .../org/antlr/v4/parse/GrammarASTAdaptor.java | 2 +- .../org/antlr/v4/parse/GrammarTreeVisitor.g | 47 +- .../v4/parse/LeftRecursiveRuleAnalyzer.java | 5 +- .../antlr/v4/parse/LeftRecursiveRuleWalker.g | 9 +- .../v4/semantics/BasicSemanticChecks.java | 7 + .../antlr/v4/semantics/SymbolCollector.java | 29 +- tool/src/org/antlr/v4/tool/Grammar.java | 17 +- .../v4/tool/GrammarTransformPipeline.java | 2 +- 15 files changed, 174 insertions(+), 465 deletions(-) delete mode 100644 tool/src/org/antlr/v4/parse/ASTVerifier.g diff --git a/tool/playground/T.g b/tool/playground/T.g index 178c9f210..cf2fe7c03 100644 --- a/tool/playground/T.g +++ b/tool/playground/T.g @@ -1,5 +1,3 @@ grammar T; -s@after {dumpDFA();} - : ID | ID {;} ; -ID : 'a'..'z'+ ; -WS : (' '|'\t'|'\n')+ {skip();} ; +s : a ; +a : 'x' ; diff --git a/tool/src/org/antlr/v4/automata/ATNSerializer.java b/tool/src/org/antlr/v4/automata/ATNSerializer.java index 2ed8225e0..a7bc01727 100644 --- a/tool/src/org/antlr/v4/automata/ATNSerializer.java +++ b/tool/src/org/antlr/v4/automata/ATNSerializer.java @@ -275,6 +275,8 @@ public class ATNSerializer { } public static String getDecoded(Grammar g, ATN atn) { - return new ATNSerializer(g, atn).decode(Utils.toCharArray(getSerialized(g, atn))); + List serialized = getSerialized(g, atn); + char[] data = Utils.toCharArray(serialized); + return new ATNSerializer(g, atn).decode(data); } } diff --git a/tool/src/org/antlr/v4/parse/ANTLRLexer.g b/tool/src/org/antlr/v4/parse/ANTLRLexer.g index f0cff348c..1d02d0f44 100644 --- a/tool/src/org/antlr/v4/parse/ANTLRLexer.g +++ b/tool/src/org/antlr/v4/parse/ANTLRLexer.g @@ -392,7 +392,7 @@ NESTED_ACTION // Keywords // -------- // keywords used to specify ANTLR v3 grammars. Keywords may not be used as -// labels for rules or in any other context where they woudl be ambiguous +// labels for rules or in any other context where they would be ambiguous // with the keyword vs some other identifier // OPTIONS and TOKENS must also consume the opening brace that captures // their option block, as this is teh easiest way to parse it separate @@ -418,6 +418,11 @@ CATCH : 'catch' ; FINALLY : 'finally' ; TEMPLATE : 'template' ; MODE : 'mode' ; +CHANNEL : 'channel' ; +PUSH : 'push' ; +SKIP : 'skip' ; +MORE : 'more' ; +POP : 'pop' ; // ----------- // Punctuation diff --git a/tool/src/org/antlr/v4/parse/ANTLRParser.g b/tool/src/org/antlr/v4/parse/ANTLRParser.g index 7a880f526..e326f3746 100644 --- a/tool/src/org/antlr/v4/parse/ANTLRParser.g +++ b/tool/src/org/antlr/v4/parse/ANTLRParser.g @@ -93,6 +93,9 @@ tokens { ST_RESULT; // distinguish between ST and tree rewrites RESULT; ALT_REWRITE; // indicate ALT is rewritten + + // lexer action stuff + LEXER_ALT_ACTION; DOWN_TOKEN; // AST node representing DOWN node in tree parser code gen UP_TOKEN; @@ -320,7 +323,7 @@ actionScopeName | PARSER -> ID[$PARSER] ; -mode: MODE id SEMI sync (rule sync)+ -> ^(MODE id rule+) ; +mode: MODE id SEMI sync (lexerRule sync)+ -> ^(MODE id lexerRule+) ; rules : sync (rule sync)* @@ -342,6 +345,10 @@ sync } : ; +rule: parserRule + | lexerRule + ; + // The specification of an EBNF rule in ANTLR style, with all the // rule level parameters, declarations, actions, rewrite specs and so // on. @@ -351,7 +358,7 @@ sync // verification of the AST determine if things are repeated or if a // particular functional element is not valid in the context of the // grammar type, such as using returns in lexer rules and so on. -rule +parserRule @init { paraphrases.push("matching a rule"); } @after { paraphrases.pop(); @@ -373,7 +380,7 @@ rule // parser or lexer rules, the semantic verification phase will // reject any rules that make no sense, such as lexer rules in // a pure parser or tree parser. - id + RULE_REF // Immediately following the rulename, there may be a specification // of input parameters for the rule. We do not do anything with the @@ -414,7 +421,7 @@ rule exceptionGroup - -> ^( RULE id DOC_COMMENT? ruleModifiers? ARG_ACTION? + -> ^( RULE RULE_REF DOC_COMMENT? ruleModifiers? ARG_ACTION? ruleReturns? throwsSpec? locals? rulePrequels? ruleBlock exceptionGroup* ) ; @@ -538,8 +545,59 @@ ruleAltList ; labeledAlt - : alternative (POUND id {((AltAST)$alternative.tree).altLabel=$id.tree;})? - -> alternative + : alternative (POUND id {((AltAST)$alternative.tree).altLabel=$id.tree;})? + ; + + +lexerRule +@init { paraphrases.push("matching a lexer rule"); } +@after { + paraphrases.pop(); +} + : DOC_COMMENT? FRAGMENT? + TOKEN_REF COLON lexerRuleBlock SEMI + -> ^( RULE TOKEN_REF DOC_COMMENT? + ^(RULEMODIFIERS FRAGMENT)? lexerRuleBlock + ) + ; + +lexerRuleBlock +@init {Token colon = input.LT(-1);} + : lexerRuleAltList -> ^(BLOCK[colon,"BLOCK"] lexerRuleAltList) + ; + catch [ResyncToEndOfRuleBlock e] { + // just resyncing; ignore error + retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), null); + } + +lexerRuleAltList + : lexerAlt (OR lexerAlt)* -> lexerAlt+ + ; + +lexerAlt + : elements + ( lexerActions -> ^(LEXER_ALT_ACTION elements lexerActions) + | -> elements + ) + ; + +// channel=HIDDEN, skip, more, mode(INSIDE), push(INSIDE), pop +lexerActions + : IMPLIES lexerAction (COMMA lexerAction)* -> lexerAction+ + ; + +lexerAction + : CHANNEL LPAREN lexerActionExpr RPAREN -> ^(CHANNEL lexerActionExpr) + | MODE LPAREN lexerActionExpr RPAREN -> ^(MODE lexerActionExpr) + | PUSH LPAREN lexerActionExpr RPAREN -> ^(PUSH lexerActionExpr) + | SKIP + | MORE + | POP + ; + +lexerActionExpr + : ID + | INT ; altList diff --git a/tool/src/org/antlr/v4/parse/ASTVerifier.g b/tool/src/org/antlr/v4/parse/ASTVerifier.g deleted file mode 100644 index 0694db98d..000000000 --- a/tool/src/org/antlr/v4/parse/ASTVerifier.g +++ /dev/null @@ -1,424 +0,0 @@ -/* - [The "BSD license"] - Copyright (c) 2010 Terence Parr - All rights reserved. - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/** The definitive ANTLR v3 tree grammar to parse ANTLR v4 grammars. - * Parses trees created in ANTLRParser.g. - - OBSOLETE; See tree visitor grammar. - */ -tree grammar ASTVerifier; -options { - language = Java; - tokenVocab = ANTLRParser; - ASTLabelType = GrammarAST; -} - -// Include the copyright in this source and also the generated source -@header { -/* - [The "BSD license"] - Copyright (c) 2005-2009 Terence Parr - All rights reserved. - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -package org.antlr.v4.parse; -import org.antlr.v4.tool.*; -import org.antlr.v4.tool.ast.*; -} - -@members { -public String getErrorMessage(RecognitionException e, - String[] tokenNames) -{ - List stack = getRuleInvocationStack(e, this.getClass().getName()); - String msg = null; - String inputContext = - input.LT(-3) == null ? "" : ((Tree)input.LT(-3)).getText()+" "+ - input.LT(-2) == null ? "" : ((Tree)input.LT(-2)).getText()+" "+ - input.LT(-1) == null ? "" : ((Tree)input.LT(-1)).getText()+" >>>"+ - input.LT(1) == null ? "" : ((Tree)input.LT(1)).getText()+"<<< "+ - input.LT(2) == null ? "" : ((Tree)input.LT(2)).getText()+" "+ - input.LT(3) == null ? "" : ((Tree)input.LT(3)).getText(); - if ( e instanceof NoViableAltException ) { - NoViableAltException nvae = (NoViableAltException)e; - msg = " no viable alt; token="+e.token+ - " (decision="+nvae.decisionNumber+ - " state "+nvae.stateNumber+")"+ - " decision=<<"+nvae.grammarDecisionDescription+">>"; - } - else { - msg = super.getErrorMessage(e, tokenNames); - } - return stack+" "+msg+"\ncontext=..."+inputContext+"..."; -} -public String getTokenErrorDisplay(Token t) { - return t.toString(); -} -public void traceIn(String ruleName, int ruleIndex) { - System.out.print("enter "+ruleName+" "+ - ((GrammarAST)input.LT(1)).token+" "+ - ((GrammarAST)input.LT(2)).token+" "+ - ((GrammarAST)input.LT(3)).token+" "+ - ((GrammarAST)input.LT(4)).token); - if ( state.backtracking>0 ) { - System.out.print(" backtracking="+state.backtracking); - } - System.out.println(); -} - protected void mismatch(IntStream input, int ttype, BitSet follow) - throws RecognitionException { - throw new MismatchedTokenException(ttype, input); - } - public void recoverFromMismatchedToken(IntStream input, - RecognitionException e, BitSet follow) - throws RecognitionException - - { - throw e; - } -} - -// Alter code generation so catch-clauses get replace with // this action. -@rulecatch { catch (RecognitionException e) { -throw e; -} -} - -grammarSpec - : ^(GRAMMAR ID DOC_COMMENT? prequelConstruct* rules mode*) - ; - -prequelConstruct - : optionsSpec - | delegateGrammars - | tokensSpec - | attrScope - | action - ; - -optionsSpec - : ^(OPTIONS option*) - ; - -option - : ^(ASSIGN ID optionValue) - ; - -optionValue returns [String v] -@init {$v = $start.token.getText();} - : ID - | STRING_LITERAL - | INT - | STAR - ; - -delegateGrammars - : ^(IMPORT delegateGrammar+) - ; - -delegateGrammar - : ^(ASSIGN ID ID) - | ID - ; - -tokensSpec - : ^(TOKENS tokenSpec+) - ; - -tokenSpec - : ^(ASSIGN ID STRING_LITERAL) - | ID - ; - -attrScope - : ^(SCOPE ID ACTION) - ; - -action - : ^(AT ID? ID ACTION) - ; - -rules - : ^(RULES rule*) - ; - -mode: ^( MODE ID rule+ ) ; - -rule: ^( RULE ID DOC_COMMENT? ruleModifiers? ARG_ACTION? - ruleReturns? rulePrequel* altListAsBlock exceptionGroup - ) - ; - -exceptionGroup - : exceptionHandler* finallyClause? - ; - -exceptionHandler - : ^(CATCH ARG_ACTION ACTION) - ; - -finallyClause - : ^(FINALLY ACTION) - ; - -rulePrequel - : throwsSpec - | ruleScopeSpec - | optionsSpec - | ruleAction - ; - -ruleReturns - : ^(RETURNS ARG_ACTION) - ; -throwsSpec - : ^(THROWS ID+) - ; - -ruleScopeSpec - : ^(SCOPE ACTION) - | ^(SCOPE ID+) - ; - -ruleAction - : ^(AT ID ACTION) - ; - -ruleModifiers - : ^(RULEMODIFIERS ruleModifier+) - ; - -ruleModifier - : PUBLIC - | PRIVATE - | PROTECTED - | FRAGMENT - ; - -altList - : alternative+ - ; - -altListAsBlock - : ^(BLOCK altList) - ; - -alternative - : ^(ALT_REWRITE alternative rewrite) - | ^(ALT EPSILON) - | elements - ; - -elements - : ^(ALT element+) - ; - -element - : labeledElement - | atom - | subrule - | ACTION - | SEMPRED - | GATED_SEMPRED - | treeSpec - | ^(ROOT astOperand) - | ^(BANG astOperand) - | ^(NOT blockSet) - | ^(NOT block) - ; - -astOperand - : atom - | ^(NOT blockSet) - | ^(NOT block) - ; - -labeledElement - : ^((ASSIGN|PLUS_ASSIGN) ID element) - ; - -treeSpec - : ^(TREE_BEGIN element+) - ; - -subrule - : ^(blockSuffix block) - | block - ; - -blockSuffix - : ebnfSuffix - | ROOT - | IMPLIES - | BANG - ; - -ebnfSuffix - : OPTIONAL - | CLOSURE - | POSITIVE_CLOSURE - ; - -atom: range - | ^(DOT ID terminal) - | ^(DOT ID ruleref) - | ^(WILDCARD elementOptions) - | WILDCARD - | terminal - | blockSet - | ruleref - ; - -blockSet - : ^(SET setElement+) - ; - -setElement - : STRING_LITERAL - | TOKEN_REF - ; - -block - : ^(BLOCK optionsSpec? ruleAction* ACTION? altList) - ; - -ruleref - : ^(RULE_REF ARG_ACTION?) - ; - -range - : ^(RANGE STRING_LITERAL STRING_LITERAL) - ; - -terminal - : ^(STRING_LITERAL elementOptions) - | STRING_LITERAL - | ^(TOKEN_REF elementOptions) - | TOKEN_REF - ; - -elementOptions - : ^(ELEMENT_OPTIONS elementOption+) - ; - -elementOption - : ID - | ^(ASSIGN ID ID) - | ^(ASSIGN ID STRING_LITERAL) - ; - -rewrite - : predicatedRewrite* nakedRewrite - ; - -predicatedRewrite - : ^(ST_RESULT SEMPRED rewriteAlt) - | ^(RESULT SEMPRED rewriteAlt) - ; - -nakedRewrite - : ^(ST_RESULT rewriteAlt) - | ^(RESULT rewriteAlt) - ; - -rewriteAlt - : rewriteTemplate - | rewriteTreeAlt - | ETC - | EPSILON - ; - -rewriteTreeAlt - : ^(REWRITE_SEQ rewriteTreeElement+) - ; - -rewriteTreeElement - : rewriteTreeAtom - | rewriteTree - | rewriteTreeEbnf - ; - -rewriteTreeAtom - : ^(TOKEN_REF elementOptions ARG_ACTION) - | ^(TOKEN_REF elementOptions) - | ^(TOKEN_REF ARG_ACTION) - | TOKEN_REF - | RULE_REF - | ^(STRING_LITERAL elementOptions) - | STRING_LITERAL - | LABEL - | ACTION - ; - -rewriteTreeEbnf - : ^(ebnfSuffix ^(REWRITE_BLOCK rewriteTreeAlt)) - ; -rewriteTree - : ^(TREE_BEGIN rewriteTreeAtom rewriteTreeElement* ) - ; - -rewriteTemplate - : ^(TEMPLATE rewriteTemplateArgs? DOUBLE_QUOTE_STRING_LITERAL) - | ^(TEMPLATE rewriteTemplateArgs? DOUBLE_ANGLE_STRING_LITERAL) - | rewriteTemplateRef - | rewriteIndirectTemplateHead - | ACTION - ; - -rewriteTemplateRef - : ^(TEMPLATE ID rewriteTemplateArgs?) - ; - -rewriteIndirectTemplateHead - : ^(TEMPLATE ACTION rewriteTemplateArgs?) - ; - -rewriteTemplateArgs - : ^(ARGLIST rewriteTemplateArg+) - ; - -rewriteTemplateArg - : ^(ARG ID ACTION) - ; diff --git a/tool/src/org/antlr/v4/parse/ATNBuilder.g b/tool/src/org/antlr/v4/parse/ATNBuilder.g index 64bd1ad0e..87982e216 100644 --- a/tool/src/org/antlr/v4/parse/ATNBuilder.g +++ b/tool/src/org/antlr/v4/parse/ATNBuilder.g @@ -83,10 +83,10 @@ block[GrammarAST ebnfRoot] returns [ATNFactory.Handle p] alternative returns [ATNFactory.Handle p] @init {List els = new ArrayList();} - : ^(ALT_REWRITE a=alternative .*) {$p = $a.p;} - | ^(ALT EPSILON) {$p = factory.epsilon($EPSILON);} - | ^(ALT (e=element {els.add($e.p);})+) - {$p = factory.alt(els);} + : ^(ALT_REWRITE a=alternative .*) {$p = $a.p;} + | ^(LEXER_ALT_ACTION a=alternative .*) {$p = $a.p;} + | ^(ALT EPSILON) {$p = factory.epsilon($EPSILON);} + | ^(ALT (e=element {els.add($e.p);})+) {$p = factory.alt(els);} ; element returns [ATNFactory.Handle p] diff --git a/tool/src/org/antlr/v4/parse/BlockSetTransformer.g b/tool/src/org/antlr/v4/parse/BlockSetTransformer.g index a6fe1f8f1..95cc03d47 100644 --- a/tool/src/org/antlr/v4/parse/BlockSetTransformer.g +++ b/tool/src/org/antlr/v4/parse/BlockSetTransformer.g @@ -32,7 +32,7 @@ public BlockSetTransformer(TreeNodeStream input, Grammar g) { } topdown - : ^(RULE ID {currentRuleName=$ID.text;} .+) + : ^(RULE (id=TOKEN_REF|id=RULE_REF) {currentRuleName=$id.text;} .+) | setAlt | ebnfBlockSet | blockSet diff --git a/tool/src/org/antlr/v4/parse/GrammarASTAdaptor.java b/tool/src/org/antlr/v4/parse/GrammarASTAdaptor.java index ff1af6a18..5888de984 100644 --- a/tool/src/org/antlr/v4/parse/GrammarASTAdaptor.java +++ b/tool/src/org/antlr/v4/parse/GrammarASTAdaptor.java @@ -62,7 +62,7 @@ public class GrammarASTAdaptor extends CommonTreeAdaptor { else { t = (GrammarAST)super.create(tokenType, text); } - ((CommonToken)t.token).setInputStream(input); + t.token.setInputStream(input); return t; } diff --git a/tool/src/org/antlr/v4/parse/GrammarTreeVisitor.g b/tool/src/org/antlr/v4/parse/GrammarTreeVisitor.g index 97027b8ee..a52ea8395 100644 --- a/tool/src/org/antlr/v4/parse/GrammarTreeVisitor.g +++ b/tool/src/org/antlr/v4/parse/GrammarTreeVisitor.g @@ -138,7 +138,10 @@ public void discoverRule(RuleAST rule, GrammarAST ID, List modifiers GrammarAST options, GrammarAST locals, List actions, GrammarAST block) { } -public void finishRule(GrammarAST rule, GrammarAST ID, GrammarAST block) { } +public void finishRule(RuleAST rule, GrammarAST ID, GrammarAST block) { } +public void discoverLexerRule(RuleAST rule, GrammarAST ID, List modifiers, + GrammarAST block) { } +public void finishLexerRule(RuleAST rule, GrammarAST ID, GrammarAST block) { } public void ruleCatch(GrammarAST arg, ActionAST action) { } public void finallyAction(ActionAST action) { } /** outermost alt */ @@ -252,10 +255,27 @@ action ; rules - : ^(RULES {discoverRules($RULES);} rule* {finishRules($RULES);}) + : ^(RULES {discoverRules($RULES);} (rule|lexerRule)* {finishRules($RULES);}) ; -mode : ^( MODE ID {currentModeName=$ID.text; modeDef($MODE, $ID);} rule+ ) ; +mode : ^( MODE ID {currentModeName=$ID.text; modeDef($MODE, $ID);} lexerRule+ ) ; + +lexerRule +@init { +List mods = new ArrayList(); +currentOuterAltNumber=0; +} + : ^( RULE TOKEN_REF + {currentRuleName=$TOKEN_REF.text; currentRuleAST=$RULE;} + DOC_COMMENT? (^(RULEMODIFIERS m=FRAGMENT {mods.add($m);}))? + {discoverLexerRule((RuleAST)$RULE, $TOKEN_REF, mods, (GrammarAST)input.LT(1));} + ruleBlock + { + finishLexerRule((RuleAST)$RULE, $TOKEN_REF, $ruleBlock.start); + currentRuleName=null; currentRuleAST=null; + } + ) + ; rule @init { @@ -263,7 +283,7 @@ List mods = new ArrayList(); List actions = new ArrayList(); // track roots currentOuterAltNumber=0; } - : ^( RULE ID {currentRuleName=$ID.text; currentRuleAST=$RULE;} + : ^( RULE RULE_REF {currentRuleName=$RULE_REF.text; currentRuleAST=$RULE;} DOC_COMMENT? (^(RULEMODIFIERS (m=ruleModifier{mods.add($m.start);})+))? ARG_ACTION? ret=ruleReturns? @@ -272,13 +292,13 @@ currentOuterAltNumber=0; ( opts=optionsSpec | a=ruleAction {actions.add($a.start);} )* - {discoverRule((RuleAST)$RULE, $ID, mods, (ActionAST)$ARG_ACTION, + {discoverRule((RuleAST)$RULE, $RULE_REF, mods, (ActionAST)$ARG_ACTION, $ret.start!=null?(ActionAST)$ret.start.getChild(0):null, $thr.start, $opts.start, $loc.start!=null?(ActionAST)$loc.start.getChild(0):null, actions, (GrammarAST)input.LT(1));} ruleBlock exceptionGroup - {finishRule($RULE, $ID, $ruleBlock.start); currentRuleName=null; currentRuleAST=null;} + {finishRule((RuleAST)$RULE, $RULE_REF, $ruleBlock.start); currentRuleName=null; currentRuleAST=null;} ) ; @@ -347,10 +367,25 @@ outerAlternative alternative : ^(ALT_REWRITE alternative {inRewrite=true;} rewrite {inRewrite=false;}) + | ^(LEXER_ALT_ACTION alternative lexerAction*) | ^(ALT element+) | ^(ALT EPSILON) ; +lexerAction + : ^(CHANNEL lexerActionExpr) + | ^(MODE lexerActionExpr) + | ^(PUSH lexerActionExpr) + | SKIP + | MORE + | POP + ; + +lexerActionExpr + : ID + | INT + ; + element : labeledElement | atom diff --git a/tool/src/org/antlr/v4/parse/LeftRecursiveRuleAnalyzer.java b/tool/src/org/antlr/v4/parse/LeftRecursiveRuleAnalyzer.java index ac41ee763..25efb5b74 100644 --- a/tool/src/org/antlr/v4/parse/LeftRecursiveRuleAnalyzer.java +++ b/tool/src/org/antlr/v4/parse/LeftRecursiveRuleAnalyzer.java @@ -319,11 +319,12 @@ public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker { } /** - * Match (RULE ID (BLOCK (ALT .*) (ALT RULE_REF[self] .*) (ALT .*))) + * Match (RULE RULE_REF (BLOCK (ALT .*) (ALT RULE_REF[self] .*) (ALT .*))) */ public static boolean hasImmediateRecursiveRuleRefs(GrammarAST t, String ruleName) { if ( t==null ) return false; - for (GrammarAST rref : t.getNodesWithType(RULE_REF)) { + GrammarAST blk = (GrammarAST)t.getFirstChildWithType(BLOCK); + for (GrammarAST rref : blk.getNodesWithType(RULE_REF)) { if ( rref.getChildIndex()==0 && rref.getText().equals(ruleName) ) return true; } return false; diff --git a/tool/src/org/antlr/v4/parse/LeftRecursiveRuleWalker.g b/tool/src/org/antlr/v4/parse/LeftRecursiveRuleWalker.g index d637ac85e..460c83fc5 100644 --- a/tool/src/org/antlr/v4/parse/LeftRecursiveRuleWalker.g +++ b/tool/src/org/antlr/v4/parse/LeftRecursiveRuleWalker.g @@ -69,8 +69,8 @@ rec_rule returns [boolean isLeftRec] { currentOuterAltNumber = 1; } - : ^( r=RULE id=ID {ruleName=$id.getText();} - ruleModifier? + : ^( r=RULE id=RULE_REF {ruleName=$id.getText();} + DOC_COMMENT? ruleModifier? // (ARG_ACTION)? shouldn't allow args, right? (^(RETURNS a=ARG_ACTION {setReturnValues($a);}))? // ( ^(THROWS .+) )? don't allow @@ -81,8 +81,6 @@ rec_rule returns [boolean isLeftRec] ruleBlock {$isLeftRec = $ruleBlock.isLeftRec;} exceptionGroup ) - // why do this? -// {if ($ruleBlock.isLeftRec) $r.setType(PREC_RULE);} ; exceptionGroup @@ -101,7 +99,6 @@ ruleModifier : PUBLIC | PRIVATE | PROTECTED - | FRAGMENT ; ruleBlock returns [boolean isLeftRec] @@ -134,7 +131,7 @@ outerAlternative[GrammarAST rew] returns [boolean isLeftRec] ; binary - : ^( ALT recurseNoLabel (op=token)+ {setTokenPrec($op.t, currentOuterAltNumber);} recurse ) + : ^( ALT recurseNoLabel (op=token)+ {setTokenPrec($op.t, currentOuterAltNumber);} recurse ) ; binaryMultipleOp diff --git a/tool/src/org/antlr/v4/semantics/BasicSemanticChecks.java b/tool/src/org/antlr/v4/semantics/BasicSemanticChecks.java index 6da736a18..265c9183e 100644 --- a/tool/src/org/antlr/v4/semantics/BasicSemanticChecks.java +++ b/tool/src/org/antlr/v4/semantics/BasicSemanticChecks.java @@ -213,6 +213,13 @@ public class BasicSemanticChecks extends GrammarTreeVisitor { checkInvalidRuleDef(ID.token); } + @Override + public void discoverLexerRule(RuleAST rule, GrammarAST ID, List modifiers, + GrammarAST block) + { + checkInvalidRuleDef(ID.token); + } + @Override public void ruleRef(GrammarAST ref, ActionAST arg) { checkInvalidRuleRef(ref.token); diff --git a/tool/src/org/antlr/v4/semantics/SymbolCollector.java b/tool/src/org/antlr/v4/semantics/SymbolCollector.java index cb61e5be2..3a1a42a43 100644 --- a/tool/src/org/antlr/v4/semantics/SymbolCollector.java +++ b/tool/src/org/antlr/v4/semantics/SymbolCollector.java @@ -29,11 +29,18 @@ package org.antlr.v4.semantics; -import org.antlr.v4.parse.*; -import org.antlr.v4.tool.*; +import org.antlr.v4.parse.GrammarTreeVisitor; +import org.antlr.v4.parse.ScopeParser; +import org.antlr.v4.tool.AttributeDict; +import org.antlr.v4.tool.Grammar; +import org.antlr.v4.tool.LabelElementPair; +import org.antlr.v4.tool.Rule; import org.antlr.v4.tool.ast.*; -import java.util.*; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; /** Collects (create) rules, terminals, strings, actions, scopes etc... from AST * side-effects: sets resolver field of asts for actions and @@ -106,8 +113,8 @@ public class SymbolCollector extends GrammarTreeVisitor { { int numAlts = block.getChildCount(); Rule r = new Rule(g, ID.getText(), rule, numAlts); - if ( g.isLexer() ) r.mode = currentModeName; - if ( modifiers.size()>0 ) r.modifiers = modifiers; +// if ( g.isLexer() ) r.mode = currentModeName; +// if ( modifiers.size()>0 ) r.modifiers = modifiers; rules.add(r); currentRule = r; @@ -138,6 +145,18 @@ public class SymbolCollector extends GrammarTreeVisitor { } } + @Override + public void discoverLexerRule(RuleAST rule, GrammarAST ID, List modifiers, + GrammarAST block) + { + int numAlts = block.getChildCount(); + Rule r = new Rule(g, ID.getText(), rule, numAlts); + r.mode = currentModeName; + if ( modifiers.size()>0 ) r.modifiers = modifiers; + rules.add(r); + currentRule = r; + } + @Override public void discoverAltWithRewrite(AltAST alt) { discoverAlt(alt); } diff --git a/tool/src/org/antlr/v4/tool/Grammar.java b/tool/src/org/antlr/v4/tool/Grammar.java index 302053050..4b951cc6c 100644 --- a/tool/src/org/antlr/v4/tool/Grammar.java +++ b/tool/src/org/antlr/v4/tool/Grammar.java @@ -29,6 +29,7 @@ package org.antlr.v4.tool; +import org.antlr.runtime.tree.Tree; import org.antlr.runtime.tree.TreeVisitor; import org.antlr.runtime.tree.TreeVisitorAction; import org.antlr.runtime.tree.TreeWizard; @@ -744,11 +745,21 @@ public class Grammar implements AttributeResolver { for (GrammarASTWithOptions r : ruleNodes) { //tool.log("grammar", r.toStringTree()); - String ruleName = r.getChild(0).getText(); - if ( Character.isUpperCase(ruleName.charAt(0)) ) { + Tree name = r.getChild(0); + if ( name.getType()==ANTLRParser.TOKEN_REF ) { Map nodes = new HashMap(); boolean isLitRule = - wiz.parse(r, "(RULE %name:ID (BLOCK (ALT %lit:STRING_LITERAL)))", nodes); + wiz.parse(r, "(RULE %name:TOKEN_REF (BLOCK (ALT %lit:STRING_LITERAL)))", nodes); + if ( isLitRule ) { + GrammarAST litNode = (GrammarAST)nodes.get("lit"); + GrammarAST nameNode = (GrammarAST)nodes.get("name"); + lexerRuleToStringLiteral.put(litNode.getText(), nameNode.getText()); + continue; + } + nodes = new HashMap(); + // try with doc comment in there + isLitRule = + wiz.parse(r, "(RULE %name:TOKEN_REF DOC_COMMENT (BLOCK (ALT %lit:STRING_LITERAL)))", nodes); if ( isLitRule ) { GrammarAST litNode = (GrammarAST)nodes.get("lit"); GrammarAST nameNode = (GrammarAST)nodes.get("name"); diff --git a/tool/src/org/antlr/v4/tool/GrammarTransformPipeline.java b/tool/src/org/antlr/v4/tool/GrammarTransformPipeline.java index bb3f7e56c..274b254e3 100644 --- a/tool/src/org/antlr/v4/tool/GrammarTransformPipeline.java +++ b/tool/src/org/antlr/v4/tool/GrammarTransformPipeline.java @@ -422,7 +422,7 @@ public class GrammarTransformPipeline { TerminalAST slit = new TerminalAST(new CommonToken(ANTLRParser.STRING_LITERAL, lit)); alt.addChild(slit); blk.addChild(alt); - CommonToken idToken = new CommonToken(ANTLRParser.ID, rname); + CommonToken idToken = new CommonToken(ANTLRParser.TOKEN_REF, rname); litRule.addChild(new TerminalAST(idToken)); litRule.addChild(blk); lexerRulesRoot.insertChild(0, litRule); // add first