diff --git a/build.xml b/build.xml index e447a3394..5f70a56e0 100644 --- a/build.xml +++ b/build.xml @@ -114,6 +114,8 @@ dir="${basedir}/tool/src/@{srcpath}"> + + @@ -164,12 +166,7 @@ - - - - - - + diff --git a/runtime/Java/src/org/antlr/v4/runtime/tree/pattern/Chunk.java b/runtime/Java/src/org/antlr/v4/runtime/tree/pattern/Chunk.java index ad7a96036..53eaf80bb 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/tree/pattern/Chunk.java +++ b/runtime/Java/src/org/antlr/v4/runtime/tree/pattern/Chunk.java @@ -3,7 +3,7 @@ package org.antlr.v4.runtime.tree.pattern; /** A chunk is either a token reference, a rule reference, or some plaintext * within a tree pattern. Function split() in the pattern matcher returns * a list of chunks in preparation for creating a token stream by tokenize(). - * From there, we get a parse tree from with compile(). + * From there, we get a parse tree from the pattern with compile(). */ abstract class Chunk { } diff --git a/runtime/Java/src/org/antlr/v4/runtime/tree/pattern/ParseTreePatternMatcher.java b/runtime/Java/src/org/antlr/v4/runtime/tree/pattern/ParseTreePatternMatcher.java index 8f2e971ee..78dba88bc 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/tree/pattern/ParseTreePatternMatcher.java +++ b/runtime/Java/src/org/antlr/v4/runtime/tree/pattern/ParseTreePatternMatcher.java @@ -267,6 +267,7 @@ public class ParseTreePatternMatcher { } else { System.err.println("invalid tag: "+tagChunk.tag); + throw new IllegalArgumentException("invalid tag: "+tagChunk.tag+" in "+pattern); } } else { diff --git a/tool/src/org/antlr/v4/parse/ANTLRLexer.g b/tool/src/org/antlr/v4/parse/ANTLRLexer.g deleted file mode 100644 index fc46ca517..000000000 --- a/tool/src/org/antlr/v4/parse/ANTLRLexer.g +++ /dev/null @@ -1,782 +0,0 @@ -/* - * [The "BSD license"] - * Copyright (c) 2012 Terence Parr - * Copyright (c) 2012 Sam Harwell - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -// File : A3Lexer.g -// Author : Jim Idle (jimi@temporal-wave.com) -// Copyright : Free BSD - See @header clause below -// Version : First implemented as part of ANTLR 3.2 this is the self -// hosting ANTLR 3 Lexer. -// -// Description -// ----------- -// This is the definitive lexer grammar for parsing ANTLR V3.x.x grammars. All other -// gramnmars are derived from this grammar via source code control integration (perforce) -// or by the gdiff tool. -// -// This grammar and its associated grmmmars A3Parser.g and A3Walker.g exhibit the following -// traits, which are recommended for all production quality grammars: -// -// 1) They are separate grammars, not composite grammars; -// 2) They implement all supporting methods in a superclass (at least this is recommended -// for language targets that support inheritence; -// 3) All errors are pushed as far down the parsing chain as possible, which means -// that the lexer tries to defer error reporting to the parser, and the parser -// tries to defer error reporting to a semantic phase consisting of a single -// walk of the AST. The reason for this is that the error messages produced -// from later phases of the parse will generally have better context and so -// be more useful to the end user. Consider the message: "Syntax error at 'options'" -// vs: "You cannot specify two options{} sections in a single grammar file". -// 4) The lexer is 'programmed' to catch common mistakes such as unterminated literals -// and report them specifically and not just issue confusing lexer mismatch errors. -// - -/** Read in an ANTLR grammar and build an AST. Try not to do - * any actions, just build the tree. - * - * The phases are: - * - * A3Lexer.g (this file) - * A3Parser.g - * A3Verify.g (derived from A3Walker.g) - * assign.types.g - * define.g - * buildnfa.g - * antlr.print.g (optional) - * codegen.g - * - * Terence Parr - * University of San Francisco - * 2005 - * Jim Idle (this v3 grammar) - * Temporal Wave LLC - * 2009 - */ -lexer grammar ANTLRLexer; - -// ============================================================================== -// Note that while this grammar does not care about order of constructs -// that don't really matter, such as options before @header etc, it must first -// be parsed by the original v2 parser, before it replaces it. That parser does -// care about order of structures. Hence we are constrained by the v2 parser -// for at least the first bootstrap release that causes this parser to replace -// the v2 version. -// ============================================================================== - -// ------- -// Options -// -// V3 option directives to tell the tool what we are asking of it for this -// grammar. -// -options { - - // Target language is Java, which is the default but being specific - // here as this grammar is also meant as a good example grammar for - // for users. - // - language = Java; - - // The super class that this lexer should expect to inherit from, and - // which contains any and all support routines for the lexer. This is - // commented out in this baseline (definitive or normative grammar) - // - see the ANTLR tool implementation for hints on how to use the super - // class - // - //superclass = AbstractA3Lexer; -} - -tokens { SEMPRED; TOKEN_REF; RULE_REF; LEXER_CHAR_SET; ARG_ACTION; } - -// Include the copyright in this source and also the generated source -// -@lexer::header { -/* - [The "BSD licence"] - Copyright (c) 2005-2009 Terence Parr - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -package org.antlr.v4.parse; -import org.antlr.v4.tool.*; -} - - -@members { - public CommonTokenStream tokens; // track stream we push to; need for context info - public boolean isLexerRule = false; - - public void grammarError(ErrorType etype, org.antlr.runtime.Token token, Object... args) { } - - /** scan backwards from current point in this.tokens list - * looking for the start of the rule or subrule. - * Return token or null if for some reason we can't find the start. - */ - public Token getRuleOrSubruleStartToken() { - if ( tokens==null ) return null; - int i = tokens.index(); - int n = tokens.size(); - if ( i>=n ) i = n-1; // seems index == n as we lex - while ( i>=0 && i ' $ANTLR' SRC - | ~(NLCHARS)* - ) - - | // Multi-line comment, which may be a documentation comment - // if it starts /** (note that we protect against accidentaly - // recognizing a comment /**/ as a documentation comment - // - '*' ( - { input.LA(2) != '/'}?=> '*' { $type = DOC_COMMENT; } - | { true }?=> // Required to cover all alts with predicates - ) - - // Should we support embedded multiline comments here? - // - ( - // Pick out end of multiline comment and exit the loop - // if we find it. - // - { !(input.LA(1) == '*' && input.LA(2) == '/') }? - - // Anything else other than the non-greedy match of - // the comment close sequence - // - . - )* - ( - // Look for the comment terminator, but if it is accidentally - // unterminated, then we will hit EOF, which will trigger the - // epsilon alt and hence we can issue an error message relative - // to the start of the unterminated multi-line comment - // - '*/' - - | // Unterminated comment! - // - { - // ErrorManager.msg(Msg.UNTERMINATED_DOC_COMMENT, startLine, offset, $pos, startLine, offset, $pos, (Object)null); - } - ) - - | // There was nothing that made sense following the opening '/' and so - // we issue an error regarding the malformed comment - // - { - // TODO: Insert error message relative to comment start - // - } - ) - { - // Unless we had a documentation comment, then we do not wish to - // pass the comments in to the parser. If you are writing a formatter - // then you will want to preserve the comments off channel, but could - // just skip and save token space if not. - // - if ($type != DOC_COMMENT) { - - $channel=2; // Comments are on channel 2 - } - } - ; - -ARG_OR_CHARSET -options {k=1;} - : {isLexerRule}?=> LEXER_CHAR_SET {$type=LEXER_CHAR_SET;} - | {!isLexerRule}?=> ARG_ACTION - { - $type=ARG_ACTION; - // Set the token text to our gathered string minus outer [ ] - String t = $text; - t = t.substring(1,t.length()-1); - setText(t); - } - ; - -fragment -LEXER_CHAR_SET - : '[' - ( '\\' ~('\r'|'\n') - | ~('\r'|'\n'|'\\'|']') - )* - ']' - ; - -// -------------- -// Argument specs -// -// Certain argument lists, such as those specifying call parameters -// to a rule invocation, or input parameters to a rule specification -// are contained within square brackets. In the lexer we consume them -// all at once and sort them out later in the grammar analysis. -// -fragment -ARG_ACTION - : '[' - ( - ARG_ACTION - - | ('"')=>ACTION_STRING_LITERAL - - | ('\'')=>ACTION_CHAR_LITERAL - - | ~('['|']') - )* - - ']' - ; - -// ------- -// Actions -// -// Other than making sure to distinguish between { and } embedded -// within what we have assumed to be literals in the action code, the -// job of the lexer is merely to gather the code within the action -// (delimited by {}) and pass it to the parser as a single token. -// We know that this token will be asked for its text somewhere -// in the upcoming parse, so setting the text here to exclude -// the delimiting {} is no additional overhead. -// -ACTION - : NESTED_ACTION - ( '?' {$type = SEMPRED;} - ( (WSNLCHARS* '=>') => WSNLCHARS* '=>' // v3 gated sempred - { - Token t = new CommonToken(input, state.type, state.channel, state.tokenStartCharIndex, getCharIndex()-1); - t.setLine(state.tokenStartLine); - t.setText(state.text); - t.setCharPositionInLine(state.tokenStartCharPositionInLine); - grammarError(ErrorType.V3_GATED_SEMPRED, t); - } - )? - )? - ; - -// ---------------- -// Action structure -// -// Many language targets use {} as block delimiters and so we -// must recursively match {} delimited blocks to balance the -// braces. Additionally, we must make some assumptions about -// literal string representation in the target language. We assume -// that they are delimited by ' or " and so consume these -// in their own alts so as not to inadvertantly match {}. -// This rule calls itself on matching a { -// -fragment -NESTED_ACTION -@init { - - // Record the start line and offsets as if we need to report an - // unterminated block, then we want to show the start of the comment - // we think is broken, not the end, where people will have to try and work - // it out themselves. - // - int startLine = getLine(); - int offset = getCharPositionInLine(); -} - - : // Action and other blocks start with opening { - // - '{' - ( - // And now we can match one of a number of embedded - // elements within the action until we find a - // } that balances the opening {. If we do not find - // the balanced } then we will hit EOF and can issue - // an error message about the brace that we belive to - // be mismatched. This won't be foolproof but we will - // be able to at least report an error against the - // opening brace that we feel is in error and this will - // guide the user to the correction as best we can. - // - - - // An embedded {} block - // - NESTED_ACTION - - | // What appears to be a literal - // - ACTION_CHAR_LITERAL - - | // We have assumed that the target language has C/Java - // type comments. - // - COMMENT - - | // What appears to be a literal - // - ACTION_STRING_LITERAL - - | // What appears to be an escape sequence - // - ACTION_ESC - - | // Some other single character that is not - // handled above - // - ~('\\'|'"'|'\''|'/'|'{'|'}') - - )* - - ( - // Correctly balanced closing brace - // - '}' - - | // Looks like have an imblanced {} block, report - // with respect to the opening brace. - // - { - // TODO: Report imbalanced {} - System.out.println("Block starting at line " + startLine + " offset " + (offset+1) + " contains imbalanced {} or is missing a }"); - } - ) - ; - - -// Keywords -// -------- -// keywords used to specify ANTLR v3 grammars. Keywords may not be used as -// labels for rules or in any other context where they would be ambiguous -// with the keyword vs some other identifier -// OPTIONS and TOKENS must also consume the opening brace that captures -// their option block, as this is teh easiest way to parse it separate -// to an ACTION block, despite it usingthe same {} delimiters. -// -OPTIONS : 'options' WSNLCHARS* '{' ; -TOKENS_SPEC : 'tokens' WSNLCHARS* '{' ; - -IMPORT : 'import' ; -FRAGMENT : 'fragment' ; -LEXER : 'lexer' ; -PARSER : 'parser' ; -GRAMMAR : 'grammar' ; -TREE_GRAMMAR : 'tree' WSNLCHARS* 'grammar' ; -PROTECTED : 'protected' ; -PUBLIC : 'public' ; -PRIVATE : 'private' ; -RETURNS : 'returns' ; -LOCALS : 'locals' ; -THROWS : 'throws' ; -CATCH : 'catch' ; -FINALLY : 'finally' ; -MODE : 'mode' ; - -// ----------- -// Punctuation -// -// Character sequences used as separators, delimters, operators, etc -// -COLON : ':' - { - // scan backwards, looking for a RULE_REF or TOKEN_REF. - // which would indicate the start of a rule definition. - // If we see a LPAREN, then it's the start of the subrule. - // this.tokens is the token string we are pushing into, so - // just loop backwards looking for a rule definition. Then - // we set isLexerRule. - Token t = getRuleOrSubruleStartToken(); - if ( t!=null ) { - if ( t.getType()==RULE_REF ) isLexerRule = false; - else if ( t.getType()==TOKEN_REF ) isLexerRule = true; - // else must be subrule; don't alter context - } - } - ; -COLONCOLON : '::' ; -COMMA : ',' ; -SEMI : ';' ; -LPAREN : '(' ; -RPAREN : ')' ; -RARROW : '->' ; -LT : '<' ; -GT : '>' ; -ASSIGN : '=' ; -QUESTION : '?' ; -SYNPRED : '=>' - { - Token t = new CommonToken(input, state.type, state.channel, - state.tokenStartCharIndex, getCharIndex()-1); - t.setLine(state.tokenStartLine); - t.setText(state.text); - t.setCharPositionInLine(state.tokenStartCharPositionInLine); - grammarError(ErrorType.V3_SYNPRED, t); - $channel=HIDDEN; - } - ; -STAR : '*' ; -PLUS : '+' ; -PLUS_ASSIGN : '+=' ; -OR : '|' ; -DOLLAR : '$' ; -DOT : '.' ; // can be WILDCARD or DOT in qid or imported rule ref -RANGE : '..' ; -AT : '@' ; -POUND : '#' ; -NOT : '~' ; -RBRACE : '}' ; - -/** Allow unicode rule/token names */ -ID : a=NameStartChar NameChar* - { - if ( Grammar.isTokenName($a.text) ) $type = TOKEN_REF; - else $type = RULE_REF; - } - ; - -fragment -NameChar : NameStartChar - | '0'..'9' - | '_' - | '\u00B7' - | '\u0300'..'\u036F' - | '\u203F'..'\u2040' - ; - -fragment -NameStartChar - : 'A'..'Z' | 'a'..'z' - | '\u00C0'..'\u00D6' - | '\u00D8'..'\u00F6' - | '\u00F8'..'\u02FF' - | '\u0370'..'\u037D' - | '\u037F'..'\u1FFF' - | '\u200C'..'\u200D' - | '\u2070'..'\u218F' - | '\u2C00'..'\u2FEF' - | '\u3001'..'\uD7FF' - | '\uF900'..'\uFDCF' - | '\uFDF0'..'\uFFFD' - ; // ignores | ['\u10000-'\uEFFFF] ; - -// ---------------------------- -// Literals embedded in actions -// -// Note that we have made the assumption that the language used within -// actions uses the fairly standard " and ' delimiters for literals and -// that within these literals, characters are escaped using the \ character. -// There are some languages which do not conform to this in all cases, such -// as by using /string/ and so on. We will have to deal with such cases if -// if they come up in targets. -// - -// Within actions, or other structures that are not part of the ANTLR -// syntax, we may encounter literal characters. Within these, we do -// not want to inadvertantly match things like '}' and so we eat them -// specifically. While this rule is called CHAR it allows for the fact that -// some languages may use/allow ' as the string delimiter. -// -fragment -ACTION_CHAR_LITERAL - : '\'' (('\\')=>ACTION_ESC | ~'\'' )* '\'' - ; - -// Within actions, or other structures that are not part of the ANTLR -// syntax, we may encounter literal strings. Within these, we do -// not want to inadvertantly match things like '}' and so we eat them -// specifically. -// -fragment -ACTION_STRING_LITERAL - : '"' (('\\')=>ACTION_ESC | ~'"')* '"' - ; - -// Within literal strings and characters that are not part of the ANTLR -// syntax, we must allow for escaped character sequences so that we do not -// inadvertantly recognize the end of a string or character when the terminating -// delimiter has been esacped. -// -fragment -ACTION_ESC - : '\\' . - ; - -// ------- -// Integer -// -// Obviously (I hope) match an aribtrary long sequence of digits. -// -INT : ('0'..'9')+ - ; - -// ----------- -// Source spec -// -// A fragment rule for picking up information about an origrinating -// file from which the grammar we are parsing has been generated. This allows -// ANTLR to report errors against the originating file and not the generated -// file. -// -fragment -SRC : 'src' WSCHARS+ file=ACTION_STRING_LITERAL WSCHARS+ line=INT - { - // TODO: Add target specific code to change the source file name and current line number - // - } - ; - -// -------------- -// Literal string -// -// ANTLR makes no disticintion between a single character literal and a -// multi-character string. All literals are single quote delimited and -// may contain unicode escape sequences of the form \uxxxx, where x -// is a valid hexadecimal number (as per Java basically). -STRING_LITERAL - : '\'' ( ( ESC_SEQ | ~('\\'|'\''|'\r'|'\n') ) )* - ( '\'' - | // Unterminated string literal - { - Token t = new CommonToken(input, state.type, state.channel, state.tokenStartCharIndex, getCharIndex()-1); - t.setLine(state.tokenStartLine); - t.setText(state.text); - t.setCharPositionInLine(state.tokenStartCharPositionInLine); - grammarError(ErrorType.UNTERMINATED_STRING_LITERAL, t); - } - ) - ; - -// A valid hex digit specification -// -fragment -HEX_DIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ; - -// Any kind of escaped character that we can embed within ANTLR -// literal strings. -// -fragment -ESC_SEQ - : '\\' - ( - // The standard escaped character set such as tab, newline, - // etc. - // - 'b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\' - - | // A Java style Unicode escape sequence - // - UNICODE_ESC - - | // An illegal escape seqeunce - // - { - // TODO: Issue error message - // - } - ) - ; - -fragment -UNICODE_ESC - -@init { - - // Flag to tell us whether we have a valid number of - // hex digits in the escape sequence - // - int hCount = 0; -} - : 'u' // Leadin for unicode escape sequence - - // We now require 4 hex digits. Note though - // that we accept any number of characters - // and issue an error if we do not get 4. We cannot - // use an inifinite count such as + because this - // might consume too many, so we lay out the lexical - // options and issue an error at the invalid paths. - // - ( - ( - HEX_DIGIT { hCount++; } - ( - HEX_DIGIT { hCount++; } - ( - HEX_DIGIT { hCount++; } - ( - // Four valid hex digits, we are good - // - HEX_DIGIT { hCount++; } - - | // Three valid digits - ) - - | // Two valid digits - ) - - | // One valid digit - ) - ) - | // No valid hex digits at all - ) - - // Now check the digit count and issue an error if we need to - // - { - if (hCount != 4) { - - // TODO: Issue error message - } - } - ; - -// ---------- -// Whitespace -// -// Characters and character constructs that are of no import -// to the parser and are used to make the grammar easier to read -// for humans. -// -WS - : ( - ' ' - | '\t' - | '\r' - | '\n' - | '\f' - )+ - {$channel=HIDDEN;} - ; - -// A fragment rule for use in recognizing end of line in -// rules like COMMENT. -// -fragment -NLCHARS - : '\n' | '\r' - ; - -// A fragment rule for recognizing traditional whitespace -// characters within lexer rules. -// -fragment -WSCHARS - : ' ' | '\t' | '\f' - ; - -// A fragment rule for recognizing both traditional whitespace and -// end of line markers, when we don't care to distinguish but don't -// want any action code going on. -// -fragment -WSNLCHARS - : ' ' | '\t' | '\f' | '\n' | '\r' - ; - -// ----------------- -// Illegal Character -// -// This is an illegal character trap which is always the last rule in the -// lexer specification. It matches a single character of any value and being -// the last rule in the file will match when no other rule knows what to do -// about the character. It is reported as an error but is not passed on to the -// parser. This means that the parser to deal with the gramamr file anyway -// but we will not try to analyse or code generate from a file with lexical -// errors. -// -ERRCHAR - : . - { - Token t = new CommonToken(input, state.type, state.channel, state.tokenStartCharIndex, getCharIndex()-1); - t.setLine(state.tokenStartLine); - t.setText(state.text); - t.setCharPositionInLine(state.tokenStartCharPositionInLine); - String msg = getTokenErrorDisplay(t) + " came as a complete surprise to me"; - grammarError(ErrorType.SYNTAX_ERROR, t, msg); - skip(); - } - ; diff --git a/tool/src/org/antlr/v4/parse/ANTLRParser.g b/tool/src/org/antlr/v4/parse/ANTLRParser.g deleted file mode 100644 index 7f5675c81..000000000 --- a/tool/src/org/antlr/v4/parse/ANTLRParser.g +++ /dev/null @@ -1,924 +0,0 @@ -/* - * [The "BSD license"] - * Copyright (c) 2012 Terence Parr - * Copyright (c) 2012 Sam Harwell - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/** The definitive ANTLR v3 grammar to parse ANTLR v4 grammars. - * The grammar builds ASTs that are sniffed by subsequent stages. - */ -parser grammar ANTLRParser; - -options { - // Target language is Java, which is the default but being specific - // here as this grammar is also meant as a good example grammar for - // for users. - language = Java; - - // The output of this grammar is going to be an AST upon which - // we run a semantic checking phase, then the rest of the analysis - // including final code generation. - output = AST; - - // The vocabulary (tokens and their int token types) we are using - // for the parser. This is generated by the lexer. The vocab will be extended - // to include the imaginary tokens below. - tokenVocab = ANTLRLexer; - - ASTLabelType = GrammarAST; -} - -// Imaginary Tokens -// -// Imaginary tokens do not exist as far as the lexer is concerned, and it cannot -// generate them. However we sometimes need additional 'tokens' to use as root -// nodes for the AST we are generating. The tokens section is where we -// specify any such tokens -tokens { - RULE; - PREC_RULE; // flip to this if we find that it's left-recursive - RULES; - RULEMODIFIERS; - RULEACTIONS; - BLOCK; - OPTIONAL; - CLOSURE; - POSITIVE_CLOSURE; - RANGE; - SET; - CHAR_RANGE; - EPSILON; - ALT; - ALTLIST; - ID; - ARG; - ARGLIST; - RET; - COMBINED; - INITACTION; - LABEL; // $x used in rewrite rules - TEMPLATE; - WILDCARD; - // A generic node indicating a list of something when we don't - // really need to distinguish what we have a list of as the AST - // will 'kinow' by context. - // - LIST; - ELEMENT_OPTIONS; // TOKEN - RESULT; - - // lexer action stuff - LEXER_ALT_ACTION; - LEXER_ACTION_CALL; // ID(foo) -} - -// Include the copyright in this source and also the generated source -// -@header { -/* - [The "BSD licence"] - Copyright (c) 2005-20012 Terence Parr - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -package org.antlr.v4.parse; - -import org.antlr.v4.tool.*; -import org.antlr.v4.tool.ast.*; - -import java.util.ArrayDeque; -import java.util.Deque; -} - -@members { -Deque paraphrases = new ArrayDeque(); -public void grammarError(ErrorType etype, org.antlr.runtime.Token token, Object... args) { } -} - -// The main entry point for parsing a V3 grammar from top to toe. This is -// the method call from whence to obtain the AST for the parse. -// -grammarSpec -@after { -GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.OPTIONS); -if ( options!=null ) { - Grammar.setNodeOptions($tree, options); -} -} - : - // The grammar itself can have a documenation comment, which is the - // first terminal in the file. - // - DOC_COMMENT? - - // Next we should see the type and name of the grammar file that - // we are about to parse. - // - grammarType id SEMI - - // There now follows zero or more declaration sections that should - // be given to us before the rules are declared - // -// A number of things can be declared/stated before the grammar rules -// 'proper' are parsed. These include grammar imports (delegate), grammar -// options, imaginary token declarations, global scope declarations, -// and actions such as @header. In this rule we allow any number of -// these constructs in any order so that the grammar author is not -// constrained by some arbitrary order of declarations that nobody -// can remember. In the next phase of the parse, we verify that these -// constructs are valid, not repeated and so on. - sync ( prequelConstruct sync )* - - // We should now see at least one ANTLR EBNF style rule - // declaration. If the rules are missing we will let the - // semantic verification phase tell the user about it. - // - rules - - modeSpec* - - // And we force ANTLR to process everything it finds in the input - // stream by specifying hte need to match End Of File before the - // parse is complete. - // - EOF - - // Having parsed everything in the file and accumulated the relevant - // subtrees, we can now rewrite everything into the main AST form - // that our tree walkers are expecting. - // - - -> ^(grammarType // The grammar type is our root AST node - id // We need to identify the grammar of course - DOC_COMMENT? // We may or may not have a global documentation comment for the file - prequelConstruct* // The set of declarations we accumulated - rules // And of course, we need the set of rules we discovered - modeSpec* - ) - ; - -grammarType -@after { - if ( $tg!=null ) throw new v3TreeGrammarException(tg); - if ( $t!=null ) ((GrammarRootAST)$tree).grammarType = $t.type; - else ((GrammarRootAST)$tree).grammarType=COMBINED; -} - : ( t=LEXER g=GRAMMAR -> GRAMMAR[$g, "LEXER_GRAMMAR", getTokenStream()] - | // A standalone parser specification - t=PARSER g=GRAMMAR -> GRAMMAR[$g, "PARSER_GRAMMAR", getTokenStream()] - - // A combined lexer and parser specification - | g=GRAMMAR -> GRAMMAR[$g, "COMBINED_GRAMMAR", getTokenStream()] - | tg=TREE_GRAMMAR - - ) - ; - -// This is the list of all constructs that can be declared before -// the set of rules that compose the grammar, and is invoked 0..n -// times by the grammarPrequel rule. -prequelConstruct - : // A list of options that affect analysis and/or code generation - optionsSpec - - | // A list of grammars to which this grammar will delegate certain - // parts of the parsing sequence - a set of imported grammars - delegateGrammars - - | // The declaration of any token types we need that are not already - // specified by a preceeding grammar, such as when a parser declares - // imaginary tokens with which to construct the AST, or a rewriting - // tree parser adds further imaginary tokens to ones defined in a prior - // {tree} parser. - tokensSpec - - | // A declaration of language target implemented constructs. All such - // action sections start with '@' and are given to the language target's - // StringTemplate group. For instance @parser::header and @lexer::header - // are gathered here. - action - ; - -// A list of options that affect analysis and/or code generation -optionsSpec - : OPTIONS (option SEMI)* RBRACE -> ^(OPTIONS[$OPTIONS, "OPTIONS"] option*) - ; - -option - : id ASSIGN^ optionValue - ; - -// ------------ -// Option Value -// -// The actual value of an option - Doh! -// -optionValue - : // If the option value is a single word that conforms to the - // lexical rules of token or rule names, then the user may skip quotes - // and so on. Many option values meet this description - qid - | STRING_LITERAL - | ACTION - | INT - ; - -// A list of grammars to which this grammar will delegate certain -// parts of the parsing sequence - a set of imported grammars -delegateGrammars - : IMPORT delegateGrammar (COMMA delegateGrammar)* SEMI -> ^(IMPORT delegateGrammar+) - ; - -// A possibly named grammar file that should be imported to this gramamr -// and delgated to for the rules it specifies -delegateGrammar - : id ASSIGN^ id - | id - ; - -tokensSpec - : TOKENS_SPEC id (COMMA id)* RBRACE -> ^(TOKENS_SPEC id+) - | TOKENS_SPEC RBRACE -> - | TOKENS_SPEC^ v3tokenSpec+ RBRACE! - {grammarError(ErrorType.V3_TOKENS_SYNTAX, $TOKENS_SPEC);} - ; - -v3tokenSpec - : id - ( ASSIGN lit=STRING_LITERAL - { - grammarError(ErrorType.V3_ASSIGN_IN_TOKENS, $id.start, - $id.text, $lit.getText()); - } - -> id // ignore assignment - | -> id - ) - SEMI - ; - -// A declaration of a language target specifc section, -// such as @header, @includes and so on. We do not verify these -// sections, they are just passed on to the language target. -/** Match stuff like @parser::members {int i;} */ -action - : AT (actionScopeName COLONCOLON)? id ACTION -> ^(AT actionScopeName? id ACTION) - ; - -/** Sometimes the scope names will collide with keywords; allow them as - * ids for action scopes. - */ -actionScopeName - : id - | LEXER -> ID[$LEXER] - | PARSER -> ID[$PARSER] - ; - -modeSpec - : MODE id SEMI sync (lexerRule sync)* -> ^(MODE id lexerRule*) - ; - -rules - : sync (rule sync)* - // Rewrite with an enclosing node as this is good for counting - // the number of rules and an easy marker for the walker to detect - // that there are no rules. - ->^(RULES rule*) - ; - -sync -@init { - BitSet followSet = computeErrorRecoverySet(); - if ( input.LA(1)!=Token.EOF && !followSet.member(input.LA(1)) ) { - reportError(new NoViableAltException("",0,0,input)); - beginResync(); - consumeUntil(input, followSet); - endResync(); - } -} : - ; - -rule: parserRule - | lexerRule - ; - -// The specification of an EBNF rule in ANTLR style, with all the -// rule level parameters, declarations, actions, rewrite specs and so -// on. -// -// Note that here we allow any number of rule declaration sections (such -// as scope, returns, etc) in any order and we let the upcoming semantic -// verification of the AST determine if things are repeated or if a -// particular functional element is not valid in the context of the -// grammar type, such as using returns in lexer rules and so on. -parserRule -@init { paraphrases.push("matching a rule"); } -@after { - paraphrases.pop(); - GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.OPTIONS); - if ( options!=null ) { - Grammar.setNodeOptions($tree, options); - } -} - : // A rule may start with an optional documentation comment - DOC_COMMENT? - - // Next comes the rule name. Here we do not distinguish between - // parser or lexer rules, the semantic verification phase will - // reject any rules that make no sense, such as lexer rules in - // a pure parser or tree parser. - RULE_REF - - // Immediately following the rulename, there may be a specification - // of input parameters for the rule. We do not do anything with the - // parameters here except gather them for future phases such as - // semantic verifcation, type assignment etc. We require that - // the input parameters are the next syntactically significant element - // following the rule id. - ARG_ACTION? - - ruleReturns? - - throwsSpec? - - localsSpec? - - // Now, before the rule specification itself, which is introduced - // with a COLON, we may have zero or more configuration sections. - // As usual we just accept anything that is syntactically valid for - // one form of the rule or another and let the semantic verification - // phase throw out anything that is invalid. -// At the rule level, a programmer may specify a number of sections, such -// as scope declarations, rule return elements, @ sections (which may be -// language target specific) and so on. We allow any number of these in any -// order here and as usual rely onthe semantic verification phase to reject -// anything invalid using its addinotal context information. Here we are -// context free and just accept anything that is a syntactically correct -// construct. -// - rulePrequels - - COLON - - // The rule is, at the top level, just a list of alts, with - // finer grained structure defined within the alts. - ruleBlock - - SEMI - - exceptionGroup - - -> ^( RULE RULE_REF DOC_COMMENT? ARG_ACTION? - ruleReturns? throwsSpec? localsSpec? rulePrequels? ruleBlock exceptionGroup* - ) - ; - -// Many language targets support exceptions and the rule will -// generally be able to throw the language target equivalent -// of a recognition exception. The grammar programmar can -// specify a list of exceptions to catch or a generic catch all -// and the target language code generation template is -// responsible for generating code that makes sense. -exceptionGroup - : exceptionHandler* finallyClause? - ; - -// Specifies a handler for a particular type of exception -// thrown by a rule -exceptionHandler - : CATCH ARG_ACTION ACTION -> ^(CATCH ARG_ACTION ACTION) - ; - -finallyClause - : FINALLY ACTION -> ^(FINALLY ACTION) - ; - -rulePrequels -@init { paraphrases.push("matching rule preamble"); } -@after { paraphrases.pop(); } - : sync (rulePrequel sync)* -> rulePrequel* - ; - -// An individual rule level configuration as referenced by the ruleActions -// rule above. -// -rulePrequel - : optionsSpec - | ruleAction - ; - -// A rule can return elements that it constructs as it executes. -// The return values are specified in a 'returns' prequel element, -// which contains COMMA separated declarations, where the declaration -// is target language specific. Here we see the returns declaration -// as a single lexical action element, to be processed later. -// -ruleReturns - : RETURNS^ ARG_ACTION - ; - -// -------------- -// Exception spec -// -// Some target languages, such as Java and C# support exceptions -// and they are specified as a prequel element for each rule that -// wishes to throw its own exception type. Note that the name of the -// exception is just a single word, so the header section of the grammar -// must specify the correct import statements (or language equivalent). -// Target languages that do not support exceptions just safely ignore -// them. -// -throwsSpec - : THROWS qid (COMMA qid)* -> ^(THROWS qid+) - ; - -// locals [Cat x, float g] -localsSpec : LOCALS^ ARG_ACTION ; - -// @ Sections are generally target language specific things -// such as local variable declarations, code to run before the -// rule starts and so on. Fir instance most targets support the -// @init {} section where declarations and code can be placed -// to run before the rule is entered. The C target also has -// an @declarations {} section, where local variables are declared -// in order that the generated code is C89 copmliant. -// -/** Match stuff like @init {int i;} */ -ruleAction - : AT id ACTION -> ^(AT id ACTION) - ; - -// A set of alts, rewritten as a BLOCK for generic processing -// in tree walkers. Used by the rule 'rule' so that the list of -// alts for a rule appears as a BLOCK containing the alts and -// can be processed by the generic BLOCK rule. Note that we -// use a separate rule so that the BLOCK node has start and stop -// boundaries set correctly by rule post processing of rewrites. -ruleBlock -@init {Token colon = input.LT(-1);} - : ruleAltList -> ^(BLOCK[colon,"BLOCK"] ruleAltList) - ; - catch [ResyncToEndOfRuleBlock e] { - // just resyncing; ignore error - retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), null); - } - -ruleAltList - : labeledAlt (OR labeledAlt)* -> labeledAlt+ - ; - -labeledAlt - : alternative - ( POUND! id! {((AltAST)$alternative.tree).altLabel=$id.tree;} - )? - ; - -lexerRule -@init { paraphrases.push("matching a lexer rule"); } -@after { - paraphrases.pop(); -} - : DOC_COMMENT? FRAGMENT? - TOKEN_REF COLON lexerRuleBlock SEMI - -> ^( RULE TOKEN_REF DOC_COMMENT? - ^(RULEMODIFIERS FRAGMENT)? lexerRuleBlock - ) - ; - -lexerRuleBlock -@init {Token colon = input.LT(-1);} - : lexerAltList -> ^(BLOCK[colon,"BLOCK"] lexerAltList) - ; - catch [ResyncToEndOfRuleBlock e] { - // just resyncing; ignore error - retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), null); - } - -lexerAltList - : lexerAlt (OR lexerAlt)* -> lexerAlt+ - ; - -lexerAlt - : lexerElements - ( lexerCommands -> ^(LEXER_ALT_ACTION lexerElements lexerCommands) - | -> lexerElements - ) - | -> ^(ALT EPSILON) // empty alt - ; - -lexerElements - : lexerElement+ -> ^(ALT lexerElement+) - ; - -lexerElement -@init { - paraphrases.push("looking for lexer rule element"); - int m = input.mark(); -} -@after { paraphrases.pop(); } - : labeledLexerElement - ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK[$labeledLexerElement.start,"BLOCK"] ^(ALT labeledLexerElement) ) ) - | -> labeledLexerElement - ) - | lexerAtom - ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK[$lexerAtom.start,"BLOCK"] ^(ALT lexerAtom) ) ) - | -> lexerAtom - ) - | lexerBlock - ( ebnfSuffix -> ^(ebnfSuffix lexerBlock) - | -> lexerBlock - ) - | actionElement // actions only allowed at end of outer alt actually, - // but preds can be anywhere - ; - catch [RecognitionException re] { - retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), re); - int ttype = input.get(input.range()).getType(); // seems to be next token - // look for anything that really belongs at the start of the rule minus the initial ID - if ( ttype==COLON || ttype==RETURNS || ttype==CATCH || ttype==FINALLY || ttype==AT || ttype==EOF ) { - RecognitionException missingSemi = - new v4ParserException("unterminated rule (missing ';') detected at '"+ - input.LT(1).getText()+" "+input.LT(2).getText()+"'", input); - reportError(missingSemi); - if ( ttype==EOF ) { - input.seek(input.index()+1); - } - else if ( ttype==CATCH || ttype==FINALLY ) { - input.seek(input.range()); // ignore what's before rule trailer stuff - } - else if ( ttype==RETURNS || ttype==AT ) { // scan back looking for ID of rule header - int p = input.index(); - Token t = input.get(p); - while ( t.getType()!=RULE_REF && t.getType()!=TOKEN_REF ) { - p--; - t = input.get(p); - } - input.seek(p); - } - throw new ResyncToEndOfRuleBlock(); // make sure it goes back to rule block level to recover - } - reportError(re); - recover(input,re); - } - -labeledLexerElement - : id (ass=ASSIGN|ass=PLUS_ASSIGN) - ( lexerAtom -> ^($ass id lexerAtom) - | lexerBlock -> ^($ass id lexerBlock) - ) - ; - - -lexerBlock -@after { -GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.OPTIONS); -if ( options!=null ) { - Grammar.setNodeOptions($tree, options); -} -} - : LPAREN - ( optionsSpec COLON )? - lexerAltList - RPAREN - -> ^(BLOCK[$LPAREN,"BLOCK"] optionsSpec? lexerAltList ) - ; - -// channel=HIDDEN, skip, more, mode(INSIDE), push(INSIDE), pop -lexerCommands - : RARROW lexerCommand (COMMA lexerCommand)* -> lexerCommand+ - ; - -lexerCommand - : lexerCommandName LPAREN lexerCommandExpr RPAREN -> ^(LEXER_ACTION_CALL lexerCommandName lexerCommandExpr) - | lexerCommandName - ; - -lexerCommandExpr - : id - | INT - ; - -lexerCommandName - : id - | MODE ->ID[$MODE] - ; - -altList - : alternative (OR alternative)* -> alternative+ - ; - -// An individual alt with an optional alt option like -alternative -@init { paraphrases.push("matching alternative"); } -@after { - paraphrases.pop(); - Grammar.setNodeOptions($tree, $o.tree); -} - : o=elementOptions? - e+=element+ -> ^(ALT elementOptions? $e+) - | -> ^(ALT EPSILON) // empty alt - ; - -element -@init { - paraphrases.push("looking for rule element"); - int m = input.mark(); -} -@after { paraphrases.pop(); } - : labeledElement - ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK[$labeledElement.start,"BLOCK"] ^(ALT labeledElement ) )) - | -> labeledElement - ) - | atom - ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK[$atom.start,"BLOCK"] ^(ALT atom) ) ) - | -> atom - ) - | ebnf - | actionElement - ; - catch [RecognitionException re] { - retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), re); - int ttype = input.get(input.range()).getType(); - // look for anything that really belongs at the start of the rule minus the initial ID - if ( ttype==COLON || ttype==RETURNS || ttype==CATCH || ttype==FINALLY || ttype==AT ) { - RecognitionException missingSemi = - new v4ParserException("unterminated rule (missing ';') detected at '"+ - input.LT(1).getText()+" "+input.LT(2).getText()+"'", input); - reportError(missingSemi); - if ( ttype==CATCH || ttype==FINALLY ) { - input.seek(input.range()); // ignore what's before rule trailer stuff - } - if ( ttype==RETURNS || ttype==AT ) { // scan back looking for ID of rule header - int p = input.index(); - Token t = input.get(p); - while ( t.getType()!=RULE_REF && t.getType()!=TOKEN_REF ) { - p--; - t = input.get(p); - } - input.seek(p); - } - throw new ResyncToEndOfRuleBlock(); // make sure it goes back to rule block level to recover - } - reportError(re); - recover(input,re); - } - -actionElement -@after { - GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.ELEMENT_OPTIONS); - if ( options!=null ) { - Grammar.setNodeOptions($tree, options); - } -} - : ACTION - | ACTION elementOptions -> ^(ACTION elementOptions) - | SEMPRED - | SEMPRED elementOptions -> ^(SEMPRED elementOptions) - ; - -labeledElement - : id (ass=ASSIGN|ass=PLUS_ASSIGN) - ( atom -> ^($ass id atom) - | block -> ^($ass id block) - ) - ; - -// A block of gramamr structure optionally followed by standard EBNF -// notation, or ANTLR specific notation. I.E. ? + ^ and so on -ebnf - : block - // And now we see if we have any of the optional suffixs and rewrite - // the AST for this rule accordingly - ( blockSuffix -> ^(blockSuffix block) - | -> block - ) - ; - -// The standard EBNF suffixes with additional components that make -// sense only to ANTLR, in the context of a grammar block. -blockSuffix - : ebnfSuffix // Standard EBNF - ; - -ebnfSuffix - : QUESTION nongreedy=QUESTION? -> OPTIONAL[$start, $nongreedy] - | STAR nongreedy=QUESTION? -> CLOSURE[$start, $nongreedy] - | PLUS nongreedy=QUESTION? -> POSITIVE_CLOSURE[$start, $nongreedy] - ; - -lexerAtom - : range - | terminal - | RULE_REF - | notSet - | wildcard - | LEXER_CHAR_SET - ; - -atom - : // Qualified reference delegate.rule. This must be - // lexically contiguous (no spaces either side of the DOT) - // otherwise it is two references with a wildcard in between - // and not a qualified reference. - /* - { - input.LT(1).getCharPositionInLine()+input.LT(1).getText().length()== - input.LT(2).getCharPositionInLine() && - input.LT(2).getCharPositionInLine()+1==input.LT(3).getCharPositionInLine() - }? - id DOT ruleref -> ^(DOT id ruleref) - - | - */ - range // Range x..y - only valid in lexers - | terminal - | ruleref - | notSet - | wildcard - ; - catch [RecognitionException re] { throw re; } // pass upwards to element - -wildcard -@after { - GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.ELEMENT_OPTIONS); - if ( options!=null ) { - Grammar.setNodeOptions($tree, options); - } -} - : // Wildcard '.' means any character in a lexer, any - // token in parser and any node or subtree in a tree parser - // Because the terminal rule is allowed to be the node - // specification for the start of a tree rule, we must - // later check that wildcard was not used for that. - DOT elementOptions? - -> ^(WILDCARD[$DOT] elementOptions?) - ; - -// -------------------- -// Inverted element set -// -// A set of characters (in a lexer) or terminal tokens, if a parser, -// that are then used to create the inverse set of them. -notSet - : NOT setElement -> ^(NOT[$NOT] ^(SET[$setElement.start,"SET"] setElement)) - | NOT blockSet -> ^(NOT[$NOT] blockSet) - ; - -blockSet -@init { - Token t; - boolean ebnf = false; -} - : LPAREN setElement (OR setElement)* RPAREN - -> ^(SET[$LPAREN,"SET"] setElement+ ) - ; - -setElement - : TOKEN_REF - | STRING_LITERAL - | range - | LEXER_CHAR_SET - ; - -// ------------- -// Grammar Block -// -// Anywhere where an element is valid, the grammar may start a new block -// of alts by surrounding that block with ( ). A new block may also have a set -// of options, which apply only to that block. -// -block -@after { -GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.OPTIONS); -if ( options!=null ) { - Grammar.setNodeOptions($tree, options); -} -} - : LPAREN - ( optionsSpec? ra+=ruleAction* COLON )? - altList - RPAREN - -> ^(BLOCK[$LPAREN,"BLOCK"] optionsSpec? $ra* altList ) - ; - -// ---------------- -// Parser rule ref -// -// Reference to a parser rule with optional arguments and optional -// directive to become the root node or ignore the tree produced -// -ruleref -@after { -GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.ELEMENT_OPTIONS); -if ( options!=null ) { - Grammar.setNodeOptions($tree, options); -} -} - : RULE_REF ARG_ACTION? elementOptions? -> ^(RULE_REF ARG_ACTION? elementOptions?) - ; - catch [RecognitionException re] { throw re; } // pass upwards to element - -// --------------- -// Character Range -// -// Specifies a range of characters. Valid for lexer rules only, but -// we do not check that here, the tree walkers shoudl do that. -// Note also that the parser also allows through more than just -// character literals so that we can produce a much nicer semantic -// error about any abuse of the .. operator. -// -range - : STRING_LITERAL RANGE^ STRING_LITERAL - ; - -terminal -@after { -GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.ELEMENT_OPTIONS); -if ( options!=null ) { - Grammar.setNodeOptions($tree, options); -} -} - : TOKEN_REF elementOptions? -> ^(TOKEN_REF elementOptions?) - | STRING_LITERAL elementOptions? -> ^(STRING_LITERAL elementOptions?) - ; - -// Terminals may be adorned with certain options when -// reference in the grammar: TOK<,,,> -elementOptions - : LT (elementOption (COMMA elementOption)*)? GT - -> ^(ELEMENT_OPTIONS[$LT,"ELEMENT_OPTIONS"] elementOption*) - ; - -// When used with elements we can specify what the tree node type can -// be and also assign settings of various options (which we do not check here) -elementOption - : // This format indicates the default element option - qid - | id ASSIGN^ optionValue - ; - -// The name of the grammar, and indeed some other grammar elements may -// come through to the parser looking like a rule reference or a token -// reference, hence this rule is used to pick up whichever it is and rewrite -// it as a generic ID token. -id -@init { paraphrases.push("looking for an identifier"); } -@after { paraphrases.pop(); } - : RULE_REF ->ID[$RULE_REF] - | TOKEN_REF ->ID[$TOKEN_REF] - ; - -qid -@init { paraphrases.push("looking for a qualified identifier"); } -@after { paraphrases.pop(); } - : id (DOT id)* -> ID[$qid.start, $text] - ; - -alternativeEntry : alternative EOF ; // allow gunit to call alternative and see EOF afterwards -elementEntry : element EOF ; -ruleEntry : rule EOF ; -blockEntry : block EOF ; diff --git a/tool/src/org/antlr/v4/parse/ATNBuilder.g b/tool/src/org/antlr/v4/parse/ATNBuilder.g deleted file mode 100644 index d98a43a29..000000000 --- a/tool/src/org/antlr/v4/parse/ATNBuilder.g +++ /dev/null @@ -1,200 +0,0 @@ -/* - * [The "BSD license"] - * Copyright (c) 2012 Terence Parr - * Copyright (c) 2012 Sam Harwell - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -tree grammar ATNBuilder; -options { - language = Java; - tokenVocab = ANTLRParser; - ASTLabelType = GrammarAST; -// filter = true; -} - -// Include the copyright in this source and also the generated source -@header { -/* - [The "BSD license"] - Copyright (c) 2010 Terence Parr - All rights reserved. - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -package org.antlr.v4.parse; -import org.antlr.v4.tool.*; -import org.antlr.v4.tool.ast.*; -import org.antlr.v4.automata.ATNFactory; -} - -@members { - ATNFactory factory; - public ATNBuilder(TreeNodeStream input, ATNFactory factory) { - this(input); - this.factory = factory; - } -} - -dummy : block[null] ; // avoid error about no start rule - -ruleBlock[GrammarAST ebnfRoot] returns [ATNFactory.Handle p] -@init { - List alts = new ArrayList(); - int alt = 1; - factory.setCurrentOuterAlt(alt); -} - : ^(BLOCK - (^(OPTIONS .*))? - ( a=alternative - {alts.add($a.p); factory.setCurrentOuterAlt(++alt);} - )+ - ) - {$p = factory.block((BlockAST)$BLOCK, ebnfRoot, alts);} - ; - -block[GrammarAST ebnfRoot] returns [ATNFactory.Handle p] -@init {List alts = new ArrayList();} - : ^(BLOCK (^(OPTIONS .*))? (a=alternative {alts.add($a.p);})+) - {$p = factory.block((BlockAST)$BLOCK, ebnfRoot, alts);} - ; - -alternative returns [ATNFactory.Handle p] -@init {List els = new ArrayList();} - : ^(LEXER_ALT_ACTION a=alternative lexerCommands) - {$p = factory.lexerAltCommands($a.p,$lexerCommands.p);} - | ^(ALT EPSILON) {$p = factory.epsilon($EPSILON);} - | ^(ALT (e=element {els.add($e.p);})+) {$p = factory.alt(els);} - ; - -lexerCommands returns [ATNFactory.Handle p] -@init {StringBuilder cmds = new StringBuilder();} - : (c=lexerCommand {cmds.append($c.cmd).append(' ');})+ - { - $p = factory.action(cmds.toString()); - } - ; - -lexerCommand returns [String cmd] - : ^(LEXER_ACTION_CALL ID lexerCommandExpr) - {$cmd = factory.lexerCallCommand($ID, $lexerCommandExpr.start);} - | ID - {$cmd = factory.lexerCommand($ID);} - ; - -lexerCommandExpr - : ID - | INT - ; - -element returns [ATNFactory.Handle p] - : labeledElement {$p = $labeledElement.p;} - | atom {$p = $atom.p;} - | subrule {$p = $subrule.p;} - | ACTION {$p = factory.action((ActionAST)$ACTION);} - | SEMPRED {$p = factory.sempred((PredAST)$SEMPRED);} - | ^(ACTION .) {$p = factory.action((ActionAST)$ACTION);} - | ^(SEMPRED .) {$p = factory.sempred((PredAST)$SEMPRED);} - | ^(NOT b=blockSet[true]) {$p = $b.p;} - | LEXER_CHAR_SET {$p = factory.charSetLiteral($start);} - ; - -astOperand returns [ATNFactory.Handle p] - : atom {$p = $atom.p;} - | ^(NOT blockSet[true]) {$p = $blockSet.p;} - ; - -labeledElement returns [ATNFactory.Handle p] - : ^(ASSIGN ID element) {$p = factory.label($element.p);} - | ^(PLUS_ASSIGN ID element) {$p = factory.listLabel($element.p);} - ; - -subrule returns [ATNFactory.Handle p] - : ^(OPTIONAL block[$start]) {$p = $block.p;} - | ^(CLOSURE block[$start]) {$p = $block.p;} - | ^(POSITIVE_CLOSURE block[$start]) {$p = $block.p;} - | block[null] {$p = $block.p;} - ; - -blockSet[boolean invert] returns [ATNFactory.Handle p] -@init {List alts = new ArrayList();} - : ^(SET (setElement {alts.add($setElement.start);})+) {$p = factory.set($start, alts, $invert);} - ; - -/** Don't combine with atom otherwise it will build spurious ATN nodes */ -setElement - : STRING_LITERAL - | TOKEN_REF - | ^(RANGE a=STRING_LITERAL b=STRING_LITERAL) - | LEXER_CHAR_SET - ; - -atom returns [ATNFactory.Handle p] - : range {$p = $range.p;} - | ^(DOT ID terminal) {$p = $terminal.p;} - | ^(DOT ID ruleref) {$p = $ruleref.p;} - | ^(WILDCARD .) {$p = factory.wildcard($start);} - | WILDCARD {$p = factory.wildcard($start);} - | blockSet[false] {$p = $blockSet.p;} - | terminal {$p = $terminal.p;} - | ruleref {$p = $ruleref.p;} - ; - -ruleref returns [ATNFactory.Handle p] - : ^(RULE_REF ARG_ACTION? ^(ELEMENT_OPTIONS .*)) {$p = factory.ruleRef($RULE_REF);} - | ^(RULE_REF ARG_ACTION?) {$p = factory.ruleRef($RULE_REF);} - | RULE_REF {$p = factory.ruleRef($RULE_REF);} - ; - -range returns [ATNFactory.Handle p] - : ^(RANGE a=STRING_LITERAL b=STRING_LITERAL) {$p = factory.range($a,$b);} - ; - -terminal returns [ATNFactory.Handle p] - : ^(STRING_LITERAL .) {$p = factory.stringLiteral((TerminalAST)$start);} - | STRING_LITERAL {$p = factory.stringLiteral((TerminalAST)$start);} - | ^(TOKEN_REF ARG_ACTION .) {$p = factory.tokenRef((TerminalAST)$start);} - | ^(TOKEN_REF .) {$p = factory.tokenRef((TerminalAST)$start);} - | TOKEN_REF {$p = factory.tokenRef((TerminalAST)$start);} - ; diff --git a/tool/src/org/antlr/v4/parse/ActionSplitter.g b/tool/src/org/antlr/v4/parse/ActionSplitter.g deleted file mode 100644 index 5feeb955d..000000000 --- a/tool/src/org/antlr/v4/parse/ActionSplitter.g +++ /dev/null @@ -1,125 +0,0 @@ -/* - * [The "BSD license"] - * Copyright (c) 2012 Terence Parr - * Copyright (c) 2012 Sam Harwell - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -lexer grammar ActionSplitter; - -options { filter=true; } - -@header { -package org.antlr.v4.parse; -import org.antlr.v4.tool.*; -import org.antlr.v4.tool.ast.*; -} - -@members { -ActionSplitterListener delegate; - -public ActionSplitter(CharStream input, ActionSplitterListener delegate) { - this(input, new RecognizerSharedState()); - this.delegate = delegate; -} - -/** force filtering (and return tokens). triggers all above actions. */ -public List getActionTokens() { - List chunks = new ArrayList(); - Token t = nextToken(); - while ( t.getType()!=Token.EOF ) { - chunks.add(t); - t = nextToken(); - } - return chunks; -} - -private boolean isIDStartChar(int c) { - return c == '_' || Character.isLetter(c); -} -} - -// ignore comments right away - -COMMENT - : '/*' ( options {greedy=false;} : . )* '*/' {delegate.text($text);} - ; - -LINE_COMMENT - : '//' ~('\n'|'\r')* '\r'? '\n' {delegate.text($text);} - ; - -SET_NONLOCAL_ATTR - : '$' x=ID '::' y=ID WS? '=' expr=ATTR_VALUE_EXPR ';' - { - delegate.setNonLocalAttr($text, $x, $y, $expr); - } - ; - -NONLOCAL_ATTR - : '$' x=ID '::' y=ID {delegate.nonLocalAttr($text, $x, $y);} - ; - -QUALIFIED_ATTR - : '$' x=ID '.' y=ID {input.LA(1)!='('}? {delegate.qualifiedAttr($text, $x, $y);} - ; - -SET_ATTR - : '$' x=ID WS? '=' expr=ATTR_VALUE_EXPR ';' - { - delegate.setAttr($text, $x, $expr); - } - ; - -ATTR - : '$' x=ID {delegate.attr($text, $x);} - ; - -// Anything else is just random text -TEXT -@init {StringBuilder buf = new StringBuilder();} -@after {delegate.text(buf.toString());} - : ( c=~('\\'| '$') {buf.append((char)$c);} - | '\\$' {buf.append('$');} - | '\\' c=~('$') {buf.append('\\').append((char)$c);} - | {!isIDStartChar(input.LA(2))}? => '$' {buf.append('$');} - )+ - ; - -fragment -ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')* - ; - -/** Don't allow an = as first char to prevent $x == 3; kind of stuff. */ -fragment -ATTR_VALUE_EXPR - : ~'=' (~';')* - ; - -fragment -WS : (' '|'\t'|'\n'|'\r')+ - ; - diff --git a/tool/src/org/antlr/v4/parse/BlockSetTransformer.g b/tool/src/org/antlr/v4/parse/BlockSetTransformer.g deleted file mode 100644 index 885bae987..000000000 --- a/tool/src/org/antlr/v4/parse/BlockSetTransformer.g +++ /dev/null @@ -1,115 +0,0 @@ -/* - * [The "BSD license"] - * Copyright (c) 2012 Terence Parr - * Copyright (c) 2012 Sam Harwell - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -tree grammar BlockSetTransformer; -options { - language = Java; - tokenVocab = ANTLRParser; - ASTLabelType = GrammarAST; - output = AST; - filter = true; -} - -@header { -package org.antlr.v4.parse; -import org.antlr.v4.misc.Utils; -import org.antlr.v4.misc.*; -import org.antlr.v4.tool.*; -import org.antlr.v4.tool.ast.*; -import java.util.List; -import java.util.Set; -import java.util.HashSet; -import java.util.ArrayList; -import org.antlr.v4.runtime.misc.IntervalSet; -} - -@members { -public String currentRuleName; -public GrammarAST currentAlt; -public Grammar g; -public BlockSetTransformer(TreeNodeStream input, Grammar g) { - this(input, new RecognizerSharedState()); - this.g = g; -} -} - -topdown - : ^(RULE (id=TOKEN_REF|id=RULE_REF) {currentRuleName=$id.text;} .+) - | setAlt - | ebnfBlockSet - | blockSet - ; - -setAlt - : {inContext("RULE BLOCK")}? - ALT {currentAlt = $start;} - ; - -// (BLOCK (ALT (+ (BLOCK (ALT INT) (ALT ID))))) -ebnfBlockSet -@after { - GrammarTransformPipeline.setGrammarPtr(g, $tree); -} - : ^(ebnfSuffix blockSet) -> ^(ebnfSuffix ^(BLOCK ^(ALT blockSet))) - ; - -ebnfSuffix -@after {$tree = (GrammarAST)adaptor.dupNode($start);} - : OPTIONAL - | CLOSURE - | POSITIVE_CLOSURE - ; - -blockSet -@init { -boolean inLexer = Grammar.isTokenName(currentRuleName); -} -@after { - GrammarTransformPipeline.setGrammarPtr(g, $tree); -} - : {inContext("RULE")}? // top-level: rule block and > 1 alt - ^(BLOCK ^(alt=ALT {((AltAST)$alt).altLabel==null}? setElement[inLexer]) ( ^(ALT setElement[inLexer]) )+) - -> ^(BLOCK[$BLOCK.token] ^(ALT[$BLOCK.token,"ALT"] ^(SET[$BLOCK.token, "SET"] setElement+))) - | {!inContext("RULE")}? // if not rule block and > 1 alt - ^(BLOCK ^(ALT setElement[inLexer]) ( ^(ALT setElement[inLexer]) )+) - -> ^(SET[$BLOCK.token, "SET"] setElement+) - ; - -setElement[boolean inLexer] -@after { - GrammarTransformPipeline.setGrammarPtr(g, $tree); -} - : ( a=STRING_LITERAL {!inLexer || CharSupport.getCharValueFromGrammarCharLiteral($a.getText())!=-1}? - | {!inLexer}?=> TOKEN_REF - | {inLexer}?=> ^(RANGE a=STRING_LITERAL b=STRING_LITERAL) - {CharSupport.getCharValueFromGrammarCharLiteral($a.getText())!=-1 && - CharSupport.getCharValueFromGrammarCharLiteral($b.getText())!=-1}? - ) - ; diff --git a/tool/src/org/antlr/v4/parse/GrammarTreeVisitor.g b/tool/src/org/antlr/v4/parse/GrammarTreeVisitor.g deleted file mode 100644 index 2f4253725..000000000 --- a/tool/src/org/antlr/v4/parse/GrammarTreeVisitor.g +++ /dev/null @@ -1,995 +0,0 @@ -/* - * [The "BSD license"] - * Copyright (c) 2012 Terence Parr - * Copyright (c) 2012 Sam Harwell - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/** The definitive ANTLR v3 tree grammar to walk/visit ANTLR v4 grammars. - * Parses trees created by ANTLRParser.g. - * - * Rather than have multiple tree grammars, one for each visit, I'm - * creating this generic visitor that knows about context. All of the - * boilerplate pattern recognition is done here. Then, subclasses can - * override the methods they care about. This prevents a lot of the same - * context tracking stuff like "set current alternative for current - * rule node" that is repeated in lots of tree filters. - */ -tree grammar GrammarTreeVisitor; -options { - language = Java; - tokenVocab = ANTLRParser; - ASTLabelType = GrammarAST; -} - -// Include the copyright in this source and also the generated source -@header { -/* - [The "BSD license"] - Copyright (c) 2011 Terence Parr - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -package org.antlr.v4.parse; -import org.antlr.v4.tool.*; -import org.antlr.v4.tool.ast.*; -import java.lang.reflect.Method; -} - -@members { -public String grammarName; -public GrammarAST currentRuleAST; -public String currentModeName = LexerGrammar.DEFAULT_MODE_NAME; -public String currentRuleName; -//public GrammarAST currentRuleBlock; -public GrammarAST currentOuterAltRoot; -public int currentOuterAltNumber = 1; // 1..n -public int rewriteEBNFLevel = 0; - -public GrammarTreeVisitor() { this(null); } - -public ErrorManager getErrorManager() { return null; } - -public void visitGrammar(GrammarAST t) { visit(t, "grammarSpec"); } -public void visit(GrammarAST t, String ruleName) { - CommonTreeNodeStream nodes = new CommonTreeNodeStream(new GrammarASTAdaptor(), t); - setTreeNodeStream(nodes); - try { - Method m = getClass().getMethod(ruleName); - m.invoke(this); - } - catch (Exception e) { - ErrorManager errMgr = getErrorManager(); - if ( errMgr==null ) { - System.err.println("can't find rule "+ruleName+ - " or tree structure error: "+t.toStringTree() - ); - e.printStackTrace(System.err); - } - else errMgr.toolError(ErrorType.INTERNAL_ERROR, e); - } -} - -public void discoverGrammar(GrammarRootAST root, GrammarAST ID) { } -public void finishPrequels(GrammarAST firstPrequel) { } -public void finishGrammar(GrammarRootAST root, GrammarAST ID) { } - -public void grammarOption(GrammarAST ID, GrammarAST valueAST) { } -public void ruleOption(GrammarAST ID, GrammarAST valueAST) { } -public void blockOption(GrammarAST ID, GrammarAST valueAST) { } -public void defineToken(GrammarAST ID) { } -public void globalNamedAction(GrammarAST scope, GrammarAST ID, ActionAST action) { } -public void importGrammar(GrammarAST label, GrammarAST ID) { } - -public void modeDef(GrammarAST m, GrammarAST ID) { } - -public void discoverRules(GrammarAST rules) { } -public void finishRules(GrammarAST rule) { } -public void discoverRule(RuleAST rule, GrammarAST ID, List modifiers, - ActionAST arg, ActionAST returns, GrammarAST thrws, - GrammarAST options, ActionAST locals, - List actions, - GrammarAST block) { } -public void finishRule(RuleAST rule, GrammarAST ID, GrammarAST block) { } -public void discoverLexerRule(RuleAST rule, GrammarAST ID, List modifiers, - GrammarAST block) { } -public void finishLexerRule(RuleAST rule, GrammarAST ID, GrammarAST block) { } -public void ruleCatch(GrammarAST arg, ActionAST action) { } -public void finallyAction(ActionAST action) { } -public void discoverOuterAlt(AltAST alt) { } -public void finishOuterAlt(AltAST alt) { } -public void discoverAlt(AltAST alt) { } -public void finishAlt(AltAST alt) { } - -public void ruleRef(GrammarAST ref, ActionAST arg) { } -public void tokenRef(TerminalAST ref) { } -public void elementOption(GrammarASTWithOptions t, GrammarAST ID, GrammarAST valueAST) { } -public void stringRef(TerminalAST ref) { } -public void wildcardRef(GrammarAST ref) { } -public void actionInAlt(ActionAST action) { } -public void sempredInAlt(PredAST pred) { } -public void label(GrammarAST op, GrammarAST ID, GrammarAST element) { } -public void lexerCallCommand(int outerAltNumber, GrammarAST ID, GrammarAST arg) { } -public void lexerCommand(int outerAltNumber, GrammarAST ID) { } - -protected void enterGrammarSpec(GrammarAST tree) { } -protected void exitGrammarSpec(GrammarAST tree) { } - -protected void enterPrequelConstructs(GrammarAST tree) { } -protected void exitPrequelConstructs(GrammarAST tree) { } - -protected void enterPrequelConstruct(GrammarAST tree) { } -protected void exitPrequelConstruct(GrammarAST tree) { } - -protected void enterOptionsSpec(GrammarAST tree) { } -protected void exitOptionsSpec(GrammarAST tree) { } - -protected void enterOption(GrammarAST tree) { } -protected void exitOption(GrammarAST tree) { } - -protected void enterOptionValue(GrammarAST tree) { } -protected void exitOptionValue(GrammarAST tree) { } - -protected void enterDelegateGrammars(GrammarAST tree) { } -protected void exitDelegateGrammars(GrammarAST tree) { } - -protected void enterDelegateGrammar(GrammarAST tree) { } -protected void exitDelegateGrammar(GrammarAST tree) { } - -protected void enterTokensSpec(GrammarAST tree) { } -protected void exitTokensSpec(GrammarAST tree) { } - -protected void enterTokenSpec(GrammarAST tree) { } -protected void exitTokenSpec(GrammarAST tree) { } - -protected void enterAction(GrammarAST tree) { } -protected void exitAction(GrammarAST tree) { } - -protected void enterRules(GrammarAST tree) { } -protected void exitRules(GrammarAST tree) { } - -protected void enterMode(GrammarAST tree) { } -protected void exitMode(GrammarAST tree) { } - -protected void enterLexerRule(GrammarAST tree) { } -protected void exitLexerRule(GrammarAST tree) { } - -protected void enterRule(GrammarAST tree) { } -protected void exitRule(GrammarAST tree) { } - -protected void enterExceptionGroup(GrammarAST tree) { } -protected void exitExceptionGroup(GrammarAST tree) { } - -protected void enterExceptionHandler(GrammarAST tree) { } -protected void exitExceptionHandler(GrammarAST tree) { } - -protected void enterFinallyClause(GrammarAST tree) { } -protected void exitFinallyClause(GrammarAST tree) { } - -protected void enterLocals(GrammarAST tree) { } -protected void exitLocals(GrammarAST tree) { } - -protected void enterRuleReturns(GrammarAST tree) { } -protected void exitRuleReturns(GrammarAST tree) { } - -protected void enterThrowsSpec(GrammarAST tree) { } -protected void exitThrowsSpec(GrammarAST tree) { } - -protected void enterRuleAction(GrammarAST tree) { } -protected void exitRuleAction(GrammarAST tree) { } - -protected void enterRuleModifier(GrammarAST tree) { } -protected void exitRuleModifier(GrammarAST tree) { } - -protected void enterLexerRuleBlock(GrammarAST tree) { } -protected void exitLexerRuleBlock(GrammarAST tree) { } - -protected void enterRuleBlock(GrammarAST tree) { } -protected void exitRuleBlock(GrammarAST tree) { } - -protected void enterLexerOuterAlternative(AltAST tree) { } -protected void exitLexerOuterAlternative(AltAST tree) { } - -protected void enterOuterAlternative(AltAST tree) { } -protected void exitOuterAlternative(AltAST tree) { } - -protected void enterLexerAlternative(GrammarAST tree) { } -protected void exitLexerAlternative(GrammarAST tree) { } - -protected void enterLexerElements(GrammarAST tree) { } -protected void exitLexerElements(GrammarAST tree) { } - -protected void enterLexerElement(GrammarAST tree) { } -protected void exitLexerElement(GrammarAST tree) { } - -protected void enterLabeledLexerElement(GrammarAST tree) { } -protected void exitLabeledLexerElement(GrammarAST tree) { } - -protected void enterLexerBlock(GrammarAST tree) { } -protected void exitLexerBlock(GrammarAST tree) { } - -protected void enterLexerAtom(GrammarAST tree) { } -protected void exitLexerAtom(GrammarAST tree) { } - -protected void enterActionElement(GrammarAST tree) { } -protected void exitActionElement(GrammarAST tree) { } - -protected void enterAlternative(AltAST tree) { } -protected void exitAlternative(AltAST tree) { } - -protected void enterLexerCommand(GrammarAST tree) { } -protected void exitLexerCommand(GrammarAST tree) { } - -protected void enterLexerCommandExpr(GrammarAST tree) { } -protected void exitLexerCommandExpr(GrammarAST tree) { } - -protected void enterElement(GrammarAST tree) { } -protected void exitElement(GrammarAST tree) { } - -protected void enterAstOperand(GrammarAST tree) { } -protected void exitAstOperand(GrammarAST tree) { } - -protected void enterLabeledElement(GrammarAST tree) { } -protected void exitLabeledElement(GrammarAST tree) { } - -protected void enterSubrule(GrammarAST tree) { } -protected void exitSubrule(GrammarAST tree) { } - -protected void enterLexerSubrule(GrammarAST tree) { } -protected void exitLexerSubrule(GrammarAST tree) { } - -protected void enterBlockSuffix(GrammarAST tree) { } -protected void exitBlockSuffix(GrammarAST tree) { } - -protected void enterEbnfSuffix(GrammarAST tree) { } -protected void exitEbnfSuffix(GrammarAST tree) { } - -protected void enterAtom(GrammarAST tree) { } -protected void exitAtom(GrammarAST tree) { } - -protected void enterBlockSet(GrammarAST tree) { } -protected void exitBlockSet(GrammarAST tree) { } - -protected void enterSetElement(GrammarAST tree) { } -protected void exitSetElement(GrammarAST tree) { } - -protected void enterBlock(GrammarAST tree) { } -protected void exitBlock(GrammarAST tree) { } - -protected void enterRuleref(GrammarAST tree) { } -protected void exitRuleref(GrammarAST tree) { } - -protected void enterRange(GrammarAST tree) { } -protected void exitRange(GrammarAST tree) { } - -protected void enterTerminal(GrammarAST tree) { } -protected void exitTerminal(GrammarAST tree) { } - -protected void enterElementOptions(GrammarAST tree) { } -protected void exitElementOptions(GrammarAST tree) { } - -protected void enterElementOption(GrammarAST tree) { } -protected void exitElementOption(GrammarAST tree) { } - - @Override - public void traceIn(String ruleName, int ruleIndex) { - System.err.println("enter "+ruleName+": "+input.LT(1)); - } - - @Override - public void traceOut(String ruleName, int ruleIndex) { - System.err.println("exit "+ruleName+": "+input.LT(1)); - } -} - -grammarSpec -@init { - enterGrammarSpec($start); -} -@after { - exitGrammarSpec($start); -} - : ^( GRAMMAR ID {grammarName=$ID.text;} DOC_COMMENT? - {discoverGrammar((GrammarRootAST)$GRAMMAR, $ID);} - prequelConstructs - {finishPrequels($prequelConstructs.firstOne);} - rules mode* - {finishGrammar((GrammarRootAST)$GRAMMAR, $ID);} - ) - ; - -prequelConstructs returns [GrammarAST firstOne=null] -@init { - enterPrequelConstructs($start); -} -@after { - exitPrequelConstructs($start); -} - : {$firstOne=$start;} prequelConstruct+ - | - ; - -prequelConstruct -@init { - enterPrequelConstructs($start); -} -@after { - exitPrequelConstructs($start); -} - : optionsSpec - | delegateGrammars - | tokensSpec - | action - ; - -optionsSpec -@init { - enterOptionsSpec($start); -} -@after { - exitOptionsSpec($start); -} - : ^(OPTIONS option*) - ; - -option -@init { - enterOption($start); - boolean rule = inContext("RULE ..."); - boolean block = inContext("BLOCK ..."); -} -@after { - exitOption($start); -} - : ^(a=ASSIGN ID v=optionValue) - { - if ( block ) blockOption($ID, $v.start); // most specific first - else if ( rule ) ruleOption($ID, $v.start); - else grammarOption($ID, $v.start); - } - ; - -optionValue returns [String v] -@init { - enterOptionValue($start); - $v = $start.token.getText(); -} -@after { - exitOptionValue($start); -} - : ID - | STRING_LITERAL - | INT - ; - -delegateGrammars -@init { - enterDelegateGrammars($start); -} -@after { - exitDelegateGrammars($start); -} - : ^(IMPORT delegateGrammar+) - ; - -delegateGrammar -@init { - enterDelegateGrammar($start); -} -@after { - exitDelegateGrammar($start); -} - : ^(ASSIGN label=ID id=ID) {importGrammar($label, $id);} - | id=ID {importGrammar(null, $id);} - ; - -tokensSpec -@init { - enterTokensSpec($start); -} -@after { - exitTokensSpec($start); -} - : ^(TOKENS_SPEC tokenSpec+) - ; - -tokenSpec -@init { - enterTokenSpec($start); -} -@after { - exitTokenSpec($start); -} - : ID {defineToken($ID);} - ; - -action -@init { - enterAction($start); -} -@after { - exitAction($start); -} - : ^(AT sc=ID? name=ID ACTION) {globalNamedAction($sc, $name, (ActionAST)$ACTION);} - ; - -rules -@init { - enterRules($start); -} -@after { - exitRules($start); -} - : ^(RULES {discoverRules($RULES);} (rule|lexerRule)* {finishRules($RULES);}) - ; - -mode -@init { - enterMode($start); -} -@after { - exitMode($start); -} - : ^( MODE ID {currentModeName=$ID.text; modeDef($MODE, $ID);} lexerRule* ) - ; - -lexerRule -@init { - enterLexerRule($start); - List mods = new ArrayList(); - currentOuterAltNumber=0; -} -@after { - exitLexerRule($start); -} - : ^( RULE TOKEN_REF - {currentRuleName=$TOKEN_REF.text; currentRuleAST=$RULE;} - DOC_COMMENT? (^(RULEMODIFIERS m=FRAGMENT {mods.add($m);}))? - {discoverLexerRule((RuleAST)$RULE, $TOKEN_REF, mods, (GrammarAST)input.LT(1));} - lexerRuleBlock - { - finishLexerRule((RuleAST)$RULE, $TOKEN_REF, $lexerRuleBlock.start); - currentRuleName=null; currentRuleAST=null; - } - ) - ; - -rule -@init { - enterRule($start); - List mods = new ArrayList(); - List actions = new ArrayList(); // track roots - currentOuterAltNumber=0; -} -@after { - exitRule($start); -} - : ^( RULE RULE_REF {currentRuleName=$RULE_REF.text; currentRuleAST=$RULE;} - DOC_COMMENT? (^(RULEMODIFIERS (m=ruleModifier{mods.add($m.start);})+))? - ARG_ACTION? - ret=ruleReturns? - thr=throwsSpec? - loc=locals? - ( opts=optionsSpec - | a=ruleAction {actions.add($a.start);} - )* - {discoverRule((RuleAST)$RULE, $RULE_REF, mods, (ActionAST)$ARG_ACTION, - $ret.start!=null?(ActionAST)$ret.start.getChild(0):null, - $thr.start, $opts.start, - $loc.start!=null?(ActionAST)$loc.start.getChild(0):null, - actions, (GrammarAST)input.LT(1));} - ruleBlock exceptionGroup - {finishRule((RuleAST)$RULE, $RULE_REF, $ruleBlock.start); currentRuleName=null; currentRuleAST=null;} - ) - ; - -exceptionGroup -@init { - enterExceptionGroup($start); -} -@after { - exitExceptionGroup($start); -} - : exceptionHandler* finallyClause? - ; - -exceptionHandler -@init { - enterExceptionHandler($start); -} -@after { - exitExceptionHandler($start); -} - : ^(CATCH ARG_ACTION ACTION) {ruleCatch($ARG_ACTION, (ActionAST)$ACTION);} - ; - -finallyClause -@init { - enterFinallyClause($start); -} -@after { - exitFinallyClause($start); -} - : ^(FINALLY ACTION) {finallyAction((ActionAST)$ACTION);} - ; - -locals -@init { - enterLocals($start); -} -@after { - exitLocals($start); -} - : ^(LOCALS ARG_ACTION) - ; - -ruleReturns -@init { - enterRuleReturns($start); -} -@after { - exitRuleReturns($start); -} - : ^(RETURNS ARG_ACTION) - ; - -throwsSpec -@init { - enterThrowsSpec($start); -} -@after { - exitThrowsSpec($start); -} - : ^(THROWS ID+) - ; - -ruleAction -@init { - enterRuleAction($start); -} -@after { - exitRuleAction($start); -} - : ^(AT ID ACTION) - ; - -ruleModifier -@init { - enterRuleModifier($start); -} -@after { - exitRuleModifier($start); -} - : PUBLIC - | PRIVATE - | PROTECTED - | FRAGMENT - ; - -lexerRuleBlock -@init { - enterLexerRuleBlock($start); -} -@after { - exitLexerRuleBlock($start); -} - : ^( BLOCK - ( { - currentOuterAltRoot = (GrammarAST)input.LT(1); - currentOuterAltNumber++; - } - lexerOuterAlternative - )+ - ) - ; - -ruleBlock -@init { - enterRuleBlock($start); -} -@after { - exitRuleBlock($start); -} - : ^( BLOCK - ( { - currentOuterAltRoot = (GrammarAST)input.LT(1); - currentOuterAltNumber++; - } - outerAlternative - )+ - ) - ; - -lexerOuterAlternative -@init { - enterLexerOuterAlternative((AltAST)$start); - discoverOuterAlt((AltAST)$start); -} -@after { - finishOuterAlt((AltAST)$start); - exitLexerOuterAlternative((AltAST)$start); -} - : lexerAlternative - ; - - -outerAlternative -@init { - enterOuterAlternative((AltAST)$start); - discoverOuterAlt((AltAST)$start); -} -@after { - finishOuterAlt((AltAST)$start); - exitOuterAlternative((AltAST)$start); -} - : alternative - ; - -lexerAlternative -@init { - enterLexerAlternative($start); -} -@after { - exitLexerAlternative($start); -} - : ^(LEXER_ALT_ACTION lexerElements lexerCommand+) - | lexerElements - ; - -lexerElements -@init { - enterLexerElements($start); -} -@after { - exitLexerElements($start); -} - : ^(ALT lexerElement+) - ; - -lexerElement -@init { - enterLexerElement($start); -} -@after { - exitLexerElement($start); -} - : labeledLexerElement - | lexerAtom - | lexerSubrule - | ACTION {actionInAlt((ActionAST)$ACTION);} - | SEMPRED {sempredInAlt((PredAST)$SEMPRED);} - | ^(ACTION elementOptions) {actionInAlt((ActionAST)$ACTION);} - | ^(SEMPRED elementOptions) {sempredInAlt((PredAST)$SEMPRED);} - | EPSILON - ; - -labeledLexerElement -@init { - enterLabeledLexerElement($start); -} -@after { - exitLabeledLexerElement($start); -} - : ^((ASSIGN|PLUS_ASSIGN) ID (lexerAtom|block)) - ; - -lexerBlock -@init { - enterLexerBlock($start); -} -@after { - exitLexerBlock($start); -} - : ^(BLOCK optionsSpec? lexerAlternative+) - ; - -lexerAtom -@init { - enterLexerAtom($start); -} -@after { - exitLexerAtom($start); -} - : terminal - | ^(NOT blockSet) - | blockSet - | ^(WILDCARD elementOptions) - | WILDCARD - | LEXER_CHAR_SET - | range - ; - -actionElement -@init { - enterActionElement($start); -} -@after { - exitActionElement($start); -} - : ACTION - | ^(ACTION elementOptions) - | SEMPRED - | ^(SEMPRED elementOptions) - ; - -alternative -@init { - enterAlternative((AltAST)$start); - discoverAlt((AltAST)$start); -} -@after { - finishAlt((AltAST)$start); - exitAlternative((AltAST)$start); -} - : ^(ALT elementOptions? element+) - | ^(ALT EPSILON) - ; - -lexerCommand -@init { - enterLexerCommand($start); -} -@after { - exitLexerCommand($start); -} - : ^(LEXER_ACTION_CALL ID lexerCommandExpr) - {lexerCallCommand(currentOuterAltNumber, $ID, $lexerCommandExpr.start);} - | ID - {lexerCommand(currentOuterAltNumber, $ID);} - ; - -lexerCommandExpr -@init { - enterLexerCommandExpr($start); -} -@after { - exitLexerCommandExpr($start); -} - : ID - | INT - ; - -element -@init { - enterElement($start); -} -@after { - exitElement($start); -} - : labeledElement - | atom - | subrule - | ACTION {actionInAlt((ActionAST)$ACTION);} - | SEMPRED {sempredInAlt((PredAST)$SEMPRED);} - | ^(ACTION elementOptions) {actionInAlt((ActionAST)$ACTION);} - | ^(SEMPRED elementOptions) {sempredInAlt((PredAST)$SEMPRED);} - - | ^(NOT blockSet) - | ^(NOT block) - ; - -astOperand -@init { - enterAstOperand($start); -} -@after { - exitAstOperand($start); -} - : atom - | ^(NOT blockSet) - | ^(NOT block) - ; - -labeledElement -@init { - enterLabeledElement($start); -} -@after { - exitLabeledElement($start); -} - : ^((ASSIGN|PLUS_ASSIGN) ID element) {label($start, $ID, $element.start);} - ; - -subrule -@init { - enterSubrule($start); -} -@after { - exitSubrule($start); -} - : ^(blockSuffix block) - | block - ; - -lexerSubrule -@init { - enterLexerSubrule($start); -} -@after { - exitLexerSubrule($start); -} - : ^(blockSuffix lexerBlock) - | lexerBlock - ; - -blockSuffix -@init { - enterBlockSuffix($start); -} -@after { - exitBlockSuffix($start); -} - : ebnfSuffix - ; - -ebnfSuffix -@init { - enterEbnfSuffix($start); -} -@after { - exitEbnfSuffix($start); -} - : OPTIONAL - | CLOSURE - | POSITIVE_CLOSURE - ; - -atom -@init { - enterAtom($start); -} -@after { - exitAtom($start); -} - : ^(DOT ID terminal) - | ^(DOT ID ruleref) - | ^(WILDCARD elementOptions) {wildcardRef($WILDCARD);} - | WILDCARD {wildcardRef($WILDCARD);} - | terminal - | blockSet - | ruleref - ; - -blockSet -@init { - enterBlockSet($start); -} -@after { - exitBlockSet($start); -} - : ^(SET setElement+) - ; - -setElement -@init { - enterSetElement($start); -} -@after { - exitSetElement($start); -} - : STRING_LITERAL {stringRef((TerminalAST)$STRING_LITERAL);} - | TOKEN_REF {tokenRef((TerminalAST)$TOKEN_REF);} - | ^(RANGE a=STRING_LITERAL b=STRING_LITERAL) - { - stringRef((TerminalAST)$a); - stringRef((TerminalAST)$b); - } - | LEXER_CHAR_SET - ; - -block -@init { - enterBlock($start); -} -@after { - exitBlock($start); -} - : ^(BLOCK optionsSpec? ruleAction* ACTION? alternative+) - ; - -ruleref -@init { - enterRuleref($start); -} -@after { - exitRuleref($start); -} - : ^(RULE_REF arg=ARG_ACTION? elementOptions?) - { - ruleRef($RULE_REF, (ActionAST)$ARG_ACTION); - if ( $arg!=null ) actionInAlt((ActionAST)$arg); - } - ; - -range -@init { - enterRange($start); -} -@after { - exitRange($start); -} - : ^(RANGE STRING_LITERAL STRING_LITERAL) - ; - -terminal -@init { - enterTerminal($start); -} -@after { - exitTerminal($start); -} - : ^(STRING_LITERAL elementOptions) - {stringRef((TerminalAST)$STRING_LITERAL);} - | STRING_LITERAL {stringRef((TerminalAST)$STRING_LITERAL);} - | ^(TOKEN_REF elementOptions) {tokenRef((TerminalAST)$TOKEN_REF);} - | TOKEN_REF {tokenRef((TerminalAST)$TOKEN_REF);} - ; - -elementOptions -@init { - enterElementOptions($start); -} -@after { - exitElementOptions($start); -} - : ^(ELEMENT_OPTIONS elementOption[(GrammarASTWithOptions)$start.getParent()]*) - ; - -elementOption[GrammarASTWithOptions t] -@init { - enterElementOption($start); -} -@after { - exitElementOption($start); -} - : ID {elementOption(t, $ID, null);} - | ^(ASSIGN id=ID v=ID) {elementOption(t, $id, $v);} - | ^(ASSIGN ID v=STRING_LITERAL) {elementOption(t, $ID, $v);} - | ^(ASSIGN ID v=ACTION) {elementOption(t, $ID, $v);} - | ^(ASSIGN ID v=INT) {elementOption(t, $ID, $v);} - ; diff --git a/tool/src/org/antlr/v4/parse/LeftRecursiveRuleWalker.g b/tool/src/org/antlr/v4/parse/LeftRecursiveRuleWalker.g deleted file mode 100644 index 2ee03a64a..000000000 --- a/tool/src/org/antlr/v4/parse/LeftRecursiveRuleWalker.g +++ /dev/null @@ -1,215 +0,0 @@ -/* - * [The "BSD license"] - * Copyright (c) 2012 Terence Parr - * Copyright (c) 2012 Sam Harwell - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/** Find left-recursive rules */ -tree grammar LeftRecursiveRuleWalker; - -options { - tokenVocab=ANTLRParser; - ASTLabelType=GrammarAST; -} - -@header { -package org.antlr.v4.parse; - -import org.antlr.v4.misc.*; -import org.antlr.v4.tool.*; -import org.antlr.v4.tool.ast.*; -} - -@members { -private String ruleName; -private int currentOuterAltNumber; // which outer alt of rule? -public int numAlts; // how many alts for this rule total? - -public void setAltAssoc(AltAST altTree, int alt) {} -public void binaryAlt(AltAST altTree, int alt) {} -public void prefixAlt(AltAST altTree, int alt) {} -public void suffixAlt(AltAST altTree, int alt) {} -public void otherAlt(AltAST altTree, int alt) {} -public void setReturnValues(GrammarAST t) {} -} - -@rulecatch { } - -// TODO: can get parser errors for not matching pattern; make them go away -public -rec_rule returns [boolean isLeftRec] -@init -{ - currentOuterAltNumber = 1; -} - : ^( r=RULE id=RULE_REF {ruleName=$id.getText();} - DOC_COMMENT? ruleModifier? -// (ARG_ACTION)? shouldn't allow args, right? - (^(RETURNS a=ARG_ACTION {setReturnValues($a);}))? -// ( ^(THROWS .+) )? don't allow - ( ^(LOCALS ARG_ACTION) )? // TODO: copy these to gen'd code - ( ^(OPTIONS .*) - | ^(AT ID ACTION) // TODO: copy - )* - ruleBlock {$isLeftRec = $ruleBlock.isLeftRec;} - exceptionGroup - ) - ; - -exceptionGroup - : exceptionHandler* finallyClause? - ; - -exceptionHandler - : ^(CATCH ARG_ACTION ACTION) - ; - -finallyClause - : ^(FINALLY ACTION) - ; - -ruleModifier - : PUBLIC - | PRIVATE - | PROTECTED - ; - -ruleBlock returns [boolean isLeftRec] -@init{boolean lr=false; this.numAlts = $start.getChildCount();} - : ^( BLOCK - ( - o=outerAlternative - {if ($o.isLeftRec) $isLeftRec = true;} - {currentOuterAltNumber++;} - )+ - ) - ; - -/** An alt is either prefix, suffix, binary, or ternary operation or "other" */ -outerAlternative returns [boolean isLeftRec] - : (binary)=> binary - {binaryAlt((AltAST)$start, currentOuterAltNumber); $isLeftRec=true;} - | (prefix)=> prefix - {prefixAlt((AltAST)$start, currentOuterAltNumber);} - | (suffix)=> suffix - {suffixAlt((AltAST)$start, currentOuterAltNumber); $isLeftRec=true;} - | nonLeftRecur {otherAlt((AltAST)$start, currentOuterAltNumber);} - ; - -binary - : ^( ALT elementOptions? recurse element+ recurse ACTION? ) - {setAltAssoc((AltAST)$ALT,currentOuterAltNumber);} - ; - -prefix - : ^( ALT elementOptions? - ({!((CommonTree)input.LT(1)).getText().equals(ruleName)}? element)+ - recurse ACTION? - ) - {setAltAssoc((AltAST)$ALT,currentOuterAltNumber);} - ; - -suffix - : ^( ALT elementOptions? recurse element+ ) - {setAltAssoc((AltAST)$ALT,currentOuterAltNumber);} - ; - -nonLeftRecur - : ^(ALT element+) // no assoc for these; ignore if present - ; - -recurse - : ^(ASSIGN ID recurseNoLabel) - | recurseNoLabel - ; - -recurseNoLabel : {((CommonTree)input.LT(1)).getText().equals(ruleName)}? RULE_REF; - -token returns [GrammarAST t=null] - : ^(ASSIGN ID s=token {$t = $s.t;}) - | ^(PLUS_ASSIGN ID s=token {$t = $s.t;}) - | b=STRING_LITERAL {$t = $b;} - | ^(b=STRING_LITERAL elementOptions) {$t = $b;} - | ^(c=TOKEN_REF elementOptions) {$t = $c;} - | c=TOKEN_REF {$t = $c;} - ; - -elementOptions - : ^(ELEMENT_OPTIONS elementOption*) - ; - -elementOption - : ID - | ^(ASSIGN ID ID) - | ^(ASSIGN ID STRING_LITERAL) - | ^(ASSIGN ID ACTION) - | ^(ASSIGN ID INT) - ; - -element - : atom - | ^(NOT element) - | ^(RANGE atom atom) - | ^(ASSIGN ID element) - | ^(PLUS_ASSIGN ID element) - | ^(SET setElement+) - | RULE_REF - | ebnf - | ACTION - | SEMPRED - | EPSILON - ; - -setElement - : STRING_LITERAL - | TOKEN_REF - ; - -ebnf: block - | ^( OPTIONAL block ) - | ^( CLOSURE block ) - | ^( POSITIVE_CLOSURE block ) - ; - -block - : ^(BLOCK ACTION? alternative+) - ; - -alternative - : ^(ALT elementOptions? element+) - ; - -atom - : ^(RULE_REF ARG_ACTION? elementOptions?) - | ^(STRING_LITERAL elementOptions) - | STRING_LITERAL - | ^(TOKEN_REF elementOptions) - | TOKEN_REF - | ^(WILDCARD elementOptions) - | WILDCARD - | ^(DOT ID element) - ;