fix comment

This commit is contained in:
Terence Parr 2013-11-21 08:42:02 -08:00
parent 40789babf5
commit e8f577e162
10 changed files with 5 additions and 3363 deletions

View File

@ -114,6 +114,8 @@
dir="${basedir}/tool/src/@{srcpath}"> dir="${basedir}/tool/src/@{srcpath}">
<arg value="-o"/> <arg value="-o"/>
<arg value="${build.antlr3.dir}/@{srcpath}"/> <arg value="${build.antlr3.dir}/@{srcpath}"/>
<arg value="-lib"/>
<arg value="${build.antlr3.dir}/org/antlr/v4/parse"/>
<args/> <args/>
<arg line="${sources.antlr3.local}"/> <arg line="${sources.antlr3.local}"/>
<classpath> <classpath>
@ -164,12 +166,7 @@
<antlr3 srcpath="org/antlr/v4/parse"/> <antlr3 srcpath="org/antlr/v4/parse"/>
<antlr3 srcpath="org/antlr/v4/codegen"> <antlr3 srcpath="org/antlr/v4/codegen"/>
<args>
<arg value="-lib"/>
<arg value="${build.antlr3.dir}/org/antlr/v4/parse"/>
</args>
</antlr3>
<touch file="${antlr3.touch}" mkdirs="true"/> <touch file="${antlr3.touch}" mkdirs="true"/>
</target> </target>

View File

@ -3,7 +3,7 @@ package org.antlr.v4.runtime.tree.pattern;
/** A chunk is either a token reference, a rule reference, or some plaintext /** A chunk is either a token reference, a rule reference, or some plaintext
* within a tree pattern. Function split() in the pattern matcher returns * within a tree pattern. Function split() in the pattern matcher returns
* a list of chunks in preparation for creating a token stream by tokenize(). * a list of chunks in preparation for creating a token stream by tokenize().
* From there, we get a parse tree from with compile(). * From there, we get a parse tree from the pattern with compile().
*/ */
abstract class Chunk { abstract class Chunk {
} }

View File

@ -267,6 +267,7 @@ public class ParseTreePatternMatcher {
} }
else { else {
System.err.println("invalid tag: "+tagChunk.tag); System.err.println("invalid tag: "+tagChunk.tag);
throw new IllegalArgumentException("invalid tag: "+tagChunk.tag+" in "+pattern);
} }
} }
else { else {

View File

@ -1,782 +0,0 @@
/*
* [The "BSD license"]
* Copyright (c) 2012 Terence Parr
* Copyright (c) 2012 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// File : A3Lexer.g
// Author : Jim Idle (jimi@temporal-wave.com)
// Copyright : Free BSD - See @header clause below
// Version : First implemented as part of ANTLR 3.2 this is the self
// hosting ANTLR 3 Lexer.
//
// Description
// -----------
// This is the definitive lexer grammar for parsing ANTLR V3.x.x grammars. All other
// gramnmars are derived from this grammar via source code control integration (perforce)
// or by the gdiff tool.
//
// This grammar and its associated grmmmars A3Parser.g and A3Walker.g exhibit the following
// traits, which are recommended for all production quality grammars:
//
// 1) They are separate grammars, not composite grammars;
// 2) They implement all supporting methods in a superclass (at least this is recommended
// for language targets that support inheritence;
// 3) All errors are pushed as far down the parsing chain as possible, which means
// that the lexer tries to defer error reporting to the parser, and the parser
// tries to defer error reporting to a semantic phase consisting of a single
// walk of the AST. The reason for this is that the error messages produced
// from later phases of the parse will generally have better context and so
// be more useful to the end user. Consider the message: "Syntax error at 'options'"
// vs: "You cannot specify two options{} sections in a single grammar file".
// 4) The lexer is 'programmed' to catch common mistakes such as unterminated literals
// and report them specifically and not just issue confusing lexer mismatch errors.
//
/** Read in an ANTLR grammar and build an AST. Try not to do
* any actions, just build the tree.
*
* The phases are:
*
* A3Lexer.g (this file)
* A3Parser.g
* A3Verify.g (derived from A3Walker.g)
* assign.types.g
* define.g
* buildnfa.g
* antlr.print.g (optional)
* codegen.g
*
* Terence Parr
* University of San Francisco
* 2005
* Jim Idle (this v3 grammar)
* Temporal Wave LLC
* 2009
*/
lexer grammar ANTLRLexer;
// ==============================================================================
// Note that while this grammar does not care about order of constructs
// that don't really matter, such as options before @header etc, it must first
// be parsed by the original v2 parser, before it replaces it. That parser does
// care about order of structures. Hence we are constrained by the v2 parser
// for at least the first bootstrap release that causes this parser to replace
// the v2 version.
// ==============================================================================
// -------
// Options
//
// V3 option directives to tell the tool what we are asking of it for this
// grammar.
//
options {
// Target language is Java, which is the default but being specific
// here as this grammar is also meant as a good example grammar for
// for users.
//
language = Java;
// The super class that this lexer should expect to inherit from, and
// which contains any and all support routines for the lexer. This is
// commented out in this baseline (definitive or normative grammar)
// - see the ANTLR tool implementation for hints on how to use the super
// class
//
//superclass = AbstractA3Lexer;
}
tokens { SEMPRED; TOKEN_REF; RULE_REF; LEXER_CHAR_SET; ARG_ACTION; }
// Include the copyright in this source and also the generated source
//
@lexer::header {
/*
[The "BSD licence"]
Copyright (c) 2005-2009 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.parse;
import org.antlr.v4.tool.*;
}
@members {
public CommonTokenStream tokens; // track stream we push to; need for context info
public boolean isLexerRule = false;
public void grammarError(ErrorType etype, org.antlr.runtime.Token token, Object... args) { }
/** scan backwards from current point in this.tokens list
* looking for the start of the rule or subrule.
* Return token or null if for some reason we can't find the start.
*/
public Token getRuleOrSubruleStartToken() {
if ( tokens==null ) return null;
int i = tokens.index();
int n = tokens.size();
if ( i>=n ) i = n-1; // seems index == n as we lex
while ( i>=0 && i<n) {
int ttype = tokens.get(i).getType();
if ( ttype == LPAREN || ttype == TOKEN_REF || ttype == RULE_REF ) {
return tokens.get(i);
}
i--;
}
return null;
}
}
// --------
// Comments
//
// ANTLR comments can be multi or single line and we don't care
// which particularly. However we also accept Javadoc style comments
// of the form: /** ... */ and we do take care to distinguish those
// from ordinary multi-line comments
// Note how we guide the lexical PATH because we want to issue a decriptive
// error message in case of a standalone '/' character, which makes no
// sense in ANTLR source code. We alo trap unterminated multi-line comments
//
fragment DOC_COMMENT : ;
COMMENT
@init {
// Record the start line and offsets as if we need to report an
// unterminated comment, then we want to show the start of the comment
// we think is broken, not the end, where people will have to try and work
// it out themselves.
//
int startLine = $line;
int offset = getCharPositionInLine();
}
: // Eat the first character only, then see if we have a comment
// or something silly.
//
'/' // Comment introducer
(
// Single line comment, possibly with embedded src/line directives
// in a similar style to the C pre-processor, allowing generated
// code to refer the programmer back to the original source code
// in case of error.
//
'/'
(
(' $ANTLR')=> ' $ANTLR' SRC
| ~(NLCHARS)*
)
| // Multi-line comment, which may be a documentation comment
// if it starts /** (note that we protect against accidentaly
// recognizing a comment /**/ as a documentation comment
//
'*' (
{ input.LA(2) != '/'}?=> '*' { $type = DOC_COMMENT; }
| { true }?=> // Required to cover all alts with predicates
)
// Should we support embedded multiline comments here?
//
(
// Pick out end of multiline comment and exit the loop
// if we find it.
//
{ !(input.LA(1) == '*' && input.LA(2) == '/') }?
// Anything else other than the non-greedy match of
// the comment close sequence
//
.
)*
(
// Look for the comment terminator, but if it is accidentally
// unterminated, then we will hit EOF, which will trigger the
// epsilon alt and hence we can issue an error message relative
// to the start of the unterminated multi-line comment
//
'*/'
| // Unterminated comment!
//
{
// ErrorManager.msg(Msg.UNTERMINATED_DOC_COMMENT, startLine, offset, $pos, startLine, offset, $pos, (Object)null);
}
)
| // There was nothing that made sense following the opening '/' and so
// we issue an error regarding the malformed comment
//
{
// TODO: Insert error message relative to comment start
//
}
)
{
// Unless we had a documentation comment, then we do not wish to
// pass the comments in to the parser. If you are writing a formatter
// then you will want to preserve the comments off channel, but could
// just skip and save token space if not.
//
if ($type != DOC_COMMENT) {
$channel=2; // Comments are on channel 2
}
}
;
ARG_OR_CHARSET
options {k=1;}
: {isLexerRule}?=> LEXER_CHAR_SET {$type=LEXER_CHAR_SET;}
| {!isLexerRule}?=> ARG_ACTION
{
$type=ARG_ACTION;
// Set the token text to our gathered string minus outer [ ]
String t = $text;
t = t.substring(1,t.length()-1);
setText(t);
}
;
fragment
LEXER_CHAR_SET
: '['
( '\\' ~('\r'|'\n')
| ~('\r'|'\n'|'\\'|']')
)*
']'
;
// --------------
// Argument specs
//
// Certain argument lists, such as those specifying call parameters
// to a rule invocation, or input parameters to a rule specification
// are contained within square brackets. In the lexer we consume them
// all at once and sort them out later in the grammar analysis.
//
fragment
ARG_ACTION
: '['
(
ARG_ACTION
| ('"')=>ACTION_STRING_LITERAL
| ('\'')=>ACTION_CHAR_LITERAL
| ~('['|']')
)*
']'
;
// -------
// Actions
//
// Other than making sure to distinguish between { and } embedded
// within what we have assumed to be literals in the action code, the
// job of the lexer is merely to gather the code within the action
// (delimited by {}) and pass it to the parser as a single token.
// We know that this token will be asked for its text somewhere
// in the upcoming parse, so setting the text here to exclude
// the delimiting {} is no additional overhead.
//
ACTION
: NESTED_ACTION
( '?' {$type = SEMPRED;}
( (WSNLCHARS* '=>') => WSNLCHARS* '=>' // v3 gated sempred
{
Token t = new CommonToken(input, state.type, state.channel, state.tokenStartCharIndex, getCharIndex()-1);
t.setLine(state.tokenStartLine);
t.setText(state.text);
t.setCharPositionInLine(state.tokenStartCharPositionInLine);
grammarError(ErrorType.V3_GATED_SEMPRED, t);
}
)?
)?
;
// ----------------
// Action structure
//
// Many language targets use {} as block delimiters and so we
// must recursively match {} delimited blocks to balance the
// braces. Additionally, we must make some assumptions about
// literal string representation in the target language. We assume
// that they are delimited by ' or " and so consume these
// in their own alts so as not to inadvertantly match {}.
// This rule calls itself on matching a {
//
fragment
NESTED_ACTION
@init {
// Record the start line and offsets as if we need to report an
// unterminated block, then we want to show the start of the comment
// we think is broken, not the end, where people will have to try and work
// it out themselves.
//
int startLine = getLine();
int offset = getCharPositionInLine();
}
: // Action and other blocks start with opening {
//
'{'
(
// And now we can match one of a number of embedded
// elements within the action until we find a
// } that balances the opening {. If we do not find
// the balanced } then we will hit EOF and can issue
// an error message about the brace that we belive to
// be mismatched. This won't be foolproof but we will
// be able to at least report an error against the
// opening brace that we feel is in error and this will
// guide the user to the correction as best we can.
//
// An embedded {} block
//
NESTED_ACTION
| // What appears to be a literal
//
ACTION_CHAR_LITERAL
| // We have assumed that the target language has C/Java
// type comments.
//
COMMENT
| // What appears to be a literal
//
ACTION_STRING_LITERAL
| // What appears to be an escape sequence
//
ACTION_ESC
| // Some other single character that is not
// handled above
//
~('\\'|'"'|'\''|'/'|'{'|'}')
)*
(
// Correctly balanced closing brace
//
'}'
| // Looks like have an imblanced {} block, report
// with respect to the opening brace.
//
{
// TODO: Report imbalanced {}
System.out.println("Block starting at line " + startLine + " offset " + (offset+1) + " contains imbalanced {} or is missing a }");
}
)
;
// Keywords
// --------
// keywords used to specify ANTLR v3 grammars. Keywords may not be used as
// labels for rules or in any other context where they would be ambiguous
// with the keyword vs some other identifier
// OPTIONS and TOKENS must also consume the opening brace that captures
// their option block, as this is teh easiest way to parse it separate
// to an ACTION block, despite it usingthe same {} delimiters.
//
OPTIONS : 'options' WSNLCHARS* '{' ;
TOKENS_SPEC : 'tokens' WSNLCHARS* '{' ;
IMPORT : 'import' ;
FRAGMENT : 'fragment' ;
LEXER : 'lexer' ;
PARSER : 'parser' ;
GRAMMAR : 'grammar' ;
TREE_GRAMMAR : 'tree' WSNLCHARS* 'grammar' ;
PROTECTED : 'protected' ;
PUBLIC : 'public' ;
PRIVATE : 'private' ;
RETURNS : 'returns' ;
LOCALS : 'locals' ;
THROWS : 'throws' ;
CATCH : 'catch' ;
FINALLY : 'finally' ;
MODE : 'mode' ;
// -----------
// Punctuation
//
// Character sequences used as separators, delimters, operators, etc
//
COLON : ':'
{
// scan backwards, looking for a RULE_REF or TOKEN_REF.
// which would indicate the start of a rule definition.
// If we see a LPAREN, then it's the start of the subrule.
// this.tokens is the token string we are pushing into, so
// just loop backwards looking for a rule definition. Then
// we set isLexerRule.
Token t = getRuleOrSubruleStartToken();
if ( t!=null ) {
if ( t.getType()==RULE_REF ) isLexerRule = false;
else if ( t.getType()==TOKEN_REF ) isLexerRule = true;
// else must be subrule; don't alter context
}
}
;
COLONCOLON : '::' ;
COMMA : ',' ;
SEMI : ';' ;
LPAREN : '(' ;
RPAREN : ')' ;
RARROW : '->' ;
LT : '<' ;
GT : '>' ;
ASSIGN : '=' ;
QUESTION : '?' ;
SYNPRED : '=>'
{
Token t = new CommonToken(input, state.type, state.channel,
state.tokenStartCharIndex, getCharIndex()-1);
t.setLine(state.tokenStartLine);
t.setText(state.text);
t.setCharPositionInLine(state.tokenStartCharPositionInLine);
grammarError(ErrorType.V3_SYNPRED, t);
$channel=HIDDEN;
}
;
STAR : '*' ;
PLUS : '+' ;
PLUS_ASSIGN : '+=' ;
OR : '|' ;
DOLLAR : '$' ;
DOT : '.' ; // can be WILDCARD or DOT in qid or imported rule ref
RANGE : '..' ;
AT : '@' ;
POUND : '#' ;
NOT : '~' ;
RBRACE : '}' ;
/** Allow unicode rule/token names */
ID : a=NameStartChar NameChar*
{
if ( Grammar.isTokenName($a.text) ) $type = TOKEN_REF;
else $type = RULE_REF;
}
;
fragment
NameChar : NameStartChar
| '0'..'9'
| '_'
| '\u00B7'
| '\u0300'..'\u036F'
| '\u203F'..'\u2040'
;
fragment
NameStartChar
: 'A'..'Z' | 'a'..'z'
| '\u00C0'..'\u00D6'
| '\u00D8'..'\u00F6'
| '\u00F8'..'\u02FF'
| '\u0370'..'\u037D'
| '\u037F'..'\u1FFF'
| '\u200C'..'\u200D'
| '\u2070'..'\u218F'
| '\u2C00'..'\u2FEF'
| '\u3001'..'\uD7FF'
| '\uF900'..'\uFDCF'
| '\uFDF0'..'\uFFFD'
; // ignores | ['\u10000-'\uEFFFF] ;
// ----------------------------
// Literals embedded in actions
//
// Note that we have made the assumption that the language used within
// actions uses the fairly standard " and ' delimiters for literals and
// that within these literals, characters are escaped using the \ character.
// There are some languages which do not conform to this in all cases, such
// as by using /string/ and so on. We will have to deal with such cases if
// if they come up in targets.
//
// Within actions, or other structures that are not part of the ANTLR
// syntax, we may encounter literal characters. Within these, we do
// not want to inadvertantly match things like '}' and so we eat them
// specifically. While this rule is called CHAR it allows for the fact that
// some languages may use/allow ' as the string delimiter.
//
fragment
ACTION_CHAR_LITERAL
: '\'' (('\\')=>ACTION_ESC | ~'\'' )* '\''
;
// Within actions, or other structures that are not part of the ANTLR
// syntax, we may encounter literal strings. Within these, we do
// not want to inadvertantly match things like '}' and so we eat them
// specifically.
//
fragment
ACTION_STRING_LITERAL
: '"' (('\\')=>ACTION_ESC | ~'"')* '"'
;
// Within literal strings and characters that are not part of the ANTLR
// syntax, we must allow for escaped character sequences so that we do not
// inadvertantly recognize the end of a string or character when the terminating
// delimiter has been esacped.
//
fragment
ACTION_ESC
: '\\' .
;
// -------
// Integer
//
// Obviously (I hope) match an aribtrary long sequence of digits.
//
INT : ('0'..'9')+
;
// -----------
// Source spec
//
// A fragment rule for picking up information about an origrinating
// file from which the grammar we are parsing has been generated. This allows
// ANTLR to report errors against the originating file and not the generated
// file.
//
fragment
SRC : 'src' WSCHARS+ file=ACTION_STRING_LITERAL WSCHARS+ line=INT
{
// TODO: Add target specific code to change the source file name and current line number
//
}
;
// --------------
// Literal string
//
// ANTLR makes no disticintion between a single character literal and a
// multi-character string. All literals are single quote delimited and
// may contain unicode escape sequences of the form \uxxxx, where x
// is a valid hexadecimal number (as per Java basically).
STRING_LITERAL
: '\'' ( ( ESC_SEQ | ~('\\'|'\''|'\r'|'\n') ) )*
( '\''
| // Unterminated string literal
{
Token t = new CommonToken(input, state.type, state.channel, state.tokenStartCharIndex, getCharIndex()-1);
t.setLine(state.tokenStartLine);
t.setText(state.text);
t.setCharPositionInLine(state.tokenStartCharPositionInLine);
grammarError(ErrorType.UNTERMINATED_STRING_LITERAL, t);
}
)
;
// A valid hex digit specification
//
fragment
HEX_DIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;
// Any kind of escaped character that we can embed within ANTLR
// literal strings.
//
fragment
ESC_SEQ
: '\\'
(
// The standard escaped character set such as tab, newline,
// etc.
//
'b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\'
| // A Java style Unicode escape sequence
//
UNICODE_ESC
| // An illegal escape seqeunce
//
{
// TODO: Issue error message
//
}
)
;
fragment
UNICODE_ESC
@init {
// Flag to tell us whether we have a valid number of
// hex digits in the escape sequence
//
int hCount = 0;
}
: 'u' // Leadin for unicode escape sequence
// We now require 4 hex digits. Note though
// that we accept any number of characters
// and issue an error if we do not get 4. We cannot
// use an inifinite count such as + because this
// might consume too many, so we lay out the lexical
// options and issue an error at the invalid paths.
//
(
(
HEX_DIGIT { hCount++; }
(
HEX_DIGIT { hCount++; }
(
HEX_DIGIT { hCount++; }
(
// Four valid hex digits, we are good
//
HEX_DIGIT { hCount++; }
| // Three valid digits
)
| // Two valid digits
)
| // One valid digit
)
)
| // No valid hex digits at all
)
// Now check the digit count and issue an error if we need to
//
{
if (hCount != 4) {
// TODO: Issue error message
}
}
;
// ----------
// Whitespace
//
// Characters and character constructs that are of no import
// to the parser and are used to make the grammar easier to read
// for humans.
//
WS
: (
' '
| '\t'
| '\r'
| '\n'
| '\f'
)+
{$channel=HIDDEN;}
;
// A fragment rule for use in recognizing end of line in
// rules like COMMENT.
//
fragment
NLCHARS
: '\n' | '\r'
;
// A fragment rule for recognizing traditional whitespace
// characters within lexer rules.
//
fragment
WSCHARS
: ' ' | '\t' | '\f'
;
// A fragment rule for recognizing both traditional whitespace and
// end of line markers, when we don't care to distinguish but don't
// want any action code going on.
//
fragment
WSNLCHARS
: ' ' | '\t' | '\f' | '\n' | '\r'
;
// -----------------
// Illegal Character
//
// This is an illegal character trap which is always the last rule in the
// lexer specification. It matches a single character of any value and being
// the last rule in the file will match when no other rule knows what to do
// about the character. It is reported as an error but is not passed on to the
// parser. This means that the parser to deal with the gramamr file anyway
// but we will not try to analyse or code generate from a file with lexical
// errors.
//
ERRCHAR
: .
{
Token t = new CommonToken(input, state.type, state.channel, state.tokenStartCharIndex, getCharIndex()-1);
t.setLine(state.tokenStartLine);
t.setText(state.text);
t.setCharPositionInLine(state.tokenStartCharPositionInLine);
String msg = getTokenErrorDisplay(t) + " came as a complete surprise to me";
grammarError(ErrorType.SYNTAX_ERROR, t, msg);
skip();
}
;

View File

@ -1,924 +0,0 @@
/*
* [The "BSD license"]
* Copyright (c) 2012 Terence Parr
* Copyright (c) 2012 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** The definitive ANTLR v3 grammar to parse ANTLR v4 grammars.
* The grammar builds ASTs that are sniffed by subsequent stages.
*/
parser grammar ANTLRParser;
options {
// Target language is Java, which is the default but being specific
// here as this grammar is also meant as a good example grammar for
// for users.
language = Java;
// The output of this grammar is going to be an AST upon which
// we run a semantic checking phase, then the rest of the analysis
// including final code generation.
output = AST;
// The vocabulary (tokens and their int token types) we are using
// for the parser. This is generated by the lexer. The vocab will be extended
// to include the imaginary tokens below.
tokenVocab = ANTLRLexer;
ASTLabelType = GrammarAST;
}
// Imaginary Tokens
//
// Imaginary tokens do not exist as far as the lexer is concerned, and it cannot
// generate them. However we sometimes need additional 'tokens' to use as root
// nodes for the AST we are generating. The tokens section is where we
// specify any such tokens
tokens {
RULE;
PREC_RULE; // flip to this if we find that it's left-recursive
RULES;
RULEMODIFIERS;
RULEACTIONS;
BLOCK;
OPTIONAL;
CLOSURE;
POSITIVE_CLOSURE;
RANGE;
SET;
CHAR_RANGE;
EPSILON;
ALT;
ALTLIST;
ID;
ARG;
ARGLIST;
RET;
COMBINED;
INITACTION;
LABEL; // $x used in rewrite rules
TEMPLATE;
WILDCARD;
// A generic node indicating a list of something when we don't
// really need to distinguish what we have a list of as the AST
// will 'kinow' by context.
//
LIST;
ELEMENT_OPTIONS; // TOKEN<options>
RESULT;
// lexer action stuff
LEXER_ALT_ACTION;
LEXER_ACTION_CALL; // ID(foo)
}
// Include the copyright in this source and also the generated source
//
@header {
/*
[The "BSD licence"]
Copyright (c) 2005-20012 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.parse;
import org.antlr.v4.tool.*;
import org.antlr.v4.tool.ast.*;
import java.util.ArrayDeque;
import java.util.Deque;
}
@members {
Deque<String> paraphrases = new ArrayDeque<String>();
public void grammarError(ErrorType etype, org.antlr.runtime.Token token, Object... args) { }
}
// The main entry point for parsing a V3 grammar from top to toe. This is
// the method call from whence to obtain the AST for the parse.
//
grammarSpec
@after {
GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.OPTIONS);
if ( options!=null ) {
Grammar.setNodeOptions($tree, options);
}
}
:
// The grammar itself can have a documenation comment, which is the
// first terminal in the file.
//
DOC_COMMENT?
// Next we should see the type and name of the grammar file that
// we are about to parse.
//
grammarType id SEMI
// There now follows zero or more declaration sections that should
// be given to us before the rules are declared
//
// A number of things can be declared/stated before the grammar rules
// 'proper' are parsed. These include grammar imports (delegate), grammar
// options, imaginary token declarations, global scope declarations,
// and actions such as @header. In this rule we allow any number of
// these constructs in any order so that the grammar author is not
// constrained by some arbitrary order of declarations that nobody
// can remember. In the next phase of the parse, we verify that these
// constructs are valid, not repeated and so on.
sync ( prequelConstruct sync )*
// We should now see at least one ANTLR EBNF style rule
// declaration. If the rules are missing we will let the
// semantic verification phase tell the user about it.
//
rules
modeSpec*
// And we force ANTLR to process everything it finds in the input
// stream by specifying hte need to match End Of File before the
// parse is complete.
//
EOF
// Having parsed everything in the file and accumulated the relevant
// subtrees, we can now rewrite everything into the main AST form
// that our tree walkers are expecting.
//
-> ^(grammarType // The grammar type is our root AST node
id // We need to identify the grammar of course
DOC_COMMENT? // We may or may not have a global documentation comment for the file
prequelConstruct* // The set of declarations we accumulated
rules // And of course, we need the set of rules we discovered
modeSpec*
)
;
grammarType
@after {
if ( $tg!=null ) throw new v3TreeGrammarException(tg);
if ( $t!=null ) ((GrammarRootAST)$tree).grammarType = $t.type;
else ((GrammarRootAST)$tree).grammarType=COMBINED;
}
: ( t=LEXER g=GRAMMAR -> GRAMMAR<GrammarRootAST>[$g, "LEXER_GRAMMAR", getTokenStream()]
| // A standalone parser specification
t=PARSER g=GRAMMAR -> GRAMMAR<GrammarRootAST>[$g, "PARSER_GRAMMAR", getTokenStream()]
// A combined lexer and parser specification
| g=GRAMMAR -> GRAMMAR<GrammarRootAST>[$g, "COMBINED_GRAMMAR", getTokenStream()]
| tg=TREE_GRAMMAR
)
;
// This is the list of all constructs that can be declared before
// the set of rules that compose the grammar, and is invoked 0..n
// times by the grammarPrequel rule.
prequelConstruct
: // A list of options that affect analysis and/or code generation
optionsSpec
| // A list of grammars to which this grammar will delegate certain
// parts of the parsing sequence - a set of imported grammars
delegateGrammars
| // The declaration of any token types we need that are not already
// specified by a preceeding grammar, such as when a parser declares
// imaginary tokens with which to construct the AST, or a rewriting
// tree parser adds further imaginary tokens to ones defined in a prior
// {tree} parser.
tokensSpec
| // A declaration of language target implemented constructs. All such
// action sections start with '@' and are given to the language target's
// StringTemplate group. For instance @parser::header and @lexer::header
// are gathered here.
action
;
// A list of options that affect analysis and/or code generation
optionsSpec
: OPTIONS (option SEMI)* RBRACE -> ^(OPTIONS[$OPTIONS, "OPTIONS"] option*)
;
option
: id ASSIGN^ optionValue
;
// ------------
// Option Value
//
// The actual value of an option - Doh!
//
optionValue
: // If the option value is a single word that conforms to the
// lexical rules of token or rule names, then the user may skip quotes
// and so on. Many option values meet this description
qid
| STRING_LITERAL
| ACTION<ActionAST>
| INT
;
// A list of grammars to which this grammar will delegate certain
// parts of the parsing sequence - a set of imported grammars
delegateGrammars
: IMPORT delegateGrammar (COMMA delegateGrammar)* SEMI -> ^(IMPORT delegateGrammar+)
;
// A possibly named grammar file that should be imported to this gramamr
// and delgated to for the rules it specifies
delegateGrammar
: id ASSIGN^ id
| id
;
tokensSpec
: TOKENS_SPEC id (COMMA id)* RBRACE -> ^(TOKENS_SPEC id+)
| TOKENS_SPEC RBRACE ->
| TOKENS_SPEC^ v3tokenSpec+ RBRACE!
{grammarError(ErrorType.V3_TOKENS_SYNTAX, $TOKENS_SPEC);}
;
v3tokenSpec
: id
( ASSIGN lit=STRING_LITERAL
{
grammarError(ErrorType.V3_ASSIGN_IN_TOKENS, $id.start,
$id.text, $lit.getText());
}
-> id // ignore assignment
| -> id
)
SEMI
;
// A declaration of a language target specifc section,
// such as @header, @includes and so on. We do not verify these
// sections, they are just passed on to the language target.
/** Match stuff like @parser::members {int i;} */
action
: AT (actionScopeName COLONCOLON)? id ACTION -> ^(AT actionScopeName? id ACTION<ActionAST>)
;
/** Sometimes the scope names will collide with keywords; allow them as
* ids for action scopes.
*/
actionScopeName
: id
| LEXER -> ID[$LEXER]
| PARSER -> ID[$PARSER]
;
modeSpec
: MODE id SEMI sync (lexerRule sync)* -> ^(MODE id lexerRule*)
;
rules
: sync (rule sync)*
// Rewrite with an enclosing node as this is good for counting
// the number of rules and an easy marker for the walker to detect
// that there are no rules.
->^(RULES rule*)
;
sync
@init {
BitSet followSet = computeErrorRecoverySet();
if ( input.LA(1)!=Token.EOF && !followSet.member(input.LA(1)) ) {
reportError(new NoViableAltException("",0,0,input));
beginResync();
consumeUntil(input, followSet);
endResync();
}
} :
;
rule: parserRule
| lexerRule
;
// The specification of an EBNF rule in ANTLR style, with all the
// rule level parameters, declarations, actions, rewrite specs and so
// on.
//
// Note that here we allow any number of rule declaration sections (such
// as scope, returns, etc) in any order and we let the upcoming semantic
// verification of the AST determine if things are repeated or if a
// particular functional element is not valid in the context of the
// grammar type, such as using returns in lexer rules and so on.
parserRule
@init { paraphrases.push("matching a rule"); }
@after {
paraphrases.pop();
GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.OPTIONS);
if ( options!=null ) {
Grammar.setNodeOptions($tree, options);
}
}
: // A rule may start with an optional documentation comment
DOC_COMMENT?
// Next comes the rule name. Here we do not distinguish between
// parser or lexer rules, the semantic verification phase will
// reject any rules that make no sense, such as lexer rules in
// a pure parser or tree parser.
RULE_REF
// Immediately following the rulename, there may be a specification
// of input parameters for the rule. We do not do anything with the
// parameters here except gather them for future phases such as
// semantic verifcation, type assignment etc. We require that
// the input parameters are the next syntactically significant element
// following the rule id.
ARG_ACTION?
ruleReturns?
throwsSpec?
localsSpec?
// Now, before the rule specification itself, which is introduced
// with a COLON, we may have zero or more configuration sections.
// As usual we just accept anything that is syntactically valid for
// one form of the rule or another and let the semantic verification
// phase throw out anything that is invalid.
// At the rule level, a programmer may specify a number of sections, such
// as scope declarations, rule return elements, @ sections (which may be
// language target specific) and so on. We allow any number of these in any
// order here and as usual rely onthe semantic verification phase to reject
// anything invalid using its addinotal context information. Here we are
// context free and just accept anything that is a syntactically correct
// construct.
//
rulePrequels
COLON
// The rule is, at the top level, just a list of alts, with
// finer grained structure defined within the alts.
ruleBlock
SEMI
exceptionGroup
-> ^( RULE<RuleAST> RULE_REF DOC_COMMENT? ARG_ACTION<ActionAST>?
ruleReturns? throwsSpec? localsSpec? rulePrequels? ruleBlock exceptionGroup*
)
;
// Many language targets support exceptions and the rule will
// generally be able to throw the language target equivalent
// of a recognition exception. The grammar programmar can
// specify a list of exceptions to catch or a generic catch all
// and the target language code generation template is
// responsible for generating code that makes sense.
exceptionGroup
: exceptionHandler* finallyClause?
;
// Specifies a handler for a particular type of exception
// thrown by a rule
exceptionHandler
: CATCH ARG_ACTION ACTION -> ^(CATCH ARG_ACTION<ActionAST> ACTION<ActionAST>)
;
finallyClause
: FINALLY ACTION -> ^(FINALLY ACTION<ActionAST>)
;
rulePrequels
@init { paraphrases.push("matching rule preamble"); }
@after { paraphrases.pop(); }
: sync (rulePrequel sync)* -> rulePrequel*
;
// An individual rule level configuration as referenced by the ruleActions
// rule above.
//
rulePrequel
: optionsSpec
| ruleAction
;
// A rule can return elements that it constructs as it executes.
// The return values are specified in a 'returns' prequel element,
// which contains COMMA separated declarations, where the declaration
// is target language specific. Here we see the returns declaration
// as a single lexical action element, to be processed later.
//
ruleReturns
: RETURNS^ ARG_ACTION<ActionAST>
;
// --------------
// Exception spec
//
// Some target languages, such as Java and C# support exceptions
// and they are specified as a prequel element for each rule that
// wishes to throw its own exception type. Note that the name of the
// exception is just a single word, so the header section of the grammar
// must specify the correct import statements (or language equivalent).
// Target languages that do not support exceptions just safely ignore
// them.
//
throwsSpec
: THROWS qid (COMMA qid)* -> ^(THROWS qid+)
;
// locals [Cat x, float g]
localsSpec : LOCALS^ ARG_ACTION<ActionAST> ;
// @ Sections are generally target language specific things
// such as local variable declarations, code to run before the
// rule starts and so on. Fir instance most targets support the
// @init {} section where declarations and code can be placed
// to run before the rule is entered. The C target also has
// an @declarations {} section, where local variables are declared
// in order that the generated code is C89 copmliant.
//
/** Match stuff like @init {int i;} */
ruleAction
: AT id ACTION -> ^(AT id ACTION<ActionAST>)
;
// A set of alts, rewritten as a BLOCK for generic processing
// in tree walkers. Used by the rule 'rule' so that the list of
// alts for a rule appears as a BLOCK containing the alts and
// can be processed by the generic BLOCK rule. Note that we
// use a separate rule so that the BLOCK node has start and stop
// boundaries set correctly by rule post processing of rewrites.
ruleBlock
@init {Token colon = input.LT(-1);}
: ruleAltList -> ^(BLOCK<BlockAST>[colon,"BLOCK"] ruleAltList)
;
catch [ResyncToEndOfRuleBlock e] {
// just resyncing; ignore error
retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), null);
}
ruleAltList
: labeledAlt (OR labeledAlt)* -> labeledAlt+
;
labeledAlt
: alternative
( POUND! id! {((AltAST)$alternative.tree).altLabel=$id.tree;}
)?
;
lexerRule
@init { paraphrases.push("matching a lexer rule"); }
@after {
paraphrases.pop();
}
: DOC_COMMENT? FRAGMENT?
TOKEN_REF COLON lexerRuleBlock SEMI
-> ^( RULE<RuleAST> TOKEN_REF DOC_COMMENT?
^(RULEMODIFIERS FRAGMENT)? lexerRuleBlock
)
;
lexerRuleBlock
@init {Token colon = input.LT(-1);}
: lexerAltList -> ^(BLOCK<BlockAST>[colon,"BLOCK"] lexerAltList)
;
catch [ResyncToEndOfRuleBlock e] {
// just resyncing; ignore error
retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), null);
}
lexerAltList
: lexerAlt (OR lexerAlt)* -> lexerAlt+
;
lexerAlt
: lexerElements
( lexerCommands -> ^(LEXER_ALT_ACTION<AltAST> lexerElements lexerCommands)
| -> lexerElements
)
| -> ^(ALT<AltAST> EPSILON) // empty alt
;
lexerElements
: lexerElement+ -> ^(ALT<AltAST> lexerElement+)
;
lexerElement
@init {
paraphrases.push("looking for lexer rule element");
int m = input.mark();
}
@after { paraphrases.pop(); }
: labeledLexerElement
( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK<BlockAST>[$labeledLexerElement.start,"BLOCK"] ^(ALT<AltAST> labeledLexerElement) ) )
| -> labeledLexerElement
)
| lexerAtom
( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK<BlockAST>[$lexerAtom.start,"BLOCK"] ^(ALT<AltAST> lexerAtom) ) )
| -> lexerAtom
)
| lexerBlock
( ebnfSuffix -> ^(ebnfSuffix lexerBlock)
| -> lexerBlock
)
| actionElement // actions only allowed at end of outer alt actually,
// but preds can be anywhere
;
catch [RecognitionException re] {
retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), re);
int ttype = input.get(input.range()).getType(); // seems to be next token
// look for anything that really belongs at the start of the rule minus the initial ID
if ( ttype==COLON || ttype==RETURNS || ttype==CATCH || ttype==FINALLY || ttype==AT || ttype==EOF ) {
RecognitionException missingSemi =
new v4ParserException("unterminated rule (missing ';') detected at '"+
input.LT(1).getText()+" "+input.LT(2).getText()+"'", input);
reportError(missingSemi);
if ( ttype==EOF ) {
input.seek(input.index()+1);
}
else if ( ttype==CATCH || ttype==FINALLY ) {
input.seek(input.range()); // ignore what's before rule trailer stuff
}
else if ( ttype==RETURNS || ttype==AT ) { // scan back looking for ID of rule header
int p = input.index();
Token t = input.get(p);
while ( t.getType()!=RULE_REF && t.getType()!=TOKEN_REF ) {
p--;
t = input.get(p);
}
input.seek(p);
}
throw new ResyncToEndOfRuleBlock(); // make sure it goes back to rule block level to recover
}
reportError(re);
recover(input,re);
}
labeledLexerElement
: id (ass=ASSIGN|ass=PLUS_ASSIGN)
( lexerAtom -> ^($ass id lexerAtom)
| lexerBlock -> ^($ass id lexerBlock)
)
;
lexerBlock
@after {
GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.OPTIONS);
if ( options!=null ) {
Grammar.setNodeOptions($tree, options);
}
}
: LPAREN
( optionsSpec COLON )?
lexerAltList
RPAREN
-> ^(BLOCK<BlockAST>[$LPAREN,"BLOCK"] optionsSpec? lexerAltList )
;
// channel=HIDDEN, skip, more, mode(INSIDE), push(INSIDE), pop
lexerCommands
: RARROW lexerCommand (COMMA lexerCommand)* -> lexerCommand+
;
lexerCommand
: lexerCommandName LPAREN lexerCommandExpr RPAREN -> ^(LEXER_ACTION_CALL lexerCommandName lexerCommandExpr)
| lexerCommandName
;
lexerCommandExpr
: id
| INT
;
lexerCommandName
: id
| MODE ->ID[$MODE]
;
altList
: alternative (OR alternative)* -> alternative+
;
// An individual alt with an optional alt option like <assoc=right>
alternative
@init { paraphrases.push("matching alternative"); }
@after {
paraphrases.pop();
Grammar.setNodeOptions($tree, $o.tree);
}
: o=elementOptions?
e+=element+ -> ^(ALT<AltAST> elementOptions? $e+)
| -> ^(ALT<AltAST> EPSILON) // empty alt
;
element
@init {
paraphrases.push("looking for rule element");
int m = input.mark();
}
@after { paraphrases.pop(); }
: labeledElement
( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK<BlockAST>[$labeledElement.start,"BLOCK"] ^(ALT<AltAST> labeledElement ) ))
| -> labeledElement
)
| atom
( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK<BlockAST>[$atom.start,"BLOCK"] ^(ALT<AltAST> atom) ) )
| -> atom
)
| ebnf
| actionElement
;
catch [RecognitionException re] {
retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), re);
int ttype = input.get(input.range()).getType();
// look for anything that really belongs at the start of the rule minus the initial ID
if ( ttype==COLON || ttype==RETURNS || ttype==CATCH || ttype==FINALLY || ttype==AT ) {
RecognitionException missingSemi =
new v4ParserException("unterminated rule (missing ';') detected at '"+
input.LT(1).getText()+" "+input.LT(2).getText()+"'", input);
reportError(missingSemi);
if ( ttype==CATCH || ttype==FINALLY ) {
input.seek(input.range()); // ignore what's before rule trailer stuff
}
if ( ttype==RETURNS || ttype==AT ) { // scan back looking for ID of rule header
int p = input.index();
Token t = input.get(p);
while ( t.getType()!=RULE_REF && t.getType()!=TOKEN_REF ) {
p--;
t = input.get(p);
}
input.seek(p);
}
throw new ResyncToEndOfRuleBlock(); // make sure it goes back to rule block level to recover
}
reportError(re);
recover(input,re);
}
actionElement
@after {
GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.ELEMENT_OPTIONS);
if ( options!=null ) {
Grammar.setNodeOptions($tree, options);
}
}
: ACTION<ActionAST>
| ACTION elementOptions -> ^(ACTION<ActionAST> elementOptions)
| SEMPRED<PredAST>
| SEMPRED elementOptions -> ^(SEMPRED<PredAST> elementOptions)
;
labeledElement
: id (ass=ASSIGN|ass=PLUS_ASSIGN)
( atom -> ^($ass id atom)
| block -> ^($ass id block)
)
;
// A block of gramamr structure optionally followed by standard EBNF
// notation, or ANTLR specific notation. I.E. ? + ^ and so on
ebnf
: block
// And now we see if we have any of the optional suffixs and rewrite
// the AST for this rule accordingly
( blockSuffix -> ^(blockSuffix block)
| -> block
)
;
// The standard EBNF suffixes with additional components that make
// sense only to ANTLR, in the context of a grammar block.
blockSuffix
: ebnfSuffix // Standard EBNF
;
ebnfSuffix
: QUESTION nongreedy=QUESTION? -> OPTIONAL<OptionalBlockAST>[$start, $nongreedy]
| STAR nongreedy=QUESTION? -> CLOSURE<StarBlockAST>[$start, $nongreedy]
| PLUS nongreedy=QUESTION? -> POSITIVE_CLOSURE<PlusBlockAST>[$start, $nongreedy]
;
lexerAtom
: range
| terminal
| RULE_REF<RuleRefAST>
| notSet
| wildcard
| LEXER_CHAR_SET
;
atom
: // Qualified reference delegate.rule. This must be
// lexically contiguous (no spaces either side of the DOT)
// otherwise it is two references with a wildcard in between
// and not a qualified reference.
/*
{
input.LT(1).getCharPositionInLine()+input.LT(1).getText().length()==
input.LT(2).getCharPositionInLine() &&
input.LT(2).getCharPositionInLine()+1==input.LT(3).getCharPositionInLine()
}?
id DOT ruleref -> ^(DOT id ruleref)
|
*/
range // Range x..y - only valid in lexers
| terminal
| ruleref
| notSet
| wildcard
;
catch [RecognitionException re] { throw re; } // pass upwards to element
wildcard
@after {
GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.ELEMENT_OPTIONS);
if ( options!=null ) {
Grammar.setNodeOptions($tree, options);
}
}
: // Wildcard '.' means any character in a lexer, any
// token in parser and any node or subtree in a tree parser
// Because the terminal rule is allowed to be the node
// specification for the start of a tree rule, we must
// later check that wildcard was not used for that.
DOT elementOptions?
-> ^(WILDCARD<TerminalAST>[$DOT] elementOptions?)
;
// --------------------
// Inverted element set
//
// A set of characters (in a lexer) or terminal tokens, if a parser,
// that are then used to create the inverse set of them.
notSet
: NOT setElement -> ^(NOT<NotAST>[$NOT] ^(SET<SetAST>[$setElement.start,"SET"] setElement))
| NOT blockSet -> ^(NOT<NotAST>[$NOT] blockSet)
;
blockSet
@init {
Token t;
boolean ebnf = false;
}
: LPAREN setElement (OR setElement)* RPAREN
-> ^(SET<SetAST>[$LPAREN,"SET"] setElement+ )
;
setElement
: TOKEN_REF<TerminalAST>
| STRING_LITERAL<TerminalAST>
| range
| LEXER_CHAR_SET
;
// -------------
// Grammar Block
//
// Anywhere where an element is valid, the grammar may start a new block
// of alts by surrounding that block with ( ). A new block may also have a set
// of options, which apply only to that block.
//
block
@after {
GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.OPTIONS);
if ( options!=null ) {
Grammar.setNodeOptions($tree, options);
}
}
: LPAREN
( optionsSpec? ra+=ruleAction* COLON )?
altList
RPAREN
-> ^(BLOCK<BlockAST>[$LPAREN,"BLOCK"] optionsSpec? $ra* altList )
;
// ----------------
// Parser rule ref
//
// Reference to a parser rule with optional arguments and optional
// directive to become the root node or ignore the tree produced
//
ruleref
@after {
GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.ELEMENT_OPTIONS);
if ( options!=null ) {
Grammar.setNodeOptions($tree, options);
}
}
: RULE_REF ARG_ACTION? elementOptions? -> ^(RULE_REF<RuleRefAST> ARG_ACTION<ActionAST>? elementOptions?)
;
catch [RecognitionException re] { throw re; } // pass upwards to element
// ---------------
// Character Range
//
// Specifies a range of characters. Valid for lexer rules only, but
// we do not check that here, the tree walkers shoudl do that.
// Note also that the parser also allows through more than just
// character literals so that we can produce a much nicer semantic
// error about any abuse of the .. operator.
//
range
: STRING_LITERAL<TerminalAST> RANGE<RangeAST>^ STRING_LITERAL<TerminalAST>
;
terminal
@after {
GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.ELEMENT_OPTIONS);
if ( options!=null ) {
Grammar.setNodeOptions($tree, options);
}
}
: TOKEN_REF elementOptions? -> ^(TOKEN_REF<TerminalAST> elementOptions?)
| STRING_LITERAL elementOptions? -> ^(STRING_LITERAL<TerminalAST> elementOptions?)
;
// Terminals may be adorned with certain options when
// reference in the grammar: TOK<,,,>
elementOptions
: LT (elementOption (COMMA elementOption)*)? GT
-> ^(ELEMENT_OPTIONS[$LT,"ELEMENT_OPTIONS"] elementOption*)
;
// When used with elements we can specify what the tree node type can
// be and also assign settings of various options (which we do not check here)
elementOption
: // This format indicates the default element option
qid
| id ASSIGN^ optionValue
;
// The name of the grammar, and indeed some other grammar elements may
// come through to the parser looking like a rule reference or a token
// reference, hence this rule is used to pick up whichever it is and rewrite
// it as a generic ID token.
id
@init { paraphrases.push("looking for an identifier"); }
@after { paraphrases.pop(); }
: RULE_REF ->ID[$RULE_REF]
| TOKEN_REF ->ID[$TOKEN_REF]
;
qid
@init { paraphrases.push("looking for a qualified identifier"); }
@after { paraphrases.pop(); }
: id (DOT id)* -> ID[$qid.start, $text]
;
alternativeEntry : alternative EOF ; // allow gunit to call alternative and see EOF afterwards
elementEntry : element EOF ;
ruleEntry : rule EOF ;
blockEntry : block EOF ;

View File

@ -1,200 +0,0 @@
/*
* [The "BSD license"]
* Copyright (c) 2012 Terence Parr
* Copyright (c) 2012 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
tree grammar ATNBuilder;
options {
language = Java;
tokenVocab = ANTLRParser;
ASTLabelType = GrammarAST;
// filter = true;
}
// Include the copyright in this source and also the generated source
@header {
/*
[The "BSD license"]
Copyright (c) 2010 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.parse;
import org.antlr.v4.tool.*;
import org.antlr.v4.tool.ast.*;
import org.antlr.v4.automata.ATNFactory;
}
@members {
ATNFactory factory;
public ATNBuilder(TreeNodeStream input, ATNFactory factory) {
this(input);
this.factory = factory;
}
}
dummy : block[null] ; // avoid error about no start rule
ruleBlock[GrammarAST ebnfRoot] returns [ATNFactory.Handle p]
@init {
List<ATNFactory.Handle> alts = new ArrayList<ATNFactory.Handle>();
int alt = 1;
factory.setCurrentOuterAlt(alt);
}
: ^(BLOCK
(^(OPTIONS .*))?
( a=alternative
{alts.add($a.p); factory.setCurrentOuterAlt(++alt);}
)+
)
{$p = factory.block((BlockAST)$BLOCK, ebnfRoot, alts);}
;
block[GrammarAST ebnfRoot] returns [ATNFactory.Handle p]
@init {List<ATNFactory.Handle> alts = new ArrayList<ATNFactory.Handle>();}
: ^(BLOCK (^(OPTIONS .*))? (a=alternative {alts.add($a.p);})+)
{$p = factory.block((BlockAST)$BLOCK, ebnfRoot, alts);}
;
alternative returns [ATNFactory.Handle p]
@init {List<ATNFactory.Handle> els = new ArrayList<ATNFactory.Handle>();}
: ^(LEXER_ALT_ACTION a=alternative lexerCommands)
{$p = factory.lexerAltCommands($a.p,$lexerCommands.p);}
| ^(ALT EPSILON) {$p = factory.epsilon($EPSILON);}
| ^(ALT (e=element {els.add($e.p);})+) {$p = factory.alt(els);}
;
lexerCommands returns [ATNFactory.Handle p]
@init {StringBuilder cmds = new StringBuilder();}
: (c=lexerCommand {cmds.append($c.cmd).append(' ');})+
{
$p = factory.action(cmds.toString());
}
;
lexerCommand returns [String cmd]
: ^(LEXER_ACTION_CALL ID lexerCommandExpr)
{$cmd = factory.lexerCallCommand($ID, $lexerCommandExpr.start);}
| ID
{$cmd = factory.lexerCommand($ID);}
;
lexerCommandExpr
: ID
| INT
;
element returns [ATNFactory.Handle p]
: labeledElement {$p = $labeledElement.p;}
| atom {$p = $atom.p;}
| subrule {$p = $subrule.p;}
| ACTION {$p = factory.action((ActionAST)$ACTION);}
| SEMPRED {$p = factory.sempred((PredAST)$SEMPRED);}
| ^(ACTION .) {$p = factory.action((ActionAST)$ACTION);}
| ^(SEMPRED .) {$p = factory.sempred((PredAST)$SEMPRED);}
| ^(NOT b=blockSet[true]) {$p = $b.p;}
| LEXER_CHAR_SET {$p = factory.charSetLiteral($start);}
;
astOperand returns [ATNFactory.Handle p]
: atom {$p = $atom.p;}
| ^(NOT blockSet[true]) {$p = $blockSet.p;}
;
labeledElement returns [ATNFactory.Handle p]
: ^(ASSIGN ID element) {$p = factory.label($element.p);}
| ^(PLUS_ASSIGN ID element) {$p = factory.listLabel($element.p);}
;
subrule returns [ATNFactory.Handle p]
: ^(OPTIONAL block[$start]) {$p = $block.p;}
| ^(CLOSURE block[$start]) {$p = $block.p;}
| ^(POSITIVE_CLOSURE block[$start]) {$p = $block.p;}
| block[null] {$p = $block.p;}
;
blockSet[boolean invert] returns [ATNFactory.Handle p]
@init {List<GrammarAST> alts = new ArrayList<GrammarAST>();}
: ^(SET (setElement {alts.add($setElement.start);})+) {$p = factory.set($start, alts, $invert);}
;
/** Don't combine with atom otherwise it will build spurious ATN nodes */
setElement
: STRING_LITERAL
| TOKEN_REF
| ^(RANGE a=STRING_LITERAL b=STRING_LITERAL)
| LEXER_CHAR_SET
;
atom returns [ATNFactory.Handle p]
: range {$p = $range.p;}
| ^(DOT ID terminal) {$p = $terminal.p;}
| ^(DOT ID ruleref) {$p = $ruleref.p;}
| ^(WILDCARD .) {$p = factory.wildcard($start);}
| WILDCARD {$p = factory.wildcard($start);}
| blockSet[false] {$p = $blockSet.p;}
| terminal {$p = $terminal.p;}
| ruleref {$p = $ruleref.p;}
;
ruleref returns [ATNFactory.Handle p]
: ^(RULE_REF ARG_ACTION? ^(ELEMENT_OPTIONS .*)) {$p = factory.ruleRef($RULE_REF);}
| ^(RULE_REF ARG_ACTION?) {$p = factory.ruleRef($RULE_REF);}
| RULE_REF {$p = factory.ruleRef($RULE_REF);}
;
range returns [ATNFactory.Handle p]
: ^(RANGE a=STRING_LITERAL b=STRING_LITERAL) {$p = factory.range($a,$b);}
;
terminal returns [ATNFactory.Handle p]
: ^(STRING_LITERAL .) {$p = factory.stringLiteral((TerminalAST)$start);}
| STRING_LITERAL {$p = factory.stringLiteral((TerminalAST)$start);}
| ^(TOKEN_REF ARG_ACTION .) {$p = factory.tokenRef((TerminalAST)$start);}
| ^(TOKEN_REF .) {$p = factory.tokenRef((TerminalAST)$start);}
| TOKEN_REF {$p = factory.tokenRef((TerminalAST)$start);}
;

View File

@ -1,125 +0,0 @@
/*
* [The "BSD license"]
* Copyright (c) 2012 Terence Parr
* Copyright (c) 2012 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
lexer grammar ActionSplitter;
options { filter=true; }
@header {
package org.antlr.v4.parse;
import org.antlr.v4.tool.*;
import org.antlr.v4.tool.ast.*;
}
@members {
ActionSplitterListener delegate;
public ActionSplitter(CharStream input, ActionSplitterListener delegate) {
this(input, new RecognizerSharedState());
this.delegate = delegate;
}
/** force filtering (and return tokens). triggers all above actions. */
public List<Token> getActionTokens() {
List<Token> chunks = new ArrayList<Token>();
Token t = nextToken();
while ( t.getType()!=Token.EOF ) {
chunks.add(t);
t = nextToken();
}
return chunks;
}
private boolean isIDStartChar(int c) {
return c == '_' || Character.isLetter(c);
}
}
// ignore comments right away
COMMENT
: '/*' ( options {greedy=false;} : . )* '*/' {delegate.text($text);}
;
LINE_COMMENT
: '//' ~('\n'|'\r')* '\r'? '\n' {delegate.text($text);}
;
SET_NONLOCAL_ATTR
: '$' x=ID '::' y=ID WS? '=' expr=ATTR_VALUE_EXPR ';'
{
delegate.setNonLocalAttr($text, $x, $y, $expr);
}
;
NONLOCAL_ATTR
: '$' x=ID '::' y=ID {delegate.nonLocalAttr($text, $x, $y);}
;
QUALIFIED_ATTR
: '$' x=ID '.' y=ID {input.LA(1)!='('}? {delegate.qualifiedAttr($text, $x, $y);}
;
SET_ATTR
: '$' x=ID WS? '=' expr=ATTR_VALUE_EXPR ';'
{
delegate.setAttr($text, $x, $expr);
}
;
ATTR
: '$' x=ID {delegate.attr($text, $x);}
;
// Anything else is just random text
TEXT
@init {StringBuilder buf = new StringBuilder();}
@after {delegate.text(buf.toString());}
: ( c=~('\\'| '$') {buf.append((char)$c);}
| '\\$' {buf.append('$');}
| '\\' c=~('$') {buf.append('\\').append((char)$c);}
| {!isIDStartChar(input.LA(2))}? => '$' {buf.append('$');}
)+
;
fragment
ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*
;
/** Don't allow an = as first char to prevent $x == 3; kind of stuff. */
fragment
ATTR_VALUE_EXPR
: ~'=' (~';')*
;
fragment
WS : (' '|'\t'|'\n'|'\r')+
;

View File

@ -1,115 +0,0 @@
/*
* [The "BSD license"]
* Copyright (c) 2012 Terence Parr
* Copyright (c) 2012 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
tree grammar BlockSetTransformer;
options {
language = Java;
tokenVocab = ANTLRParser;
ASTLabelType = GrammarAST;
output = AST;
filter = true;
}
@header {
package org.antlr.v4.parse;
import org.antlr.v4.misc.Utils;
import org.antlr.v4.misc.*;
import org.antlr.v4.tool.*;
import org.antlr.v4.tool.ast.*;
import java.util.List;
import java.util.Set;
import java.util.HashSet;
import java.util.ArrayList;
import org.antlr.v4.runtime.misc.IntervalSet;
}
@members {
public String currentRuleName;
public GrammarAST currentAlt;
public Grammar g;
public BlockSetTransformer(TreeNodeStream input, Grammar g) {
this(input, new RecognizerSharedState());
this.g = g;
}
}
topdown
: ^(RULE (id=TOKEN_REF|id=RULE_REF) {currentRuleName=$id.text;} .+)
| setAlt
| ebnfBlockSet
| blockSet
;
setAlt
: {inContext("RULE BLOCK")}?
ALT {currentAlt = $start;}
;
// (BLOCK (ALT (+ (BLOCK (ALT INT) (ALT ID)))))
ebnfBlockSet
@after {
GrammarTransformPipeline.setGrammarPtr(g, $tree);
}
: ^(ebnfSuffix blockSet) -> ^(ebnfSuffix ^(BLOCK<BlockAST> ^(ALT<AltAST> blockSet)))
;
ebnfSuffix
@after {$tree = (GrammarAST)adaptor.dupNode($start);}
: OPTIONAL
| CLOSURE
| POSITIVE_CLOSURE
;
blockSet
@init {
boolean inLexer = Grammar.isTokenName(currentRuleName);
}
@after {
GrammarTransformPipeline.setGrammarPtr(g, $tree);
}
: {inContext("RULE")}? // top-level: rule block and > 1 alt
^(BLOCK ^(alt=ALT {((AltAST)$alt).altLabel==null}? setElement[inLexer]) ( ^(ALT setElement[inLexer]) )+)
-> ^(BLOCK<BlockAST>[$BLOCK.token] ^(ALT<AltAST>[$BLOCK.token,"ALT"] ^(SET[$BLOCK.token, "SET"] setElement+)))
| {!inContext("RULE")}? // if not rule block and > 1 alt
^(BLOCK ^(ALT setElement[inLexer]) ( ^(ALT setElement[inLexer]) )+)
-> ^(SET[$BLOCK.token, "SET"] setElement+)
;
setElement[boolean inLexer]
@after {
GrammarTransformPipeline.setGrammarPtr(g, $tree);
}
: ( a=STRING_LITERAL {!inLexer || CharSupport.getCharValueFromGrammarCharLiteral($a.getText())!=-1}?
| {!inLexer}?=> TOKEN_REF
| {inLexer}?=> ^(RANGE a=STRING_LITERAL b=STRING_LITERAL)
{CharSupport.getCharValueFromGrammarCharLiteral($a.getText())!=-1 &&
CharSupport.getCharValueFromGrammarCharLiteral($b.getText())!=-1}?
)
;

View File

@ -1,995 +0,0 @@
/*
* [The "BSD license"]
* Copyright (c) 2012 Terence Parr
* Copyright (c) 2012 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** The definitive ANTLR v3 tree grammar to walk/visit ANTLR v4 grammars.
* Parses trees created by ANTLRParser.g.
*
* Rather than have multiple tree grammars, one for each visit, I'm
* creating this generic visitor that knows about context. All of the
* boilerplate pattern recognition is done here. Then, subclasses can
* override the methods they care about. This prevents a lot of the same
* context tracking stuff like "set current alternative for current
* rule node" that is repeated in lots of tree filters.
*/
tree grammar GrammarTreeVisitor;
options {
language = Java;
tokenVocab = ANTLRParser;
ASTLabelType = GrammarAST;
}
// Include the copyright in this source and also the generated source
@header {
/*
[The "BSD license"]
Copyright (c) 2011 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.parse;
import org.antlr.v4.tool.*;
import org.antlr.v4.tool.ast.*;
import java.lang.reflect.Method;
}
@members {
public String grammarName;
public GrammarAST currentRuleAST;
public String currentModeName = LexerGrammar.DEFAULT_MODE_NAME;
public String currentRuleName;
//public GrammarAST currentRuleBlock;
public GrammarAST currentOuterAltRoot;
public int currentOuterAltNumber = 1; // 1..n
public int rewriteEBNFLevel = 0;
public GrammarTreeVisitor() { this(null); }
public ErrorManager getErrorManager() { return null; }
public void visitGrammar(GrammarAST t) { visit(t, "grammarSpec"); }
public void visit(GrammarAST t, String ruleName) {
CommonTreeNodeStream nodes = new CommonTreeNodeStream(new GrammarASTAdaptor(), t);
setTreeNodeStream(nodes);
try {
Method m = getClass().getMethod(ruleName);
m.invoke(this);
}
catch (Exception e) {
ErrorManager errMgr = getErrorManager();
if ( errMgr==null ) {
System.err.println("can't find rule "+ruleName+
" or tree structure error: "+t.toStringTree()
);
e.printStackTrace(System.err);
}
else errMgr.toolError(ErrorType.INTERNAL_ERROR, e);
}
}
public void discoverGrammar(GrammarRootAST root, GrammarAST ID) { }
public void finishPrequels(GrammarAST firstPrequel) { }
public void finishGrammar(GrammarRootAST root, GrammarAST ID) { }
public void grammarOption(GrammarAST ID, GrammarAST valueAST) { }
public void ruleOption(GrammarAST ID, GrammarAST valueAST) { }
public void blockOption(GrammarAST ID, GrammarAST valueAST) { }
public void defineToken(GrammarAST ID) { }
public void globalNamedAction(GrammarAST scope, GrammarAST ID, ActionAST action) { }
public void importGrammar(GrammarAST label, GrammarAST ID) { }
public void modeDef(GrammarAST m, GrammarAST ID) { }
public void discoverRules(GrammarAST rules) { }
public void finishRules(GrammarAST rule) { }
public void discoverRule(RuleAST rule, GrammarAST ID, List<GrammarAST> modifiers,
ActionAST arg, ActionAST returns, GrammarAST thrws,
GrammarAST options, ActionAST locals,
List<GrammarAST> actions,
GrammarAST block) { }
public void finishRule(RuleAST rule, GrammarAST ID, GrammarAST block) { }
public void discoverLexerRule(RuleAST rule, GrammarAST ID, List<GrammarAST> modifiers,
GrammarAST block) { }
public void finishLexerRule(RuleAST rule, GrammarAST ID, GrammarAST block) { }
public void ruleCatch(GrammarAST arg, ActionAST action) { }
public void finallyAction(ActionAST action) { }
public void discoverOuterAlt(AltAST alt) { }
public void finishOuterAlt(AltAST alt) { }
public void discoverAlt(AltAST alt) { }
public void finishAlt(AltAST alt) { }
public void ruleRef(GrammarAST ref, ActionAST arg) { }
public void tokenRef(TerminalAST ref) { }
public void elementOption(GrammarASTWithOptions t, GrammarAST ID, GrammarAST valueAST) { }
public void stringRef(TerminalAST ref) { }
public void wildcardRef(GrammarAST ref) { }
public void actionInAlt(ActionAST action) { }
public void sempredInAlt(PredAST pred) { }
public void label(GrammarAST op, GrammarAST ID, GrammarAST element) { }
public void lexerCallCommand(int outerAltNumber, GrammarAST ID, GrammarAST arg) { }
public void lexerCommand(int outerAltNumber, GrammarAST ID) { }
protected void enterGrammarSpec(GrammarAST tree) { }
protected void exitGrammarSpec(GrammarAST tree) { }
protected void enterPrequelConstructs(GrammarAST tree) { }
protected void exitPrequelConstructs(GrammarAST tree) { }
protected void enterPrequelConstruct(GrammarAST tree) { }
protected void exitPrequelConstruct(GrammarAST tree) { }
protected void enterOptionsSpec(GrammarAST tree) { }
protected void exitOptionsSpec(GrammarAST tree) { }
protected void enterOption(GrammarAST tree) { }
protected void exitOption(GrammarAST tree) { }
protected void enterOptionValue(GrammarAST tree) { }
protected void exitOptionValue(GrammarAST tree) { }
protected void enterDelegateGrammars(GrammarAST tree) { }
protected void exitDelegateGrammars(GrammarAST tree) { }
protected void enterDelegateGrammar(GrammarAST tree) { }
protected void exitDelegateGrammar(GrammarAST tree) { }
protected void enterTokensSpec(GrammarAST tree) { }
protected void exitTokensSpec(GrammarAST tree) { }
protected void enterTokenSpec(GrammarAST tree) { }
protected void exitTokenSpec(GrammarAST tree) { }
protected void enterAction(GrammarAST tree) { }
protected void exitAction(GrammarAST tree) { }
protected void enterRules(GrammarAST tree) { }
protected void exitRules(GrammarAST tree) { }
protected void enterMode(GrammarAST tree) { }
protected void exitMode(GrammarAST tree) { }
protected void enterLexerRule(GrammarAST tree) { }
protected void exitLexerRule(GrammarAST tree) { }
protected void enterRule(GrammarAST tree) { }
protected void exitRule(GrammarAST tree) { }
protected void enterExceptionGroup(GrammarAST tree) { }
protected void exitExceptionGroup(GrammarAST tree) { }
protected void enterExceptionHandler(GrammarAST tree) { }
protected void exitExceptionHandler(GrammarAST tree) { }
protected void enterFinallyClause(GrammarAST tree) { }
protected void exitFinallyClause(GrammarAST tree) { }
protected void enterLocals(GrammarAST tree) { }
protected void exitLocals(GrammarAST tree) { }
protected void enterRuleReturns(GrammarAST tree) { }
protected void exitRuleReturns(GrammarAST tree) { }
protected void enterThrowsSpec(GrammarAST tree) { }
protected void exitThrowsSpec(GrammarAST tree) { }
protected void enterRuleAction(GrammarAST tree) { }
protected void exitRuleAction(GrammarAST tree) { }
protected void enterRuleModifier(GrammarAST tree) { }
protected void exitRuleModifier(GrammarAST tree) { }
protected void enterLexerRuleBlock(GrammarAST tree) { }
protected void exitLexerRuleBlock(GrammarAST tree) { }
protected void enterRuleBlock(GrammarAST tree) { }
protected void exitRuleBlock(GrammarAST tree) { }
protected void enterLexerOuterAlternative(AltAST tree) { }
protected void exitLexerOuterAlternative(AltAST tree) { }
protected void enterOuterAlternative(AltAST tree) { }
protected void exitOuterAlternative(AltAST tree) { }
protected void enterLexerAlternative(GrammarAST tree) { }
protected void exitLexerAlternative(GrammarAST tree) { }
protected void enterLexerElements(GrammarAST tree) { }
protected void exitLexerElements(GrammarAST tree) { }
protected void enterLexerElement(GrammarAST tree) { }
protected void exitLexerElement(GrammarAST tree) { }
protected void enterLabeledLexerElement(GrammarAST tree) { }
protected void exitLabeledLexerElement(GrammarAST tree) { }
protected void enterLexerBlock(GrammarAST tree) { }
protected void exitLexerBlock(GrammarAST tree) { }
protected void enterLexerAtom(GrammarAST tree) { }
protected void exitLexerAtom(GrammarAST tree) { }
protected void enterActionElement(GrammarAST tree) { }
protected void exitActionElement(GrammarAST tree) { }
protected void enterAlternative(AltAST tree) { }
protected void exitAlternative(AltAST tree) { }
protected void enterLexerCommand(GrammarAST tree) { }
protected void exitLexerCommand(GrammarAST tree) { }
protected void enterLexerCommandExpr(GrammarAST tree) { }
protected void exitLexerCommandExpr(GrammarAST tree) { }
protected void enterElement(GrammarAST tree) { }
protected void exitElement(GrammarAST tree) { }
protected void enterAstOperand(GrammarAST tree) { }
protected void exitAstOperand(GrammarAST tree) { }
protected void enterLabeledElement(GrammarAST tree) { }
protected void exitLabeledElement(GrammarAST tree) { }
protected void enterSubrule(GrammarAST tree) { }
protected void exitSubrule(GrammarAST tree) { }
protected void enterLexerSubrule(GrammarAST tree) { }
protected void exitLexerSubrule(GrammarAST tree) { }
protected void enterBlockSuffix(GrammarAST tree) { }
protected void exitBlockSuffix(GrammarAST tree) { }
protected void enterEbnfSuffix(GrammarAST tree) { }
protected void exitEbnfSuffix(GrammarAST tree) { }
protected void enterAtom(GrammarAST tree) { }
protected void exitAtom(GrammarAST tree) { }
protected void enterBlockSet(GrammarAST tree) { }
protected void exitBlockSet(GrammarAST tree) { }
protected void enterSetElement(GrammarAST tree) { }
protected void exitSetElement(GrammarAST tree) { }
protected void enterBlock(GrammarAST tree) { }
protected void exitBlock(GrammarAST tree) { }
protected void enterRuleref(GrammarAST tree) { }
protected void exitRuleref(GrammarAST tree) { }
protected void enterRange(GrammarAST tree) { }
protected void exitRange(GrammarAST tree) { }
protected void enterTerminal(GrammarAST tree) { }
protected void exitTerminal(GrammarAST tree) { }
protected void enterElementOptions(GrammarAST tree) { }
protected void exitElementOptions(GrammarAST tree) { }
protected void enterElementOption(GrammarAST tree) { }
protected void exitElementOption(GrammarAST tree) { }
@Override
public void traceIn(String ruleName, int ruleIndex) {
System.err.println("enter "+ruleName+": "+input.LT(1));
}
@Override
public void traceOut(String ruleName, int ruleIndex) {
System.err.println("exit "+ruleName+": "+input.LT(1));
}
}
grammarSpec
@init {
enterGrammarSpec($start);
}
@after {
exitGrammarSpec($start);
}
: ^( GRAMMAR ID {grammarName=$ID.text;} DOC_COMMENT?
{discoverGrammar((GrammarRootAST)$GRAMMAR, $ID);}
prequelConstructs
{finishPrequels($prequelConstructs.firstOne);}
rules mode*
{finishGrammar((GrammarRootAST)$GRAMMAR, $ID);}
)
;
prequelConstructs returns [GrammarAST firstOne=null]
@init {
enterPrequelConstructs($start);
}
@after {
exitPrequelConstructs($start);
}
: {$firstOne=$start;} prequelConstruct+
|
;
prequelConstruct
@init {
enterPrequelConstructs($start);
}
@after {
exitPrequelConstructs($start);
}
: optionsSpec
| delegateGrammars
| tokensSpec
| action
;
optionsSpec
@init {
enterOptionsSpec($start);
}
@after {
exitOptionsSpec($start);
}
: ^(OPTIONS option*)
;
option
@init {
enterOption($start);
boolean rule = inContext("RULE ...");
boolean block = inContext("BLOCK ...");
}
@after {
exitOption($start);
}
: ^(a=ASSIGN ID v=optionValue)
{
if ( block ) blockOption($ID, $v.start); // most specific first
else if ( rule ) ruleOption($ID, $v.start);
else grammarOption($ID, $v.start);
}
;
optionValue returns [String v]
@init {
enterOptionValue($start);
$v = $start.token.getText();
}
@after {
exitOptionValue($start);
}
: ID
| STRING_LITERAL
| INT
;
delegateGrammars
@init {
enterDelegateGrammars($start);
}
@after {
exitDelegateGrammars($start);
}
: ^(IMPORT delegateGrammar+)
;
delegateGrammar
@init {
enterDelegateGrammar($start);
}
@after {
exitDelegateGrammar($start);
}
: ^(ASSIGN label=ID id=ID) {importGrammar($label, $id);}
| id=ID {importGrammar(null, $id);}
;
tokensSpec
@init {
enterTokensSpec($start);
}
@after {
exitTokensSpec($start);
}
: ^(TOKENS_SPEC tokenSpec+)
;
tokenSpec
@init {
enterTokenSpec($start);
}
@after {
exitTokenSpec($start);
}
: ID {defineToken($ID);}
;
action
@init {
enterAction($start);
}
@after {
exitAction($start);
}
: ^(AT sc=ID? name=ID ACTION) {globalNamedAction($sc, $name, (ActionAST)$ACTION);}
;
rules
@init {
enterRules($start);
}
@after {
exitRules($start);
}
: ^(RULES {discoverRules($RULES);} (rule|lexerRule)* {finishRules($RULES);})
;
mode
@init {
enterMode($start);
}
@after {
exitMode($start);
}
: ^( MODE ID {currentModeName=$ID.text; modeDef($MODE, $ID);} lexerRule* )
;
lexerRule
@init {
enterLexerRule($start);
List<GrammarAST> mods = new ArrayList<GrammarAST>();
currentOuterAltNumber=0;
}
@after {
exitLexerRule($start);
}
: ^( RULE TOKEN_REF
{currentRuleName=$TOKEN_REF.text; currentRuleAST=$RULE;}
DOC_COMMENT? (^(RULEMODIFIERS m=FRAGMENT {mods.add($m);}))?
{discoverLexerRule((RuleAST)$RULE, $TOKEN_REF, mods, (GrammarAST)input.LT(1));}
lexerRuleBlock
{
finishLexerRule((RuleAST)$RULE, $TOKEN_REF, $lexerRuleBlock.start);
currentRuleName=null; currentRuleAST=null;
}
)
;
rule
@init {
enterRule($start);
List<GrammarAST> mods = new ArrayList<GrammarAST>();
List<GrammarAST> actions = new ArrayList<GrammarAST>(); // track roots
currentOuterAltNumber=0;
}
@after {
exitRule($start);
}
: ^( RULE RULE_REF {currentRuleName=$RULE_REF.text; currentRuleAST=$RULE;}
DOC_COMMENT? (^(RULEMODIFIERS (m=ruleModifier{mods.add($m.start);})+))?
ARG_ACTION?
ret=ruleReturns?
thr=throwsSpec?
loc=locals?
( opts=optionsSpec
| a=ruleAction {actions.add($a.start);}
)*
{discoverRule((RuleAST)$RULE, $RULE_REF, mods, (ActionAST)$ARG_ACTION,
$ret.start!=null?(ActionAST)$ret.start.getChild(0):null,
$thr.start, $opts.start,
$loc.start!=null?(ActionAST)$loc.start.getChild(0):null,
actions, (GrammarAST)input.LT(1));}
ruleBlock exceptionGroup
{finishRule((RuleAST)$RULE, $RULE_REF, $ruleBlock.start); currentRuleName=null; currentRuleAST=null;}
)
;
exceptionGroup
@init {
enterExceptionGroup($start);
}
@after {
exitExceptionGroup($start);
}
: exceptionHandler* finallyClause?
;
exceptionHandler
@init {
enterExceptionHandler($start);
}
@after {
exitExceptionHandler($start);
}
: ^(CATCH ARG_ACTION ACTION) {ruleCatch($ARG_ACTION, (ActionAST)$ACTION);}
;
finallyClause
@init {
enterFinallyClause($start);
}
@after {
exitFinallyClause($start);
}
: ^(FINALLY ACTION) {finallyAction((ActionAST)$ACTION);}
;
locals
@init {
enterLocals($start);
}
@after {
exitLocals($start);
}
: ^(LOCALS ARG_ACTION)
;
ruleReturns
@init {
enterRuleReturns($start);
}
@after {
exitRuleReturns($start);
}
: ^(RETURNS ARG_ACTION)
;
throwsSpec
@init {
enterThrowsSpec($start);
}
@after {
exitThrowsSpec($start);
}
: ^(THROWS ID+)
;
ruleAction
@init {
enterRuleAction($start);
}
@after {
exitRuleAction($start);
}
: ^(AT ID ACTION)
;
ruleModifier
@init {
enterRuleModifier($start);
}
@after {
exitRuleModifier($start);
}
: PUBLIC
| PRIVATE
| PROTECTED
| FRAGMENT
;
lexerRuleBlock
@init {
enterLexerRuleBlock($start);
}
@after {
exitLexerRuleBlock($start);
}
: ^( BLOCK
( {
currentOuterAltRoot = (GrammarAST)input.LT(1);
currentOuterAltNumber++;
}
lexerOuterAlternative
)+
)
;
ruleBlock
@init {
enterRuleBlock($start);
}
@after {
exitRuleBlock($start);
}
: ^( BLOCK
( {
currentOuterAltRoot = (GrammarAST)input.LT(1);
currentOuterAltNumber++;
}
outerAlternative
)+
)
;
lexerOuterAlternative
@init {
enterLexerOuterAlternative((AltAST)$start);
discoverOuterAlt((AltAST)$start);
}
@after {
finishOuterAlt((AltAST)$start);
exitLexerOuterAlternative((AltAST)$start);
}
: lexerAlternative
;
outerAlternative
@init {
enterOuterAlternative((AltAST)$start);
discoverOuterAlt((AltAST)$start);
}
@after {
finishOuterAlt((AltAST)$start);
exitOuterAlternative((AltAST)$start);
}
: alternative
;
lexerAlternative
@init {
enterLexerAlternative($start);
}
@after {
exitLexerAlternative($start);
}
: ^(LEXER_ALT_ACTION lexerElements lexerCommand+)
| lexerElements
;
lexerElements
@init {
enterLexerElements($start);
}
@after {
exitLexerElements($start);
}
: ^(ALT lexerElement+)
;
lexerElement
@init {
enterLexerElement($start);
}
@after {
exitLexerElement($start);
}
: labeledLexerElement
| lexerAtom
| lexerSubrule
| ACTION {actionInAlt((ActionAST)$ACTION);}
| SEMPRED {sempredInAlt((PredAST)$SEMPRED);}
| ^(ACTION elementOptions) {actionInAlt((ActionAST)$ACTION);}
| ^(SEMPRED elementOptions) {sempredInAlt((PredAST)$SEMPRED);}
| EPSILON
;
labeledLexerElement
@init {
enterLabeledLexerElement($start);
}
@after {
exitLabeledLexerElement($start);
}
: ^((ASSIGN|PLUS_ASSIGN) ID (lexerAtom|block))
;
lexerBlock
@init {
enterLexerBlock($start);
}
@after {
exitLexerBlock($start);
}
: ^(BLOCK optionsSpec? lexerAlternative+)
;
lexerAtom
@init {
enterLexerAtom($start);
}
@after {
exitLexerAtom($start);
}
: terminal
| ^(NOT blockSet)
| blockSet
| ^(WILDCARD elementOptions)
| WILDCARD
| LEXER_CHAR_SET
| range
;
actionElement
@init {
enterActionElement($start);
}
@after {
exitActionElement($start);
}
: ACTION
| ^(ACTION elementOptions)
| SEMPRED
| ^(SEMPRED elementOptions)
;
alternative
@init {
enterAlternative((AltAST)$start);
discoverAlt((AltAST)$start);
}
@after {
finishAlt((AltAST)$start);
exitAlternative((AltAST)$start);
}
: ^(ALT elementOptions? element+)
| ^(ALT EPSILON)
;
lexerCommand
@init {
enterLexerCommand($start);
}
@after {
exitLexerCommand($start);
}
: ^(LEXER_ACTION_CALL ID lexerCommandExpr)
{lexerCallCommand(currentOuterAltNumber, $ID, $lexerCommandExpr.start);}
| ID
{lexerCommand(currentOuterAltNumber, $ID);}
;
lexerCommandExpr
@init {
enterLexerCommandExpr($start);
}
@after {
exitLexerCommandExpr($start);
}
: ID
| INT
;
element
@init {
enterElement($start);
}
@after {
exitElement($start);
}
: labeledElement
| atom
| subrule
| ACTION {actionInAlt((ActionAST)$ACTION);}
| SEMPRED {sempredInAlt((PredAST)$SEMPRED);}
| ^(ACTION elementOptions) {actionInAlt((ActionAST)$ACTION);}
| ^(SEMPRED elementOptions) {sempredInAlt((PredAST)$SEMPRED);}
| ^(NOT blockSet)
| ^(NOT block)
;
astOperand
@init {
enterAstOperand($start);
}
@after {
exitAstOperand($start);
}
: atom
| ^(NOT blockSet)
| ^(NOT block)
;
labeledElement
@init {
enterLabeledElement($start);
}
@after {
exitLabeledElement($start);
}
: ^((ASSIGN|PLUS_ASSIGN) ID element) {label($start, $ID, $element.start);}
;
subrule
@init {
enterSubrule($start);
}
@after {
exitSubrule($start);
}
: ^(blockSuffix block)
| block
;
lexerSubrule
@init {
enterLexerSubrule($start);
}
@after {
exitLexerSubrule($start);
}
: ^(blockSuffix lexerBlock)
| lexerBlock
;
blockSuffix
@init {
enterBlockSuffix($start);
}
@after {
exitBlockSuffix($start);
}
: ebnfSuffix
;
ebnfSuffix
@init {
enterEbnfSuffix($start);
}
@after {
exitEbnfSuffix($start);
}
: OPTIONAL
| CLOSURE
| POSITIVE_CLOSURE
;
atom
@init {
enterAtom($start);
}
@after {
exitAtom($start);
}
: ^(DOT ID terminal)
| ^(DOT ID ruleref)
| ^(WILDCARD elementOptions) {wildcardRef($WILDCARD);}
| WILDCARD {wildcardRef($WILDCARD);}
| terminal
| blockSet
| ruleref
;
blockSet
@init {
enterBlockSet($start);
}
@after {
exitBlockSet($start);
}
: ^(SET setElement+)
;
setElement
@init {
enterSetElement($start);
}
@after {
exitSetElement($start);
}
: STRING_LITERAL {stringRef((TerminalAST)$STRING_LITERAL);}
| TOKEN_REF {tokenRef((TerminalAST)$TOKEN_REF);}
| ^(RANGE a=STRING_LITERAL b=STRING_LITERAL)
{
stringRef((TerminalAST)$a);
stringRef((TerminalAST)$b);
}
| LEXER_CHAR_SET
;
block
@init {
enterBlock($start);
}
@after {
exitBlock($start);
}
: ^(BLOCK optionsSpec? ruleAction* ACTION? alternative+)
;
ruleref
@init {
enterRuleref($start);
}
@after {
exitRuleref($start);
}
: ^(RULE_REF arg=ARG_ACTION? elementOptions?)
{
ruleRef($RULE_REF, (ActionAST)$ARG_ACTION);
if ( $arg!=null ) actionInAlt((ActionAST)$arg);
}
;
range
@init {
enterRange($start);
}
@after {
exitRange($start);
}
: ^(RANGE STRING_LITERAL STRING_LITERAL)
;
terminal
@init {
enterTerminal($start);
}
@after {
exitTerminal($start);
}
: ^(STRING_LITERAL elementOptions)
{stringRef((TerminalAST)$STRING_LITERAL);}
| STRING_LITERAL {stringRef((TerminalAST)$STRING_LITERAL);}
| ^(TOKEN_REF elementOptions) {tokenRef((TerminalAST)$TOKEN_REF);}
| TOKEN_REF {tokenRef((TerminalAST)$TOKEN_REF);}
;
elementOptions
@init {
enterElementOptions($start);
}
@after {
exitElementOptions($start);
}
: ^(ELEMENT_OPTIONS elementOption[(GrammarASTWithOptions)$start.getParent()]*)
;
elementOption[GrammarASTWithOptions t]
@init {
enterElementOption($start);
}
@after {
exitElementOption($start);
}
: ID {elementOption(t, $ID, null);}
| ^(ASSIGN id=ID v=ID) {elementOption(t, $id, $v);}
| ^(ASSIGN ID v=STRING_LITERAL) {elementOption(t, $ID, $v);}
| ^(ASSIGN ID v=ACTION) {elementOption(t, $ID, $v);}
| ^(ASSIGN ID v=INT) {elementOption(t, $ID, $v);}
;

View File

@ -1,215 +0,0 @@
/*
* [The "BSD license"]
* Copyright (c) 2012 Terence Parr
* Copyright (c) 2012 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** Find left-recursive rules */
tree grammar LeftRecursiveRuleWalker;
options {
tokenVocab=ANTLRParser;
ASTLabelType=GrammarAST;
}
@header {
package org.antlr.v4.parse;
import org.antlr.v4.misc.*;
import org.antlr.v4.tool.*;
import org.antlr.v4.tool.ast.*;
}
@members {
private String ruleName;
private int currentOuterAltNumber; // which outer alt of rule?
public int numAlts; // how many alts for this rule total?
public void setAltAssoc(AltAST altTree, int alt) {}
public void binaryAlt(AltAST altTree, int alt) {}
public void prefixAlt(AltAST altTree, int alt) {}
public void suffixAlt(AltAST altTree, int alt) {}
public void otherAlt(AltAST altTree, int alt) {}
public void setReturnValues(GrammarAST t) {}
}
@rulecatch { }
// TODO: can get parser errors for not matching pattern; make them go away
public
rec_rule returns [boolean isLeftRec]
@init
{
currentOuterAltNumber = 1;
}
: ^( r=RULE id=RULE_REF {ruleName=$id.getText();}
DOC_COMMENT? ruleModifier?
// (ARG_ACTION)? shouldn't allow args, right?
(^(RETURNS a=ARG_ACTION {setReturnValues($a);}))?
// ( ^(THROWS .+) )? don't allow
( ^(LOCALS ARG_ACTION) )? // TODO: copy these to gen'd code
( ^(OPTIONS .*)
| ^(AT ID ACTION) // TODO: copy
)*
ruleBlock {$isLeftRec = $ruleBlock.isLeftRec;}
exceptionGroup
)
;
exceptionGroup
: exceptionHandler* finallyClause?
;
exceptionHandler
: ^(CATCH ARG_ACTION ACTION)
;
finallyClause
: ^(FINALLY ACTION)
;
ruleModifier
: PUBLIC
| PRIVATE
| PROTECTED
;
ruleBlock returns [boolean isLeftRec]
@init{boolean lr=false; this.numAlts = $start.getChildCount();}
: ^( BLOCK
(
o=outerAlternative
{if ($o.isLeftRec) $isLeftRec = true;}
{currentOuterAltNumber++;}
)+
)
;
/** An alt is either prefix, suffix, binary, or ternary operation or "other" */
outerAlternative returns [boolean isLeftRec]
: (binary)=> binary
{binaryAlt((AltAST)$start, currentOuterAltNumber); $isLeftRec=true;}
| (prefix)=> prefix
{prefixAlt((AltAST)$start, currentOuterAltNumber);}
| (suffix)=> suffix
{suffixAlt((AltAST)$start, currentOuterAltNumber); $isLeftRec=true;}
| nonLeftRecur {otherAlt((AltAST)$start, currentOuterAltNumber);}
;
binary
: ^( ALT elementOptions? recurse element+ recurse ACTION? )
{setAltAssoc((AltAST)$ALT,currentOuterAltNumber);}
;
prefix
: ^( ALT elementOptions?
({!((CommonTree)input.LT(1)).getText().equals(ruleName)}? element)+
recurse ACTION?
)
{setAltAssoc((AltAST)$ALT,currentOuterAltNumber);}
;
suffix
: ^( ALT elementOptions? recurse element+ )
{setAltAssoc((AltAST)$ALT,currentOuterAltNumber);}
;
nonLeftRecur
: ^(ALT element+) // no assoc for these; ignore if <assoc=...> present
;
recurse
: ^(ASSIGN ID recurseNoLabel)
| recurseNoLabel
;
recurseNoLabel : {((CommonTree)input.LT(1)).getText().equals(ruleName)}? RULE_REF;
token returns [GrammarAST t=null]
: ^(ASSIGN ID s=token {$t = $s.t;})
| ^(PLUS_ASSIGN ID s=token {$t = $s.t;})
| b=STRING_LITERAL {$t = $b;}
| ^(b=STRING_LITERAL elementOptions) {$t = $b;}
| ^(c=TOKEN_REF elementOptions) {$t = $c;}
| c=TOKEN_REF {$t = $c;}
;
elementOptions
: ^(ELEMENT_OPTIONS elementOption*)
;
elementOption
: ID
| ^(ASSIGN ID ID)
| ^(ASSIGN ID STRING_LITERAL)
| ^(ASSIGN ID ACTION)
| ^(ASSIGN ID INT)
;
element
: atom
| ^(NOT element)
| ^(RANGE atom atom)
| ^(ASSIGN ID element)
| ^(PLUS_ASSIGN ID element)
| ^(SET setElement+)
| RULE_REF
| ebnf
| ACTION
| SEMPRED
| EPSILON
;
setElement
: STRING_LITERAL
| TOKEN_REF
;
ebnf: block
| ^( OPTIONAL block )
| ^( CLOSURE block )
| ^( POSITIVE_CLOSURE block )
;
block
: ^(BLOCK ACTION? alternative+)
;
alternative
: ^(ALT elementOptions? element+)
;
atom
: ^(RULE_REF ARG_ACTION? elementOptions?)
| ^(STRING_LITERAL elementOptions)
| STRING_LITERAL
| ^(TOKEN_REF elementOptions)
| TOKEN_REF
| ^(WILDCARD elementOptions)
| WILDCARD
| ^(DOT ID element)
;