forked from jasder/antlr
ugh. fix "[a-z]" for real now
This commit is contained in:
parent
5cec4721ab
commit
c47c07299a
|
@ -368,6 +368,7 @@ public class Tool {
|
|||
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(in);
|
||||
ANTLRLexer lexer = new ANTLRLexer(in);
|
||||
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
||||
lexer.tokens = tokens;
|
||||
ToolANTLRParser p = new ToolANTLRParser(tokens, this);
|
||||
p.setTreeAdaptor(adaptor);
|
||||
ParserRuleReturnScope r = p.grammarSpec();
|
||||
|
|
|
@ -157,6 +157,7 @@ public class LeftRecursiveRuleTransformer {
|
|||
ANTLRLexer lexer = new ANTLRLexer(new ANTLRStringStream(ruleText));
|
||||
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
|
||||
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
||||
lexer.tokens = tokens;
|
||||
ToolANTLRParser p = new ToolANTLRParser(tokens, tool);
|
||||
p.setTreeAdaptor(adaptor);
|
||||
try {
|
||||
|
|
|
@ -119,7 +119,27 @@ package org.antlr.v4.parse;
|
|||
|
||||
|
||||
@members {
|
||||
public boolean isLexer = false;
|
||||
public CommonTokenStream tokens; // track stream we push to; need for context info
|
||||
public boolean isLexerRule = false;
|
||||
|
||||
/** scan backwards from current point in this.tokens list
|
||||
* looking for the start of the rule or subrule.
|
||||
* Return token or null if for some reason we can't find the start.
|
||||
*/
|
||||
public Token getRuleOrSubruleStartToken() {
|
||||
if ( tokens==null ) return null;
|
||||
int i = tokens.index();
|
||||
int n = tokens.size();
|
||||
if ( i>=n ) i = n-1; // seems index == n as we lex
|
||||
while ( i>=0 && i<n) {
|
||||
int ttype = tokens.get(i).getType();
|
||||
if ( ttype == LPAREN || ttype == TOKEN_REF || ttype == RULE_REF ) {
|
||||
return tokens.get(i);
|
||||
}
|
||||
i--;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// --------
|
||||
|
@ -222,8 +242,8 @@ COMMENT
|
|||
|
||||
ARG_OR_CHARSET
|
||||
options {k=1;}
|
||||
: {isLexer}?=> LEXER_CHAR_SET {$type=LEXER_CHAR_SET;}
|
||||
| {!isLexer}?=> ARG_ACTION {$type=ARG_ACTION;}
|
||||
: {isLexerRule}?=> LEXER_CHAR_SET {$type=LEXER_CHAR_SET;}
|
||||
| {!isLexerRule}?=> ARG_ACTION {$type=ARG_ACTION;}
|
||||
;
|
||||
|
||||
fragment
|
||||
|
@ -407,7 +427,7 @@ TOKENS_SPEC : 'tokens' WSNLCHARS* '{' ;
|
|||
|
||||
IMPORT : 'import' ;
|
||||
FRAGMENT : 'fragment' ;
|
||||
LEXER : 'lexer' {isLexer=true;} ;
|
||||
LEXER : 'lexer' ;
|
||||
PARSER : 'parser' ;
|
||||
GRAMMAR : 'grammar' ;
|
||||
PROTECTED : 'protected' ;
|
||||
|
@ -425,7 +445,22 @@ MODE : 'mode' ;
|
|||
//
|
||||
// Character sequences used as separators, delimters, operators, etc
|
||||
//
|
||||
COLON : ':' ;
|
||||
COLON : ':'
|
||||
{
|
||||
// scan backwards, looking for a RULE_REF or TOKEN_REF.
|
||||
// which would indicate the start of a rule definition.
|
||||
// If we see a LPAREN, then it's the start of the subrule.
|
||||
// this.tokens is the token string we are pushing into, so
|
||||
// just loop backwards looking for a rule definition. Then
|
||||
// we set isLexerRule.
|
||||
Token t = getRuleOrSubruleStartToken();
|
||||
if ( t!=null ) {
|
||||
if ( t.getType()==RULE_REF ) isLexerRule = false;
|
||||
else if ( t.getType()==TOKEN_REF ) isLexerRule = true;
|
||||
// else must be subrule; don't alter context
|
||||
}
|
||||
}
|
||||
;
|
||||
COLONCOLON : '::' ;
|
||||
COMMA : ',' ;
|
||||
SEMI : ';' ;
|
||||
|
@ -449,35 +484,13 @@ POUND : '#' ;
|
|||
NOT : '~' ;
|
||||
RBRACE : '}' ;
|
||||
|
||||
/*
|
||||
// ---------------
|
||||
// Token reference
|
||||
//
|
||||
// The names of all tokens must start with an upper case letter and so
|
||||
// the lexer can distinguish them directly.
|
||||
//
|
||||
TOKEN_REF
|
||||
: ('A'..'Z') ('A'..'Z' | 'a'..'z' | '0'..'9' | '_')*
|
||||
;
|
||||
|
||||
// --------------
|
||||
// Rule reference
|
||||
//
|
||||
// The names of all rules must start with a lower case letter
|
||||
// so the lexer can distibguish them directly. The parser takes
|
||||
// care of the case such as id=rulename
|
||||
//
|
||||
RULE_REF
|
||||
: ('a'..'z') ('A'..'Z' | 'a'..'z' | '0'..'9' | '_')*
|
||||
;
|
||||
*/
|
||||
|
||||
/** Allow unicode rule/token names */
|
||||
ID : a=NameStartChar NameChar*
|
||||
{
|
||||
if ( Character.isUpperCase($a.text.charAt(0)) ) $type = TOKEN_REF;
|
||||
else $type = RULE_REF;
|
||||
};
|
||||
}
|
||||
;
|
||||
|
||||
fragment
|
||||
NameChar : NameStartChar
|
||||
|
|
|
@ -57,7 +57,6 @@ options {
|
|||
// nodes for the AST we are generating. The tokens section is where we
|
||||
// specify any such tokens
|
||||
tokens {
|
||||
LEXER;
|
||||
RULE;
|
||||
PREC_RULE; // flip to this if we find that it's left-recursive
|
||||
RULES;
|
||||
|
@ -89,7 +88,7 @@ tokens {
|
|||
LIST;
|
||||
ELEMENT_OPTIONS; // TOKEN<options>
|
||||
RESULT;
|
||||
|
||||
|
||||
// lexer action stuff
|
||||
LEXER_ALT_ACTION;
|
||||
LEXER_ACTION_CALL; // ID(foo)
|
||||
|
@ -340,7 +339,7 @@ sync
|
|||
rule: parserRule
|
||||
| lexerRule
|
||||
;
|
||||
|
||||
|
||||
// The specification of an EBNF rule in ANTLR style, with all the
|
||||
// rule level parameters, declarations, actions, rewrite specs and so
|
||||
// on.
|
||||
|
@ -561,7 +560,7 @@ lexerRuleBlock
|
|||
// just resyncing; ignore error
|
||||
retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), null);
|
||||
}
|
||||
|
||||
|
||||
lexerAltList
|
||||
: lexerAlt (OR lexerAlt)* -> lexerAlt+
|
||||
;
|
||||
|
@ -586,11 +585,11 @@ lexerElement
|
|||
: labeledLexerElement
|
||||
( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK<BlockAST>[$labeledLexerElement.start,"BLOCK"] ^(ALT<AltAST> labeledLexerElement) ) )
|
||||
| -> labeledLexerElement
|
||||
)
|
||||
)
|
||||
| lexerAtom
|
||||
( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK<BlockAST>[$lexerAtom.start,"BLOCK"] ^(ALT<AltAST> lexerAtom) ) )
|
||||
| -> lexerAtom
|
||||
)
|
||||
)
|
||||
| lexerBlock
|
||||
( ebnfSuffix -> ^(ebnfSuffix lexerBlock)
|
||||
| -> lexerBlock
|
||||
|
@ -624,14 +623,14 @@ lexerElement
|
|||
reportError(re);
|
||||
recover(input,re);
|
||||
}
|
||||
|
||||
|
||||
labeledLexerElement
|
||||
: id (ass=ASSIGN|ass=PLUS_ASSIGN)
|
||||
( lexerAtom -> ^($ass id lexerAtom)
|
||||
| block -> ^($ass id block)
|
||||
)
|
||||
;
|
||||
|
||||
|
||||
lexerBlock
|
||||
: LPAREN lexerAltList RPAREN
|
||||
-> ^(BLOCK<BlockAST>[$LPAREN,"BLOCK"] lexerAltList )
|
||||
|
@ -648,7 +647,7 @@ lexerCommand
|
|||
;
|
||||
|
||||
lexerCommandExpr
|
||||
: id
|
||||
: id
|
||||
| INT
|
||||
;
|
||||
|
||||
|
@ -755,12 +754,12 @@ ebnfSuffix
|
|||
| STAR -> CLOSURE<StarBlockAST>[$start]
|
||||
| PLUS -> POSITIVE_CLOSURE<PlusBlockAST>[$start]
|
||||
;
|
||||
|
||||
|
||||
lexerAtom
|
||||
: range
|
||||
: range
|
||||
| terminal
|
||||
| RULE_REF<RuleRefAST>
|
||||
| notSet
|
||||
| notSet
|
||||
| wildcard
|
||||
| LEXER_CHAR_SET
|
||||
;
|
||||
|
|
Loading…
Reference in New Issue