ugh. fix "[a-z]" for real now

This commit is contained in:
Terence Parr 2012-02-02 10:41:27 -08:00
parent 5cec4721ab
commit c47c07299a
4 changed files with 55 additions and 41 deletions

View File

@ -368,6 +368,7 @@ public class Tool {
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(in);
ANTLRLexer lexer = new ANTLRLexer(in);
CommonTokenStream tokens = new CommonTokenStream(lexer);
lexer.tokens = tokens;
ToolANTLRParser p = new ToolANTLRParser(tokens, this);
p.setTreeAdaptor(adaptor);
ParserRuleReturnScope r = p.grammarSpec();

View File

@ -157,6 +157,7 @@ public class LeftRecursiveRuleTransformer {
ANTLRLexer lexer = new ANTLRLexer(new ANTLRStringStream(ruleText));
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
CommonTokenStream tokens = new CommonTokenStream(lexer);
lexer.tokens = tokens;
ToolANTLRParser p = new ToolANTLRParser(tokens, tool);
p.setTreeAdaptor(adaptor);
try {

View File

@ -119,7 +119,27 @@ package org.antlr.v4.parse;
@members {
public boolean isLexer = false;
public CommonTokenStream tokens; // track stream we push to; need for context info
public boolean isLexerRule = false;
/** scan backwards from current point in this.tokens list
* looking for the start of the rule or subrule.
* Return token or null if for some reason we can't find the start.
*/
public Token getRuleOrSubruleStartToken() {
if ( tokens==null ) return null;
int i = tokens.index();
int n = tokens.size();
if ( i>=n ) i = n-1; // seems index == n as we lex
while ( i>=0 && i<n) {
int ttype = tokens.get(i).getType();
if ( ttype == LPAREN || ttype == TOKEN_REF || ttype == RULE_REF ) {
return tokens.get(i);
}
i--;
}
return null;
}
}
// --------
@ -222,8 +242,8 @@ COMMENT
ARG_OR_CHARSET
options {k=1;}
: {isLexer}?=> LEXER_CHAR_SET {$type=LEXER_CHAR_SET;}
| {!isLexer}?=> ARG_ACTION {$type=ARG_ACTION;}
: {isLexerRule}?=> LEXER_CHAR_SET {$type=LEXER_CHAR_SET;}
| {!isLexerRule}?=> ARG_ACTION {$type=ARG_ACTION;}
;
fragment
@ -407,7 +427,7 @@ TOKENS_SPEC : 'tokens' WSNLCHARS* '{' ;
IMPORT : 'import' ;
FRAGMENT : 'fragment' ;
LEXER : 'lexer' {isLexer=true;} ;
LEXER : 'lexer' ;
PARSER : 'parser' ;
GRAMMAR : 'grammar' ;
PROTECTED : 'protected' ;
@ -425,7 +445,22 @@ MODE : 'mode' ;
//
// Character sequences used as separators, delimters, operators, etc
//
COLON : ':' ;
COLON : ':'
{
// scan backwards, looking for a RULE_REF or TOKEN_REF.
// which would indicate the start of a rule definition.
// If we see a LPAREN, then it's the start of the subrule.
// this.tokens is the token string we are pushing into, so
// just loop backwards looking for a rule definition. Then
// we set isLexerRule.
Token t = getRuleOrSubruleStartToken();
if ( t!=null ) {
if ( t.getType()==RULE_REF ) isLexerRule = false;
else if ( t.getType()==TOKEN_REF ) isLexerRule = true;
// else must be subrule; don't alter context
}
}
;
COLONCOLON : '::' ;
COMMA : ',' ;
SEMI : ';' ;
@ -449,35 +484,13 @@ POUND : '#' ;
NOT : '~' ;
RBRACE : '}' ;
/*
// ---------------
// Token reference
//
// The names of all tokens must start with an upper case letter and so
// the lexer can distinguish them directly.
//
TOKEN_REF
: ('A'..'Z') ('A'..'Z' | 'a'..'z' | '0'..'9' | '_')*
;
// --------------
// Rule reference
//
// The names of all rules must start with a lower case letter
// so the lexer can distibguish them directly. The parser takes
// care of the case such as id=rulename
//
RULE_REF
: ('a'..'z') ('A'..'Z' | 'a'..'z' | '0'..'9' | '_')*
;
*/
/** Allow unicode rule/token names */
ID : a=NameStartChar NameChar*
{
if ( Character.isUpperCase($a.text.charAt(0)) ) $type = TOKEN_REF;
else $type = RULE_REF;
};
}
;
fragment
NameChar : NameStartChar

View File

@ -57,7 +57,6 @@ options {
// nodes for the AST we are generating. The tokens section is where we
// specify any such tokens
tokens {
LEXER;
RULE;
PREC_RULE; // flip to this if we find that it's left-recursive
RULES;
@ -89,7 +88,7 @@ tokens {
LIST;
ELEMENT_OPTIONS; // TOKEN<options>
RESULT;
// lexer action stuff
LEXER_ALT_ACTION;
LEXER_ACTION_CALL; // ID(foo)
@ -340,7 +339,7 @@ sync
rule: parserRule
| lexerRule
;
// The specification of an EBNF rule in ANTLR style, with all the
// rule level parameters, declarations, actions, rewrite specs and so
// on.
@ -561,7 +560,7 @@ lexerRuleBlock
// just resyncing; ignore error
retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), null);
}
lexerAltList
: lexerAlt (OR lexerAlt)* -> lexerAlt+
;
@ -586,11 +585,11 @@ lexerElement
: labeledLexerElement
( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK<BlockAST>[$labeledLexerElement.start,"BLOCK"] ^(ALT<AltAST> labeledLexerElement) ) )
| -> labeledLexerElement
)
)
| lexerAtom
( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK<BlockAST>[$lexerAtom.start,"BLOCK"] ^(ALT<AltAST> lexerAtom) ) )
| -> lexerAtom
)
)
| lexerBlock
( ebnfSuffix -> ^(ebnfSuffix lexerBlock)
| -> lexerBlock
@ -624,14 +623,14 @@ lexerElement
reportError(re);
recover(input,re);
}
labeledLexerElement
: id (ass=ASSIGN|ass=PLUS_ASSIGN)
( lexerAtom -> ^($ass id lexerAtom)
| block -> ^($ass id block)
)
;
lexerBlock
: LPAREN lexerAltList RPAREN
-> ^(BLOCK<BlockAST>[$LPAREN,"BLOCK"] lexerAltList )
@ -648,7 +647,7 @@ lexerCommand
;
lexerCommandExpr
: id
: id
| INT
;
@ -755,12 +754,12 @@ ebnfSuffix
| STAR -> CLOSURE<StarBlockAST>[$start]
| PLUS -> POSITIVE_CLOSURE<PlusBlockAST>[$start]
;
lexerAtom
: range
: range
| terminal
| RULE_REF<RuleRefAST>
| notSet
| notSet
| wildcard
| LEXER_CHAR_SET
;