Merge pull request #4 from parrt/master

ugh. fix "[a-z]" for real now
This commit is contained in:
Terence Parr 2012-02-02 10:42:10 -08:00
commit c09c8b88d7
4 changed files with 55 additions and 41 deletions

View File

@ -368,6 +368,7 @@ public class Tool {
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(in); GrammarASTAdaptor adaptor = new GrammarASTAdaptor(in);
ANTLRLexer lexer = new ANTLRLexer(in); ANTLRLexer lexer = new ANTLRLexer(in);
CommonTokenStream tokens = new CommonTokenStream(lexer); CommonTokenStream tokens = new CommonTokenStream(lexer);
lexer.tokens = tokens;
ToolANTLRParser p = new ToolANTLRParser(tokens, this); ToolANTLRParser p = new ToolANTLRParser(tokens, this);
p.setTreeAdaptor(adaptor); p.setTreeAdaptor(adaptor);
ParserRuleReturnScope r = p.grammarSpec(); ParserRuleReturnScope r = p.grammarSpec();

View File

@ -157,6 +157,7 @@ public class LeftRecursiveRuleTransformer {
ANTLRLexer lexer = new ANTLRLexer(new ANTLRStringStream(ruleText)); ANTLRLexer lexer = new ANTLRLexer(new ANTLRStringStream(ruleText));
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(); GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
CommonTokenStream tokens = new CommonTokenStream(lexer); CommonTokenStream tokens = new CommonTokenStream(lexer);
lexer.tokens = tokens;
ToolANTLRParser p = new ToolANTLRParser(tokens, tool); ToolANTLRParser p = new ToolANTLRParser(tokens, tool);
p.setTreeAdaptor(adaptor); p.setTreeAdaptor(adaptor);
try { try {

View File

@ -119,7 +119,27 @@ package org.antlr.v4.parse;
@members { @members {
public boolean isLexer = false; public CommonTokenStream tokens; // track stream we push to; need for context info
public boolean isLexerRule = false;
/** scan backwards from current point in this.tokens list
* looking for the start of the rule or subrule.
* Return token or null if for some reason we can't find the start.
*/
public Token getRuleOrSubruleStartToken() {
if ( tokens==null ) return null;
int i = tokens.index();
int n = tokens.size();
if ( i>=n ) i = n-1; // seems index == n as we lex
while ( i>=0 && i<n) {
int ttype = tokens.get(i).getType();
if ( ttype == LPAREN || ttype == TOKEN_REF || ttype == RULE_REF ) {
return tokens.get(i);
}
i--;
}
return null;
}
} }
// -------- // --------
@ -222,8 +242,8 @@ COMMENT
ARG_OR_CHARSET ARG_OR_CHARSET
options {k=1;} options {k=1;}
: {isLexer}?=> LEXER_CHAR_SET {$type=LEXER_CHAR_SET;} : {isLexerRule}?=> LEXER_CHAR_SET {$type=LEXER_CHAR_SET;}
| {!isLexer}?=> ARG_ACTION {$type=ARG_ACTION;} | {!isLexerRule}?=> ARG_ACTION {$type=ARG_ACTION;}
; ;
fragment fragment
@ -407,7 +427,7 @@ TOKENS_SPEC : 'tokens' WSNLCHARS* '{' ;
IMPORT : 'import' ; IMPORT : 'import' ;
FRAGMENT : 'fragment' ; FRAGMENT : 'fragment' ;
LEXER : 'lexer' {isLexer=true;} ; LEXER : 'lexer' ;
PARSER : 'parser' ; PARSER : 'parser' ;
GRAMMAR : 'grammar' ; GRAMMAR : 'grammar' ;
PROTECTED : 'protected' ; PROTECTED : 'protected' ;
@ -425,7 +445,22 @@ MODE : 'mode' ;
// //
// Character sequences used as separators, delimters, operators, etc // Character sequences used as separators, delimters, operators, etc
// //
COLON : ':' ; COLON : ':'
{
// scan backwards, looking for a RULE_REF or TOKEN_REF.
// which would indicate the start of a rule definition.
// If we see a LPAREN, then it's the start of the subrule.
// this.tokens is the token string we are pushing into, so
// just loop backwards looking for a rule definition. Then
// we set isLexerRule.
Token t = getRuleOrSubruleStartToken();
if ( t!=null ) {
if ( t.getType()==RULE_REF ) isLexerRule = false;
else if ( t.getType()==TOKEN_REF ) isLexerRule = true;
// else must be subrule; don't alter context
}
}
;
COLONCOLON : '::' ; COLONCOLON : '::' ;
COMMA : ',' ; COMMA : ',' ;
SEMI : ';' ; SEMI : ';' ;
@ -449,35 +484,13 @@ POUND : '#' ;
NOT : '~' ; NOT : '~' ;
RBRACE : '}' ; RBRACE : '}' ;
/*
// ---------------
// Token reference
//
// The names of all tokens must start with an upper case letter and so
// the lexer can distinguish them directly.
//
TOKEN_REF
: ('A'..'Z') ('A'..'Z' | 'a'..'z' | '0'..'9' | '_')*
;
// --------------
// Rule reference
//
// The names of all rules must start with a lower case letter
// so the lexer can distibguish them directly. The parser takes
// care of the case such as id=rulename
//
RULE_REF
: ('a'..'z') ('A'..'Z' | 'a'..'z' | '0'..'9' | '_')*
;
*/
/** Allow unicode rule/token names */ /** Allow unicode rule/token names */
ID : a=NameStartChar NameChar* ID : a=NameStartChar NameChar*
{ {
if ( Character.isUpperCase($a.text.charAt(0)) ) $type = TOKEN_REF; if ( Character.isUpperCase($a.text.charAt(0)) ) $type = TOKEN_REF;
else $type = RULE_REF; else $type = RULE_REF;
}; }
;
fragment fragment
NameChar : NameStartChar NameChar : NameStartChar

View File

@ -57,7 +57,6 @@ options {
// nodes for the AST we are generating. The tokens section is where we // nodes for the AST we are generating. The tokens section is where we
// specify any such tokens // specify any such tokens
tokens { tokens {
LEXER;
RULE; RULE;
PREC_RULE; // flip to this if we find that it's left-recursive PREC_RULE; // flip to this if we find that it's left-recursive
RULES; RULES;
@ -89,7 +88,7 @@ tokens {
LIST; LIST;
ELEMENT_OPTIONS; // TOKEN<options> ELEMENT_OPTIONS; // TOKEN<options>
RESULT; RESULT;
// lexer action stuff // lexer action stuff
LEXER_ALT_ACTION; LEXER_ALT_ACTION;
LEXER_ACTION_CALL; // ID(foo) LEXER_ACTION_CALL; // ID(foo)
@ -340,7 +339,7 @@ sync
rule: parserRule rule: parserRule
| lexerRule | lexerRule
; ;
// The specification of an EBNF rule in ANTLR style, with all the // The specification of an EBNF rule in ANTLR style, with all the
// rule level parameters, declarations, actions, rewrite specs and so // rule level parameters, declarations, actions, rewrite specs and so
// on. // on.
@ -561,7 +560,7 @@ lexerRuleBlock
// just resyncing; ignore error // just resyncing; ignore error
retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), null); retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), null);
} }
lexerAltList lexerAltList
: lexerAlt (OR lexerAlt)* -> lexerAlt+ : lexerAlt (OR lexerAlt)* -> lexerAlt+
; ;
@ -586,11 +585,11 @@ lexerElement
: labeledLexerElement : labeledLexerElement
( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK<BlockAST>[$labeledLexerElement.start,"BLOCK"] ^(ALT<AltAST> labeledLexerElement) ) ) ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK<BlockAST>[$labeledLexerElement.start,"BLOCK"] ^(ALT<AltAST> labeledLexerElement) ) )
| -> labeledLexerElement | -> labeledLexerElement
) )
| lexerAtom | lexerAtom
( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK<BlockAST>[$lexerAtom.start,"BLOCK"] ^(ALT<AltAST> lexerAtom) ) ) ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK<BlockAST>[$lexerAtom.start,"BLOCK"] ^(ALT<AltAST> lexerAtom) ) )
| -> lexerAtom | -> lexerAtom
) )
| lexerBlock | lexerBlock
( ebnfSuffix -> ^(ebnfSuffix lexerBlock) ( ebnfSuffix -> ^(ebnfSuffix lexerBlock)
| -> lexerBlock | -> lexerBlock
@ -624,14 +623,14 @@ lexerElement
reportError(re); reportError(re);
recover(input,re); recover(input,re);
} }
labeledLexerElement labeledLexerElement
: id (ass=ASSIGN|ass=PLUS_ASSIGN) : id (ass=ASSIGN|ass=PLUS_ASSIGN)
( lexerAtom -> ^($ass id lexerAtom) ( lexerAtom -> ^($ass id lexerAtom)
| block -> ^($ass id block) | block -> ^($ass id block)
) )
; ;
lexerBlock lexerBlock
: LPAREN lexerAltList RPAREN : LPAREN lexerAltList RPAREN
-> ^(BLOCK<BlockAST>[$LPAREN,"BLOCK"] lexerAltList ) -> ^(BLOCK<BlockAST>[$LPAREN,"BLOCK"] lexerAltList )
@ -648,7 +647,7 @@ lexerCommand
; ;
lexerCommandExpr lexerCommandExpr
: id : id
| INT | INT
; ;
@ -755,12 +754,12 @@ ebnfSuffix
| STAR -> CLOSURE<StarBlockAST>[$start] | STAR -> CLOSURE<StarBlockAST>[$start]
| PLUS -> POSITIVE_CLOSURE<PlusBlockAST>[$start] | PLUS -> POSITIVE_CLOSURE<PlusBlockAST>[$start]
; ;
lexerAtom lexerAtom
: range : range
| terminal | terminal
| RULE_REF<RuleRefAST> | RULE_REF<RuleRefAST>
| notSet | notSet
| wildcard | wildcard
| LEXER_CHAR_SET | LEXER_CHAR_SET
; ;