forked from jasder/antlr
add some tests
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9763]
This commit is contained in:
parent
52396d8f5f
commit
3a3ed27e60
|
@ -0,0 +1,151 @@
|
|||
/**
|
||||
derived from http://svn.r-project.org/R/trunk/src/main/gram.y
|
||||
http://cran.r-project.org/doc/manuals/R-lang.html#Parser
|
||||
*/
|
||||
grammar R;
|
||||
|
||||
// ambig upon a(i)<- (delayed a bit since ';' could follow--really ambig on "a(i)")
|
||||
|
||||
/** ambig since stacks are exact as it loops around; no way to distinguish
|
||||
|
||||
I tried tracking input index in stack to differentiate the 2 invocations
|
||||
of expr_or_assign, but that would mean altering the our context from
|
||||
the decision-making in expr_or_assign. Also, later we need to have
|
||||
context stacks that are not dependent on input position to reuse them.
|
||||
|
||||
The fact that the recursive version correctly matches the input while the
|
||||
looping version does not is a problem. We base the notion of ambiguous
|
||||
on the same state, different alternatives, same stack. But, if the
|
||||
rule invocation stack does not uniquely indicate context, we are not accurately
|
||||
detecting ambiguities. We are detecting ambiguities overzealously.
|
||||
|
||||
We need a way for the context stack or configuration to distinguish between
|
||||
iterations of the loop that dive into the same rule such as expr_or_assign*.
|
||||
Perhaps the answer is to track iteration number in the configuration:
|
||||
|
||||
(s, alt, ctx, iter#)
|
||||
|
||||
When we reached the state following '<-', say p, in expr then we need
|
||||
|
||||
(p, 1, [expr expr_or_assign prog], 1)
|
||||
(p, 2, [expr expr_or_assign prog], 2)
|
||||
|
||||
But, that number would be useful... we might pass through 3 or 4 loops.
|
||||
The iteration index really has to be a part of the stack context.
|
||||
Perhaps we and an additional stack element as if we were doing the
|
||||
recursive version
|
||||
|
||||
prog : expr_or_assign prog | ;
|
||||
|
||||
(p, 1, [expr expr_or_assign prog])
|
||||
(p, 2, [expr expr_or_assign prog expr_or_assign prog])
|
||||
|
||||
The "expr expr_or_assign prog" represents the second call back down
|
||||
into expr_or_assign like the loop would except that the stack looks different.
|
||||
|
||||
Or, we could mark stack references with the loop iteration index.
|
||||
|
||||
(p, 1, [expr expr_or_assign prog])
|
||||
(p, 2, [expr expr_or_assign.2 prog])
|
||||
|
||||
This seems reusable as opposed to the input index. It might be complicated
|
||||
to track this. In the general case, we would need a mapping from rule
|
||||
invocation of rule r to a count, and within a specific rule context. That
|
||||
might add a HashMap for every RuleContext. ick. Also, one about the context
|
||||
that I create during ATN simulation? I would have to track that as well
|
||||
as the generated code in the parser. Rule invocation states would act
|
||||
like triggers that would bump account for that target rule in the current ctx.
|
||||
*/
|
||||
prog : expr_or_assign* ;
|
||||
|
||||
/** This one is not ambig since 2nd time into expr_or_assign has different
|
||||
context where expr_or_assign* shows same context.
|
||||
*/
|
||||
//prog : expr_or_assign expr_or_assign ;
|
||||
|
||||
// not ambig, context different
|
||||
//prog : expr_or_assign prog | ;
|
||||
|
||||
expr_or_assign
|
||||
: expr '=' expr_or_assign
|
||||
| expr // match ID a, fall out, reenter, match "(i)<-x" via alt 1
|
||||
;
|
||||
|
||||
expr : expr_primary ('<-' ID)? ;
|
||||
expr_primary
|
||||
: '(' ID ')'
|
||||
| ID '(' ID ')'
|
||||
| ID
|
||||
;
|
||||
|
||||
/*
|
||||
expr : '(' ID ')' // and this
|
||||
| expr '<-'<assoc=right> ID
|
||||
| ID '(' ID ')'
|
||||
| ID
|
||||
;
|
||||
*/
|
||||
|
||||
HEX : '0' ('x'|'X') HEXDIGIT+ [Ll]? ;
|
||||
|
||||
INT : DIGIT+ [Ll]? ;
|
||||
|
||||
fragment
|
||||
HEXDIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;
|
||||
|
||||
FLOAT : DIGIT+ '.' DIGIT* EXP? [Ll]?
|
||||
| DIGIT+ EXP? [Ll]?
|
||||
| '.' DIGIT+ EXP? [Ll]?
|
||||
;
|
||||
fragment
|
||||
DIGIT : '0'..'9' ;
|
||||
fragment
|
||||
EXP : ('E' | 'e') ('+' | '-')? INT ;
|
||||
|
||||
COMPLEX : INT 'i'
|
||||
| FLOAT 'i'
|
||||
;
|
||||
|
||||
STRING : '"' ( ESC | ~('\\'|'"') )* '"'
|
||||
| '\'' ( ESC | ~('\\'|'\'') )* '\''
|
||||
;
|
||||
|
||||
fragment
|
||||
ESC
|
||||
: '\\' ([abtnfrv]|'"'|'\'')
|
||||
| UNICODE_ESCAPE
|
||||
| HEX_ESCAPE
|
||||
| OCTAL_ESCAPE
|
||||
;
|
||||
|
||||
fragment
|
||||
UNICODE_ESCAPE
|
||||
: '\\' 'u' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
|
||||
| '\\' 'u' '{' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT '}'
|
||||
;
|
||||
|
||||
fragment
|
||||
OCTAL_ESCAPE
|
||||
: '\\' ('0'..'3') ('0'..'7') ('0'..'7')
|
||||
| '\\' ('0'..'7') ('0'..'7')
|
||||
| '\\' ('0'..'7')
|
||||
;
|
||||
|
||||
fragment
|
||||
HEX_ESCAPE
|
||||
: '\\' HEXDIGIT HEXDIGIT?
|
||||
;
|
||||
|
||||
ID : '.'? (LETTER|'_'|'.') (LETTER|DIGIT|'_'|'.')*
|
||||
| LETTER (LETTER|DIGIT|'_'|'.')*
|
||||
;
|
||||
|
||||
fragment
|
||||
LETTER : 'a'..'z'|'A'..'Z'|'\u0080'..'\u00FF' ;
|
||||
|
||||
USER_OP : '%' .* '%' ;
|
||||
|
||||
COMMENT : '#' .* '\n' {skip();} ;
|
||||
|
||||
/** Doesn't handle '\n' correctly. it's context-sensitive */
|
||||
WS : (' '|'\t'|'\n'|'\r')+ {skip();} ;
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
[The "BSD license"]
|
||||
Copyright (c) 2011 Terence Parr
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the author may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
import org.antlr.v4.runtime.ANTLRFileStream;
|
||||
import org.antlr.v4.runtime.CommonTokenStream;
|
||||
import org.antlr.v4.runtime.DiagnosticErrorStrategy;
|
||||
import org.antlr.v4.runtime.Token;
|
||||
|
||||
public class TestR {
|
||||
public static void main(String[] args) throws Exception {
|
||||
RLexer t = new RLexer(new ANTLRFileStream(args[0]));
|
||||
CommonTokenStream tokens = new CommonTokenStream(t);
|
||||
// tokens.fill();
|
||||
// for (Object tok : tokens.getTokens()) {
|
||||
// System.out.println(tok);
|
||||
// }
|
||||
RParser p = new RParser(tokens);
|
||||
p.setBuildParseTree(true);
|
||||
p.setErrorHandler(new DiagnosticErrorStrategy<Token>());
|
||||
p.prog();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue