add some tests

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9763]
This commit is contained in:
parrt 2011-12-28 19:49:37 -08:00
parent 52396d8f5f
commit 3a3ed27e60
2 changed files with 199 additions and 0 deletions

151
tool/playground/R.g4 Normal file
View File

@ -0,0 +1,151 @@
/**
derived from http://svn.r-project.org/R/trunk/src/main/gram.y
http://cran.r-project.org/doc/manuals/R-lang.html#Parser
*/
grammar R;
// ambig upon a(i)<- (delayed a bit since ';' could follow--really ambig on "a(i)")
/** ambig since stacks are exact as it loops around; no way to distinguish
I tried tracking input index in stack to differentiate the 2 invocations
of expr_or_assign, but that would mean altering the our context from
the decision-making in expr_or_assign. Also, later we need to have
context stacks that are not dependent on input position to reuse them.
The fact that the recursive version correctly matches the input while the
looping version does not is a problem. We base the notion of ambiguous
on the same state, different alternatives, same stack. But, if the
rule invocation stack does not uniquely indicate context, we are not accurately
detecting ambiguities. We are detecting ambiguities overzealously.
We need a way for the context stack or configuration to distinguish between
iterations of the loop that dive into the same rule such as expr_or_assign*.
Perhaps the answer is to track iteration number in the configuration:
(s, alt, ctx, iter#)
When we reached the state following '<-', say p, in expr then we need
(p, 1, [expr expr_or_assign prog], 1)
(p, 2, [expr expr_or_assign prog], 2)
But, that number would be useful... we might pass through 3 or 4 loops.
The iteration index really has to be a part of the stack context.
Perhaps we and an additional stack element as if we were doing the
recursive version
prog : expr_or_assign prog | ;
(p, 1, [expr expr_or_assign prog])
(p, 2, [expr expr_or_assign prog expr_or_assign prog])
The "expr expr_or_assign prog" represents the second call back down
into expr_or_assign like the loop would except that the stack looks different.
Or, we could mark stack references with the loop iteration index.
(p, 1, [expr expr_or_assign prog])
(p, 2, [expr expr_or_assign.2 prog])
This seems reusable as opposed to the input index. It might be complicated
to track this. In the general case, we would need a mapping from rule
invocation of rule r to a count, and within a specific rule context. That
might add a HashMap for every RuleContext. ick. Also, one about the context
that I create during ATN simulation? I would have to track that as well
as the generated code in the parser. Rule invocation states would act
like triggers that would bump account for that target rule in the current ctx.
*/
prog : expr_or_assign* ;
/** This one is not ambig since 2nd time into expr_or_assign has different
context where expr_or_assign* shows same context.
*/
//prog : expr_or_assign expr_or_assign ;
// not ambig, context different
//prog : expr_or_assign prog | ;
expr_or_assign
: expr '=' expr_or_assign
| expr // match ID a, fall out, reenter, match "(i)<-x" via alt 1
;
expr : expr_primary ('<-' ID)? ;
expr_primary
: '(' ID ')'
| ID '(' ID ')'
| ID
;
/*
expr : '(' ID ')' // and this
| expr '<-'<assoc=right> ID
| ID '(' ID ')'
| ID
;
*/
HEX : '0' ('x'|'X') HEXDIGIT+ [Ll]? ;
INT : DIGIT+ [Ll]? ;
fragment
HEXDIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;
FLOAT : DIGIT+ '.' DIGIT* EXP? [Ll]?
| DIGIT+ EXP? [Ll]?
| '.' DIGIT+ EXP? [Ll]?
;
fragment
DIGIT : '0'..'9' ;
fragment
EXP : ('E' | 'e') ('+' | '-')? INT ;
COMPLEX : INT 'i'
| FLOAT 'i'
;
STRING : '"' ( ESC | ~('\\'|'"') )* '"'
| '\'' ( ESC | ~('\\'|'\'') )* '\''
;
fragment
ESC
: '\\' ([abtnfrv]|'"'|'\'')
| UNICODE_ESCAPE
| HEX_ESCAPE
| OCTAL_ESCAPE
;
fragment
UNICODE_ESCAPE
: '\\' 'u' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
| '\\' 'u' '{' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT '}'
;
fragment
OCTAL_ESCAPE
: '\\' ('0'..'3') ('0'..'7') ('0'..'7')
| '\\' ('0'..'7') ('0'..'7')
| '\\' ('0'..'7')
;
fragment
HEX_ESCAPE
: '\\' HEXDIGIT HEXDIGIT?
;
ID : '.'? (LETTER|'_'|'.') (LETTER|DIGIT|'_'|'.')*
| LETTER (LETTER|DIGIT|'_'|'.')*
;
fragment
LETTER : 'a'..'z'|'A'..'Z'|'\u0080'..'\u00FF' ;
USER_OP : '%' .* '%' ;
COMMENT : '#' .* '\n' {skip();} ;
/** Doesn't handle '\n' correctly. it's context-sensitive */
WS : (' '|'\t'|'\n'|'\r')+ {skip();} ;

View File

@ -0,0 +1,48 @@
/*
[The "BSD license"]
Copyright (c) 2011 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
import org.antlr.v4.runtime.ANTLRFileStream;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.DiagnosticErrorStrategy;
import org.antlr.v4.runtime.Token;
public class TestR {
public static void main(String[] args) throws Exception {
RLexer t = new RLexer(new ANTLRFileStream(args[0]));
CommonTokenStream tokens = new CommonTokenStream(t);
// tokens.fill();
// for (Object tok : tokens.getTokens()) {
// System.out.println(tok);
// }
RParser p = new RParser(tokens);
p.setBuildParseTree(true);
p.setErrorHandler(new DiagnosticErrorStrategy<Token>());
p.prog();
}
}