add some tests

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9763]
2011-12-28 19:49:37 -08:00 · 2011-12-28 19:49:37 -08:00 · 3a3ed27e60
parent 52396d8f5f
commit 3a3ed27e60
2 changed files with 199 additions and 0 deletions
--- a/tool/playground/R.g4
+++ b/tool/playground/R.g4
@ -0,0 +1,151 @@
+/**
+derived from http://svn.r-project.org/R/trunk/src/main/gram.y
+http://cran.r-project.org/doc/manuals/R-lang.html#Parser
+*/
+grammar R;
+
+// ambig upon a(i)<-  (delayed a bit since ';' could follow--really ambig on "a(i)")
+
+/** ambig since stacks are exact as it loops around; no way to distinguish
+
+    I tried tracking input index in stack to differentiate the 2 invocations
+    of expr_or_assign, but that would mean altering the our context from
+    the decision-making in expr_or_assign.  Also, later we need to have
+    context stacks that are not dependent on input position to reuse them.
+
+    The fact that the recursive version correctly matches the input while the
+    looping version does not is a problem. We base the notion of ambiguous
+    on the same state, different alternatives, same stack. But, if the
+    rule invocation stack does not uniquely indicate context, we are not accurately
+    detecting ambiguities. We are detecting ambiguities overzealously.
+
+    We need a way for the context stack or configuration to distinguish between
+    iterations of the loop that dive into the same rule such as expr_or_assign*.
+    Perhaps the answer is to track iteration number in the configuration:
+
+	(s, alt, ctx, iter#)
+
+    When we reached the state following '<-', say p, in expr then we need
+
+	(p, 1, [expr expr_or_assign prog], 1)
+	(p, 2, [expr expr_or_assign prog], 2)
+
+    But, that number would be useful... we might pass through 3 or 4 loops.
+    The iteration index really has to be a part of the stack context.
+    Perhaps we and an additional stack element as if we were doing the
+    recursive version
+
+	prog : expr_or_assign prog | ;
+
+	(p, 1, [expr expr_or_assign prog])
+	(p, 2, [expr expr_or_assign prog expr_or_assign prog])
+
+    The "expr expr_or_assign prog" represents the second call back down
+    into expr_or_assign like the loop would except that the stack looks different.
+    
+    Or, we could mark stack references with the loop iteration index.
+
+	(p, 1, [expr expr_or_assign prog])
+	(p, 2, [expr expr_or_assign.2 prog])
+
+    This seems reusable as opposed to the input index. It might be complicated
+    to track this. In the general case, we would need a mapping from rule
+    invocation of rule r to a count, and within a specific rule context. That
+    might add a HashMap for every RuleContext. ick. Also, one about the context
+    that I create during ATN simulation? I would have to track that as well
+    as the generated code in the parser. Rule invocation states would act
+    like triggers that would bump account for that target rule in the current ctx.
+*/
+prog	:	expr_or_assign* ;
+
+/** This one is not ambig since 2nd time into expr_or_assign has different
+    context where expr_or_assign* shows same context.
+ */
+//prog	:	expr_or_assign expr_or_assign ;
+
+// not ambig, context different
+//prog	:	expr_or_assign prog | ;
+
+expr_or_assign
+	:	expr '=' expr_or_assign
+	|	expr	// match ID a, fall out, reenter, match "(i)<-x" via alt 1
+        ;
+
+expr : expr_primary ('<-' ID)? ;
+expr_primary
+    : '(' ID ')'
+    | ID '(' ID ')'
+    | ID
+    ;
+
+/*
+expr	:	'(' ID ')'  // and this
+	|	expr '<-'<assoc=right> ID
+	|	ID '(' ID ')'
+ 	|	ID
+	;
+*/
+
+HEX	:	'0' ('x'|'X') HEXDIGIT+ [Ll]? ;
+
+INT	:	DIGIT+ [Ll]? ;
+
+fragment
+HEXDIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;
+
+FLOAT	:	DIGIT+ '.' DIGIT* EXP? [Ll]?
+	|	DIGIT+ EXP? [Ll]?
+	|	'.' DIGIT+ EXP? [Ll]?
+	;
+fragment
+DIGIT	:   '0'..'9' ;
+fragment
+EXP	:   ('E' | 'e') ('+' | '-')? INT ;
+
+COMPLEX	:   INT 'i'
+	|   FLOAT 'i'
+	;
+
+STRING	:	'"' ( ESC | ~('\\'|'"') )* '"'
+	|	'\'' ( ESC | ~('\\'|'\'') )* '\''
+	;
+
+fragment
+ESC
+    :   '\\' ([abtnfrv]|'"'|'\'')
+    |   UNICODE_ESCAPE
+    |	HEX_ESCAPE
+    |   OCTAL_ESCAPE
+    ;
+
+fragment
+UNICODE_ESCAPE
+    :   '\\' 'u' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
+    |   '\\' 'u' '{' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT '}'
+    ;
+
+fragment
+OCTAL_ESCAPE
+    :   '\\' ('0'..'3') ('0'..'7') ('0'..'7')
+    |   '\\' ('0'..'7') ('0'..'7')
+    |   '\\' ('0'..'7')
+    ;
+
+fragment
+HEX_ESCAPE
+    :   '\\' HEXDIGIT HEXDIGIT?
+    ;
+
+ID      :   '.'? (LETTER|'_'|'.') (LETTER|DIGIT|'_'|'.')*
+	|   LETTER (LETTER|DIGIT|'_'|'.')*
+	;
+
+fragment
+LETTER      :   'a'..'z'|'A'..'Z'|'\u0080'..'\u00FF' ;
+
+USER_OP	    :	'%' .* '%' ;
+
+COMMENT :   '#' .* '\n' {skip();} ;
+
+/** Doesn't handle '\n' correctly. it's context-sensitive */
+WS          :   (' '|'\t'|'\n'|'\r')+ {skip();} ;
--- a/tool/playground/TestR.java
+++ b/tool/playground/TestR.java
@ -0,0 +1,48 @@
+/*
+ [The "BSD license"]
+  Copyright (c) 2011 Terence Parr
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+  1. Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+  2. Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+  3. The name of the author may not be used to endorse or promote products
+     derived from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+import org.antlr.v4.runtime.ANTLRFileStream;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.DiagnosticErrorStrategy;
+import org.antlr.v4.runtime.Token;
+
+public class TestR {
+	public static void main(String[] args) throws Exception {
+		RLexer t = new RLexer(new ANTLRFileStream(args[0]));
+		CommonTokenStream tokens = new CommonTokenStream(t);
+//		tokens.fill();
+//		for (Object tok : tokens.getTokens()) {
+//			System.out.println(tok);
+//		}
+		RParser p = new RParser(tokens);
+		p.setBuildParseTree(true);
+		p.setErrorHandler(new DiagnosticErrorStrategy<Token>());
+		p.prog();
+	}
+}