diff --git a/tool/playground/R.g4 b/tool/playground/R.g4 new file mode 100644 index 000000000..29c32b717 --- /dev/null +++ b/tool/playground/R.g4 @@ -0,0 +1,151 @@ +/** +derived from http://svn.r-project.org/R/trunk/src/main/gram.y +http://cran.r-project.org/doc/manuals/R-lang.html#Parser +*/ +grammar R; + +// ambig upon a(i)<- (delayed a bit since ';' could follow--really ambig on "a(i)") + +/** ambig since stacks are exact as it loops around; no way to distinguish + + I tried tracking input index in stack to differentiate the 2 invocations + of expr_or_assign, but that would mean altering the our context from + the decision-making in expr_or_assign. Also, later we need to have + context stacks that are not dependent on input position to reuse them. + + The fact that the recursive version correctly matches the input while the + looping version does not is a problem. We base the notion of ambiguous + on the same state, different alternatives, same stack. But, if the + rule invocation stack does not uniquely indicate context, we are not accurately + detecting ambiguities. We are detecting ambiguities overzealously. + + We need a way for the context stack or configuration to distinguish between + iterations of the loop that dive into the same rule such as expr_or_assign*. + Perhaps the answer is to track iteration number in the configuration: + + (s, alt, ctx, iter#) + + When we reached the state following '<-', say p, in expr then we need + + (p, 1, [expr expr_or_assign prog], 1) + (p, 2, [expr expr_or_assign prog], 2) + + But, that number would be useful... we might pass through 3 or 4 loops. + The iteration index really has to be a part of the stack context. + Perhaps we and an additional stack element as if we were doing the + recursive version + + prog : expr_or_assign prog | ; + + (p, 1, [expr expr_or_assign prog]) + (p, 2, [expr expr_or_assign prog expr_or_assign prog]) + + The "expr expr_or_assign prog" represents the second call back down + into expr_or_assign like the loop would except that the stack looks different. + + Or, we could mark stack references with the loop iteration index. + + (p, 1, [expr expr_or_assign prog]) + (p, 2, [expr expr_or_assign.2 prog]) + + This seems reusable as opposed to the input index. It might be complicated + to track this. In the general case, we would need a mapping from rule + invocation of rule r to a count, and within a specific rule context. That + might add a HashMap for every RuleContext. ick. Also, one about the context + that I create during ATN simulation? I would have to track that as well + as the generated code in the parser. Rule invocation states would act + like triggers that would bump account for that target rule in the current ctx. +*/ +prog : expr_or_assign* ; + +/** This one is not ambig since 2nd time into expr_or_assign has different + context where expr_or_assign* shows same context. + */ +//prog : expr_or_assign expr_or_assign ; + +// not ambig, context different +//prog : expr_or_assign prog | ; + +expr_or_assign + : expr '=' expr_or_assign + | expr // match ID a, fall out, reenter, match "(i)<-x" via alt 1 + ; + +expr : expr_primary ('<-' ID)? ; +expr_primary + : '(' ID ')' + | ID '(' ID ')' + | ID + ; + +/* +expr : '(' ID ')' // and this + | expr '<-' ID + | ID '(' ID ')' + | ID + ; +*/ + +HEX : '0' ('x'|'X') HEXDIGIT+ [Ll]? ; + +INT : DIGIT+ [Ll]? ; + +fragment +HEXDIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ; + +FLOAT : DIGIT+ '.' DIGIT* EXP? [Ll]? + | DIGIT+ EXP? [Ll]? + | '.' DIGIT+ EXP? [Ll]? + ; +fragment +DIGIT : '0'..'9' ; +fragment +EXP : ('E' | 'e') ('+' | '-')? INT ; + +COMPLEX : INT 'i' + | FLOAT 'i' + ; + +STRING : '"' ( ESC | ~('\\'|'"') )* '"' + | '\'' ( ESC | ~('\\'|'\'') )* '\'' + ; + +fragment +ESC + : '\\' ([abtnfrv]|'"'|'\'') + | UNICODE_ESCAPE + | HEX_ESCAPE + | OCTAL_ESCAPE + ; + +fragment +UNICODE_ESCAPE + : '\\' 'u' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT + | '\\' 'u' '{' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT '}' + ; + +fragment +OCTAL_ESCAPE + : '\\' ('0'..'3') ('0'..'7') ('0'..'7') + | '\\' ('0'..'7') ('0'..'7') + | '\\' ('0'..'7') + ; + +fragment +HEX_ESCAPE + : '\\' HEXDIGIT HEXDIGIT? + ; + +ID : '.'? (LETTER|'_'|'.') (LETTER|DIGIT|'_'|'.')* + | LETTER (LETTER|DIGIT|'_'|'.')* + ; + +fragment +LETTER : 'a'..'z'|'A'..'Z'|'\u0080'..'\u00FF' ; + +USER_OP : '%' .* '%' ; + +COMMENT : '#' .* '\n' {skip();} ; + +/** Doesn't handle '\n' correctly. it's context-sensitive */ +WS : (' '|'\t'|'\n'|'\r')+ {skip();} ; diff --git a/tool/playground/TestR.java b/tool/playground/TestR.java new file mode 100644 index 000000000..fc4c0b98f --- /dev/null +++ b/tool/playground/TestR.java @@ -0,0 +1,48 @@ +/* + [The "BSD license"] + Copyright (c) 2011 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +import org.antlr.v4.runtime.ANTLRFileStream; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.DiagnosticErrorStrategy; +import org.antlr.v4.runtime.Token; + +public class TestR { + public static void main(String[] args) throws Exception { + RLexer t = new RLexer(new ANTLRFileStream(args[0])); + CommonTokenStream tokens = new CommonTokenStream(t); +// tokens.fill(); +// for (Object tok : tokens.getTokens()) { +// System.out.println(tok); +// } + RParser p = new RParser(tokens); + p.setBuildParseTree(true); + p.setErrorHandler(new DiagnosticErrorStrategy()); + p.prog(); + } +}