From 8e89af918f35839ca8bfeb420404c68df553ed9e Mon Sep 17 00:00:00 2001 From: parrt Date: Thu, 25 Jun 2015 12:25:42 -0700 Subject: [PATCH] all Python2 runtime tests pass --- antlr4.iml | 1 - .../v4/test/runtime/python2/Python2.test.stg | 399 +++++ .../test/runtime/python2/BasePython2Test.java | 94 + .../runtime/python2/TestCompositeParsers.java | 2 +- runtime/Python2/LICENSE.txt | 26 + runtime/Python2/MANIFEST.in | 1 + runtime/Python2/README.txt | 4 + runtime/Python2/RELEASE-4.5.txt | 13 + runtime/Python2/setup.py | 13 + .../Python2/src/antlr4/BufferedTokenStream.py | 328 ++++ .../Python2/src/antlr4/CommonTokenFactory.py | 84 + .../Python2/src/antlr4/CommonTokenStream.py | 110 ++ runtime/Python2/src/antlr4/FileStream.py | 58 + runtime/Python2/src/antlr4/InputStream.py | 133 ++ runtime/Python2/src/antlr4/IntervalSet.py | 297 ++++ runtime/Python2/src/antlr4/LL1Analyzer.py | 195 +++ runtime/Python2/src/antlr4/Lexer.py | 343 ++++ runtime/Python2/src/antlr4/ListTokenSource.py | 139 ++ runtime/Python2/src/antlr4/Parser.py | 575 +++++++ .../Python2/src/antlr4/ParserInterpreter.py | 187 ++ .../Python2/src/antlr4/ParserRuleContext.py | 188 ++ .../Python2/src/antlr4/PredictionContext.py | 660 +++++++ runtime/Python2/src/antlr4/Recognizer.py | 168 ++ runtime/Python2/src/antlr4/RuleContext.py | 234 +++ runtime/Python2/src/antlr4/StdinStream.py | 48 + runtime/Python2/src/antlr4/Token.py | 184 ++ runtime/Python2/src/antlr4/Utils.py | 64 + runtime/Python2/src/antlr4/__init__.py | 1 + runtime/Python2/src/antlr4/atn/ATN.py | 147 ++ runtime/Python2/src/antlr4/atn/ATNConfig.py | 154 ++ .../Python2/src/antlr4/atn/ATNConfigSet.py | 239 +++ .../antlr4/atn/ATNDeserializationOptions.py | 46 + .../Python2/src/antlr4/atn/ATNDeserializer.py | 542 ++++++ .../Python2/src/antlr4/atn/ATNSimulator.py | 70 + runtime/Python2/src/antlr4/atn/ATNState.py | 283 +++ runtime/Python2/src/antlr4/atn/ATNType.py | 37 + .../src/antlr4/atn/LexerATNSimulator.py | 588 +++++++ runtime/Python2/src/antlr4/atn/LexerAction.py | 316 ++++ .../src/antlr4/atn/LexerActionExecutor.py | 160 ++ .../src/antlr4/atn/ParserATNSimulator.py | 1523 +++++++++++++++++ .../Python2/src/antlr4/atn/PredictionMode.py | 544 ++++++ .../Python2/src/antlr4/atn/SemanticContext.py | 360 ++++ runtime/Python2/src/antlr4/atn/Transition.py | 279 +++ runtime/Python2/src/antlr4/atn/__init__.py | 1 + runtime/Python2/src/antlr4/dfa/DFA.py | 150 ++ .../Python2/src/antlr4/dfa/DFASerializer.py | 100 ++ runtime/Python2/src/antlr4/dfa/DFAState.py | 156 ++ runtime/Python2/src/antlr4/dfa/__init__.py | 1 + .../antlr4/error/DiagnosticErrorListener.py | 133 ++ .../Python2/src/antlr4/error/ErrorListener.py | 97 ++ .../Python2/src/antlr4/error/ErrorStrategy.py | 719 ++++++++ runtime/Python2/src/antlr4/error/Errors.py | 177 ++ runtime/Python2/src/antlr4/error/__init__.py | 1 + runtime/Python2/src/antlr4/tree/Chunk.py | 26 + .../Python2/src/antlr4/tree/ParseTreeMatch.py | 145 ++ .../src/antlr4/tree/ParseTreePattern.py | 94 + .../antlr4/tree/ParseTreePatternMatcher.py | 392 +++++ .../Python2/src/antlr4/tree/RuleTagToken.py | 74 + .../Python2/src/antlr4/tree/TokenTagToken.py | 72 + runtime/Python2/src/antlr4/tree/Tree.py | 191 +++ runtime/Python2/src/antlr4/tree/Trees.py | 134 ++ runtime/Python2/src/antlr4/tree/__init__.py | 0 runtime/Python2/src/antlr4/xpath/XPath.py | 346 ++++ runtime/Python2/src/antlr4/xpath/__init__.py | 1 + .../templates/codegen/Python2/Python2.stg | 805 +++++++++ 65 files changed, 13650 insertions(+), 2 deletions(-) create mode 100644 runtime-testsuite/resources/org/antlr/v4/test/runtime/python2/Python2.test.stg create mode 100644 runtime-testsuite/test/org/antlr/v4/test/runtime/python2/BasePython2Test.java create mode 100644 runtime/Python2/LICENSE.txt create mode 100644 runtime/Python2/MANIFEST.in create mode 100644 runtime/Python2/README.txt create mode 100644 runtime/Python2/RELEASE-4.5.txt create mode 100644 runtime/Python2/setup.py create mode 100644 runtime/Python2/src/antlr4/BufferedTokenStream.py create mode 100644 runtime/Python2/src/antlr4/CommonTokenFactory.py create mode 100644 runtime/Python2/src/antlr4/CommonTokenStream.py create mode 100644 runtime/Python2/src/antlr4/FileStream.py create mode 100644 runtime/Python2/src/antlr4/InputStream.py create mode 100644 runtime/Python2/src/antlr4/IntervalSet.py create mode 100644 runtime/Python2/src/antlr4/LL1Analyzer.py create mode 100644 runtime/Python2/src/antlr4/Lexer.py create mode 100644 runtime/Python2/src/antlr4/ListTokenSource.py create mode 100644 runtime/Python2/src/antlr4/Parser.py create mode 100644 runtime/Python2/src/antlr4/ParserInterpreter.py create mode 100644 runtime/Python2/src/antlr4/ParserRuleContext.py create mode 100644 runtime/Python2/src/antlr4/PredictionContext.py create mode 100644 runtime/Python2/src/antlr4/Recognizer.py create mode 100644 runtime/Python2/src/antlr4/RuleContext.py create mode 100644 runtime/Python2/src/antlr4/StdinStream.py create mode 100644 runtime/Python2/src/antlr4/Token.py create mode 100644 runtime/Python2/src/antlr4/Utils.py create mode 100644 runtime/Python2/src/antlr4/__init__.py create mode 100644 runtime/Python2/src/antlr4/atn/ATN.py create mode 100644 runtime/Python2/src/antlr4/atn/ATNConfig.py create mode 100755 runtime/Python2/src/antlr4/atn/ATNConfigSet.py create mode 100644 runtime/Python2/src/antlr4/atn/ATNDeserializationOptions.py create mode 100644 runtime/Python2/src/antlr4/atn/ATNDeserializer.py create mode 100644 runtime/Python2/src/antlr4/atn/ATNSimulator.py create mode 100644 runtime/Python2/src/antlr4/atn/ATNState.py create mode 100644 runtime/Python2/src/antlr4/atn/ATNType.py create mode 100644 runtime/Python2/src/antlr4/atn/LexerATNSimulator.py create mode 100644 runtime/Python2/src/antlr4/atn/LexerAction.py create mode 100644 runtime/Python2/src/antlr4/atn/LexerActionExecutor.py create mode 100755 runtime/Python2/src/antlr4/atn/ParserATNSimulator.py create mode 100644 runtime/Python2/src/antlr4/atn/PredictionMode.py create mode 100644 runtime/Python2/src/antlr4/atn/SemanticContext.py create mode 100644 runtime/Python2/src/antlr4/atn/Transition.py create mode 100644 runtime/Python2/src/antlr4/atn/__init__.py create mode 100644 runtime/Python2/src/antlr4/dfa/DFA.py create mode 100644 runtime/Python2/src/antlr4/dfa/DFASerializer.py create mode 100644 runtime/Python2/src/antlr4/dfa/DFAState.py create mode 100644 runtime/Python2/src/antlr4/dfa/__init__.py create mode 100644 runtime/Python2/src/antlr4/error/DiagnosticErrorListener.py create mode 100644 runtime/Python2/src/antlr4/error/ErrorListener.py create mode 100644 runtime/Python2/src/antlr4/error/ErrorStrategy.py create mode 100644 runtime/Python2/src/antlr4/error/Errors.py create mode 100644 runtime/Python2/src/antlr4/error/__init__.py create mode 100644 runtime/Python2/src/antlr4/tree/Chunk.py create mode 100644 runtime/Python2/src/antlr4/tree/ParseTreeMatch.py create mode 100644 runtime/Python2/src/antlr4/tree/ParseTreePattern.py create mode 100644 runtime/Python2/src/antlr4/tree/ParseTreePatternMatcher.py create mode 100644 runtime/Python2/src/antlr4/tree/RuleTagToken.py create mode 100644 runtime/Python2/src/antlr4/tree/TokenTagToken.py create mode 100644 runtime/Python2/src/antlr4/tree/Tree.py create mode 100644 runtime/Python2/src/antlr4/tree/Trees.py create mode 100644 runtime/Python2/src/antlr4/tree/__init__.py create mode 100644 runtime/Python2/src/antlr4/xpath/XPath.py create mode 100644 runtime/Python2/src/antlr4/xpath/__init__.py create mode 100644 tool/resources/org/antlr/v4/tool/templates/codegen/Python2/Python2.stg diff --git a/antlr4.iml b/antlr4.iml index dfb0b34a6..4bb68d53b 100644 --- a/antlr4.iml +++ b/antlr4.iml @@ -20,7 +20,6 @@ - diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/python2/Python2.test.stg b/runtime-testsuite/resources/org/antlr/v4/test/runtime/python2/Python2.test.stg new file mode 100644 index 000000000..401d28190 --- /dev/null +++ b/runtime-testsuite/resources/org/antlr/v4/test/runtime/python2/Python2.test.stg @@ -0,0 +1,399 @@ +IgnoredTests ::= [ + default: false +] + +TestFile(file) ::= << +/* This file is generated by TestGenerator, any edits will be overwritten by the next generation. */ +package org.antlr.v4.test.runtime.python2; + +import org.junit.Ignore; +import org.junit.Test; +import static org.junit.Assert.*; + + +import org.antlr.v4.test.runtime.java.ErrorQueue; + + +import org.antlr.v4.tool.Grammar; + + +@SuppressWarnings("unused") +public class Test extends BasePython2Test { + + }; separator="\n", wrap, anchor> + +} +>> + +LexerTestMethod(test) ::= << +/* This file and method are generated by TestGenerator, any edits will be overwritten by the next generation. */ + +public void test() throws Exception { + mkdir(tmpdir); + + =; + writeFile(tmpdir, ".g4", slave_); + }; separator="\n"> + + + + + + String input =; + String found = execLexer(".g4", grammar, "Lexer", input, ); + assertEquals(, found); + + assertEquals(, this.stderrDuringParse); + + assertNull(this.stderrDuringParse); + + }> +} + +>> + +CompositeLexerTestMethod(test) ::= << + +>> + +ParserTestMethod(test) ::= << +/* This file and method are generated by TestGenerator, any edits will be overwritten by the next generation. */ + +public void test() throws Exception { + mkdir(tmpdir); + + =; + + rawGenerateAndBuildRecognizer(".g4", slave_, null, ""); + + writeFile(tmpdir, ".g4", slave_); + + }; separator="\n"> + + + + + String input =; + String found = execParser(".g4", grammar, "Parser", "Lexer", "Listener", "Visitor", "", input, ); + + assertEquals(, found); + + assertEquals(, this.stderrDuringParse); + + assertNull(this.stderrDuringParse); + + }> +} + +>> + +CompositeParserTestMethod(test) ::= << + +>> + +AbstractParserTestMethod(test) ::= << +/* this file and method are generated, any edit will be overwritten by the next generation */ +String test(String input) throws Exception { + String grammar = };separator="\\n\" +\n", wrap, anchor>"; + return execParser(".g4", grammar, "Parser", "Lexer", "", input, ); +} + +>> + +ConcreteParserTestMethod(test) ::= << +/* this file and method are generated, any edit will be overwritten by the next generation */ +@Test +public void test() throws Exception { + String found = test(""); + assertEquals("", found); + + assertEquals("", this.stderrDuringParse); + + assertNull(this.stderrDuringParse); + +} + +>> + +testAnnotations(test) ::= <% +@Test + +<\n>@Ignore() +.})))> +<\n>@Ignore(.})))>) + +%> + +buildStringLiteral(text, variable) ::= << +StringBuilder Builder = new StringBuilder(); +Builder.append("");}; separator="\n"> +String = Builder.toString(); +>> + +writeStringLiteral(text) ::= <% + +"" + + + +%> + +writeLines(textLines) ::= <% + + "}; separator="\" +">" + +"" + +%> + +string(text) ::= << +"" +>> + +writeBoolean(o) ::= "truefalse" + +writeln(s) ::= <)>> + +write(s) ::= <,end='')>> + +False() ::= "False" + +True() ::= "True" + +Not(v) ::= "not " + +Assert(s) ::= "" + +Cast(t,v) ::= "" + +Append(a,b) ::= " + str()" + +Concat(a,b) ::= "" + +DeclareLocal(s,v) ::= " = " + +AssertIsList(v) ::= "assert isinstance(v, (list, tuple))" + +AssignLocal(s,v) ::= " = " + +InitIntMember(n,v) ::= <% = %> + +InitBooleanMember(n,v) ::= <% = %> + +GetMember(n) ::= <%self.%> + +SetMember(n,v) ::= <%self. = %> + +AddMember(n,v) ::= <%self. += %> + +PlusMember(v,n) ::= <% + str(self.)%> + +MemberEquals(n,v) ::= <%self. == %> + +ModMemberEquals(n,m,v) ::= <%self. % == %> + +ModMemberNotEquals(n,m,v) ::= <%self. % != %> + +DumpDFA() ::= "self.dumpDFA()" + +Pass() ::= "pass" + +StringList() ::= "" + +BuildParseTrees() ::= "self._buildParseTrees = True" + +BailErrorStrategy() ::= <%self._errHandler = BailErrorStrategy()%> + +ToStringTree(s) ::= <%.toStringTree(recog=self)%> + +Column() ::= "self.column" + +Text() ::= "self.text" + +ValEquals(a,b) ::= <%==%> + +TextEquals(a) ::= <%self.text==""%> + +PlusText(a) ::= <%"" + self.text%> + +InputText() ::= "self._input.getText()" + +LTEquals(i, v) ::= <%self._input.LT().text==%> + +LANotEquals(i, v) ::= <%self._input.LA()!=%> + +TokenStartColumnEquals(i) ::= <%self._tokenStartColumn==%> + +ImportListener(X) ::= "" + +GetExpectedTokenNames() ::= "self.getExpectedTokens().toString(self.literalNames, self.symbolicNames)" + +RuleInvocationStack() ::= "str_list(self.getRuleInvocationStack())" + +LL_EXACT_AMBIG_DETECTION() ::= <> + +ParserPropertyMember() ::= << +@members { +def Property(self): + return True + +} +>> + +PositionAdjustingLexer() ::= << + +def resetAcceptPosition(self, index, line, column): + self._input.seek(index) + self.line = line + self.column = column + self._interp.consume(self._input) + +def nextToken(self): + if self._interp.__dict__.get("resetAcceptPosition", None) is None: + self._interp.__dict__["resetAcceptPosition"] = self.resetAcceptPosition + return super(type(self),self).nextToken() + +def emit(self): + if self._type==PositionAdjustingLexer.TOKENS: + self.handleAcceptPositionForKeyword("tokens") + elif self._type==PositionAdjustingLexer.LABEL: + self.handleAcceptPositionForIdentifier() + return super(type(self),self).emit() + +def handleAcceptPositionForIdentifier(self): + tokenText = self.text + identifierLength = 0 + while identifierLength \< len(tokenText) and self.isIdentifierChar(tokenText[identifierLength]): + identifierLength += 1 + + if self._input.index > self._tokenStartCharIndex + identifierLength: + offset = identifierLength - 1 + self._interp.resetAcceptPosition(self._tokenStartCharIndex + offset, + self._tokenStartLine, self._tokenStartColumn + offset) + return True + else: + return False + + +def handleAcceptPositionForKeyword(self, keyword): + if self._input.index > self._tokenStartCharIndex + len(keyword): + offset = len(keyword) - 1 + self._interp.resetAcceptPosition(self._tokenStartCharIndex + offset, + self._tokenStartLine, self._tokenStartColumn + offset) + return True + else: + return False + +@staticmethod +def isIdentifierChar(c): + return c.isalnum() or c == '_' + +>> + +BasicListener(X) ::= << +if __name__ is not None and "." in __name__: + from .Listener import Listener +else: + from Listener import Listener + +class LeafListener(TListener): + def visitTerminal(self, node): + print(node.symbol.text) + +>> + +WalkListener(s) ::= << +walker = ParseTreeWalker() +walker.walk(TParser.LeafListener(), ) +>> + +TokenGetterListener(X) ::= << +if __name__ is not None and "." in __name__: + from .Listener import Listener +else: + from Listener import Listener + +class LeafListener(TListener): + def exitA(self, ctx): + if ctx.getChildCount()==2: + print(ctx.INT(0).symbol.text + ' ' + ctx.INT(1).symbol.text + ' ' + str_list(ctx.INT())) + else: + print(str(ctx.ID().symbol)) + +>> + +RuleGetterListener(X) ::= << +if __name__ is not None and "." in __name__: + from .Listener import Listener +else: + from Listener import Listener + +class LeafListener(TListener): + def exitA(self, ctx): + if ctx.getChildCount()==2: + print(ctx.b(0).start.text + ' ' + ctx.b(1).start.text + ' ' + ctx.b()[0].start.text) + else: + print(ctx.b(0).start.text) + +>> + + +LRListener(X) ::= << +if __name__ is not None and "." in __name__: + from .Listener import Listener +else: + from Listener import Listener + +class LeafListener(TListener): + def exitE(self, ctx): + if ctx.getChildCount()==3: + print(ctx.e(0).start.text + ' ' + ctx.e(1).start.text + ' ' + ctx.e()[0].start.text) + else: + print(ctx.INT().symbol.text) + +>> + +LRWithLabelsListener(X) ::= << +if __name__ is not None and "." in __name__: + from .Listener import Listener +else: + from Listener import Listener + +class LeafListener(TListener): + def exitCall(self, ctx): + print(ctx.e().start.text + ' ' + str(ctx.eList())) + def exitInt(self, ctx): + print(ctx.INT().symbol.text) + +>> + +DeclareContextListGettersFunction() ::= << +def foo(): + s = SContext() + a = s.a() + b = s.b() +>> + +Declare_foo() ::= <> + +Invoke_foo() ::= "self.foo()" + +Declare_pred() ::= < import \n" + + "from import \n" + + "from import \n" + + "from import \n" + + "\n" + + "class TreeShapeListener(ParseTreeListener):\n" + + "\n" + + " def visitTerminal(self, node):\n" + + " pass\n" + + "\n" + + " def visitErrorNode(self, node):\n" + + " pass\n" + + "\n" + + " def exitEveryRule(self, ctx):\n" + + " pass\n" + + "\n" + + " def enterEveryRule(self, ctx):\n" + + " for child in ctx.getChildren():\n" + + " parent = child.parentCtx\n" + + " if not isinstance(parent, RuleNode) or parent.getRuleContext() != ctx:\n" + + " raise IllegalStateException(\"Invalid parse tree shape detected.\")\n" + + "\n" + + "def main(argv):\n" + + " input = FileStream(argv[1])\n" + + " lexer = (input)\n" + + " stream = CommonTokenStream(lexer)\n" + + "" + + " parser.buildParseTrees = True\n" + + " tree = parser.\n" + + " ParseTreeWalker.DEFAULT.walk(TreeShapeListener(), tree)\n" + + "\n" + "if __name__ == '__main__':\n" + + " main(sys.argv)\n" + "\n"); + String stSource = " parser = (stream)\n"; + if(debug) + stSource += " parser.addErrorListener(DiagnosticErrorListener())\n"; + if(trace) + stSource += " parser.setTrace(True)\n"; + ST createParserST = new ST(stSource); + outputFileST.add("createParser", createParserST); + outputFileST.add("parserName", parserName); + outputFileST.add("lexerName", lexerName); + outputFileST.add("listenerName", listenerName); + outputFileST.add("visitorName", visitorName); + outputFileST.add("parserStartRuleName", parserStartRuleName); + writeFile(tmpdir, "Test.py", outputFileST.render()); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/python2/TestCompositeParsers.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/python2/TestCompositeParsers.java index 6a17897bd..5225d1738 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/python2/TestCompositeParsers.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/python2/TestCompositeParsers.java @@ -1,7 +1,7 @@ /* This file is generated by TestGenerator, any edits will be overwritten by the next generation. */ package org.antlr.v4.test.runtime.python2; -import org.antlr.v4.test.tool.ErrorQueue; +import org.antlr.v4.test.runtime.java.ErrorQueue; import org.antlr.v4.tool.Grammar; import org.junit.Test; diff --git a/runtime/Python2/LICENSE.txt b/runtime/Python2/LICENSE.txt new file mode 100644 index 000000000..dff211c19 --- /dev/null +++ b/runtime/Python2/LICENSE.txt @@ -0,0 +1,26 @@ +[The "BSD license"] +Copyright (c) 2015 Terence Parr, Sam Harwell, Eric Vergnaud +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/runtime/Python2/MANIFEST.in b/runtime/Python2/MANIFEST.in new file mode 100644 index 000000000..86604e496 --- /dev/null +++ b/runtime/Python2/MANIFEST.in @@ -0,0 +1 @@ +include *.txt \ No newline at end of file diff --git a/runtime/Python2/README.txt b/runtime/Python2/README.txt new file mode 100644 index 000000000..1bf975052 --- /dev/null +++ b/runtime/Python2/README.txt @@ -0,0 +1,4 @@ +This is the Python 2.7 runtime for AntLR. +Visit the AntLR web sites for more information: +http://www.antlr.org +http://theantlrguy.atlassian.net/wiki/display/ANTLR4/Python+Target \ No newline at end of file diff --git a/runtime/Python2/RELEASE-4.5.txt b/runtime/Python2/RELEASE-4.5.txt new file mode 100644 index 000000000..8f976dc77 --- /dev/null +++ b/runtime/Python2/RELEASE-4.5.txt @@ -0,0 +1,13 @@ +What's in this release? + + - fixed minor bugs due to mix of ascii/unicode encoding + - fixed bug where non-ascii input streams would fail + - added support for visitor pattern + - added support for wildcards in grammar + + Breaking change: + + In version 4.4, the parser/lexer had a tokenNames member. + This has been removed in favor of the following members: + - lexicalNames, containing the parsed text + - symbolicNames, corresponding to tokenNames diff --git a/runtime/Python2/setup.py b/runtime/Python2/setup.py new file mode 100644 index 000000000..0eeb03c5f --- /dev/null +++ b/runtime/Python2/setup.py @@ -0,0 +1,13 @@ +from distutils.core import setup + +setup( + name='antlr4-python2-runtime', + version='4.5.2', + packages=['antlr4', 'antlr4.atn', 'antlr4.dfa', 'antlr4.tree', 'antlr4.error', 'antlr4.xpath'], + package_dir={'': 'src'}, + url='http://www.antlr.org', + license='BSD', + author='Eric Vergnaud, Terence Parr, Sam Harwell', + author_email='eric.vergnaud@wanadoo.fr', + description='ANTLR 4.5.2 runtime for Python 2.7.6' +) diff --git a/runtime/Python2/src/antlr4/BufferedTokenStream.py b/runtime/Python2/src/antlr4/BufferedTokenStream.py new file mode 100644 index 000000000..eee619a20 --- /dev/null +++ b/runtime/Python2/src/antlr4/BufferedTokenStream.py @@ -0,0 +1,328 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This implementation of {@link TokenStream} loads tokens from a +# {@link TokenSource} on-demand, and places the tokens in a buffer to provide +# access to any previous token by index. +# +#

+# This token stream ignores the value of {@link Token#getChannel}. If your +# parser requires the token stream filter tokens to only those on a particular +# channel, such as {@link Token#DEFAULT_CHANNEL} or +# {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a +# {@link CommonTokenStream}.

+from io import StringIO + +from antlr4.Token import Token +from antlr4.error.Errors import IllegalStateException + + +# this is just to keep meaningful parameter types to Parser +class TokenStream(object): + + pass + + +class BufferedTokenStream(TokenStream): + + def __init__(self, tokenSource): + # The {@link TokenSource} from which tokens for this stream are fetched. + self.tokenSource = tokenSource + + # A collection of all tokens fetched from the token source. The list is + # considered a complete view of the input once {@link #fetchedEOF} is set + # to {@code true}. + self.tokens = [] + + # The index into {@link #tokens} of the current token (next token to + # {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be + # {@link #LT LT(1)}. + # + #

This field is set to -1 when the stream is first constructed or when + # {@link #setTokenSource} is called, indicating that the first token has + # not yet been fetched from the token source. For additional information, + # see the documentation of {@link IntStream} for a description of + # Initializing Methods.

+ self.index = -1 + + # Indicates whether the {@link Token#EOF} token has been fetched from + # {@link #tokenSource} and added to {@link #tokens}. This field improves + # performance for the following cases: + # + #
    + #
  • {@link #consume}: The lookahead check in {@link #consume} to prevent + # consuming the EOF symbol is optimized by checking the values of + # {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.
  • + #
  • {@link #fetch}: The check to prevent adding multiple EOF symbols into + # {@link #tokens} is trivial with this field.
  • + #
      + self.fetchedEOF = False + + def mark(self): + return 0 + + def release(self, marker): + # no resources to release + pass + + def reset(self): + self.seek(0) + + def seek(self, index): + self.lazyInit() + self.index = self.adjustSeekIndex(index) + + def get(self, index): + self.lazyInit() + return self.tokens[index] + + def consume(self): + skipEofCheck = False + if self.index >= 0: + if self.fetchedEOF: + # the last token in tokens is EOF. skip check if p indexes any + # fetched token except the last. + skipEofCheck = self.index < len(self.tokens) - 1 + else: + # no EOF token in tokens. skip check if p indexes a fetched token. + skipEofCheck = self.index < len(self.tokens) + else: + # not yet initialized + skipEofCheck = False + + if not skipEofCheck and self.LA(1) == Token.EOF: + raise IllegalStateException("cannot consume EOF") + + if self.sync(self.index + 1): + self.index = self.adjustSeekIndex(self.index + 1) + + # Make sure index {@code i} in tokens has a token. + # + # @return {@code true} if a token is located at index {@code i}, otherwise + # {@code false}. + # @see #get(int i) + #/ + def sync(self, i): + assert i >= 0 + n = i - len(self.tokens) + 1 # how many more elements we need? + if n > 0 : + fetched = self.fetch(n) + return fetched >= n + return True + + # Add {@code n} elements to buffer. + # + # @return The actual number of elements added to the buffer. + #/ + def fetch(self, n): + if self.fetchedEOF: + return 0 + for i in range(0, n): + t = self.tokenSource.nextToken() + t.tokenIndex = len(self.tokens) + self.tokens.append(t) + if t.type==Token.EOF: + self.fetchedEOF = True + return i + 1 + return n + + + # Get all tokens from start..stop inclusively#/ + def getTokens(self, start, stop, types=None): + if start<0 or stop<0: + return None + self.lazyInit() + subset = [] + if stop >= len(self.tokens): + stop = len(self.tokens)-1 + for i in range(start, stop): + t = self.tokens[i] + if t.type==Token.EOF: + break + if types is None or t.type in types: + subset.append(t) + return subset + + def LA(self, i): + return self.LT(i).type + + def LB(self, k): + if (self.index-k) < 0: + return None + return self.tokens[self.index-k] + + def LT(self, k): + self.lazyInit() + if k==0: + return None + if k < 0: + return self.LB(-k) + i = self.index + k - 1 + self.sync(i) + if i >= len(self.tokens): # return EOF token + # EOF must be last token + return self.tokens[len(self.tokens)-1] + return self.tokens[i] + + # Allowed derived classes to modify the behavior of operations which change + # the current stream position by adjusting the target token index of a seek + # operation. The default implementation simply returns {@code i}. If an + # exception is thrown in this method, the current stream index should not be + # changed. + # + #

      For example, {@link CommonTokenStream} overrides this method to ensure that + # the seek target is always an on-channel token.

      + # + # @param i The target token index. + # @return The adjusted target token index. + + def adjustSeekIndex(self, i): + return i + + def lazyInit(self): + if self.index == -1: + self.setup() + + def setup(self): + self.sync(0) + self.index = self.adjustSeekIndex(0) + + # Reset this token stream by setting its token source.#/ + def setTokenSource(self, tokenSource): + self.tokenSource = tokenSource + self.tokens = [] + self.index = -1 + + + + # Given a starting index, return the index of the next token on channel. + # Return i if tokens[i] is on channel. Return -1 if there are no tokens + # on channel between i and EOF. + #/ + def nextTokenOnChannel(self, i, channel): + self.sync(i) + if i>=len(self.tokens): + return -1 + token = self.tokens[i] + while token.channel!=channel: + if token.type==Token.EOF: + return -1 + i += 1 + self.sync(i) + token = self.tokens[i] + return i + + # Given a starting index, return the index of the previous token on channel. + # Return i if tokens[i] is on channel. Return -1 if there are no tokens + # on channel between i and 0. + def previousTokenOnChannel(self, i, channel): + while i>=0 and self.tokens[i].channel!=channel: + i -= 1 + return i + + # Collect all tokens on specified channel to the right of + # the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or + # EOF. If channel is -1, find any non default channel token. + def getHiddenTokensToRight(self, tokenIndex, channel=-1): + self.lazyInit() + if tokenIndex<0 or tokenIndex>=len(self.tokens): + raise Exception(str(tokenIndex) + " not in 0.." + str(len(self.tokens)-1)) + from antlr4.Lexer import Lexer + nextOnChannel = self.nextTokenOnChannel(tokenIndex + 1, Lexer.DEFAULT_TOKEN_CHANNEL) + from_ = tokenIndex+1 + # if none onchannel to right, nextOnChannel=-1 so set to = last token + to = (len(self.tokens)-1) if nextOnChannel==-1 else nextOnChannel + return self.filterForChannel(from_, to, channel) + + + # Collect all tokens on specified channel to the left of + # the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. + # If channel is -1, find any non default channel token. + def getHiddenTokensToLeft(self, tokenIndex, channel=-1): + self.lazyInit() + if tokenIndex<0 or tokenIndex>=len(self.tokens): + raise Exception(str(tokenIndex) + " not in 0.." + str(len(self.tokens)-1)) + from antlr4.Lexer import Lexer + prevOnChannel = self.previousTokenOnChannel(tokenIndex - 1, Lexer.DEFAULT_TOKEN_CHANNEL) + if prevOnChannel == tokenIndex - 1: + return None + # if none on channel to left, prevOnChannel=-1 then from=0 + from_ = prevOnChannel+1 + to = tokenIndex-1 + return self.filterForChannel(from_, to, channel) + + + def filterForChannel(self, left, right, channel): + hidden = [] + for i in range(left, right+1): + t = self.tokens[i] + if channel==-1: + from antlr4.Lexer import Lexer + if t.channel!= Lexer.DEFAULT_TOKEN_CHANNEL: + hidden.append(t) + elif t.channel==channel: + hidden.append(t) + if len(hidden)==0: + return None + return hidden + + def getSourceName(self): + return self.tokenSource.getSourceName() + + # Get the text of all tokens in this buffer.#/ + def getText(self, interval=None): + self.lazyInit() + self.fill() + if interval is None: + interval = (0, len(self.tokens)-1) + start = interval[0] + if isinstance(start, Token): + start = start.tokenIndex + stop = interval[1] + if isinstance(stop, Token): + stop = stop.tokenIndex + if start is None or stop is None or start<0 or stop<0: + return "" + if stop >= len(self.tokens): + stop = len(self.tokens)-1 + with StringIO() as buf: + for i in range(start, stop+1): + t = self.tokens[i] + if t.type==Token.EOF: + break + buf.write(t.text) + return buf.getvalue() + + + # Get all tokens from lexer until EOF#/ + def fill(self): + self.lazyInit() + while self.fetch(1000)==1000: + pass diff --git a/runtime/Python2/src/antlr4/CommonTokenFactory.py b/runtime/Python2/src/antlr4/CommonTokenFactory.py new file mode 100644 index 000000000..8434479d4 --- /dev/null +++ b/runtime/Python2/src/antlr4/CommonTokenFactory.py @@ -0,0 +1,84 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# +# This default implementation of {@link TokenFactory} creates +# {@link CommonToken} objects. +# +from antlr4.Token import CommonToken + +class TokenFactory(object): + + pass + +class CommonTokenFactory(TokenFactory): + # + # The default {@link CommonTokenFactory} instance. + # + #

      + # This token factory does not explicitly copy token text when constructing + # tokens.

      + # + DEFAULT = None + + def __init__(self, copyText=False): + # Indicates whether {@link CommonToken#setText} should be called after + # constructing tokens to explicitly set the text. This is useful for cases + # where the input stream might not be able to provide arbitrary substrings + # of text from the input after the lexer creates a token (e.g. the + # implementation of {@link CharStream#getText} in + # {@link UnbufferedCharStream} throws an + # {@link UnsupportedOperationException}). Explicitly setting the token text + # allows {@link Token#getText} to be called at any time regardless of the + # input stream implementation. + # + #

      + # The default value is {@code false} to avoid the performance and memory + # overhead of copying text for every token unless explicitly requested.

      + # + self.copyText = copyText + + def create(self, source, type, text, channel, start, stop, line, column): + t = CommonToken(source, type, channel, start, stop) + t.line = line + t.column = column + if text is not None: + t.text = text + elif self.copyText and source[1] is not None: + t.text = source[1].getText(start,stop) + return t + + def createThin(self, type, text): + t = CommonToken(type=type) + t.text = text + return t + +CommonTokenFactory.DEFAULT = CommonTokenFactory() \ No newline at end of file diff --git a/runtime/Python2/src/antlr4/CommonTokenStream.py b/runtime/Python2/src/antlr4/CommonTokenStream.py new file mode 100644 index 000000000..05ff7f520 --- /dev/null +++ b/runtime/Python2/src/antlr4/CommonTokenStream.py @@ -0,0 +1,110 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#/ + +# +# This class extends {@link BufferedTokenStream} with functionality to filter +# token streams to tokens on a particular channel (tokens where +# {@link Token#getChannel} returns a particular value). +# +#

      +# This token stream provides access to all tokens by index or when calling +# methods like {@link #getText}. The channel filtering is only used for code +# accessing tokens via the lookahead methods {@link #LA}, {@link #LT}, and +# {@link #LB}.

      +# +#

      +# By default, tokens are placed on the default channel +# ({@link Token#DEFAULT_CHANNEL}), but may be reassigned by using the +# {@code ->channel(HIDDEN)} lexer command, or by using an embedded action to +# call {@link Lexer#setChannel}. +#

      +# +#

      +# Note: lexer rules which use the {@code ->skip} lexer command or call +# {@link Lexer#skip} do not produce tokens at all, so input text matched by +# such a rule will not be available as part of the token stream, regardless of +# channel.

      +#/ + +from antlr4.BufferedTokenStream import BufferedTokenStream +from antlr4.Token import Token + + +class CommonTokenStream(BufferedTokenStream): + + def __init__(self, lexer, channel=Token.DEFAULT_CHANNEL): + super(CommonTokenStream, self).__init__(lexer) + self.channel = channel + + def adjustSeekIndex(self, i): + return self.nextTokenOnChannel(i, self.channel) + + def LB(self, k): + if k==0 or (self.index-k)<0: + return None + i = self.index + n = 1 + # find k good tokens looking backwards + while n <= k: + # skip off-channel tokens + i = self.previousTokenOnChannel(i - 1, self.channel) + n += 1 + if i < 0: + return None + return self.tokens[i] + + def LT(self, k): + self.lazyInit() + if k == 0: + return None + if k < 0: + return self.LB(-k) + i = self.index + n = 1 # we know tokens[pos] is a good one + # find k good tokens + while n < k: + # skip off-channel tokens, but make sure to not look past EOF + if self.sync(i + 1): + i = self.nextTokenOnChannel(i + 1, self.channel) + n += 1 + return self.tokens[i] + + # Count EOF just once.#/ + def getNumberOfOnChannelTokens(self): + n = 0 + self.fill() + for i in range(0, len(self.tokens)): + t = self.tokens[i] + if t.channel==self.channel: + n += 1 + if t.type==Token.EOF: + break + return n diff --git a/runtime/Python2/src/antlr4/FileStream.py b/runtime/Python2/src/antlr4/FileStream.py new file mode 100644 index 000000000..1b8cd18f0 --- /dev/null +++ b/runtime/Python2/src/antlr4/FileStream.py @@ -0,0 +1,58 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# +# This is an InputStream that is loaded from a file all at once +# when you construct the object. +# + +import codecs +import unittest + +from antlr4.InputStream import InputStream + + +class FileStream(InputStream): + + def __init__(self, fileName, encoding='ascii'): + self.fileName = fileName + # read binary to avoid line ending conversion + with open(fileName, 'rb') as file: + bytes = file.read() + data = codecs.decode(bytes, encoding) + super(type(self), self).__init__(data) + + +class TestFileStream(unittest.TestCase): + + def testStream(self): + stream = FileStream("FileStream.py") + self.assertTrue(stream.size>0) diff --git a/runtime/Python2/src/antlr4/InputStream.py b/runtime/Python2/src/antlr4/InputStream.py new file mode 100644 index 000000000..633f2aa05 --- /dev/null +++ b/runtime/Python2/src/antlr4/InputStream.py @@ -0,0 +1,133 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +import unittest + + +# +# Vacuum all input from a string and then treat it like a buffer. +# +from antlr4.Token import Token + + +class InputStream (object): + + def __init__(self, data): + self.name = "" + self.strdata = unicode(data) + self._loadString() + + def _loadString(self): + self._index = 0 + self.data = [ord(c) for c in self.strdata] + self._size = len(self.data) + + @property + def index(self): + return self._index + + @property + def size(self): + return self._size + + # Reset the stream so that it's in the same state it was + # when the object was created *except* the data array is not + # touched. + # + def reset(self): + self._index = 0 + + def consume(self): + if self._index >= self._size: + assert self.LA(1) == Token.EOF + raise Exception("cannot consume EOF") + self._index += 1 + + def LA(self, offset): + if offset==0: + return 0 # undefined + if offset<0: + offset += 1 # e.g., translate LA(-1) to use offset=0 + pos = self._index + offset - 1 + if pos < 0 or pos >= self._size: # invalid + return Token.EOF + return self.data[pos] + + def LT(self, offset): + return self.LA(offset) + + # mark/release do nothing; we have entire buffer + def mark(self): + return -1 + + def release(self, marker): + pass + + # consume() ahead until p==_index; can't just set p=_index as we must + # update line and column. If we seek backwards, just set p + # + def seek(self, _index): + if _index<=self._index: + self._index = _index # just jump; don't update stream state (line, ...) + return + # seek forward + self._index = min(_index, self._size) + + def getText(self, start, stop): + if stop >= self._size: + stop = self._size-1 + if start >= self._size: + return "" + else: + return self.strdata[start:stop+1] + + def __str__(self): + return unicode(self) + + def __unicode__(self): + return self.strdata + + +class TestInputStream(unittest.TestCase): + + def testStream(self): + stream = InputStream("abcde") + self.assertEqual(0, stream.index) + self.assertEqual(5, stream.size) + self.assertEqual(ord("a"), stream.LA(1)) + stream.consume() + self.assertEqual(1, stream.index) + stream.seek(5) + self.assertEqual(Token.EOF, stream.LA(1)) + self.assertEqual("bcd", stream.getText(1, 3)) + stream.reset() + self.assertEqual(0, stream.index) + + \ No newline at end of file diff --git a/runtime/Python2/src/antlr4/IntervalSet.py b/runtime/Python2/src/antlr4/IntervalSet.py new file mode 100644 index 000000000..2b9657ec6 --- /dev/null +++ b/runtime/Python2/src/antlr4/IntervalSet.py @@ -0,0 +1,297 @@ +from io import StringIO +import unittest + +from antlr4.Token import Token + + +class Interval(object): + + def __init__(self, start, stop): + self.start = start + self.stop = stop + self.range = xrange(start, stop) + + def __contains__(self, item): + return item in self.range + + def __len__(self): + return self.stop - self.start + + def __iter__(self): + return iter(self.range) + +class IntervalSet(object): + + def __init__(self): + self.intervals = None + self.readOnly = False + + def __iter__(self): + if self.intervals is not None: + for i in self.intervals: + for c in i: + yield c + + def __getitem__(self, item): + i = 0 + for k in self: + if i==item: + return k + else: + i += 1 + return Token.INVALID_TYPE + + def addOne(self, v): + self.addRange(Interval(v, v+1)) + + def addRange(self, v): + if self.intervals is None: + self.intervals = list() + self.intervals.append(v) + else: + # find insert pos + k = 0 + for i in self.intervals: + # distinct range -> insert + if v.stop adjust + elif v.stop==i.start: + self.intervals[k] = Interval(v.start, i.stop) + return + # overlapping range -> adjust and reduce + elif v.start<=i.stop: + self.intervals[k] = Interval(min(i.start,v.start), max(i.stop,v.stop)) + self.reduce(k) + return + k += 1 + # greater than any existing + self.intervals.append(v) + + def addSet(self, other): + if other.intervals is not None: + for i in other.intervals: + self.addRange(i) + return self + + def reduce(self, k): + # only need to reduce if k is not the last + if k= r.stop: + self.intervals.pop(k+1) + self.reduce(k) + elif l.stop >= r.start: + self.intervals[k] = Interval(l.start, r.stop) + self.intervals.pop(k+1) + + def complement(self, start, stop): + result = IntervalSet() + result.addRange(Interval(start,stop+1)) + for i in self.intervals: + result.removeRange(i) + return result + + def __contains__(self, item): + if self.intervals is None: + return False + else: + for i in self.intervals: + if item in i: + return True + return False + + def __len__(self): + xlen = 0 + for i in self.intervals: + xlen += len(i) + return xlen + + def removeRange(self, v): + if v.start==v.stop-1: + self.removeOne(v.start) + elif self.intervals is not None: + k = 0 + for i in self.intervals: + # intervals are ordered + if v.stop<=i.start: + return + # check for including range, split it + elif v.start>i.start and v.stop=i.stop: + self.intervals.pop(k) + k = k - 1 # need another pass + # check for lower boundary + elif v.start1: + buf.write(u"{") + first = True + for i in self.intervals: + for j in i: + if not first: + buf.write(u", ") + buf.write(self.elementName(literalNames, symbolicNames, j)) + first = False + if len(self)>1: + buf.write(u"}") + return buf.getvalue() + + def elementName(self, literalNames, symbolicNames, a): + if a==Token.EOF: + return u"" + elif a==Token.EPSILON: + return u"" + else: + if a" + + +class TestIntervalSet(unittest.TestCase): + + def testEmpty(self): + s = IntervalSet() + self.assertIsNone(s.intervals) + self.assertFalse(30 in s) + + def testOne(self): + s = IntervalSet() + s.addOne(30) + self.assertTrue(30 in s) + self.assertFalse(29 in s) + self.assertFalse(31 in s) + + def testTwo(self): + s = IntervalSet() + s.addOne(30) + s.addOne(40) + self.assertTrue(30 in s) + self.assertTrue(40 in s) + self.assertFalse(35 in s) + + def testRange(self): + s = IntervalSet() + s.addRange(Interval(30,41)) + self.assertTrue(30 in s) + self.assertTrue(40 in s) + self.assertTrue(35 in s) + + def testDistinct1(self): + s = IntervalSet() + s.addRange(Interval(30,32)) + s.addRange(Interval(40,42)) + self.assertEquals(2,len(s.intervals)) + self.assertTrue(30 in s) + self.assertTrue(40 in s) + self.assertFalse(35 in s) + + def testDistinct2(self): + s = IntervalSet() + s.addRange(Interval(40,42)) + s.addRange(Interval(30,32)) + self.assertEquals(2,len(s.intervals)) + self.assertTrue(30 in s) + self.assertTrue(40 in s) + self.assertFalse(35 in s) + + def testContiguous1(self): + s = IntervalSet() + s.addRange(Interval(30,36)) + s.addRange(Interval(36,41)) + self.assertEquals(1,len(s.intervals)) + self.assertTrue(30 in s) + self.assertTrue(40 in s) + self.assertTrue(35 in s) + + def testContiguous2(self): + s = IntervalSet() + s.addRange(Interval(36,41)) + s.addRange(Interval(30,36)) + self.assertEquals(1,len(s.intervals)) + self.assertTrue(30 in s) + self.assertTrue(40 in s) + + def testOverlapping1(self): + s = IntervalSet() + s.addRange(Interval(30,40)) + s.addRange(Interval(35,45)) + self.assertEquals(1,len(s.intervals)) + self.assertTrue(30 in s) + self.assertTrue(44 in s) + + def testOverlapping2(self): + s = IntervalSet() + s.addRange(Interval(35,45)) + s.addRange(Interval(30,40)) + self.assertEquals(1,len(s.intervals)) + self.assertTrue(30 in s) + self.assertTrue(44 in s) + + def testOverlapping3(self): + s = IntervalSet() + s.addRange(Interval(30,32)) + s.addRange(Interval(40,42)) + s.addRange(Interval(50,52)) + s.addRange(Interval(20,61)) + self.assertEquals(1,len(s.intervals)) + self.assertTrue(20 in s) + self.assertTrue(60 in s) + + def testComplement(self): + s = IntervalSet() + s.addRange(Interval(10,21)) + c = s.complement(1,100) + self.assertTrue(1 in c) + self.assertTrue(100 in c) + self.assertTrue(10 not in c) + self.assertTrue(20 not in c) + + diff --git a/runtime/Python2/src/antlr4/LL1Analyzer.py b/runtime/Python2/src/antlr4/LL1Analyzer.py new file mode 100644 index 000000000..d01f3f456 --- /dev/null +++ b/runtime/Python2/src/antlr4/LL1Analyzer.py @@ -0,0 +1,195 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#/ +from antlr4.IntervalSet import IntervalSet, Interval +from antlr4.Token import Token +from antlr4.PredictionContext import PredictionContext, SingletonPredictionContext, PredictionContextFromRuleContext +from antlr4.atn.ATNConfig import ATNConfig +from antlr4.atn.ATNState import RuleStopState +from antlr4.atn.Transition import WildcardTransition, NotSetTransition, AbstractPredicateTransition, RuleTransition + + +class LL1Analyzer (object): + + #* Special value added to the lookahead sets to indicate that we hit + # a predicate during analysis if {@code seeThruPreds==false}. + #/ + HIT_PRED = Token.INVALID_TYPE + + def __init__(self, atn): + self.atn = atn + + #* + # Calculates the SLL(1) expected lookahead set for each outgoing transition + # of an {@link ATNState}. The returned array has one element for each + # outgoing transition in {@code s}. If the closure from transition + # i leads to a semantic predicate before matching a symbol, the + # element at index i of the result will be {@code null}. + # + # @param s the ATN state + # @return the expected symbols for each outgoing transition of {@code s}. + #/ + def getDecisionLookahead(self, s): + if s is None: + return None + + count = len(s.transitions) + look = [] * count + for alt in range(0, count): + look[alt] = set() + lookBusy = set() + seeThruPreds = False # fail to get lookahead upon pred + self._LOOK(s.transition(alt).target, None, PredictionContext.EMPTY, \ + look[alt], lookBusy, set(), seeThruPreds, False) + # Wipe out lookahead for this alternative if we found nothing + # or we had a predicate when we !seeThruPreds + if len(look[alt])==0 or self.HIT_PRED in look[alt]: + look[alt] = None + return look + + #* + # Compute set of tokens that can follow {@code s} in the ATN in the + # specified {@code ctx}. + # + #

      If {@code ctx} is {@code null} and the end of the rule containing + # {@code s} is reached, {@link Token#EPSILON} is added to the result set. + # If {@code ctx} is not {@code null} and the end of the outermost rule is + # reached, {@link Token#EOF} is added to the result set.

      + # + # @param s the ATN state + # @param stopState the ATN state to stop at. This can be a + # {@link BlockEndState} to detect epsilon paths through a closure. + # @param ctx the complete parser context, or {@code null} if the context + # should be ignored + # + # @return The set of tokens that can follow {@code s} in the ATN in the + # specified {@code ctx}. + #/ + def LOOK(self, s, stopState=None, ctx=None): + r = IntervalSet() + seeThruPreds = True # ignore preds; get all lookahead + lookContext = PredictionContextFromRuleContext(s.atn, ctx) if ctx is not None else None + self._LOOK(s, stopState, lookContext, r, set(), set(), seeThruPreds, True) + return r + + #* + # Compute set of tokens that can follow {@code s} in the ATN in the + # specified {@code ctx}. + # + #

      If {@code ctx} is {@code null} and {@code stopState} or the end of the + # rule containing {@code s} is reached, {@link Token#EPSILON} is added to + # the result set. If {@code ctx} is not {@code null} and {@code addEOF} is + # {@code true} and {@code stopState} or the end of the outermost rule is + # reached, {@link Token#EOF} is added to the result set.

      + # + # @param s the ATN state. + # @param stopState the ATN state to stop at. This can be a + # {@link BlockEndState} to detect epsilon paths through a closure. + # @param ctx The outer context, or {@code null} if the outer context should + # not be used. + # @param look The result lookahead set. + # @param lookBusy A set used for preventing epsilon closures in the ATN + # from causing a stack overflow. Outside code should pass + # {@code new HashSet} for this argument. + # @param calledRuleStack A set used for preventing left recursion in the + # ATN from causing a stack overflow. Outside code should pass + # {@code new BitSet()} for this argument. + # @param seeThruPreds {@code true} to true semantic predicates as + # implicitly {@code true} and "see through them", otherwise {@code false} + # to treat semantic predicates as opaque and add {@link #HIT_PRED} to the + # result if one is encountered. + # @param addEOF Add {@link Token#EOF} to the result if the end of the + # outermost context is reached. This parameter has no effect if {@code ctx} + # is {@code null}. + #/ + def _LOOK(self, s, stopState , ctx, look, lookBusy, \ + calledRuleStack, seeThruPreds, addEOF): + c = ATNConfig(s, 0, ctx) + + if c in lookBusy: + return + lookBusy.add(c) + + if s == stopState: + if ctx is None: + look.addOne(Token.EPSILON) + return + elif ctx.isEmpty() and addEOF: + look.addOne(Token.EOF) + return + + if isinstance(s, RuleStopState ): + if ctx is None: + look.addOne(Token.EPSILON) + return + elif ctx.isEmpty() and addEOF: + look.addOne(Token.EOF) + return + + if ctx != PredictionContext.EMPTY: + # run thru all possible stack tops in ctx + for i in range(0, len(ctx)): + returnState = self.atn.states[ctx.getReturnState(i)] + removed = returnState.ruleIndex in calledRuleStack + try: + calledRuleStack.discard(returnState.ruleIndex) + self._LOOK(returnState, stopState, ctx.getParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF) + finally: + if removed: + calledRuleStack.add(returnState.ruleIndex) + return + + for t in s.transitions: + if type(t) == RuleTransition: + if t.target.ruleIndex in calledRuleStack: + continue + + newContext = SingletonPredictionContext.create(ctx, t.followState.stateNumber) + + try: + calledRuleStack.add(t.target.ruleIndex) + self._LOOK(t.target, stopState, newContext, look, lookBusy, calledRuleStack, seeThruPreds, addEOF) + finally: + calledRuleStack.remove(t.target.ruleIndex) + elif isinstance(t, AbstractPredicateTransition ): + if seeThruPreds: + self._LOOK(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF) + else: + look.addOne(self.HIT_PRED) + elif t.isEpsilon: + self._LOOK(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF) + elif type(t) == WildcardTransition: + look.addRange( Interval(Token.MIN_USER_TOKEN_TYPE, self.atn.maxTokenType + 1) ) + else: + set = t.label + if set is not None: + if isinstance(t, NotSetTransition): + set = set.complement(Token.MIN_USER_TOKEN_TYPE, self.atn.maxTokenType) + look.addSet(set) diff --git a/runtime/Python2/src/antlr4/Lexer.py b/runtime/Python2/src/antlr4/Lexer.py new file mode 100644 index 000000000..b566bc33b --- /dev/null +++ b/runtime/Python2/src/antlr4/Lexer.py @@ -0,0 +1,343 @@ +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, self list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, self list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from self software without specific prior written permission. +# +# self SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# self SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#/ + +# A lexer is recognizer that draws input symbols from a character stream. +# lexer grammars result in a subclass of self object. A Lexer object +# uses simplified match() and error recovery mechanisms in the interest +# of speed. +#/ +from io import StringIO + +from antlr4.CommonTokenFactory import CommonTokenFactory +from antlr4.Recognizer import Recognizer +from antlr4.Token import Token +from antlr4.error.Errors import IllegalStateException, LexerNoViableAltException + + +class TokenSource(object): + + pass + + +class Lexer(Recognizer, TokenSource): + + DEFAULT_MODE = 0 + MORE = -2 + SKIP = -3 + + DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL + HIDDEN = Token.HIDDEN_CHANNEL + MIN_CHAR_VALUE = '\u0000' + MAX_CHAR_VALUE = '\uFFFE' + + def __init__(self, input): + super(Lexer, self).__init__() + self._input = input + self._factory = CommonTokenFactory.DEFAULT + self._tokenFactorySourcePair = (self, input) + + self._interp = None # child classes must populate this + + # The goal of all lexer rules/methods is to create a token object. + # self is an instance variable as multiple rules may collaborate to + # create a single token. nextToken will return self object after + # matching lexer rule(s). If you subclass to allow multiple token + # emissions, then set self to the last token to be matched or + # something nonnull so that the auto token emit mechanism will not + # emit another token. + self._token = None + + # What character index in the stream did the current token start at? + # Needed, for example, to get the text for current token. Set at + # the start of nextToken. + self._tokenStartCharIndex = -1 + + # The line on which the first character of the token resides#/ + self._tokenStartLine = -1 + + # The character position of first character within the line#/ + self._tokenStartColumn = -1 + + # Once we see EOF on char stream, next token will be EOF. + # If you have DONE : EOF ; then you see DONE EOF. + self._hitEOF = False + + # The channel number for the current token#/ + self._channel = Token.DEFAULT_CHANNEL + + # The token type for the current token#/ + self._type = Token.INVALID_TYPE + + self._modeStack = [] + self._mode = self.DEFAULT_MODE + + # You can set the text for the current token to override what is in + # the input char buffer. Use setText() or can set self instance var. + #/ + self._text = None + + + def reset(self): + # wack Lexer state variables + if self._input is not None: + self._input.seek(0) # rewind the input + self._token = None + self._type = Token.INVALID_TYPE + self._channel = Token.DEFAULT_CHANNEL + self._tokenStartCharIndex = -1 + self._tokenStartColumn = -1 + self._tokenStartLine = -1 + self._text = None + + self._hitEOF = False + self._mode = Lexer.DEFAULT_MODE + self._modeStack = [] + + self._interp.reset() + + # Return a token from self source; i.e., match a token on the char + # stream. + def nextToken(self): + if self._input is None: + raise IllegalStateException("nextToken requires a non-null input stream.") + + # Mark start location in char stream so unbuffered streams are + # guaranteed at least have text of current token + tokenStartMarker = self._input.mark() + try: + while True: + if self._hitEOF: + self.emitEOF() + return self._token + self._token = None + self._channel = Token.DEFAULT_CHANNEL + self._tokenStartCharIndex = self._input.index + self._tokenStartColumn = self._interp.column + self._tokenStartLine = self._interp.line + self._text = None + continueOuter = False + while True: + self._type = Token.INVALID_TYPE + ttype = self.SKIP + try: + ttype = self._interp.match(self._input, self._mode) + except LexerNoViableAltException as e: + self.notifyListeners(e) # report error + self.recover(e) + if self._input.LA(1)==Token.EOF: + self._hitEOF = True + if self._type == Token.INVALID_TYPE: + self._type = ttype + if self._type == self.SKIP: + continueOuter = True + break + if self._type!=self.MORE: + break + if continueOuter: + continue + if self._token is None: + self.emit() + return self._token + finally: + # make sure we release marker after match or + # unbuffered char stream will keep buffering + self._input.release(tokenStartMarker) + + # Instruct the lexer to skip creating a token for current lexer rule + # and look for another token. nextToken() knows to keep looking when + # a lexer rule finishes with token set to SKIP_TOKEN. Recall that + # if token==null at end of any token rule, it creates one for you + # and emits it. + #/ + def skip(self): + self._type = self.SKIP + + def more(self): + self._type = self.MORE + + def mode(self, m): + self._mode = m + + def pushMode(self, m): + if self._interp.debug: + print("pushMode " + str(m)) + self._modeStack.append(self._mode) + self.mode(m) + + def popMode(self): + if len(self._modeStack)==0: + raise Exception("Empty Stack") + if self._interp.debug: + print("popMode back to "+ self._modeStack[:-1]) + self.mode( self._modeStack.pop() ) + return self._mode + + # Set the char stream and reset the lexer#/ + @property + def inputStream(self): + return self._input + + @inputStream.setter + def inputStream(self, input): + self._input = None + self._tokenFactorySourcePair = (self, self._input) + self.reset() + self._input = input + self._tokenFactorySourcePair = (self, self._input) + + @property + def sourceName(self): + return self._input.sourceName + + # By default does not support multiple emits per nextToken invocation + # for efficiency reasons. Subclass and override self method, nextToken, + # and getToken (to push tokens into a list and pull from that list + # rather than a single variable as self implementation does). + #/ + def emitToken(self, token): + self._token = token + + # The standard method called to automatically emit a token at the + # outermost lexical rule. The token object should point into the + # char buffer start..stop. If there is a text override in 'text', + # use that to set the token's text. Override self method to emit + # custom Token objects or provide a new factory. + #/ + def emit(self): + t = self._factory.create(self._tokenFactorySourcePair, self._type, self._text, self._channel, self._tokenStartCharIndex, + self.getCharIndex()-1, self._tokenStartLine, self._tokenStartColumn) + self.emitToken(t) + return t + + def emitEOF(self): + cpos = self.column + lpos = self.line + eof = self._factory.create(self._tokenFactorySourcePair, Token.EOF, None, Token.DEFAULT_CHANNEL, self._input.index, + self._input.index-1, lpos, cpos) + self.emitToken(eof) + return eof + + @property + def type(self): + return self._type + + @type.setter + def type(self, type): + self._type = type + + @property + def line(self): + return self._interp.line + + @line.setter + def line(self, line): + self._interp.line = line + + @property + def column(self): + return self._interp.column + + @column.setter + def column(self, column): + self._interp.column = column + + # What is the index of the current character of lookahead?#/ + def getCharIndex(self): + return self._input.index + + # Return the text matched so far for the current token or any + # text override. + @property + def text(self): + if self._text is not None: + return self._text + else: + return self._interp.getText(self._input) + + # Set the complete text of self token; it wipes any previous + # changes to the text. + @text.setter + def text(self, txt): + self._text = txt + + # Return a list of all Token objects in input char stream. + # Forces load of all tokens. Does not include EOF token. + #/ + def getAllTokens(self): + tokens = [] + t = self.nextToken() + while t.type!=Token.EOF: + tokens.append(t) + t = self.nextToken() + return tokens + + def notifyListeners(self, e): + start = self._tokenStartCharIndex + stop = self._input.index + text = self._input.getText(start, stop) + msg = "token recognition error at: '" + self.getErrorDisplay(text) + "'" + listener = self.getErrorListenerDispatch() + listener.syntaxError(self, None, self._tokenStartLine, self._tokenStartColumn, msg, e) + + def getErrorDisplay(self, s): + with StringIO() as buf: + for c in s: + buf.write(unicode(self.getErrorDisplayForChar(c))) + return buf.getvalue() + + def getErrorDisplayForChar(self, c): + if ord(c[0])==Token.EOF: + return "" + elif c=='\n': + return "\\n" + elif c=='\t': + return "\\t" + elif c=='\r': + return "\\r" + else: + return str(c) + + def getCharErrorDisplay(self, c): + return "'" + self.getErrorDisplayForChar(c) + "'" + + # Lexers can normally match any char in it's vocabulary after matching + # a token, so do the easy thing and just kill a character and hope + # it all works out. You can instead use the rule invocation stack + # to do sophisticated error recovery if you are in a fragment rule. + #/ + def recover(self, re): + if self._input.LA(1) != Token.EOF: + if isinstance(re, LexerNoViableAltException): + # skip a char and try again + self._interp.consume(self._input) + else: + # TODO: Do we lose character or line position information? + self._input.consume() + diff --git a/runtime/Python2/src/antlr4/ListTokenSource.py b/runtime/Python2/src/antlr4/ListTokenSource.py new file mode 100644 index 000000000..5e5e03a02 --- /dev/null +++ b/runtime/Python2/src/antlr4/ListTokenSource.py @@ -0,0 +1,139 @@ +# +# Provides an implementation of {@link TokenSource} as a wrapper around a list +# of {@link Token} objects. +# +#

      If the final token in the list is an {@link Token#EOF} token, it will be used +# as the EOF token for every call to {@link #nextToken} after the end of the +# list is reached. Otherwise, an EOF token will be created.

      +# +from antlr4.CommonTokenFactory import CommonTokenFactory +from antlr4.Lexer import TokenSource +from antlr4.Token import Token + + +class ListTokenSource(TokenSource): + + # Constructs a new {@link ListTokenSource} instance from the specified + # collection of {@link Token} objects and source name. + # + # @param tokens The collection of {@link Token} objects to provide as a + # {@link TokenSource}. + # @param sourceName The name of the {@link TokenSource}. If this value is + # {@code null}, {@link #getSourceName} will attempt to infer the name from + # the next {@link Token} (or the previous token if the end of the input has + # been reached). + # + # @exception NullPointerException if {@code tokens} is {@code null} + # + def __init__(self, tokens, sourceName=None): + if tokens is None: + raise ReferenceError("tokens cannot be null") + self.tokens = tokens + self.sourceName = sourceName + # The index into {@link #tokens} of token to return by the next call to + # {@link #nextToken}. The end of the input is indicated by this value + # being greater than or equal to the number of items in {@link #tokens}. + self.pos = 0 + # This field caches the EOF token for the token source. + self.eofToken = None + # This is the backing field for {@link #getTokenFactory} and + self._factory = CommonTokenFactory.DEFAULT + + + # + # {@inheritDoc} + # + @property + def column(self): + if self.pos < len(self.tokens): + return self.tokens[self.pos].column + elif self.eofToken is not None: + return self.eofToken.column + elif len(self.tokens) > 0: + # have to calculate the result from the line/column of the previous + # token, along with the text of the token. + lastToken = self.tokens[len(self.tokens) - 1] + tokenText = lastToken.getText() + if tokenText is not None: + lastNewLine = tokenText.rfind('\n') + if lastNewLine >= 0: + return len(tokenText) - lastNewLine - 1 + return lastToken.column + lastToken.stopIndex - lastToken.startIndex + 1 + + # only reach this if tokens is empty, meaning EOF occurs at the first + # position in the input + return 0 + + # + # {@inheritDoc} + # + def nextToken(self): + if self.pos >= len(self.tokens): + if self.eofToken is None: + start = -1 + if len(self.tokens) > 0: + previousStop = self.tokens[len(self.tokens) - 1].stopIndex + if previousStop != -1: + start = previousStop + 1 + stop = max(-1, start - 1) + self.eofToken = self._factory.create((self, self.getInputStream()), + Token.EOF, "EOF", Token.DEFAULT_CHANNEL, start, stop, self.line, self.column) + return self.eofToken + t = self.tokens[self.pos] + if self.pos == len(self.tokens) - 1 and t.type == Token.EOF: + eofToken = t + self.pos += 1 + return t + + # + # {@inheritDoc} + # + @property + def line(self): + if self.pos < len(self.tokens): + return self.tokens[self.pos].line + elif self.eofToken is not None: + return self.eofToken.line + elif len(self.tokens) > 0: + # have to calculate the result from the line/column of the previous + # token, along with the text of the token. + lastToken = self.tokens[len(self.tokens) - 1] + line = lastToken.line + tokenText = lastToken.text + if tokenText is not None: + for c in tokenText: + if c == '\n': + line += 1 + + # if no text is available, assume the token did not contain any newline characters. + return line + + # only reach this if tokens is empty, meaning EOF occurs at the first + # position in the input + return 1 + + # + # {@inheritDoc} + # + def getInputStream(self): + if self.pos < len(self.tokens): + return self.tokens[self.pos].getInputStream() + elif self.eofToken is not None: + return self.eofToken.getInputStream() + elif len(self.tokens) > 0: + return self.tokens[len(self.tokens) - 1].getInputStream() + else: + # no input stream information is available + return None + + # + # {@inheritDoc} + # + def getSourceName(self): + if self.sourceName is not None: + return self.sourceName + inputStream = self.getInputStream() + if inputStream is not None: + return inputStream.getSourceName() + else: + return "List" \ No newline at end of file diff --git a/runtime/Python2/src/antlr4/Parser.py b/runtime/Python2/src/antlr4/Parser.py new file mode 100644 index 000000000..f0a6d900d --- /dev/null +++ b/runtime/Python2/src/antlr4/Parser.py @@ -0,0 +1,575 @@ +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, self list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, self list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from self software without specific prior written permission. +# +# self SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# self SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from __future__ import print_function +from antlr4.error.ErrorStrategy import DefaultErrorStrategy +from antlr4.Recognizer import Recognizer +from antlr4.Token import Token +from antlr4.Lexer import Lexer +from antlr4.atn.ATNDeserializer import ATNDeserializer +from antlr4.atn.ATNDeserializationOptions import ATNDeserializationOptions +from antlr4.error.Errors import UnsupportedOperationException +from antlr4.tree.ParseTreePatternMatcher import ParseTreePatternMatcher +from antlr4.tree.Tree import ParseTreeListener + +class TraceListener(ParseTreeListener): + + def __init__(self, parser): + self._parser = parser + + def enterEveryRule(self, ctx): + print("enter " + self._parser.ruleNames[ctx.getRuleIndex()] + ", LT(1)=" + self._parser._input.LT(1).text) + + def visitTerminal(self, node): + print("consume " + str(node.symbol) + " rule " + self._parser.ruleNames[self._parser._ctx.getRuleIndex()]) + + def visitErrorNode(self, node): + pass + + def exitEveryRule(self, ctx): + print("exit " + self._parser.ruleNames[ctx.getRuleIndex()] + ", LT(1)=" + self._parser._input.LT(1).text) + + +# self is all the parsing support code essentially; most of it is error recovery stuff.# +class Parser (Recognizer): + + # self field maps from the serialized ATN string to the deserialized {@link ATN} with + # bypass alternatives. + # + # @see ATNDeserializationOptions#isGenerateRuleBypassTransitions() + # + bypassAltsAtnCache = dict() + + def __init__(self, input): + super(Parser, self).__init__() + # The input stream. + self._input = None + # The error handling strategy for the parser. The default value is a new + # instance of {@link DefaultErrorStrategy}. + self._errHandler = DefaultErrorStrategy() + self._precedenceStack = list() + self._precedenceStack.append(0) + # The {@link ParserRuleContext} object for the currently executing rule. + # self is always non-null during the parsing process. + self._ctx = None + # Specifies whether or not the parser should construct a parse tree during + # the parsing process. The default value is {@code true}. + self.buildParseTrees = True + # When {@link #setTrace}{@code (true)} is called, a reference to the + # {@link TraceListener} is stored here so it can be easily removed in a + # later call to {@link #setTrace}{@code (false)}. The listener itself is + # implemented as a parser listener so self field is not directly used by + # other parser methods. + self._tracer = None + # The list of {@link ParseTreeListener} listeners registered to receive + # events during the parse. + self._parseListeners = None + # The number of syntax errors reported during parsing. self value is + # incremented each time {@link #notifyErrorListeners} is called. + self._syntaxErrors = 0 + self.setInputStream(input) + + # reset the parser's state# + def reset(self): + if self._input is not None: + self._input.seek(0) + self._errHandler.reset(self) + self._ctx = None + self._syntaxErrors = 0 + self.setTrace(False) + self._precedenceStack = list() + self._precedenceStack.append(0) + if self._interp is not None: + self._interp.reset() + + # Match current input symbol against {@code ttype}. If the symbol type + # matches, {@link ANTLRErrorStrategy#reportMatch} and {@link #consume} are + # called to complete the match process. + # + #

      If the symbol type does not match, + # {@link ANTLRErrorStrategy#recoverInline} is called on the current error + # strategy to attempt recovery. If {@link #getBuildParseTree} is + # {@code true} and the token index of the symbol returned by + # {@link ANTLRErrorStrategy#recoverInline} is -1, the symbol is added to + # the parse tree by calling {@link ParserRuleContext#addErrorNode}.

      + # + # @param ttype the token type to match + # @return the matched symbol + # @throws RecognitionException if the current input symbol did not match + # {@code ttype} and the error strategy could not recover from the + # mismatched symbol + + def match(self, ttype): + t = self.getCurrentToken() + if t.type==ttype: + self._errHandler.reportMatch(self) + self.consume() + else: + t = self._errHandler.recoverInline(self) + if self.buildParseTrees and t.tokenIndex==-1: + # we must have conjured up a new token during single token insertion + # if it's not the current symbol + self._ctx.addErrorNode(t) + return t + + # Match current input symbol as a wildcard. If the symbol type matches + # (i.e. has a value greater than 0), {@link ANTLRErrorStrategy#reportMatch} + # and {@link #consume} are called to complete the match process. + # + #

      If the symbol type does not match, + # {@link ANTLRErrorStrategy#recoverInline} is called on the current error + # strategy to attempt recovery. If {@link #getBuildParseTree} is + # {@code true} and the token index of the symbol returned by + # {@link ANTLRErrorStrategy#recoverInline} is -1, the symbol is added to + # the parse tree by calling {@link ParserRuleContext#addErrorNode}.

      + # + # @return the matched symbol + # @throws RecognitionException if the current input symbol did not match + # a wildcard and the error strategy could not recover from the mismatched + # symbol + + def matchWildcard(self): + t = self.getCurrentToken() + if t.type > 0: + self._errHandler.reportMatch(self) + self.consume() + else: + t = self._errHandler.recoverInline(self) + if self.buildParseTrees and t.tokenIndex == -1: + # we must have conjured up a new token during single token insertion + # if it's not the current symbol + self._ctx.addErrorNode(t) + + return t + + def getParseListeners(self): + return list() if self._parseListeners is None else self._parseListeners + + # Registers {@code listener} to receive events during the parsing process. + # + #

      To support output-preserving grammar transformations (including but not + # limited to left-recursion removal, automated left-factoring, and + # optimized code generation), calls to listener methods during the parse + # may differ substantially from calls made by + # {@link ParseTreeWalker#DEFAULT} used after the parse is complete. In + # particular, rule entry and exit events may occur in a different order + # during the parse than after the parser. In addition, calls to certain + # rule entry methods may be omitted.

      + # + #

      With the following specific exceptions, calls to listener events are + # deterministic, i.e. for identical input the calls to listener + # methods will be the same.

      + # + #
        + #
      • Alterations to the grammar used to generate code may change the + # behavior of the listener calls.
      • + #
      • Alterations to the command line options passed to ANTLR 4 when + # generating the parser may change the behavior of the listener calls.
      • + #
      • Changing the version of the ANTLR Tool used to generate the parser + # may change the behavior of the listener calls.
      • + #
      + # + # @param listener the listener to add + # + # @throws NullPointerException if {@code} listener is {@code null} + # + def addParseListener(self, listener): + if listener is None: + raise ReferenceError("listener") + if self._parseListeners is None: + self._parseListeners = [] + self._parseListeners.append(listener) + + # + # Remove {@code listener} from the list of parse listeners. + # + #

      If {@code listener} is {@code null} or has not been added as a parse + # listener, self method does nothing.

      + # @param listener the listener to remove + # + def removeParseListener(self, listener): + if self._parseListeners is not None: + self._parseListeners.remove(listener) + if len(self._parseListeners)==0: + self._parseListeners = None + + # Remove all parse listeners. + def removeParseListeners(self): + self._parseListeners = None + + # Notify any parse listeners of an enter rule event. + def triggerEnterRuleEvent(self): + if self._parseListeners is not None: + for listener in self._parseListeners: + listener.enterEveryRule(self._ctx) + self._ctx.enterRule(listener) + + # + # Notify any parse listeners of an exit rule event. + # + # @see #addParseListener + # + def triggerExitRuleEvent(self): + if self._parseListeners is not None: + # reverse order walk of listeners + for listener in reversed(self._parseListeners): + self._ctx.exitRule(listener) + listener.exitEveryRule(self._ctx) + + + def getTokenFactory(self): + return self._input.tokenSource._factory + + # Tell our token source and error strategy about a new way to create tokens.# + def setTokenFactory(self, factory): + self._input.tokenSource._factory = factory + + # The ATN with bypass alternatives is expensive to create so we create it + # lazily. + # + # @throws UnsupportedOperationException if the current parser does not + # implement the {@link #getSerializedATN()} method. + # + def getATNWithBypassAlts(self): + serializedAtn = self.getSerializedATN() + if serializedAtn is None: + raise UnsupportedOperationException("The current parser does not support an ATN with bypass alternatives.") + result = self.bypassAltsAtnCache.get(serializedAtn, None) + if result is None: + deserializationOptions = ATNDeserializationOptions() + deserializationOptions.generateRuleBypassTransitions = True + result = ATNDeserializer(deserializationOptions).deserialize(serializedAtn) + self.bypassAltsAtnCache[serializedAtn] = result + return result + + # The preferred method of getting a tree pattern. For example, here's a + # sample use: + # + #
      +    # ParseTree t = parser.expr();
      +    # ParseTreePattern p = parser.compileParseTreePattern("<ID>+0", MyParser.RULE_expr);
      +    # ParseTreeMatch m = p.match(t);
      +    # String id = m.get("ID");
      +    # 
      + # + def compileParseTreePattern(self, pattern, patternRuleIndex, lexer = None): + if lexer is None: + if self.getTokenStream() is not None: + tokenSource = self.getTokenStream().getTokenSource() + if isinstance( tokenSource, Lexer ): + lexer = tokenSource + if lexer is None: + raise UnsupportedOperationException("Parser can't discover a lexer to use") + + m = ParseTreePatternMatcher(lexer, self) + return m.compile(pattern, patternRuleIndex) + + + def getInputStream(self): + return self.getTokenStream() + + def setInputStream(self, input): + self.setTokenStream(input) + + def getTokenStream(self): + return self._input + + # Set the token stream and reset the parser.# + def setTokenStream(self, input): + self._input = None + self.reset() + self._input = input + + # Match needs to return the current input symbol, which gets put + # into the label for the associated token ref; e.g., x=ID. + # + def getCurrentToken(self): + return self._input.LT(1) + + def notifyErrorListeners(self, msg, offendingToken = None, e = None): + if offendingToken is None: + offendingToken = self.getCurrentToken() + self._syntaxErrors += 1 + line = offendingToken.line + column = offendingToken.column + listener = self.getErrorListenerDispatch() + listener.syntaxError(self, offendingToken, line, column, msg, e) + + # + # Consume and return the {@linkplain #getCurrentToken current symbol}. + # + #

      E.g., given the following input with {@code A} being the current + # lookahead symbol, self function moves the cursor to {@code B} and returns + # {@code A}.

      + # + #
      +    #  A B
      +    #  ^
      +    # 
      + # + # If the parser is not in error recovery mode, the consumed symbol is added + # to the parse tree using {@link ParserRuleContext#addChild(Token)}, and + # {@link ParseTreeListener#visitTerminal} is called on any parse listeners. + # If the parser is in error recovery mode, the consumed symbol is + # added to the parse tree using + # {@link ParserRuleContext#addErrorNode(Token)}, and + # {@link ParseTreeListener#visitErrorNode} is called on any parse + # listeners. + # + def consume(self): + o = self.getCurrentToken() + if o.type != Token.EOF: + self.getInputStream().consume() + hasListener = self._parseListeners is not None and len(self._parseListeners)>0 + if self.buildParseTrees or hasListener: + if self._errHandler.inErrorRecoveryMode(self): + node = self._ctx.addErrorNode(o) + else: + node = self._ctx.addTokenNode(o) + if hasListener: + for listener in self._parseListeners: + listener.visitTerminal(node) + return o + + def addContextToParseTree(self): + # add current context to parent if we have a parent + if self._ctx.parentCtx is not None: + self._ctx.parentCtx.addChild(self._ctx) + + # Always called by generated parsers upon entry to a rule. Access field + # {@link #_ctx} get the current context. + # + def enterRule(self, localctx , state , ruleIndex ): + self.state = state + self._ctx = localctx + self._ctx.start = self._input.LT(1) + if self.buildParseTrees: + self.addContextToParseTree() + if self._parseListeners is not None: + self.triggerEnterRuleEvent() + + def exitRule(self): + self._ctx.stop = self._input.LT(-1) + # trigger event on _ctx, before it reverts to parent + if self._parseListeners is not None: + self.triggerExitRuleEvent() + self.state = self._ctx.invokingState + self._ctx = self._ctx.parentCtx + + def enterOuterAlt(self, localctx, altNum): + # if we have new localctx, make sure we replace existing ctx + # that is previous child of parse tree + if self.buildParseTrees and self._ctx != localctx: + if self._ctx.parentCtx is not None: + self._ctx.parentCtx.removeLastChild() + self._ctx.parentCtx.addChild(localctx) + self._ctx = localctx + + # Get the precedence level for the top-most precedence rule. + # + # @return The precedence level for the top-most precedence rule, or -1 if + # the parser context is not nested within a precedence rule. + # + def getPrecedence(self): + if len(self._precedenceStack)==0: + return -1 + else: + return self._precedenceStack[-1] + + def enterRecursionRule(self, localctx, state, ruleIndex, precedence): + self.state = state + self._precedenceStack.append(precedence) + self._ctx = localctx + self._ctx.start = self._input.LT(1) + if self._parseListeners is not None: + self.triggerEnterRuleEvent() # simulates rule entry for left-recursive rules + + # + # Like {@link #enterRule} but for recursive rules. + # + def pushNewRecursionContext(self, localctx, state, ruleIndex): + previous = self._ctx + previous.parentCtx = localctx + previous.invokingState = state + previous.stop = self._input.LT(-1) + + self._ctx = localctx + self._ctx.start = previous.start + if self.buildParseTrees: + self._ctx.addChild(previous) + + if self._parseListeners is not None: + self.triggerEnterRuleEvent() # simulates rule entry for left-recursive rules + + def unrollRecursionContexts(self, parentCtx): + self._precedenceStack.pop() + self._ctx.stop = self._input.LT(-1) + retCtx = self._ctx # save current ctx (return value) + # unroll so _ctx is as it was before call to recursive method + if self._parseListeners is not None: + while self._ctx is not parentCtx: + self.triggerExitRuleEvent() + self._ctx = self._ctx.parentCtx + else: + self._ctx = parentCtx + + # hook into tree + retCtx.parentCtx = parentCtx + + if self.buildParseTrees and parentCtx is not None: + # add return ctx into invoking rule's tree + parentCtx.addChild(retCtx) + + def getInvokingContext(self, ruleIndex): + ctx = self._ctx + while ctx is not None: + if ctx.ruleIndex == ruleIndex: + return ctx + ctx = ctx.parentCtx + return None + + + def precpred(self, localctx , precedence): + return precedence >= self._precedenceStack[-1] + + def inContext(self, context): + # TODO: useful in parser? + return False + + # + # Checks whether or not {@code symbol} can follow the current state in the + # ATN. The behavior of self method is equivalent to the following, but is + # implemented such that the complete context-sensitive follow set does not + # need to be explicitly constructed. + # + #
      +    # return getExpectedTokens().contains(symbol);
      +    # 
      + # + # @param symbol the symbol type to check + # @return {@code true} if {@code symbol} can follow the current state in + # the ATN, otherwise {@code false}. + # + def isExpectedToken(self, symbol): + atn = self._interp.atn + ctx = self._ctx + s = atn.states[self.state] + following = atn.nextTokens(s) + if symbol in following: + return True + if not Token.EPSILON in following: + return False + + while ctx is not None and ctx.invokingState>=0 and Token.EPSILON in following: + invokingState = atn.states[ctx.invokingState] + rt = invokingState.transitions[0] + following = atn.nextTokens(rt.followState) + if symbol in following: + return True + ctx = ctx.parentCtx + + if Token.EPSILON in following and symbol == Token.EOF: + return True + else: + return False + + # Computes the set of input symbols which could follow the current parser + # state and context, as given by {@link #getState} and {@link #getContext}, + # respectively. + # + # @see ATN#getExpectedTokens(int, RuleContext) + # + def getExpectedTokens(self): + return self._interp.atn.getExpectedTokens(self.state, self._ctx) + + def getExpectedTokensWithinCurrentRule(self): + atn = self._interp.atn + s = atn.states[self.state] + return atn.nextTokens(s) + + # Get a rule's index (i.e., {@code RULE_ruleName} field) or -1 if not found.# + def getRuleIndex(self, ruleName): + ruleIndex = self.getRuleIndexMap().get(ruleName, None) + if ruleIndex is not None: + return ruleIndex + else: + return -1 + + # Return List<String> of the rule names in your parser instance + # leading up to a call to the current rule. You could override if + # you want more details such as the file/line info of where + # in the ATN a rule is invoked. + # + # this is very useful for error messages. + # + def getRuleInvocationStack(self, p=None): + if p is None: + p = self._ctx + stack = list() + while p is not None: + # compute what follows who invoked us + ruleIndex = p.getRuleIndex() + if ruleIndex<0: + stack.append("n/a") + else: + stack.append(self.ruleNames[ruleIndex]) + p = p.parentCtx + return stack + + # For debugging and other purposes.# + def getDFAStrings(self): + return [ unicode(dfa) for dfa in self._interp.decisionToDFA] + + # For debugging and other purposes.# + def dumpDFA(self): + seenOne = False + for i in range(0, len(self._interp.decisionToDFA)): + dfa = self._interp.decisionToDFA[i] + if len(dfa.states)>0: + if seenOne: + print() + print("Decision " + str(dfa.decision) + ":") + print(dfa.toString(self.literalNames, self.symbolicNames), end='') + seenOne = True + + + def getSourceName(self): + return self._input.sourceName + + # During a parse is sometimes useful to listen in on the rule entry and exit + # events as well as token matches. self is for quick and dirty debugging. + # + def setTrace(self, trace): + if not trace: + self.removeParseListener(self._tracer) + self._tracer = None + else: + if self._tracer is not None: + self.removeParseListener(self._tracer) + self._tracer = TraceListener(self) + self.addParseListener(self._tracer) diff --git a/runtime/Python2/src/antlr4/ParserInterpreter.py b/runtime/Python2/src/antlr4/ParserInterpreter.py new file mode 100644 index 000000000..ed059f9d5 --- /dev/null +++ b/runtime/Python2/src/antlr4/ParserInterpreter.py @@ -0,0 +1,187 @@ +# +# [The "BSD license"] +# Copyright (c) 2013 Terence Parr +# Copyright (c) 2013 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# A parser simulator that mimics what ANTLR's generated +# parser code does. A ParserATNSimulator is used to make +# predictions via adaptivePredict but this class moves a pointer through the +# ATN to simulate parsing. ParserATNSimulator just +# makes us efficient rather than having to backtrack, for example. +# +# This properly creates parse trees even for left recursive rules. +# +# We rely on the left recursive rule invocation and special predicate +# transitions to make left recursive rules work. +# +# See TestParserInterpreter for examples. +# +from antlr4 import PredictionContextCache +from antlr4.dfa.DFA import DFA +from antlr4.Parser import Parser +from antlr4.ParserRuleContext import InterpreterRuleContext +from antlr4.Token import Token +from antlr4.atn.ATNState import StarLoopEntryState, ATNState, LoopEndState +from antlr4.atn.ParserATNSimulator import ParserATNSimulator +from antlr4.atn.Transition import Transition +from antlr4.error.Errors import RecognitionException, UnsupportedOperationException, FailedPredicateException + + +class ParserInterpreter(Parser): + + def __init__(self, grammarFileName, tokenNames, ruleNames, atn, input): + super(ParserInterpreter, self).__init__(input) + self.grammarFileName = grammarFileName + self.atn = atn + self.tokenNames = tokenNames + self.ruleNames = ruleNames + self.decisionToDFA = [ DFA(state) for state in atn.decisionToState ] + self.sharedContextCache = PredictionContextCache() + self._parentContextStack = list() + # identify the ATN states where pushNewRecursionContext must be called + self.pushRecursionContextStates = set() + for state in atn.states: + if not isinstance(state, StarLoopEntryState): + continue + if state.precedenceRuleDecision: + self.pushRecursionContextStates.add(state.stateNumber) + # get atn simulator that knows how to do predictions + self._interp = ParserATNSimulator(self, atn, self.decisionToDFA, self.sharedContextCache) + + # Begin parsing at startRuleIndex# + def parse(self, startRuleIndex): + startRuleStartState = self.atn.ruleToStartState[startRuleIndex] + rootContext = InterpreterRuleContext(None, ATNState.INVALID_STATE_NUMBER, startRuleIndex) + if startRuleStartState.isPrecedenceRule: + self.enterRecursionRule(rootContext, startRuleStartState.stateNumber, startRuleIndex, 0) + else: + self.enterRule(rootContext, startRuleStartState.stateNumber, startRuleIndex) + while True: + p = self.getATNState() + if p.stateType==ATNState.RULE_STOP : + # pop; return from rule + if len(self._ctx)==0: + if startRuleStartState.isPrecedenceRule: + result = self._ctx + parentContext = self._parentContextStack.pop() + self.unrollRecursionContexts(parentContext.a) + return result + else: + self.exitRule() + return rootContext + self.visitRuleStopState(p) + + else: + try: + self.visitState(p) + except RecognitionException as e: + self.state = self.atn.ruleToStopState[p.ruleIndex].stateNumber + self._ctx.exception = e + self._errHandler.reportError(self, e) + self._errHandler.recover(self, e) + + def enterRecursionRule(self, localctx, state, ruleIndex, precedence): + self._parentContextStack.append((self._ctx, localctx.invokingState)) + super(ParserInterpreter, self).enterRecursionRule(localctx, state, ruleIndex, precedence) + + def getATNState(self): + return self.atn.states[self.state] + + def visitState(self, p): + edge = 0 + if len(p.transitions) > 1: + self._errHandler.sync(self) + edge = self._interp.adaptivePredict(self._input, p.decision, self._ctx) + else: + edge = 1 + + transition = p.transitions[edge - 1] + tt = transition.serializationType + if tt==Transition.EPSILON: + + if self.pushRecursionContextStates[p.stateNumber] and not isinstance(transition.target, LoopEndState): + t = self._parentContextStack[-1] + ctx = InterpreterRuleContext(t[0], t[1], self._ctx.ruleIndex) + self.pushNewRecursionContext(ctx, self.atn.ruleToStartState[p.ruleIndex].stateNumber, self._ctx.ruleIndex) + + elif tt==Transition.ATOM: + + self.match(transition.label) + + elif tt in [ Transition.RANGE, Transition.SET, Transition.NOT_SET]: + + if not transition.matches(self._input.LA(1), Token.MIN_USER_TOKEN_TYPE, 0xFFFF): + self._errHandler.recoverInline(self) + self.matchWildcard() + + elif tt==Transition.WILDCARD: + + self.matchWildcard() + + elif tt==Transition.RULE: + + ruleStartState = transition.target + ruleIndex = ruleStartState.ruleIndex + ctx = InterpreterRuleContext(self._ctx, p.stateNumber, ruleIndex) + if ruleStartState.isPrecedenceRule: + self.enterRecursionRule(ctx, ruleStartState.stateNumber, ruleIndex, transition.precedence) + else: + self.enterRule(ctx, transition.target.stateNumber, ruleIndex) + + elif tt==Transition.PREDICATE: + + if not self.sempred(self._ctx, transition.ruleIndex, transition.predIndex): + raise FailedPredicateException(self) + + elif tt==Transition.ACTION: + + self.action(self._ctx, transition.ruleIndex, transition.actionIndex) + + elif tt==Transition.PRECEDENCE: + + if not self.precpred(self._ctx, transition.precedence): + msg = "precpred(_ctx, " + str(transition.precedence) + ")" + raise FailedPredicateException(self, msg) + + else: + raise UnsupportedOperationException("Unrecognized ATN transition type.") + + self.state = transition.target.stateNumber + + def visitRuleStopState(self, p): + ruleStartState = self.atn.ruleToStartState[p.ruleIndex] + if ruleStartState.isPrecedenceRule: + parentContext = self._parentContextStack.pop() + self.unrollRecursionContexts(parentContext.a) + self.state = parentContext[1] + else: + self.exitRule() + + ruleTransition = self.atn.states[self.state].transitions[0] + self.state = ruleTransition.followState.stateNumber diff --git a/runtime/Python2/src/antlr4/ParserRuleContext.py b/runtime/Python2/src/antlr4/ParserRuleContext.py new file mode 100644 index 000000000..290a10af1 --- /dev/null +++ b/runtime/Python2/src/antlr4/ParserRuleContext.py @@ -0,0 +1,188 @@ +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#* A rule invocation record for parsing. +# +# Contains all of the information about the current rule not stored in the +# RuleContext. It handles parse tree children list, Any ATN state +# tracing, and the default values available for rule indications: +# start, stop, rule index, current alt number, current +# ATN state. +# +# Subclasses made for each rule and grammar track the parameters, +# return values, locals, and labels specific to that rule. These +# are the objects that are returned from rules. +# +# Note text is not an actual field of a rule return value; it is computed +# from start and stop using the input stream's toString() method. I +# could add a ctor to this so that we can pass in and store the input +# stream, but I'm not sure we want to do that. It would seem to be undefined +# to get the .text property anyway if the rule matches tokens from multiple +# input streams. +# +# I do not use getters for fields of objects that are used simply to +# group values such as this aggregate. The getters/setters are there to +# satisfy the superclass interface. + +from antlr4.RuleContext import RuleContext +from antlr4.tree.Tree import TerminalNodeImpl, ErrorNodeImpl, TerminalNode, INVALID_INTERVAL + +class ParserRuleContext(RuleContext): + + def __init__(self, parent = None, invokingStateNumber = None ): + super(ParserRuleContext, self).__init__(parent, invokingStateNumber) + #* If we are debugging or building a parse tree for a visitor, + # we need to track all of the tokens and rule invocations associated + # with this rule's context. This is empty for parsing w/o tree constr. + # operation because we don't the need to track the details about + # how we parse this rule. + #/ + self.children = None + self.start = None + self.stop = None + # The exception that forced this rule to return. If the rule successfully + # completed, this is {@code null}. + self.exception = None + + #* COPY a ctx (I'm deliberately not using copy constructor)#/ + def copyFrom(self, ctx): + # from RuleContext + self.parentCtx = ctx.parentCtx + self.invokingState = ctx.invokingState + self.children = None + self.start = ctx.start + self.stop = ctx.stop + + # Double dispatch methods for listeners + def enterRule(self, listener): + pass + + def exitRule(self, listener): + pass + + #* Does not set parent link; other add methods do that#/ + def addChild(self, child): + if self.children is None: + self.children = [] + self.children.append(child) + return child + + #* Used by enterOuterAlt to toss out a RuleContext previously added as + # we entered a rule. If we have # label, we will need to remove + # generic ruleContext object. + #/ + def removeLastChild(self): + if self.children is not None: + del self.children[len(self.children)-1] + + def addTokenNode(self, token): + node = TerminalNodeImpl(token) + self.addChild(node) + node.parentCtx = self + return node + + def addErrorNode(self, badToken): + node = ErrorNodeImpl(badToken) + self.addChild(node) + node.parentCtx = self + return node + + def getChild(self, i, ttype = None): + if ttype is None: + return self.children[i] if len(self.children)>i else None + else: + for child in self.getChildren(): + if not isinstance(child, ttype): + continue + if i==0: + return child + i -= 1 + return None + + def getChildren(self, predicate = None): + if self.children is not None: + for child in self.children: + if predicate is not None and not predicate(child): + continue + yield child + + def getToken(self, ttype, i): + for child in self.getChildren(): + if not isinstance(child, TerminalNode): + continue + if child.symbol.type != ttype: + continue + if i==0: + return child + i -= 1 + return None + + def getTokens(self, ttype ): + if self.getChildren() is None: + return [] + tokens = [] + for child in self.getChildren(): + if not isinstance(child, TerminalNode): + continue + if child.symbol.type != ttype: + continue + tokens.append(child) + return tokens + + def getTypedRuleContext(self, ctxType, i): + return self.getChild(i, ctxType) + + def getTypedRuleContexts(self, ctxType): + children = self.getChildren() + if children is None: + return [] + contexts = [] + for child in children: + if not isinstance(child, ctxType): + continue + contexts.append(child) + return contexts + + def getChildCount(self): + return len(self.children) if self.children else 0 + + def getSourceInterval(self): + if self.start is None or self.stop is None: + return INVALID_INTERVAL + else: + return (self.start.tokenIndex, self.stop.tokenIndex) + + +RuleContext.EMPTY = ParserRuleContext() + +class InterpreterRuleContext(ParserRuleContext): + + def __init__(self, parent, invokingStateNumber, ruleIndex): + super(InterpreterRuleContext, self).__init__(parent, invokingStateNumber) + self.ruleIndex = ruleIndex diff --git a/runtime/Python2/src/antlr4/PredictionContext.py b/runtime/Python2/src/antlr4/PredictionContext.py new file mode 100644 index 000000000..b7b941f87 --- /dev/null +++ b/runtime/Python2/src/antlr4/PredictionContext.py @@ -0,0 +1,660 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#/ +from io import StringIO + +from antlr4.RuleContext import RuleContext +from antlr4.atn.ATNState import ATNState + + +class PredictionContext(object): + + # Represents {@code $} in local context prediction, which means wildcard. + # {@code#+x =#}. + #/ + EMPTY = None + + # Represents {@code $} in an array in full context mode, when {@code $} + # doesn't mean wildcard: {@code $ + x = [$,x]}. Here, + # {@code $} = {@link #EMPTY_RETURN_STATE}. + #/ + EMPTY_RETURN_STATE = 0x7FFFFFFF + + globalNodeCount = 1 + id = globalNodeCount + + # Stores the computed hash code of this {@link PredictionContext}. The hash + # code is computed in parts to match the following reference algorithm. + # + #
      +    #  private int referenceHashCode() {
      +    #      int hash = {@link MurmurHash#initialize MurmurHash.initialize}({@link #INITIAL_HASH});
      +    #
      +    #      for (int i = 0; i < {@link #size()}; i++) {
      +    #          hash = {@link MurmurHash#update MurmurHash.update}(hash, {@link #getParent getParent}(i));
      +    #      }
      +    #
      +    #      for (int i = 0; i < {@link #size()}; i++) {
      +    #          hash = {@link MurmurHash#update MurmurHash.update}(hash, {@link #getReturnState getReturnState}(i));
      +    #      }
      +    #
      +    #      hash = {@link MurmurHash#finish MurmurHash.finish}(hash, 2# {@link #size()});
      +    #      return hash;
      +    #  }
      +    # 
      + #/ + + def __init__(self, cachedHashCode): + self.cachedHashCode = cachedHashCode + + # This means only the {@link #EMPTY} context is in set. + def isEmpty(self): + return self is self.EMPTY + + def hasEmptyPath(self): + return self.getReturnState(len(self) - 1) == self.EMPTY_RETURN_STATE + + def __hash__(self): + return self.cachedHashCode + + def __str__(self): + return unicode(self) + + +def calculateHashCode(parent, returnState): + return hash( str(parent) + str(returnState)) + +def calculateEmptyHashCode(): + return hash("") + + +# Used to cache {@link PredictionContext} objects. Its used for the shared +# context cash associated with contexts in DFA states. This cache +# can be used for both lexers and parsers. + +class PredictionContextCache(object): + + def __init__(self): + self.cache = dict() + + # Add a context to the cache and return it. If the context already exists, + # return that one instead and do not add a new context to the cache. + # Protect shared cache from unsafe thread access. + # + def add(self, ctx): + if ctx==PredictionContext.EMPTY: + return PredictionContext.EMPTY + existing = self.cache.get(ctx, None) + if existing is not None: + return existing + self.cache[ctx] = ctx + return ctx + + def get(self, ctx): + return self.cache.get(ctx, None) + + def __len__(self): + return len(self.cache) + + +class SingletonPredictionContext(PredictionContext): + + @staticmethod + def create(parent , returnState ): + if returnState == PredictionContext.EMPTY_RETURN_STATE and parent is None: + # someone can pass in the bits of an array ctx that mean $ + return SingletonPredictionContext.EMPTY + else: + return SingletonPredictionContext(parent, returnState) + + def __init__(self, parent, returnState): + assert returnState!=ATNState.INVALID_STATE_NUMBER + hashCode = calculateHashCode(parent, returnState) if parent is not None else calculateEmptyHashCode() + super(SingletonPredictionContext, self).__init__(hashCode) + self.parentCtx = parent + self.returnState = returnState + + def __len__(self): + return 1 + + def getParent(self, index): + assert index == 0 + return self.parentCtx + + def getReturnState(self, index): + assert index == 0 + return self.returnState + + def __eq__(self, other): + if self is other: + return True + elif other is None: + return False + elif not isinstance(other, SingletonPredictionContext): + return False + elif hash(self) != hash(other): + return False # can't be same if hash is different + else: + return self.returnState == other.returnState and self.parentCtx==other.parentCtx + + def __hash__(self): + return self.cachedHashCode + + def __unicode__(self): + up = "" if self.parentCtx is None else unicode(self.parentCtx) + if len(up)==0: + if self.returnState == self.EMPTY_RETURN_STATE: + return u"$" + else: + return unicode(self.returnState) + else: + return unicode(self.returnState) + u" " + up + + +class EmptyPredictionContext(SingletonPredictionContext): + + def __init__(self): + super(EmptyPredictionContext, self).__init__(None, self.EMPTY_RETURN_STATE) + + def isEmpty(self): + return True + + def getParent(self, index): + return None + + def getReturnState(self, index): + return self.returnState + + def __eq__(self, other): + return self is other + + def __unicode__(self): + return "$" + + +PredictionContext.EMPTY = EmptyPredictionContext() + +class ArrayPredictionContext(PredictionContext): + # Parent can be null only if full ctx mode and we make an array + # from {@link #EMPTY} and non-empty. We merge {@link #EMPTY} by using null parent and + # returnState == {@link #EMPTY_RETURN_STATE}. + + def __init__(self, parents, returnStates): + super(ArrayPredictionContext, self).__init__(calculateHashCode(parents, returnStates)) + assert parents is not None and len(parents)>0 + assert returnStates is not None and len(returnStates)>0 + self.parents = parents + self.returnStates = returnStates + + def isEmpty(self): + # since EMPTY_RETURN_STATE can only appear in the last position, we + # don't need to verify that size==1 + return self.returnStates[0]==PredictionContext.EMPTY_RETURN_STATE + + def __len__(self): + return len(self.returnStates) + + def getParent(self, index): + return self.parents[index] + + def getReturnState(self, index): + return self.returnStates[index] + + def __eq__(self, other): + if self is other: + return True + elif not isinstance(other, ArrayPredictionContext): + return False + elif hash(self) != hash(other): + return False # can't be same if hash is different + else: + return self.returnStates==other.returnStates and self.parents==other.parents + + def __unicode__(self): + if self.isEmpty(): + return "[]" + with StringIO() as buf: + buf.write(u"[") + for i in range(0,len(self.returnStates)): + if i>0: + buf.write(u", ") + if self.returnStates[i]==PredictionContext.EMPTY_RETURN_STATE: + buf.write(u"$") + continue + buf.write(self.returnStates[i]) + if self.parents[i] is not None: + buf.write(u' ') + buf.write(unicode(self.parents[i])) + else: + buf.write(u"null") + buf.write(u"]") + return buf.getvalue() + + +# Convert a {@link RuleContext} tree to a {@link PredictionContext} graph. +# Return {@link #EMPTY} if {@code outerContext} is empty or null. +#/ +def PredictionContextFromRuleContext(atn, outerContext=None): + if outerContext is None: + outerContext = RuleContext.EMPTY + + # if we are in RuleContext of start rule, s, then PredictionContext + # is EMPTY. Nobody called us. (if we are empty, return empty) + if outerContext.parentCtx is None or outerContext is RuleContext.EMPTY: + return PredictionContext.EMPTY + + # If we have a parent, convert it to a PredictionContext graph + parent = PredictionContextFromRuleContext(atn, outerContext.parentCtx) + state = atn.states[outerContext.invokingState] + transition = state.transitions[0] + return SingletonPredictionContext.create(parent, transition.followState.stateNumber) + + +def calculateListsHashCode(parents, returnStates ): + + with StringIO() as s: + for parent in parents: + s.write(unicode(parent)) + for returnState in returnStates: + s.write(unicode(returnState)) + return hash(s.getvalue()) + +def merge(a, b, rootIsWildcard, mergeCache): + assert a is not None and b is not None # must be empty context, never null + + # share same graph if both same + if a==b: + return a + + if isinstance(a, SingletonPredictionContext) and isinstance(b, SingletonPredictionContext): + return mergeSingletons(a, b, rootIsWildcard, mergeCache) + + # At least one of a or b is array + # If one is $ and rootIsWildcard, return $ as# wildcard + if rootIsWildcard: + if isinstance( a, EmptyPredictionContext ): + return a + if isinstance( b, EmptyPredictionContext ): + return b + + # convert singleton so both are arrays to normalize + if isinstance( a, SingletonPredictionContext ): + a = ArrayPredictionContext(a) + if isinstance( b, SingletonPredictionContext): + b = ArrayPredictionContext(b) + return mergeArrays(a, b, rootIsWildcard, mergeCache) + + +# +# Merge two {@link SingletonPredictionContext} instances. +# +#

      Stack tops equal, parents merge is same; return left graph.
      +#

      +# +#

      Same stack top, parents differ; merge parents giving array node, then +# remainders of those graphs. A new root node is created to point to the +# merged parents.
      +#

      +# +#

      Different stack tops pointing to same parent. Make array node for the +# root where both element in the root point to the same (original) +# parent.
      +#

      +# +#

      Different stack tops pointing to different parents. Make array node for +# the root where each element points to the corresponding original +# parent.
      +#

      +# +# @param a the first {@link SingletonPredictionContext} +# @param b the second {@link SingletonPredictionContext} +# @param rootIsWildcard {@code true} if this is a local-context merge, +# otherwise false to indicate a full-context merge +# @param mergeCache +#/ +def mergeSingletons(a, b, rootIsWildcard, mergeCache): + if mergeCache is not None: + previous = mergeCache.get(a,b) + if previous is not None: + return previous + previous = mergeCache.get(b,a) + if previous is not None: + return previous + + rootMerge = mergeRoot(a, b, rootIsWildcard) + if rootMerge is not None: + if mergeCache is not None: + mergeCache.put(a, b, rootMerge) + return rootMerge + + if a.returnState==b.returnState: + parent = merge(a.parentCtx, b.parentCtx, rootIsWildcard, mergeCache) + # if parent is same as existing a or b parent or reduced to a parent, return it + if parent == a.parentCtx: + return a # ax + bx = ax, if a=b + if parent == b.parentCtx: + return b # ax + bx = bx, if a=b + # else: ax + ay = a'[x,y] + # merge parents x and y, giving array node with x,y then remainders + # of those graphs. dup a, a' points at merged array + # new joined parent so create new singleton pointing to it, a' + a_ = SingletonPredictionContext.create(parent, a.returnState) + if mergeCache is not None: + mergeCache.put(a, b, a_) + return a_ + else: # a != b payloads differ + # see if we can collapse parents due to $+x parents if local ctx + singleParent = None + if a is b or (a.parentCtx is not None and a.parentCtx==b.parentCtx): # ax + bx = [a,b]x + singleParent = a.parentCtx + if singleParent is not None: # parents are same + # sort payloads and use same parent + payloads = [ a.returnState, b.returnState ] + if a.returnState > b.returnState: + payloads[0] = b.returnState + payloads[1] = a.returnState + parents = [singleParent, singleParent] + a_ = ArrayPredictionContext(parents, payloads); + if mergeCache is not None: + mergeCache.put(a, b, a_) + return a_ + # parents differ and can't merge them. Just pack together + # into array; can't merge. + # ax + by = [ax,by] + payloads = [ a.returnState, b.returnState ] + parents = [ a.parentCtx, b.parentCtx ] + if a.returnState > b.returnState: # sort by payload + payloads[0] = b.returnState + payloads[1] = a.returnState + parents = [ b.parentCtx, a.parentCtx ] + a_ = ArrayPredictionContext(parents, payloads) + if mergeCache is not None: + mergeCache.put(a, b, a_) + return a_ + + +# +# Handle case where at least one of {@code a} or {@code b} is +# {@link #EMPTY}. In the following diagrams, the symbol {@code $} is used +# to represent {@link #EMPTY}. +# +#

      Local-Context Merges

      +# +#

      These local-context merge operations are used when {@code rootIsWildcard} +# is true.

      +# +#

      {@link #EMPTY} is superset of any graph; return {@link #EMPTY}.
      +#

      +# +#

      {@link #EMPTY} and anything is {@code #EMPTY}, so merged parent is +# {@code #EMPTY}; return left graph.
      +#

      +# +#

      Special case of last merge if local context.
      +#

      +# +#

      Full-Context Merges

      +# +#

      These full-context merge operations are used when {@code rootIsWildcard} +# is false.

      +# +#

      +# +#

      Must keep all contexts; {@link #EMPTY} in array is a special value (and +# null parent).
      +#

      +# +#

      +# +# @param a the first {@link SingletonPredictionContext} +# @param b the second {@link SingletonPredictionContext} +# @param rootIsWildcard {@code true} if this is a local-context merge, +# otherwise false to indicate a full-context merge +#/ +def mergeRoot(a, b, rootIsWildcard): + if rootIsWildcard: + if a == PredictionContext.EMPTY: + return PredictionContext.EMPTY ## + b =# + if b == PredictionContext.EMPTY: + return PredictionContext.EMPTY # a +# =# + else: + if a == PredictionContext.EMPTY and b == PredictionContext.EMPTY: + return PredictionContext.EMPTY # $ + $ = $ + elif a == PredictionContext.EMPTY: # $ + x = [$,x] + payloads = [ b.returnState, PredictionContext.EMPTY_RETURN_STATE ] + parents = [ b.parentCtx, None ] + return ArrayPredictionContext(parents, payloads) + elif b == PredictionContext.EMPTY: # x + $ = [$,x] ($ is always first if present) + payloads = [ a.returnState, PredictionContext.EMPTY_RETURN_STATE ] + parents = [ a.parentCtx, None ] + return ArrayPredictionContext(parents, payloads) + return None + + +# +# Merge two {@link ArrayPredictionContext} instances. +# +#

      Different tops, different parents.
      +#

      +# +#

      Shared top, same parents.
      +#

      +# +#

      Shared top, different parents.
      +#

      +# +#

      Shared top, all shared parents.
      +#

      +# +#

      Equal tops, merge parents and reduce top to +# {@link SingletonPredictionContext}.
      +#

      +#/ +def mergeArrays(a, b, rootIsWildcard, mergeCache): + if mergeCache is not None: + previous = mergeCache.get(a,b) + if previous is not None: + return previous + previous = mergeCache.get(b,a) + if previous is not None: + return previous + + # merge sorted payloads a + b => M + i = 0; # walks a + j = 0; # walks b + k = 0; # walks target M array + + mergedReturnStates = [] * (len(a.returnState) + len( b.returnStates)) + mergedParents = [] * len(mergedReturnStates) + # walk and merge to yield mergedParents, mergedReturnStates + while i ax + if bothDollars or ax_ax: + mergedParents[k] = a_parent # choose left + mergedReturnStates[k] = payload + else: # ax+ay -> a'[x,y] + mergedParent = merge(a_parent, b_parent, rootIsWildcard, mergeCache) + mergedParents[k] = mergedParent + mergedReturnStates[k] = payload + i += 1 # hop over left one as usual + j += 1 # but also skip one in right side since we merge + elif a.returnStates[i] a, copy b[j] to M + mergedParents[k] = b_parent + mergedReturnStates[k] = b.returnStates[j] + j += 1 + k += 1 + + # copy over any payloads remaining in either array + if i < len(a.returnStates): + for p in range(i, len(a.returnStates)): + mergedParents[k] = a.parents[p] + mergedReturnStates[k] = a.returnStates[p] + k += 1 + else: + for p in range(j, len(b.returnStates)): + mergedParents[k] = b.parents[p] + mergedReturnStates[k] = b.returnStates[p] + k += 1 + + # trim merged if we combined a few that had same stack tops + if k < len(mergedParents): # write index < last position; trim + if k == 1: # for just one merged element, return singleton top + a_ = SingletonPredictionContext.create(mergedParents[0], mergedReturnStates[0]) + if mergeCache is not None: + mergeCache.put(a,b,a_) + return a_ + mergedParents = mergedParents[0:k] + mergedReturnStates = mergedReturnStates[0:k] + + M = ArrayPredictionContext(mergedParents, mergedReturnStates) + + # if we created same array as a or b, return that instead + # TODO: track whether this is possible above during merge sort for speed + if M==a: + if mergeCache is not None: + mergeCache.put(a,b,a) + return a + if M==b: + if mergeCache is not None: + mergeCache.put(a,b,b) + return b + combineCommonParents(mergedParents) + + if mergeCache is not None: + mergeCache.put(a,b,M) + return M + + +# +# Make pass over all M {@code parents}; merge any {@code equals()} +# ones. +#/ +def combineCommonParents(parents): + uniqueParents = dict() + + for p in range(0, len(parents)): + parent = parents[p] + if uniqueParents.get(parent, None) is None: + uniqueParents[parent] = parent + + for p in range(0, len(parents)): + parents[p] = uniqueParents[parents[p]] + +def getCachedPredictionContext(context, contextCache, visited): + if context.isEmpty(): + return context + existing = visited.get(context) + if existing is not None: + return existing + existing = contextCache.get(context) + if existing is not None: + visited[context] = existing + return existing + changed = False + parents = [None] * len(context) + for i in range(0, len(parents)): + parent = getCachedPredictionContext(context.getParent(i), contextCache, visited) + if changed or parent is not context.getParent(i): + if not changed: + parents = [None] * len(context) + for j in range(0, len(context)): + parents[j] = context.getParent(j) + changed = True + parents[i] = parent + if not changed: + contextCache.add(context) + visited[context] = context + return context + updated = None + if len(parents) == 0: + updated = PredictionContext.EMPTY + elif len(parents) == 1: + updated = SingletonPredictionContext.create(parents[0], context.getReturnState(0)) + else: + updated = ArrayPredictionContext(parents, context.returnStates) + + contextCache.add(updated) + visited[updated] = updated + visited[context] = updated + + return updated + + +# # extra structures, but cut/paste/morphed works, so leave it. +# # seems to do a breadth-first walk +# public static List getAllNodes(PredictionContext context) { +# Map visited = +# new IdentityHashMap(); +# Deque workList = new ArrayDeque(); +# workList.add(context); +# visited.put(context, context); +# List nodes = new ArrayList(); +# while (!workList.isEmpty()) { +# PredictionContext current = workList.pop(); +# nodes.add(current); +# for (int i = 0; i < current.size(); i++) { +# PredictionContext parent = current.getParent(i); +# if ( parent!=null && visited.put(parent, parent) == null) { +# workList.push(parent); +# } +# } +# } +# return nodes; +# } + +# ter's recursive version of Sam's getAllNodes() +def getAllContextNodes(context, nodes=None, visited=None): + if nodes is None: + nodes = list() + return getAllContextNodes(context, nodes, visited) + elif visited is None: + visited = dict() + return getAllContextNodes(context, nodes, visited) + else: + if context is None or visited.get(context, None) is not None: + return nodes + visited.put(context, context) + nodes.add(context) + for i in range(0, len(context)): + getAllContextNodes(context.getParent(i), nodes, visited); + return nodes + diff --git a/runtime/Python2/src/antlr4/Recognizer.py b/runtime/Python2/src/antlr4/Recognizer.py new file mode 100644 index 000000000..4c05cd5ad --- /dev/null +++ b/runtime/Python2/src/antlr4/Recognizer.py @@ -0,0 +1,168 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +from __builtin__ import unicode + +from antlr4.Token import Token +from antlr4.error.ErrorListener import ProxyErrorListener, ConsoleErrorListener + + +class Recognizer(object): + + tokenTypeMapCache = dict() + ruleIndexMapCache = dict() + + def __init__(self): + self._listeners = [ ConsoleErrorListener.INSTANCE ] + self._interp = None + self._stateNumber = -1 + + def extractVersion(self, version): + pos = version.find(".") + major = version[0:pos] + version = version[pos+1:] + pos = version.find(".") + if pos==-1: + pos = version.find("-") + if pos==-1: + pos = len(version) + minor = version[0:pos] + return major, minor + + def checkVersion(self, toolVersion): + runtimeVersion = "4.5.2" + rvmajor, rvminor = self.extractVersion(runtimeVersion) + tvmajor, tvminor = self.extractVersion(toolVersion) + if rvmajor!=tvmajor or rvminor!=tvminor: + print("ANTLR runtime and generated code versions disagree: "+runtimeVersion+"!="+toolVersion) + + def addErrorListener(self, listener): + self._listeners.append(listener) + + def removeErrorListener(self, listener): + self._listeners.remove(listener) + + def removeErrorListeners(self): + self._listeners = [] + + def getTokenTypeMap(self): + tokenNames = self.getTokenNames() + if tokenNames is None: + from antlr4.error.Errors import UnsupportedOperationException + raise UnsupportedOperationException("The current recognizer does not provide a list of token names.") + result = self.tokenTypeMapCache.get(tokenNames, None) + if result is None: + result = zip( tokenNames, range(0, len(tokenNames))) + result["EOF"] = Token.EOF + self.tokenTypeMapCache[tokenNames] = result + return result + + # Get a map from rule names to rule indexes. + # + #

      Used for XPath and tree pattern compilation.

      + # + def getRuleIndexMap(self): + ruleNames = self.getRuleNames() + if ruleNames is None: + from antlr4.error.Errors import UnsupportedOperationException + raise UnsupportedOperationException("The current recognizer does not provide a list of rule names.") + result = self.ruleIndexMapCache.get(ruleNames, None) + if result is None: + result = zip( ruleNames, range(0, len(ruleNames))) + self.ruleIndexMapCache[ruleNames] = result + return result + + def getTokenType(self, tokenName): + ttype = self.getTokenTypeMap().get(tokenName, None) + if ttype is not None: + return ttype + else: + return Token.INVALID_TYPE + + + # What is the error header, normally line/character position information?# + def getErrorHeader(self, e): + line = e.getOffendingToken().line + column = e.getOffendingToken().column + return u"line " + unicode(line) + u":" + unicode(column) + + + # How should a token be displayed in an error message? The default + # is to display just the text, but during development you might + # want to have a lot of information spit out. Override in that case + # to use t.toString() (which, for CommonToken, dumps everything about + # the token). This is better than forcing you to override a method in + # your token objects because you don't have to go modify your lexer + # so that it creates a new Java type. + # + # @deprecated This method is not called by the ANTLR 4 Runtime. Specific + # implementations of {@link ANTLRErrorStrategy} may provide a similar + # feature when necessary. For example, see + # {@link DefaultErrorStrategy#getTokenErrorDisplay}. + # + def getTokenErrorDisplay(self, t): + if t is None: + return u"" + s = t.text + if s is None: + if t.type==Token.EOF: + s = u"" + else: + s = u"<" + unicode(t.type) + u">" + s = s.replace(u"\n",u"\\n") + s = s.replace(u"\r",u"\\r") + s = s.replace(u"\t",u"\\t") + return u"'" + s + u"'" + + def getErrorListenerDispatch(self): + return ProxyErrorListener(self._listeners) + + # subclass needs to override these if there are sempreds or actions + # that the ATN interp needs to execute + def sempred(self, localctx, ruleIndex, actionIndex): + return True + + def precpred(self, localctx , precedence): + return True + + @property + def state(self): + return self._stateNumber + + # Indicate that the recognizer has changed internal state that is + # consistent with the ATN state passed in. This way we always know + # where we are in the ATN as the parser goes along. The rule + # context objects form a stack that lets us see the stack of + # invoking rules. Combine this and we have complete ATN + # configuration information. + + @state.setter + def state(self, atnState): + self._stateNumber = atnState diff --git a/runtime/Python2/src/antlr4/RuleContext.py b/runtime/Python2/src/antlr4/RuleContext.py new file mode 100644 index 000000000..96eb7396a --- /dev/null +++ b/runtime/Python2/src/antlr4/RuleContext.py @@ -0,0 +1,234 @@ +# [The "BSD license"] +# Copyright (c) 2013 Terence Parr +# Copyright (c) 2013 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#/ + + +# A rule context is a record of a single rule invocation. It knows +# which context invoked it, if any. If there is no parent context, then +# naturally the invoking state is not valid. The parent link +# provides a chain upwards from the current rule invocation to the root +# of the invocation tree, forming a stack. We actually carry no +# information about the rule associated with this context (except +# when parsing). We keep only the state number of the invoking state from +# the ATN submachine that invoked this. Contrast this with the s +# pointer inside ParserRuleContext that tracks the current state +# being "executed" for the current rule. +# +# The parent contexts are useful for computing lookahead sets and +# getting error information. +# +# These objects are used during parsing and prediction. +# For the special case of parsers, we use the subclass +# ParserRuleContext. +# +# @see ParserRuleContext +#/ +from io import StringIO + +from antlr4.tree.Tree import RuleNode, INVALID_INTERVAL +from antlr4.tree.Trees import Trees + + +class RuleContext(RuleNode): + + EMPTY = None + + def __init__(self, parent=None, invokingState=-1): + super(RuleContext, self).__init__() + # What context invoked this rule? + self.parentCtx = parent + # What state invoked the rule associated with this context? + # The "return address" is the followState of invokingState + # If parent is null, this should be -1. + self.invokingState = invokingState + + + def depth(self): + n = 0 + p = self + while p is not None: + p = p.parentCtx + n += 1 + return n + + # A context is empty if there is no invoking state; meaning nobody call + # current context. + def isEmpty(self): + return self.invokingState == -1 + + # satisfy the ParseTree / SyntaxTree interface + + def getSourceInterval(self): + return INVALID_INTERVAL + + def getRuleContext(self): + return self + + def getPayload(self): + return self + + # Return the combined text of all child nodes. This method only considers + # tokens which have been added to the parse tree. + #

      + # Since tokens on hidden channels (e.g. whitespace or comments) are not + # added to the parse trees, they will not appear in the output of this + # method. + #/ + def getText(self): + if self.getChildCount() == 0: + return u"" + with StringIO() as builder: + for child in self.getChildren(): + builder.write(child.getText()) + return builder.getvalue() + + def getRuleIndex(self): + return -1 + + def getChild(self, i): + return None + + def getChildCount(self): + return 0 + + def getChildren(self): + for c in []: + yield c + + def accept(self, visitor): + return visitor.visitChildren(self) + + # # Call this method to view a parse tree in a dialog box visually.#/ + # public Future inspect(@Nullable Parser parser) { + # List ruleNames = parser != null ? Arrays.asList(parser.getRuleNames()) : null; + # return inspect(ruleNames); + # } + # + # public Future inspect(@Nullable List ruleNames) { + # TreeViewer viewer = new TreeViewer(ruleNames, this); + # return viewer.open(); + # } + # + # # Save this tree in a postscript file#/ + # public void save(@Nullable Parser parser, String fileName) + # throws IOException, PrintException + # { + # List ruleNames = parser != null ? Arrays.asList(parser.getRuleNames()) : null; + # save(ruleNames, fileName); + # } + # + # # Save this tree in a postscript file using a particular font name and size#/ + # public void save(@Nullable Parser parser, String fileName, + # String fontName, int fontSize) + # throws IOException + # { + # List ruleNames = parser != null ? Arrays.asList(parser.getRuleNames()) : null; + # save(ruleNames, fileName, fontName, fontSize); + # } + # + # # Save this tree in a postscript file#/ + # public void save(@Nullable List ruleNames, String fileName) + # throws IOException, PrintException + # { + # Trees.writePS(this, ruleNames, fileName); + # } + # + # # Save this tree in a postscript file using a particular font name and size#/ + # public void save(@Nullable List ruleNames, String fileName, + # String fontName, int fontSize) + # throws IOException + # { + # Trees.writePS(this, ruleNames, fileName, fontName, fontSize); + # } + # + # # Print out a whole tree, not just a node, in LISP format + # # (root child1 .. childN). Print just a node if this is a leaf. + # # We have to know the recognizer so we can get rule names. + # #/ + # @Override + # public String toStringTree(@Nullable Parser recog) { + # return Trees.toStringTree(this, recog); + # } + # + # Print out a whole tree, not just a node, in LISP format + # (root child1 .. childN). Print just a node if this is a leaf. + # + def toStringTree(self, ruleNames=None, recog=None): + return Trees.toStringTree(self, ruleNames=ruleNames, recog=recog) + # } + # + # @Override + # public String toStringTree() { + # return toStringTree((List)null); + # } + # + def __unicode__(self): + return self.toString(None, None) + + # @Override + # public String toString() { + # return toString((List)null, (RuleContext)null); + # } + # + # public final String toString(@Nullable Recognizer recog) { + # return toString(recog, ParserRuleContext.EMPTY); + # } + # + # public final String toString(@Nullable List ruleNames) { + # return toString(ruleNames, null); + # } + # + # // recog null unless ParserRuleContext, in which case we use subclass toString(...) + # public String toString(@Nullable Recognizer recog, @Nullable RuleContext stop) { + # String[] ruleNames = recog != null ? recog.getRuleNames() : null; + # List ruleNamesList = ruleNames != null ? Arrays.asList(ruleNames) : null; + # return toString(ruleNamesList, stop); + # } + + def toString(self, ruleNames, stop): + with StringIO() as buf: + p = self + buf.write(u"[") + while p is not None and p is not stop: + if ruleNames is None: + if not p.isEmpty(): + buf.write(unicode(p.invokingState)) + else: + ri = p.getRuleIndex() + ruleName = ruleNames[ri] if ri >= 0 and ri < len(ruleNames) else unicode(ri) + buf.write(ruleName) + + if p.parentCtx is not None and (ruleNames is not None or not p.parentCtx.isEmpty()): + buf.write(u" ") + + p = p.parentCtx + + buf.write(u"]") + return buf.getvalue() + diff --git a/runtime/Python2/src/antlr4/StdinStream.py b/runtime/Python2/src/antlr4/StdinStream.py new file mode 100644 index 000000000..47623b8a2 --- /dev/null +++ b/runtime/Python2/src/antlr4/StdinStream.py @@ -0,0 +1,48 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# +# This is an InputStream that is loaded from stdin all at once +# when you construct the object. +# + +import codecs +import sys + +from antlr4.InputStream import InputStream + + +class StdinStream(InputStream): + + def __init__(self, encoding='ascii'): + bytes = sys.stdin.read() + data = codecs.decode(bytes, encoding) + super(type(self), self).__init__(data) diff --git a/runtime/Python2/src/antlr4/Token.py b/runtime/Python2/src/antlr4/Token.py new file mode 100644 index 000000000..900def230 --- /dev/null +++ b/runtime/Python2/src/antlr4/Token.py @@ -0,0 +1,184 @@ +#[The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# A token has properties: text, type, line, character position in the line +# (so we can ignore tabs), token channel, index, and source from which +# we obtained this token. +from io import StringIO + + +class Token (object): + + INVALID_TYPE = 0 + + # During lookahead operations, this "token" signifies we hit rule end ATN state + # and did not follow it despite needing to. + EPSILON = -2 + + MIN_USER_TOKEN_TYPE = 1 + + EOF = -1 + + # All tokens go to the parser (unless skip() is called in that rule) + # on a particular "channel". The parser tunes to a particular channel + # so that whitespace etc... can go to the parser on a "hidden" channel. + + DEFAULT_CHANNEL = 0 + + # Anything on different channel than DEFAULT_CHANNEL is not parsed + # by parser. + + HIDDEN_CHANNEL = 1 + + def __init__(self): + self.source = None + self.type = None # token type of the token + self.channel = None # The parser ignores everything not on DEFAULT_CHANNEL + self.start = None # optional; return -1 if not implemented. + self.stop = None # optional; return -1 if not implemented. + self.tokenIndex = None # from 0..n-1 of the token object in the input stream + self.line = None # line=1..n of the 1st character + self.column = None # beginning of the line at which it occurs, 0..n-1 + self._text = None # text of the token. + + @property + def text(self): + return self._text + + # Explicitly set the text for this token. If {code text} is not + # {@code null}, then {@link #getText} will return this value rather than + # extracting the text from the input. + # + # @param text The explicit text of the token, or {@code null} if the text + # should be obtained from the input along with the start and stop indexes + # of the token. + + @text.setter + def text(self, text): + self._text = text + + + def getTokenSource(self): + return self.source[0] + + def getInputStream(self): + return self.source[1] + + def __str__(self): + return unicode(self) + + +class CommonToken(Token): + + + # An empty {@link Pair} which is used as the default value of + # {@link #source} for tokens that do not have a source. + EMPTY_SOURCE = (None, None) + + def __init__(self, source = EMPTY_SOURCE, type = None, channel=Token.DEFAULT_CHANNEL, start=-1, stop=-1): + super(CommonToken, self).__init__() + self.source = source + self.type = type + self.channel = channel + self.start = start + self.stop = stop + self.tokenIndex = -1 + if source[0] is not None: + self.line = source[0].line + self.column = source[0].column + else: + self.column = -1 + + # Constructs a new {@link CommonToken} as a copy of another {@link Token}. + # + #

      + # If {@code oldToken} is also a {@link CommonToken} instance, the newly + # constructed token will share a reference to the {@link #text} field and + # the {@link Pair} stored in {@link #source}. Otherwise, {@link #text} will + # be assigned the result of calling {@link #getText}, and {@link #source} + # will be constructed from the result of {@link Token#getTokenSource} and + # {@link Token#getInputStream}.

      + # + # @param oldToken The token to copy. + # + def clone(self): + t = CommonToken(self.source, self.type, self.channel, self.start, self.stop) + t.tokenIndex = self.tokenIndex + t.line = self.line + t.column = self.column + t.text = self.text + return t + + @property + def text(self): + if self._text is not None: + return self._text + input = self.getInputStream() + if input is None: + return None + n = input.size + if self.start < n and self.stop < n: + return input.getText(self.start, self.stop) + else: + return u"" + + @text.setter + def text(self, text): + self._text = text + + def __unicode__(self): + with StringIO() as buf: + buf.write(u"[@") + buf.write(unicode(self.tokenIndex)) + buf.write(u",") + buf.write(unicode(self.start)) + buf.write(u":") + buf.write(unicode(self.stop)) + buf.write(u"='") + txt = self.text + if txt is not None: + txt = txt.replace(u"\n",u"\\n") + txt = txt.replace(u"\r",u"\\r") + txt = txt.replace(u"\t",u"\\t") + else: + txt = u"" + buf.write(txt) + buf.write(u"',<") + buf.write(unicode(self.type)) + buf.write(u">") + if self.channel > 0: + buf.write(u",channel=") + buf.write(unicode(self.channel)) + buf.write(u",") + buf.write(unicode(self.line)) + buf.write(u":") + buf.write(unicode(self.column)) + buf.write(u"]") + return buf.getvalue() diff --git a/runtime/Python2/src/antlr4/Utils.py b/runtime/Python2/src/antlr4/Utils.py new file mode 100644 index 000000000..0bc6d5644 --- /dev/null +++ b/runtime/Python2/src/antlr4/Utils.py @@ -0,0 +1,64 @@ +#[The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +from io import StringIO + +def str_collection(val, begin, end): + with StringIO() as buf: + buf.write(begin) + first = True + for item in val: + if not first: + buf.write(u', ') + buf.write(unicode(item)) + first = False + buf.write(end) + return buf.getvalue() + +def str_list(val): + return str_collection(val, u'[', u']') + +def str_set(val): + return str_collection(val, u'{', u'}') + +def escapeWhitespace(s, escapeSpaces): + with StringIO() as buf: + for c in s: + if c==' ' and escapeSpaces: + buf.write(u'\u00B7') + elif c=='\t': + buf.write(u"\\t") + elif c=='\n': + buf.write(u"\\n") + elif c=='\r': + buf.write(u"\\r") + else: + buf.write(unicode(c)) + return buf.getvalue() diff --git a/runtime/Python2/src/antlr4/__init__.py b/runtime/Python2/src/antlr4/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/runtime/Python2/src/antlr4/__init__.py @@ -0,0 +1 @@ + diff --git a/runtime/Python2/src/antlr4/atn/ATN.py b/runtime/Python2/src/antlr4/atn/ATN.py new file mode 100644 index 000000000..c597eb041 --- /dev/null +++ b/runtime/Python2/src/antlr4/atn/ATN.py @@ -0,0 +1,147 @@ +# [The "BSD license"] +# Copyright (c) 2013 Terence Parr +# Copyright (c) 2013 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#/ +from antlr4.IntervalSet import IntervalSet +from antlr4.Token import Token + + +class ATN(object): + + INVALID_ALT_NUMBER = 0 + + # Used for runtime deserialization of ATNs from strings#/ + def __init__(self, grammarType , maxTokenType ): + # The type of the ATN. + self.grammarType = grammarType + # The maximum value for any symbol recognized by a transition in the ATN. + self.maxTokenType = maxTokenType + self.states = [] + # Each subrule/rule is a decision point and we must track them so we + # can go back later and build DFA predictors for them. This includes + # all the rules, subrules, optional blocks, ()+, ()* etc... + self.decisionToState = [] + # Maps from rule index to starting state number. + self.ruleToStartState = [] + # Maps from rule index to stop state number. + self.ruleToStopState = None + self.modeNameToStartState = dict() + # For lexer ATNs, this maps the rule index to the resulting token type. + # For parser ATNs, this maps the rule index to the generated bypass token + # type if the + # {@link ATNDeserializationOptions#isGenerateRuleBypassTransitions} + # deserialization option was specified; otherwise, this is {@code null}. + self.ruleToTokenType = None + # For lexer ATNs, this is an array of {@link LexerAction} objects which may + # be referenced by action transitions in the ATN. + self.lexerActions = None + self.modeToStartState = [] + + # Compute the set of valid tokens that can occur starting in state {@code s}. + # If {@code ctx} is null, the set of tokens will not include what can follow + # the rule surrounding {@code s}. In other words, the set will be + # restricted to tokens reachable staying within {@code s}'s rule. + def nextTokensInContext(self, s, ctx): + from antlr4.LL1Analyzer import LL1Analyzer + anal = LL1Analyzer(self) + return anal.LOOK(s, ctx=ctx) + + # Compute the set of valid tokens that can occur starting in {@code s} and + # staying in same rule. {@link Token#EPSILON} is in set if we reach end of + # rule. + def nextTokensNoContext(self, s): + if s.nextTokenWithinRule is not None: + return s.nextTokenWithinRule + s.nextTokenWithinRule = self.nextTokensInContext(s, None) + s.nextTokenWithinRule.readonly = True + return s.nextTokenWithinRule + + def nextTokens(self, s, ctx = None): + if ctx==None: + return self.nextTokensNoContext(s) + else: + return self.nextTokensInContext(s, ctx) + + def addState(self, state): + if state is not None: + state.atn = self + state.stateNumber = len(self.states) + self.states.append(state) + + def removeState(self, state): + self.states[state.stateNumber] = None # just free mem, don't shift states in list + + def defineDecisionState(self, s): + self.decisionToState.append(s) + s.decision = len(self.decisionToState)-1 + return s.decision + + def getDecisionState(self, decision): + if len(self.decisionToState)==0: + return None + else: + return self.decisionToState[decision] + + # Computes the set of input symbols which could follow ATN state number + # {@code stateNumber} in the specified full {@code context}. This method + # considers the complete parser context, but does not evaluate semantic + # predicates (i.e. all predicates encountered during the calculation are + # assumed true). If a path in the ATN exists from the starting state to the + # {@link RuleStopState} of the outermost context without matching any + # symbols, {@link Token#EOF} is added to the returned set. + # + #

      If {@code context} is {@code null}, it is treated as + # {@link ParserRuleContext#EMPTY}.

      + # + # @param stateNumber the ATN state number + # @param context the full parse context + # @return The set of potentially valid input symbols which could follow the + # specified state in the specified context. + # @throws IllegalArgumentException if the ATN does not contain a state with + # number {@code stateNumber} + #/ + def getExpectedTokens(self, stateNumber, ctx ): + if stateNumber < 0 or stateNumber >= len(self.states): + raise Exception("Invalid state number.") + s = self.states[stateNumber] + following = self.nextTokens(s) + if Token.EPSILON not in following: + return following + expected = IntervalSet() + expected.addSet(following) + expected.removeOne(Token.EPSILON) + while (ctx != None and ctx.invokingState >= 0 and Token.EPSILON in following): + invokingState = self.states[ctx.invokingState] + rt = invokingState.transitions[0] + following = self.nextTokens(rt.followState) + expected.addSet(following) + expected.removeOne(Token.EPSILON) + ctx = ctx.parentCtx + if Token.EPSILON in following: + expected.addOne(Token.EOF) + return expected \ No newline at end of file diff --git a/runtime/Python2/src/antlr4/atn/ATNConfig.py b/runtime/Python2/src/antlr4/atn/ATNConfig.py new file mode 100644 index 000000000..30387ed16 --- /dev/null +++ b/runtime/Python2/src/antlr4/atn/ATNConfig.py @@ -0,0 +1,154 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#/ + +# A tuple: (ATN state, predicted alt, syntactic, semantic context). +# The syntactic context is a graph-structured stack node whose +# path(s) to the root is the rule invocation(s) +# chain used to arrive at the state. The semantic context is +# the tree of semantic predicates encountered before reaching +# an ATN state. +#/ +from io import StringIO + +from antlr4.atn.ATNState import DecisionState +from antlr4.atn.SemanticContext import SemanticContext + + +class ATNConfig(object): + + def __init__(self, state=None, alt=None, context=None, semantic=None, config=None): + if config is not None: + if state is None: + state = config.state + if alt is None: + alt = config.alt + if context is None: + context = config.context + if semantic is None: + semantic = config.semanticContext + if semantic is None: + semantic = SemanticContext.NONE + + # The ATN state associated with this configuration#/ + self.state = state + # What alt (or lexer rule) is predicted by this configuration#/ + self.alt = alt + # The stack of invoking states leading to the rule/states associated + # with this config. We track only those contexts pushed during + # execution of the ATN simulator. + self.context = context + self.semanticContext = semantic + # We cannot execute predicates dependent upon local context unless + # we know for sure we are in the correct context. Because there is + # no way to do this efficiently, we simply cannot evaluate + # dependent predicates unless we are in the rule that initially + # invokes the ATN simulator. + # + # closure() tracks the depth of how far we dip into the + # outer context: depth > 0. Note that it may not be totally + # accurate depth since I don't ever decrement. TODO: make it a boolean then + self.reachesIntoOuterContext = 0 if config is None else config.reachesIntoOuterContext + self.precedenceFilterSuppressed = False if config is None else config.precedenceFilterSuppressed + + # An ATN configuration is equal to another if both have + # the same state, they predict the same alternative, and + # syntactic/semantic contexts are the same. + #/ + def __eq__(self, other): + if self is other: + return True + elif not isinstance(other, ATNConfig): + return False + else: + return self.state.stateNumber==other.state.stateNumber \ + and self.alt==other.alt \ + and ((self.context is other.context) or (self.context==other.context)) \ + and self.semanticContext==other.semanticContext \ + and self.precedenceFilterSuppressed==other.precedenceFilterSuppressed + + def __hash__(self): + return hash( str(self.state.stateNumber) + "/" + + str(self.alt) + "/" + + str(self.context) + "/" + + str(self.semanticContext) ) + + def __str__(self): + return unicode(self) + + def __unicode__(self): + with StringIO() as buf: + buf.write(u"(") + buf.write(unicode(self.state)) + buf.write(u",") + buf.write(unicode(self.alt)) + if self.context is not None: + buf.write(u",[") + buf.write(unicode(self.context)) + buf.write(u"]") + if self.semanticContext is not None and self.semanticContext is not SemanticContext.NONE: + buf.write(u",") + buf.write(unicode(self.semanticContext)) + if self.reachesIntoOuterContext>0: + buf.write(u",up=") + buf.write(unicode(self.reachesIntoOuterContext)) + buf.write(u')') + return buf.getvalue() + +class LexerATNConfig(ATNConfig): + + def __init__(self, state, alt=None, context=None, semantic=SemanticContext.NONE, lexerActionExecutor=None, config=None): + super(LexerATNConfig, self).__init__(state=state, alt=alt, context=context, semantic=semantic, config=config) + if config is not None: + if lexerActionExecutor is None: + lexerActionExecutor = config.lexerActionExecutor + # This is the backing field for {@link #getLexerActionExecutor}. + self.lexerActionExecutor = lexerActionExecutor + self.passedThroughNonGreedyDecision = False if config is None else self.checkNonGreedyDecision(config, state) + + def __hash__(self): + return hash(str(self.state.stateNumber) + str(self.alt) + str(self.context) \ + + str(self.semanticContext) + str(1 if self.passedThroughNonGreedyDecision else 0) \ + + str(self.lexerActionExecutor)) + + def __eq__(self, other): + if self is other: + return True + elif not isinstance(other, LexerATNConfig): + return False + if self.passedThroughNonGreedyDecision != other.passedThroughNonGreedyDecision: + return False + if self.lexerActionExecutor is not other.lexerActionExecutor: + return False + return super(LexerATNConfig, self).__eq__(other) + + def checkNonGreedyDecision(self, source, target): + return source.passedThroughNonGreedyDecision \ + or isinstance(target, DecisionState) and target.nonGreedy diff --git a/runtime/Python2/src/antlr4/atn/ATNConfigSet.py b/runtime/Python2/src/antlr4/atn/ATNConfigSet.py new file mode 100755 index 000000000..6ecf83024 --- /dev/null +++ b/runtime/Python2/src/antlr4/atn/ATNConfigSet.py @@ -0,0 +1,239 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# +# Specialized {@link Set}{@code <}{@link ATNConfig}{@code >} that can track +# info about the set, with support for combining similar configurations using a +# graph-structured stack. +#/ +from io import StringIO + +from antlr4.PredictionContext import merge +from antlr4.Utils import str_list +from antlr4.atn.ATN import ATN +from antlr4.atn.SemanticContext import SemanticContext +from antlr4.error.Errors import UnsupportedOperationException, IllegalStateException + + +class ATNConfigSet(object): + # + # The reason that we need this is because we don't want the hash map to use + # the standard hash code and equals. We need all configurations with the same + # {@code (s,i,_,semctx)} to be equal. Unfortunately, this key effectively doubles + # the number of objects associated with ATNConfigs. The other solution is to + # use a hash table that lets us specify the equals/hashcode operation. + + def __init__(self, fullCtx=True): + # All configs but hashed by (s, i, _, pi) not including context. Wiped out + # when we go readonly as this set becomes a DFA state. + self.configLookup = set() + # Indicates that this configuration set is part of a full context + # LL prediction. It will be used to determine how to merge $. With SLL + # it's a wildcard whereas it is not for LL context merge. + self.fullCtx = fullCtx + # Indicates that the set of configurations is read-only. Do not + # allow any code to manipulate the set; DFA states will point at + # the sets and they must not change. This does not protect the other + # fields; in particular, conflictingAlts is set after + # we've made this readonly. + self.readonly = False + # Track the elements as they are added to the set; supports get(i)#/ + self.configs = [] + + # TODO: these fields make me pretty uncomfortable but nice to pack up info together, saves recomputation + # TODO: can we track conflicts as they are added to save scanning configs later? + self.uniqueAlt = 0 + self.conflictingAlts = None + + # Used in parser and lexer. In lexer, it indicates we hit a pred + # while computing a closure operation. Don't make a DFA state from this. + self.hasSemanticContext = False + self.dipsIntoOuterContext = False + + self.cachedHashCode = -1 + + def __iter__(self): + return self.configs.__iter__() + + # Adding a new config means merging contexts with existing configs for + # {@code (s, i, pi, _)}, where {@code s} is the + # {@link ATNConfig#state}, {@code i} is the {@link ATNConfig#alt}, and + # {@code pi} is the {@link ATNConfig#semanticContext}. We use + # {@code (s,i,pi)} as key. + # + #

      This method updates {@link #dipsIntoOuterContext} and + # {@link #hasSemanticContext} when necessary.

      + #/ + def add(self, config, mergeCache=None): + if self.readonly: + raise Exception("This set is readonly") + if config.semanticContext is not SemanticContext.NONE: + self.hasSemanticContext = True + if config.reachesIntoOuterContext > 0: + self.dipsIntoOuterContext = True + existing = self.getOrAdd(config) + if existing is config: + self.cachedHashCode = -1 + self.configs.append(config) # track order here + return True + # a previous (s,i,pi,_), merge with it and save result + rootIsWildcard = not self.fullCtx + merged = merge(existing.context, config.context, rootIsWildcard, mergeCache) + # no need to check for existing.context, config.context in cache + # since only way to create new graphs is "call rule" and here. We + # cache at both places. + existing.reachesIntoOuterContext = max(existing.reachesIntoOuterContext, config.reachesIntoOuterContext) + # make sure to preserve the precedence filter suppression during the merge + if config.precedenceFilterSuppressed: + existing.precedenceFilterSuppressed = True + existing.context = merged # replace context; no need to alt mapping + return True + + def getOrAdd(self, config): + for c in self.configLookup: + if c==config: + return c + self.configLookup.add(config) + return config + + def getStates(self): + states = set() + for c in self.configs: + states.add(c.state) + return states + + def getPredicates(self): + preds = list() + for c in self.configs: + if c.semanticContext!=SemanticContext.NONE: + preds.append(c.semanticContext) + return preds + + def get(self, i): + return self.configs[i] + + def optimizeConfigs(self, interpreter): + if self.readonly: + raise IllegalStateException("This set is readonly") + if len(self.configLookup)==0: + return + for config in self.configs: + config.context = interpreter.getCachedContext(config.context) + + def addAll(self, coll): + for c in coll: + self.add(c) + return False + + def __eq__(self, other): + if self is other: + return True + elif not isinstance(other, ATNConfigSet): + return False + + same = self.configs is not None and \ + self.configs==other.configs and \ + self.fullCtx == other.fullCtx and \ + self.uniqueAlt == other.uniqueAlt and \ + self.conflictingAlts == other.conflictingAlts and \ + self.hasSemanticContext == other.hasSemanticContext and \ + self.dipsIntoOuterContext == other.dipsIntoOuterContext + + return same + + def __hash__(self): + if self.readonly: + if self.cachedHashCode == -1: + self.cachedHashCode = self.hashConfigs() + return self.cachedHashCode + return self.hashConfigs() + + def hashConfigs(self): + with StringIO() as buf: + for cfg in self.configs: + buf.write(unicode(cfg)) + return hash(buf.getvalue()) + + def __len__(self): + return len(self.configs) + + def isEmpty(self): + return len(self.configs)==0 + + def __contains__(self, item): + if self.configLookup is None: + raise UnsupportedOperationException("This method is not implemented for readonly sets.") + return item in self.configLookup + + def containsFast(self, obj): + if self.configLookup is None: + raise UnsupportedOperationException("This method is not implemented for readonly sets.") + return self.configLookup.containsFast(obj) + + + def clear(self): + if self.readonly: + raise IllegalStateException("This set is readonly") + self.configs.clear() + self.cachedHashCode = -1 + self.configLookup.clear() + + def setReadonly(self, readonly): + self.readonly = readonly + self.configLookup = None # can't mod, no need for lookup cache + + def __str__(self): + return unicode(self) + + def __unicode__(self): + with StringIO() as buf: + buf.write(str_list(self.configs)) + if self.hasSemanticContext: + buf.write(u",hasSemanticContext=") + buf.write(unicode(self.hasSemanticContext)) + if self.uniqueAlt!=ATN.INVALID_ALT_NUMBER: + buf.write(u",uniqueAlt=") + buf.write(unicode(self.uniqueAlt)) + if self.conflictingAlts is not None: + buf.write(u",conflictingAlts=") + buf.write(unicode(self.conflictingAlts)) + if self.dipsIntoOuterContext: + buf.write(u",dipsIntoOuterContext") + return buf.getvalue() + + +class OrderedATNConfigSet(ATNConfigSet): + + def __init__(self): + super(OrderedATNConfigSet, self).__init__() + # self.configLookup = set() + + + diff --git a/runtime/Python2/src/antlr4/atn/ATNDeserializationOptions.py b/runtime/Python2/src/antlr4/atn/ATNDeserializationOptions.py new file mode 100644 index 000000000..5cf432991 --- /dev/null +++ b/runtime/Python2/src/antlr4/atn/ATNDeserializationOptions.py @@ -0,0 +1,46 @@ +#[The "BSD license"] +# Copyright (c) 2013 Terence Parr +# Copyright (c) 2013 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +class ATNDeserializationOptions(object): + + defaultOptions = None + + def __init__(self, copyFrom = None): + self.readOnly = False + self.verifyATN = True if copyFrom is None else copyFrom.verifyATN + self.generateRuleBypassTransitions = False if copyFrom is None else copyFrom.generateRuleBypassTransitions + + def __setattr__(self, key, value): + if key!="readOnly" and self.readOnly: + raise Exception("The object is read only.") + super(type(self), self).__setattr__(key,value) + +ATNDeserializationOptions.defaultOptions = ATNDeserializationOptions() +ATNDeserializationOptions.defaultOptions.readOnly = True + diff --git a/runtime/Python2/src/antlr4/atn/ATNDeserializer.py b/runtime/Python2/src/antlr4/atn/ATNDeserializer.py new file mode 100644 index 000000000..d9e8bb12f --- /dev/null +++ b/runtime/Python2/src/antlr4/atn/ATNDeserializer.py @@ -0,0 +1,542 @@ +# [The "BSD license"] +# Copyright (c) 2013 Terence Parr +# Copyright (c) 2013 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#/ +from uuid import UUID + +from antlr4.atn.ATN import ATN +from antlr4.atn.ATNType import ATNType +from antlr4.atn.ATNState import * +from antlr4.atn.Transition import * +from antlr4.atn.LexerAction import * +from antlr4.atn.ATNDeserializationOptions import ATNDeserializationOptions + + +# This is the earliest supported serialized UUID. +BASE_SERIALIZED_UUID = UUID("AADB8D7E-AEEF-4415-AD2B-8204D6CF042E") + +# This list contains all of the currently supported UUIDs, ordered by when +# the feature first appeared in this branch. +SUPPORTED_UUIDS = [ BASE_SERIALIZED_UUID ] + +SERIALIZED_VERSION = 3 + +# This is the current serialized UUID. +SERIALIZED_UUID = BASE_SERIALIZED_UUID + +class ATNDeserializer (object): + + def __init__(self, options = None): + if options is None: + options = ATNDeserializationOptions.defaultOptions + self.deserializationOptions = options + self.edgeFactories = None + self.stateFactories = None + self.actionFactories = None + + # Determines if a particular serialized representation of an ATN supports + # a particular feature, identified by the {@link UUID} used for serializing + # the ATN at the time the feature was first introduced. + # + # @param feature The {@link UUID} marking the first time the feature was + # supported in the serialized ATN. + # @param actualUuid The {@link UUID} of the actual serialized ATN which is + # currently being deserialized. + # @return {@code true} if the {@code actualUuid} value represents a + # serialized ATN at or after the feature identified by {@code feature} was + # introduced; otherwise, {@code false}. + + def isFeatureSupported(self, feature, actualUuid): + idx1 = SUPPORTED_UUIDS.index(feature) + if idx1<0: + return False + idx2 = SUPPORTED_UUIDS.index(actualUuid) + return idx2 >= idx1 + + def deserialize(self, data): + self.reset(data) + self.checkVersion() + self.checkUUID() + atn = self.readATN() + self.readStates(atn) + self.readRules(atn) + self.readModes(atn) + sets = self.readSets(atn) + self.readEdges(atn, sets) + self.readDecisions(atn) + self.readLexerActions(atn) + self.markPrecedenceDecisions(atn) + self.verifyATN(atn) + if self.deserializationOptions.generateRuleBypassTransitions \ + and atn.grammarType == ATNType.PARSER: + self.generateRuleBypassTransitions(atn) + # re-verify after modification + self.verifyATN(atn) + return atn + + def reset(self, data): + def adjust(c): + v = ord(c) + return v-2 if v>1 else -1 + temp = [ adjust(c) for c in data ] + # don't adjust the first value since that's the version number + temp[0] = ord(data[0]) + self.data = temp + self.pos = 0 + + def checkVersion(self): + version = self.readInt() + if version != SERIALIZED_VERSION: + raise Exception("Could not deserialize ATN with version " + str(version) + " (expected " + str(SERIALIZED_VERSION) + ").") + + def checkUUID(self): + uuid = self.readUUID() + if not uuid in SUPPORTED_UUIDS: + raise Exception("Could not deserialize ATN with UUID: " + str(uuid) + \ + " (expected " + str(SERIALIZED_UUID) + " or a legacy UUID).", uuid, SERIALIZED_UUID) + self.uuid = uuid + + def readATN(self): + grammarType = self.readInt() + maxTokenType = self.readInt() + return ATN(grammarType, maxTokenType) + + def readStates(self, atn): + loopBackStateNumbers = [] + endStateNumbers = [] + nstates = self.readInt() + for i in range(0, nstates): + stype = self.readInt() + # ignore bad type of states + if stype==ATNState.INVALID_TYPE: + atn.addState(None) + continue + ruleIndex = self.readInt() + if ruleIndex == 0xFFFF: + ruleIndex = -1 + + s = self.stateFactory(stype, ruleIndex) + if stype == ATNState.LOOP_END: # special case + loopBackStateNumber = self.readInt() + loopBackStateNumbers.append((s, loopBackStateNumber)) + elif isinstance(s, BlockStartState): + endStateNumber = self.readInt() + endStateNumbers.append((s, endStateNumber)) + + atn.addState(s) + + # delay the assignment of loop back and end states until we know all the state instances have been initialized + for pair in loopBackStateNumbers: + pair[0].loopBackState = atn.states[pair[1]] + + for pair in endStateNumbers: + pair[0].endState = atn.states[pair[1]] + + numNonGreedyStates = self.readInt() + for i in range(0, numNonGreedyStates): + stateNumber = self.readInt() + atn.states[stateNumber].nonGreedy = True + + numPrecedenceStates = self.readInt() + for i in range(0, numPrecedenceStates): + stateNumber = self.readInt() + atn.states[stateNumber].isPrecedenceRule = True + + def readRules(self, atn): + nrules = self.readInt() + if atn.grammarType == ATNType.LEXER: + atn.ruleToTokenType = [0] * nrules + + atn.ruleToStartState = [0] * nrules + for i in range(0, nrules): + s = self.readInt() + startState = atn.states[s] + atn.ruleToStartState[i] = startState + if atn.grammarType == ATNType.LEXER: + tokenType = self.readInt() + if tokenType == 0xFFFF: + tokenType = Token.EOF + + atn.ruleToTokenType[i] = tokenType + + atn.ruleToStopState = [0] * nrules + for state in atn.states: + if not isinstance(state, RuleStopState): + continue + atn.ruleToStopState[state.ruleIndex] = state + atn.ruleToStartState[state.ruleIndex].stopState = state + + def readModes(self, atn): + nmodes = self.readInt() + for i in range(0, nmodes): + s = self.readInt() + atn.modeToStartState.append(atn.states[s]) + + def readSets(self, atn): + sets = [] + m = self.readInt() + for i in range(0, m): + iset = IntervalSet() + sets.append(iset) + n = self.readInt() + containsEof = self.readInt() + if containsEof!=0: + iset.addOne(-1) + for j in range(0, n): + i1 = self.readInt() + i2 = self.readInt() + iset.addRange(Interval(i1, i2 + 1)) # range upper limit is exclusive + return sets + + def readEdges(self, atn, sets): + nedges = self.readInt() + for i in range(0, nedges): + src = self.readInt() + trg = self.readInt() + ttype = self.readInt() + arg1 = self.readInt() + arg2 = self.readInt() + arg3 = self.readInt() + trans = self.edgeFactory(atn, ttype, src, trg, arg1, arg2, arg3, sets) + srcState = atn.states[src] + srcState.addTransition(trans) + + # edges for rule stop states can be derived, so they aren't serialized + for state in atn.states: + for i in range(0, len(state.transitions)): + t = state.transitions[i] + if not isinstance(t, RuleTransition): + continue + outermostPrecedenceReturn = -1 + if atn.ruleToStartState[t.target.ruleIndex].isPrecedenceRule: + if t.precedence == 0: + outermostPrecedenceReturn = t.target.ruleIndex + trans = EpsilonTransition(t.followState, outermostPrecedenceReturn) + atn.ruleToStopState[t.target.ruleIndex].addTransition(trans) + + for state in atn.states: + if isinstance(state, BlockStartState): + # we need to know the end state to set its start state + if state.endState is None: + raise Exception("IllegalState") + # block end states can only be associated to a single block start state + if state.endState.startState is not None: + raise Exception("IllegalState") + state.endState.startState = state + + elif isinstance(state, PlusLoopbackState): + for i in range(0, len(state.transitions)): + target = state.transitions[i].target + if isinstance(target, PlusBlockStartState): + target.loopBackState = state + elif isinstance(state, StarLoopbackState): + for i in range(0, len(state.transitions)): + target = state.transitions[i].target + if isinstance(target, StarLoopEntryState): + target.loopBackState = state + + def readDecisions(self, atn): + ndecisions = self.readInt() + for i in range(0, ndecisions): + s = self.readInt() + decState = atn.states[s] + atn.decisionToState.append(decState) + decState.decision = i + + def readLexerActions(self, atn): + if atn.grammarType == ATNType.LEXER: + count = self.readInt() + atn.lexerActions = [ None ] * count + for i in range(0, count): + actionType = self.readInt() + data1 = self.readInt() + if data1 == 0xFFFF: + data1 = -1 + data2 = self.readInt() + if data2 == 0xFFFF: + data2 = -1 + lexerAction = self.lexerActionFactory(actionType, data1, data2) + atn.lexerActions[i] = lexerAction + + def generateRuleBypassTransitions(self, atn): + + count = len(atn.ruleToStartState) + atn.ruleToTokenType = [ 0 ] * count + for i in range(0, count): + atn.ruleToTokenType[i] = atn.maxTokenType + i + 1 + + for i in range(0, count): + self.generateRuleBypassTransition(atn, i) + + def generateRuleBypassTransition(self, atn, idx): + + bypassStart = BasicBlockStartState() + bypassStart.ruleIndex = idx + atn.addState(bypassStart) + + bypassStop = BlockEndState() + bypassStop.ruleIndex = idx + atn.addState(bypassStop) + + bypassStart.endState = bypassStop + atn.defineDecisionState(bypassStart) + + bypassStop.startState = bypassStart + + excludeTransition = None + + if atn.ruleToStartState[idx].isPrecedenceRule: + # wrap from the beginning of the rule to the StarLoopEntryState + endState = None + for state in atn.states: + if self.stateIsEndStateFor(state, idx): + endState = state + excludeTransition = state.loopBackState.transitions[0] + break + + if excludeTransition is None: + raise Exception("Couldn't identify final state of the precedence rule prefix section.") + + else: + + endState = atn.ruleToStopState[idx] + + # all non-excluded transitions that currently target end state need to target blockEnd instead + for state in atn.states: + for transition in state.transitions: + if transition == excludeTransition: + continue + if transition.target == endState: + transition.target = bypassStop + + # all transitions leaving the rule start state need to leave blockStart instead + ruleToStartState = atn.ruleToStartState[idx] + count = len(ruleToStartState.transitions) + while count > 0: + bypassStart.addTransition(ruleToStartState.transitions[count-1]) + del ruleToStartState.transitions[-1] + + # link the new states + atn.ruleToStartState[idx].addTransition(EpsilonTransition(bypassStart)) + bypassStop.addTransition(EpsilonTransition(endState)) + + matchState = BasicState() + atn.addState(matchState) + matchState.addTransition(AtomTransition(bypassStop, atn.ruleToTokenType[idx])) + bypassStart.addTransition(EpsilonTransition(matchState)) + + + def stateIsEndStateFor(self, state, idx): + if state.ruleIndex != idx: + return None + if not isinstance(state, StarLoopEntryState): + return None + + maybeLoopEndState = state.transitions[len(state.transitions) - 1].target + if not isinstance(maybeLoopEndState, LoopEndState): + return None + + if maybeLoopEndState.epsilonOnlyTransitions and \ + isinstance(maybeLoopEndState.transitions[0].target, RuleStopState): + return state + else: + return None + + + # + # Analyze the {@link StarLoopEntryState} states in the specified ATN to set + # the {@link StarLoopEntryState#precedenceRuleDecision} field to the + # correct value. + # + # @param atn The ATN. + # + def markPrecedenceDecisions(self, atn): + for state in atn.states: + if not isinstance(state, StarLoopEntryState): + continue + + # We analyze the ATN to determine if this ATN decision state is the + # decision for the closure block that determines whether a + # precedence rule should continue or complete. + # + if atn.ruleToStartState[state.ruleIndex].isPrecedenceRule: + maybeLoopEndState = state.transitions[len(state.transitions) - 1].target + if isinstance(maybeLoopEndState, LoopEndState): + if maybeLoopEndState.epsilonOnlyTransitions and \ + isinstance(maybeLoopEndState.transitions[0].target, RuleStopState): + state.precedenceRuleDecision = True + + def verifyATN(self, atn): + if not self.deserializationOptions.verifyATN: + return + # verify assumptions + for state in atn.states: + if state is None: + continue + + self.checkCondition(state.epsilonOnlyTransitions or len(state.transitions) <= 1) + + if isinstance(state, PlusBlockStartState): + self.checkCondition(state.loopBackState is not None) + + if isinstance(state, StarLoopEntryState): + self.checkCondition(state.loopBackState is not None) + self.checkCondition(len(state.transitions) == 2) + + if isinstance(state.transitions[0].target, StarBlockStartState): + self.checkCondition(isinstance(state.transitions[1].target, LoopEndState)) + self.checkCondition(not state.nonGreedy) + elif isinstance(state.transitions[0].target, LoopEndState): + self.checkCondition(isinstance(state.transitions[1].target, StarBlockStartState)) + self.checkCondition(state.nonGreedy) + else: + raise Exception("IllegalState") + + if isinstance(state, StarLoopbackState): + self.checkCondition(len(state.transitions) == 1) + self.checkCondition(isinstance(state.transitions[0].target, StarLoopEntryState)) + + if isinstance(state, LoopEndState): + self.checkCondition(state.loopBackState is not None) + + if isinstance(state, RuleStartState): + self.checkCondition(state.stopState is not None) + + if isinstance(state, BlockStartState): + self.checkCondition(state.endState is not None) + + if isinstance(state, BlockEndState): + self.checkCondition(state.startState is not None) + + if isinstance(state, DecisionState): + self.checkCondition(len(state.transitions) <= 1 or state.decision >= 0) + else: + self.checkCondition(len(state.transitions) <= 1 or isinstance(state, RuleStopState)) + + def checkCondition(self, condition, message=None): + if not condition: + if message is None: + message = "IllegalState" + raise Exception(message) + + def readInt(self): + i = self.data[self.pos] + self.pos += 1 + return i + + def readInt32(self): + low = self.readInt() + high = self.readInt() + return low | (high << 16) + + def readLong(self): + low = self.readInt32() + high = self.readInt32() + return (low & 0x00000000FFFFFFFF) | (high << 32) + + def readUUID(self): + low = self.readLong() + high = self.readLong() + allBits = (low & 0xFFFFFFFFFFFFFFFF) | (high << 64) + return UUID(int=allBits) + + def edgeFactory(self, atn, type, src, trg, arg1, arg2, arg3, sets): + target = atn.states[trg] + if self.edgeFactories is None: + ef = [None] * 11 + ef[0] = lambda args : None + ef[Transition.EPSILON] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ + EpsilonTransition(target) + ef[Transition.RANGE] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ + RangeTransition(target, Token.EOF, arg2) if arg3 != 0 else RangeTransition(target, arg1, arg2) + ef[Transition.RULE] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ + RuleTransition(atn.states[arg1], arg2, arg3, target) + ef[Transition.PREDICATE] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ + PredicateTransition(target, arg1, arg2, arg3 != 0) + ef[Transition.PRECEDENCE] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ + PrecedencePredicateTransition(target, arg1) + ef[Transition.ATOM] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ + AtomTransition(target, Token.EOF) if arg3 != 0 else AtomTransition(target, arg1) + ef[Transition.ACTION] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ + ActionTransition(target, arg1, arg2, arg3 != 0) + ef[Transition.SET] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ + SetTransition(target, sets[arg1]) + ef[Transition.NOT_SET] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ + NotSetTransition(target, sets[arg1]) + ef[Transition.WILDCARD] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ + WildcardTransition(target) + self.edgeFactories = ef + + if type> len(self.edgeFactories) or self.edgeFactories[type] is None: + raise Exception("The specified transition type: " + str(type) + " is not valid.") + else: + return self.edgeFactories[type](atn, src, trg, arg1, arg2, arg3, sets, target) + + def stateFactory(self, type, ruleIndex): + if self.stateFactories is None: + sf = [None] * 13 + sf[ATNState.INVALID_TYPE] = lambda : None + sf[ATNState.BASIC] = lambda : BasicState() + sf[ATNState.RULE_START] = lambda : RuleStartState() + sf[ATNState.BLOCK_START] = lambda : BasicBlockStartState() + sf[ATNState.PLUS_BLOCK_START] = lambda : PlusBlockStartState() + sf[ATNState.STAR_BLOCK_START] = lambda : StarBlockStartState() + sf[ATNState.TOKEN_START] = lambda : TokensStartState() + sf[ATNState.RULE_STOP] = lambda : RuleStopState() + sf[ATNState.BLOCK_END] = lambda : BlockEndState() + sf[ATNState.STAR_LOOP_BACK] = lambda : StarLoopbackState() + sf[ATNState.STAR_LOOP_ENTRY] = lambda : StarLoopEntryState() + sf[ATNState.PLUS_LOOP_BACK] = lambda : PlusLoopbackState() + sf[ATNState.LOOP_END] = lambda : LoopEndState() + self.stateFactories = sf + + if type> len(self.stateFactories) or self.stateFactories[type] is None: + raise Exception("The specified state type " + str(type) + " is not valid.") + else: + s = self.stateFactories[type]() + if s is not None: + s.ruleIndex = ruleIndex + return s + + def lexerActionFactory(self, type, data1, data2): + if self.actionFactories is None: + af = [ None ] * 8 + af[LexerActionType.CHANNEL] = lambda data1, data2: LexerChannelAction(data1) + af[LexerActionType.CUSTOM] = lambda data1, data2: LexerCustomAction(data1, data2) + af[LexerActionType.MODE] = lambda data1, data2: LexerModeAction(data1) + af[LexerActionType.MORE] = lambda data1, data2: LexerMoreAction.INSTANCE + af[LexerActionType.POP_MODE] = lambda data1, data2: LexerPopModeAction.INSTANCE + af[LexerActionType.PUSH_MODE] = lambda data1, data2: LexerPushModeAction(data1) + af[LexerActionType.SKIP] = lambda data1, data2: LexerSkipAction.INSTANCE + af[LexerActionType.TYPE] = lambda data1, data2: LexerTypeAction(data1) + self.actionFactories = af + + if type> len(self.actionFactories) or self.actionFactories[type] is None: + raise Exception("The specified lexer action type " + str(type) + " is not valid.") + else: + return self.actionFactories[type](data1, data2) diff --git a/runtime/Python2/src/antlr4/atn/ATNSimulator.py b/runtime/Python2/src/antlr4/atn/ATNSimulator.py new file mode 100644 index 000000000..a02de0cbf --- /dev/null +++ b/runtime/Python2/src/antlr4/atn/ATNSimulator.py @@ -0,0 +1,70 @@ +# +# [The "BSD license"] +# Copyright (c) 2013 Terence Parr +# Copyright (c) 2013 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#/ +from antlr4.PredictionContext import getCachedPredictionContext +from antlr4.atn.ATNConfigSet import ATNConfigSet +from antlr4.dfa.DFAState import DFAState + + +class ATNSimulator(object): + + # Must distinguish between missing edge and edge we know leads nowhere#/ + ERROR = DFAState(0x7FFFFFFF, ATNConfigSet()) + + # The context cache maps all PredictionContext objects that are == + # to a single cached copy. This cache is shared across all contexts + # in all ATNConfigs in all DFA states. We rebuild each ATNConfigSet + # to use only cached nodes/graphs in addDFAState(). We don't want to + # fill this during closure() since there are lots of contexts that + # pop up but are not used ever again. It also greatly slows down closure(). + # + #

      This cache makes a huge difference in memory and a little bit in speed. + # For the Java grammar on java.*, it dropped the memory requirements + # at the end from 25M to 16M. We don't store any of the full context + # graphs in the DFA because they are limited to local context only, + # but apparently there's a lot of repetition there as well. We optimize + # the config contexts before storing the config set in the DFA states + # by literally rebuilding them with cached subgraphs only.

      + # + #

      I tried a cache for use during closure operations, that was + # whacked after each adaptivePredict(). It cost a little bit + # more time I think and doesn't save on the overall footprint + # so it's not worth the complexity.

      + #/ + def __init__(self, atn, sharedContextCache): + self.atn = atn + self.sharedContextCache = sharedContextCache + + def getCachedContext(self, context): + if self.sharedContextCache is None: + return context + visited = dict() + return getCachedPredictionContext(context, self.sharedContextCache, visited) + diff --git a/runtime/Python2/src/antlr4/atn/ATNState.py b/runtime/Python2/src/antlr4/atn/ATNState.py new file mode 100644 index 000000000..36b8d92f3 --- /dev/null +++ b/runtime/Python2/src/antlr4/atn/ATNState.py @@ -0,0 +1,283 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# The following images show the relation of states and +# {@link ATNState#transitions} for various grammar constructs. +# +#
        +# +#
      • Solid edges marked with an ε indicate a required +# {@link EpsilonTransition}.
      • +# +#
      • Dashed edges indicate locations where any transition derived from +# {@link Transition} might appear.
      • +# +#
      • Dashed nodes are place holders for either a sequence of linked +# {@link BasicState} states or the inclusion of a block representing a nested +# construct in one of the forms below.
      • +# +#
      • Nodes showing multiple outgoing alternatives with a {@code ...} support +# any number of alternatives (one or more). Nodes without the {@code ...} only +# support the exact number of alternatives shown in the diagram.
      • +# +#
      +# +#

      Basic Blocks

      +# +#

      Rule

      +# +# +# +#

      Block of 1 or more alternatives

      +# +# +# +#

      Greedy Loops

      +# +#

      Greedy Closure: {@code (...)*}

      +# +# +# +#

      Greedy Positive Closure: {@code (...)+}

      +# +# +# +#

      Greedy Optional: {@code (...)?}

      +# +# +# +#

      Non-Greedy Loops

      +# +#

      Non-Greedy Closure: {@code (...)*?}

      +# +# +# +#

      Non-Greedy Positive Closure: {@code (...)+?}

      +# +# +# +#

      Non-Greedy Optional: {@code (...)??}

      +# +# +# + +INITIAL_NUM_TRANSITIONS = 4 + +class ATNState(object): + + # constants for serialization + INVALID_TYPE = 0 + BASIC = 1 + RULE_START = 2 + BLOCK_START = 3 + PLUS_BLOCK_START = 4 + STAR_BLOCK_START = 5 + TOKEN_START = 6 + RULE_STOP = 7 + BLOCK_END = 8 + STAR_LOOP_BACK = 9 + STAR_LOOP_ENTRY = 10 + PLUS_LOOP_BACK = 11 + LOOP_END = 12 + + serializationNames = [ + "INVALID", + "BASIC", + "RULE_START", + "BLOCK_START", + "PLUS_BLOCK_START", + "STAR_BLOCK_START", + "TOKEN_START", + "RULE_STOP", + "BLOCK_END", + "STAR_LOOP_BACK", + "STAR_LOOP_ENTRY", + "PLUS_LOOP_BACK", + "LOOP_END" ] + + INVALID_STATE_NUMBER = -1 + + def __init__(self): + # Which ATN are we in? + self.atn = None + self.stateNumber = ATNState.INVALID_STATE_NUMBER + self.stateType = None + self.ruleIndex = 0 # at runtime, we don't have Rule objects + self.epsilonOnlyTransitions = False + # Track the transitions emanating from this ATN state. + self.transitions = [] + # Used to cache lookahead during parsing, not used during construction + self.nextTokenWithinRule = None + + def __hash__(self): + return self.stateNumber + + def __eq__(self, other): + if isinstance(other, ATNState): + return self.stateNumber==other.stateNumber + else: + return False + + def onlyHasEpsilonTransitions(self): + return self.epsilonOnlyTransitions + + def isNonGreedyExitState(self): + return False + + def __str__(self): + return unicode(self) + + def __unicode__(self): + return unicode(self.stateNumber) + + def addTransition(self, trans, index=-1): + if len(self.transitions)==0: + self.epsilonOnlyTransitions = trans.isEpsilon + elif self.epsilonOnlyTransitions != trans.isEpsilon: + self.epsilonOnlyTransitions = False + # TODO System.err.format(Locale.getDefault(), "ATN state %d has both epsilon and non-epsilon transitions.\n", stateNumber); + if index==-1: + self.transitions.append(trans) + else: + self.transitions.insert(index, trans) + +class BasicState(ATNState): + + def __init__(self): + super(BasicState, self).__init__() + self.stateType = self.BASIC + + +class DecisionState(ATNState): + + def __init__(self): + super(DecisionState, self).__init__() + self.decision = -1 + self.nonGreedy = False + +# The start of a regular {@code (...)} block. +class BlockStartState(DecisionState): + + def __init__(self): + super(BlockStartState, self).__init__() + self.endState = None + +class BasicBlockStartState(BlockStartState): + + def __init__(self): + super(BasicBlockStartState, self).__init__() + self.stateType = self.BLOCK_START + +# Terminal node of a simple {@code (a|b|c)} block. +class BlockEndState(ATNState): + + def __init__(self): + super(BlockEndState, self).__init__() + self.stateType = self.BLOCK_END + self.startState = None + +# The last node in the ATN for a rule, unless that rule is the start symbol. +# In that case, there is one transition to EOF. Later, we might encode +# references to all calls to this rule to compute FOLLOW sets for +# error handling. +# +class RuleStopState(ATNState): + + def __init__(self): + super(RuleStopState, self).__init__() + self.stateType = self.RULE_STOP + +class RuleStartState(ATNState): + + def __init__(self): + super(RuleStartState, self).__init__() + self.stateType = self.RULE_START + self.stopState = None + self.isPrecedenceRule = False + +# Decision state for {@code A+} and {@code (A|B)+}. It has two transitions: +# one to the loop back to start of the block and one to exit. +# +class PlusLoopbackState(DecisionState): + + def __init__(self): + super(PlusLoopbackState, self).__init__() + self.stateType = self.PLUS_LOOP_BACK + +# Start of {@code (A|B|...)+} loop. Technically a decision state, but +# we don't use for code generation; somebody might need it, so I'm defining +# it for completeness. In reality, the {@link PlusLoopbackState} node is the +# real decision-making note for {@code A+}. +# +class PlusBlockStartState(BlockStartState): + + def __init__(self): + super(PlusBlockStartState, self).__init__() + self.stateType = self.PLUS_BLOCK_START + self.loopBackState = None + +# The block that begins a closure loop. +class StarBlockStartState(BlockStartState): + + def __init__(self): + super(StarBlockStartState, self).__init__() + self.stateType = self.STAR_BLOCK_START + +class StarLoopbackState(ATNState): + + def __init__(self): + super(StarLoopbackState, self).__init__() + self.stateType = self.STAR_LOOP_BACK + + +class StarLoopEntryState(DecisionState): + + def __init__(self): + super(StarLoopEntryState, self).__init__() + self.stateType = self.STAR_LOOP_ENTRY + self.loopBackState = None + # Indicates whether this state can benefit from a precedence DFA during SLL decision making. + self.precedenceRuleDecision = None + +# Mark the end of a * or + loop. +class LoopEndState(ATNState): + + def __init__(self): + super(LoopEndState, self).__init__() + self.stateType = self.LOOP_END + self.loopBackState = None + +# The Tokens rule start state linking to each lexer rule start state */ +class TokensStartState(DecisionState): + + def __init__(self): + super(TokensStartState, self).__init__() + self.stateType = self.TOKEN_START diff --git a/runtime/Python2/src/antlr4/atn/ATNType.py b/runtime/Python2/src/antlr4/atn/ATNType.py new file mode 100644 index 000000000..b351b18d1 --- /dev/null +++ b/runtime/Python2/src/antlr4/atn/ATNType.py @@ -0,0 +1,37 @@ +# [The "BSD license"] +# Copyright (c) 2013 Terence Parr +# Copyright (c) 2013 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#/ + +# Represents the type of recognizer an ATN applies to. + +class ATNType(object): + + LEXER = 0 + PARSER = 1 + diff --git a/runtime/Python2/src/antlr4/atn/LexerATNSimulator.py b/runtime/Python2/src/antlr4/atn/LexerATNSimulator.py new file mode 100644 index 000000000..5871a28c8 --- /dev/null +++ b/runtime/Python2/src/antlr4/atn/LexerATNSimulator.py @@ -0,0 +1,588 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#/ + +# When we hit an accept state in either the DFA or the ATN, we +# have to notify the character stream to start buffering characters +# via {@link IntStream#mark} and record the current state. The current sim state +# includes the current index into the input, the current line, +# and current character position in that line. Note that the Lexer is +# tracking the starting line and characterization of the token. These +# variables track the "state" of the simulator when it hits an accept state. +# +#

      We track these variables separately for the DFA and ATN simulation +# because the DFA simulation often has to fail over to the ATN +# simulation. If the ATN simulation fails, we need the DFA to fall +# back to its previously accepted state, if any. If the ATN succeeds, +# then the ATN does the accept and the DFA simulator that invoked it +# can simply return the predicted token type.

      +#/ +from antlr4 import Lexer +from antlr4.PredictionContext import SingletonPredictionContext, PredictionContext +from antlr4.Token import Token +from antlr4.atn.ATN import ATN +from antlr4.atn.ATNConfig import LexerATNConfig +from antlr4.atn.ATNSimulator import ATNSimulator +from antlr4.atn.ATNConfigSet import OrderedATNConfigSet +from antlr4.atn.ATNState import RuleStopState +from antlr4.atn.LexerActionExecutor import LexerActionExecutor +from antlr4.atn.Transition import Transition +from antlr4.dfa.DFAState import DFAState +from antlr4.error.Errors import LexerNoViableAltException, UnsupportedOperationException + +class SimState(object): + + def __init__(self): + self.reset() + + def reset(self): + self.index = -1 + self.line = 0 + self.column = -1 + self.dfaState = None + +class LexerATNSimulator(ATNSimulator): + + debug = False + dfa_debug = False + + MIN_DFA_EDGE = 0 + MAX_DFA_EDGE = 127 # forces unicode to stay in ATN + + ERROR = None + + match_calls = 0 + + def __init__(self, recog, atn, decisionToDFA, sharedContextCache): + super(LexerATNSimulator, self).__init__(atn, sharedContextCache) + self.decisionToDFA = decisionToDFA + self.recog = recog + # The current token's starting index into the character stream. + # Shared across DFA to ATN simulation in case the ATN fails and the + # DFA did not have a previous accept state. In this case, we use the + # ATN-generated exception object. + self.startIndex = -1 + # line number 1..n within the input#/ + self.line = 1 + # The index of the character relative to the beginning of the line 0..n-1#/ + self.column = 0 + from antlr4.Lexer import Lexer + self.mode = Lexer.DEFAULT_MODE + # Used during DFA/ATN exec to record the most recent accept configuration info + self.prevAccept = SimState() + + + def copyState(self, simulator ): + self.column = simulator.column + self.line = simulator.line + self.mode = simulator.mode + self.startIndex = simulator.startIndex + + def match(self, input , mode): + self.match_calls += 1 + self.mode = mode + mark = input.mark() + try: + self.startIndex = input.index + self.prevAccept.reset() + dfa = self.decisionToDFA[mode] + if dfa.s0 is None: + return self.matchATN(input) + else: + return self.execATN(input, dfa.s0) + finally: + input.release(mark) + + def reset(self): + self.prevAccept.reset() + self.startIndex = -1 + self.line = 1 + self.column = 0 + self.mode = Lexer.DEFAULT_MODE + + def matchATN(self, input): + startState = self.atn.modeToStartState[self.mode] + + if self.debug: + print("matchATN mode " + str(self.mode) + " start: " + str(startState)) + + old_mode = self.mode + s0_closure = self.computeStartState(input, startState) + suppressEdge = s0_closure.hasSemanticContext + s0_closure.hasSemanticContext = False + + next = self.addDFAState(s0_closure) + if not suppressEdge: + self.decisionToDFA[self.mode].s0 = next + + predict = self.execATN(input, next) + + if self.debug: + print("DFA after matchATN: " + str(self.decisionToDFA[old_mode].toLexerString())) + + return predict + + def execATN(self, input, ds0): + if self.debug: + print("start state closure=" + str(ds0.configs)) + + if ds0.isAcceptState: + # allow zero-length tokens + self.captureSimState(self.prevAccept, input, ds0) + + t = input.LA(1) + s = ds0 # s is current/from DFA state + + while True: # while more work + if self.debug: + print("execATN loop starting closure: %s\n", s.configs) + + # As we move src->trg, src->trg, we keep track of the previous trg to + # avoid looking up the DFA state again, which is expensive. + # If the previous target was already part of the DFA, we might + # be able to avoid doing a reach operation upon t. If s!=null, + # it means that semantic predicates didn't prevent us from + # creating a DFA state. Once we know s!=null, we check to see if + # the DFA state has an edge already for t. If so, we can just reuse + # it's configuration set; there's no point in re-computing it. + # This is kind of like doing DFA simulation within the ATN + # simulation because DFA simulation is really just a way to avoid + # computing reach/closure sets. Technically, once we know that + # we have a previously added DFA state, we could jump over to + # the DFA simulator. But, that would mean popping back and forth + # a lot and making things more complicated algorithmically. + # This optimization makes a lot of sense for loops within DFA. + # A character will take us back to an existing DFA state + # that already has lots of edges out of it. e.g., .* in comments. + # print("Target for:" + str(s) + " and:" + str(t)) + target = self.getExistingTargetState(s, t) + # print("Existing:" + str(target)) + if target is None: + target = self.computeTargetState(input, s, t) + # print("Computed:" + str(target)) + + if target == self.ERROR: + break + + # If this is a consumable input element, make sure to consume before + # capturing the accept state so the input index, line, and char + # position accurately reflect the state of the interpreter at the + # end of the token. + if t != Token.EOF: + self.consume(input) + + if target.isAcceptState: + self.captureSimState(self.prevAccept, input, target) + if t == Token.EOF: + break + + t = input.LA(1) + + s = target # flip; current DFA target becomes new src/from state + + return self.failOrAccept(self.prevAccept, input, s.configs, t) + + # Get an existing target state for an edge in the DFA. If the target state + # for the edge has not yet been computed or is otherwise not available, + # this method returns {@code null}. + # + # @param s The current DFA state + # @param t The next input symbol + # @return The existing target DFA state for the given input symbol + # {@code t}, or {@code null} if the target state for this edge is not + # already cached + def getExistingTargetState(self, s, t): + if s.edges is None or t < self.MIN_DFA_EDGE or t > self.MAX_DFA_EDGE: + return None + + target = s.edges[t - self.MIN_DFA_EDGE] + if self.debug and target is not None: + print("reuse state "+s.stateNumber+ " edge to "+target.stateNumber) + + return target + + # Compute a target state for an edge in the DFA, and attempt to add the + # computed state and corresponding edge to the DFA. + # + # @param input The input stream + # @param s The current DFA state + # @param t The next input symbol + # + # @return The computed target DFA state for the given input symbol + # {@code t}. If {@code t} does not lead to a valid DFA state, this method + # returns {@link #ERROR}. + def computeTargetState(self, input, s, t): + reach = OrderedATNConfigSet() + + # if we don't find an existing DFA state + # Fill reach starting from closure, following t transitions + self.getReachableConfigSet(input, s.configs, reach, t) + + if len(reach)==0: # we got nowhere on t from s + if not reach.hasSemanticContext: + # we got nowhere on t, don't throw out this knowledge; it'd + # cause a failover from DFA later. + self. addDFAEdge(s, t, self.ERROR) + + # stop when we can't match any more char + return self.ERROR + + # Add an edge from s to target DFA found/created for reach + return self.addDFAEdge(s, t, cfgs=reach) + + def failOrAccept(self, prevAccept , input, reach, t): + if self.prevAccept.dfaState is not None: + lexerActionExecutor = prevAccept.dfaState.lexerActionExecutor + self.accept(input, lexerActionExecutor, self.startIndex, prevAccept.index, prevAccept.line, prevAccept.column) + return prevAccept.dfaState.prediction + else: + # if no accept and EOF is first char, return EOF + if t==Token.EOF and input.index==self.startIndex: + return Token.EOF + raise LexerNoViableAltException(self.recog, input, self.startIndex, reach) + + # Given a starting configuration set, figure out all ATN configurations + # we can reach upon input {@code t}. Parameter {@code reach} is a return + # parameter. + def getReachableConfigSet(self, input, closure, reach, t): + # this is used to skip processing for configs which have a lower priority + # than a config that already reached an accept state for the same rule + skipAlt = ATN.INVALID_ALT_NUMBER + for cfg in closure: + currentAltReachedAcceptState = ( cfg.alt == skipAlt ) + if currentAltReachedAcceptState and cfg.passedThroughNonGreedyDecision: + continue + + if self.debug: + print("testing %s at %s\n", self.getTokenName(t), cfg.toString(self.recog, True)) + + for trans in cfg.state.transitions: # for each transition + target = self.getReachableTarget(trans, t) + if target is not None: + lexerActionExecutor = cfg.lexerActionExecutor + if lexerActionExecutor is not None: + lexerActionExecutor = lexerActionExecutor.fixOffsetBeforeMatch(input.index - self.startIndex) + + treatEofAsEpsilon = (t == Token.EOF) + config = LexerATNConfig(state=target, lexerActionExecutor=lexerActionExecutor, config=cfg) + if self.closure(input, config, reach, currentAltReachedAcceptState, True, treatEofAsEpsilon): + # any remaining configs for this alt have a lower priority than + # the one that just reached an accept state. + skipAlt = cfg.alt + + def accept(self, input, lexerActionExecutor, startIndex, index, line, charPos): + if self.debug: + print("ACTION %s\n", lexerActionExecutor) + + # seek to after last char in token + input.seek(index) + self.line = line + self.column = charPos + + if lexerActionExecutor is not None and self.recog is not None: + lexerActionExecutor.execute(self.recog, input, startIndex) + + def getReachableTarget(self, trans, t): + if trans.matches(t, 0, 0xFFFE): + return trans.target + else: + return None + + def computeStartState(self, input, p): + initialContext = PredictionContext.EMPTY + configs = OrderedATNConfigSet() + for i in range(0,len(p.transitions)): + target = p.transitions[i].target + c = LexerATNConfig(state=target, alt=i+1, context=initialContext) + self.closure(input, c, configs, False, False, False) + return configs + + # Since the alternatives within any lexer decision are ordered by + # preference, this method stops pursuing the closure as soon as an accept + # state is reached. After the first accept state is reached by depth-first + # search from {@code config}, all other (potentially reachable) states for + # this rule would have a lower priority. + # + # @return {@code true} if an accept state is reached, otherwise + # {@code false}. + def closure(self, input, config, configs, currentAltReachedAcceptState, + speculative, treatEofAsEpsilon): + if self.debug: + print("closure("+config.toString(self.recog, True)+")") + + if isinstance( config.state, RuleStopState ): + if self.debug: + if self.recog is not None: + print("closure at %s rule stop %s\n", self.recog.getRuleNames()[config.state.ruleIndex], config) + else: + print("closure at rule stop %s\n", config) + + if config.context is None or config.context.hasEmptyPath(): + if config.context is None or config.context.isEmpty(): + configs.add(config) + return True + else: + configs.add(LexerATNConfig(state=config.state, config=config, context=PredictionContext.EMPTY)) + currentAltReachedAcceptState = True + + if config.context is not None and not config.context.isEmpty(): + for i in range(0,len(config.context)): + if config.context.getReturnState(i) != PredictionContext.EMPTY_RETURN_STATE: + newContext = config.context.getParent(i) # "pop" return state + returnState = self.atn.states[config.context.getReturnState(i)] + c = LexerATNConfig(state=returnState, config=config, context=newContext) + currentAltReachedAcceptState = self.closure(input, c, configs, + currentAltReachedAcceptState, speculative, treatEofAsEpsilon) + + return currentAltReachedAcceptState + + # optimization + if not config.state.epsilonOnlyTransitions: + if not currentAltReachedAcceptState or not config.passedThroughNonGreedyDecision: + configs.add(config) + + for t in config.state.transitions: + c = self.getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon) + if c is not None: + currentAltReachedAcceptState = self.closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon) + + return currentAltReachedAcceptState + + # side-effect: can alter configs.hasSemanticContext + def getEpsilonTarget(self, input, config, t, configs, speculative, treatEofAsEpsilon): + c = None + if t.serializationType==Transition.RULE: + newContext = SingletonPredictionContext.create(config.context, t.followState.stateNumber) + c = LexerATNConfig(state=t.target, config=config, context=newContext) + + elif t.serializationType==Transition.PRECEDENCE: + raise UnsupportedOperationException("Precedence predicates are not supported in lexers.") + + elif t.serializationType==Transition.PREDICATE: + # Track traversing semantic predicates. If we traverse, + # we cannot add a DFA state for this "reach" computation + # because the DFA would not test the predicate again in the + # future. Rather than creating collections of semantic predicates + # like v3 and testing them on prediction, v4 will test them on the + # fly all the time using the ATN not the DFA. This is slower but + # semantically it's not used that often. One of the key elements to + # this predicate mechanism is not adding DFA states that see + # predicates immediately afterwards in the ATN. For example, + + # a : ID {p1}? | ID {p2}? ; + + # should create the start state for rule 'a' (to save start state + # competition), but should not create target of ID state. The + # collection of ATN states the following ID references includes + # states reached by traversing predicates. Since this is when we + # test them, we cannot cash the DFA state target of ID. + + if self.debug: + print("EVAL rule "+ str(t.ruleIndex) + ":" + str(t.predIndex)) + configs.hasSemanticContext = True + if self.evaluatePredicate(input, t.ruleIndex, t.predIndex, speculative): + c = LexerATNConfig(state=t.target, config=config) + + elif t.serializationType==Transition.ACTION: + if config.context is None or config.context.hasEmptyPath(): + # execute actions anywhere in the start rule for a token. + # + # TODO: if the entry rule is invoked recursively, some + # actions may be executed during the recursive call. The + # problem can appear when hasEmptyPath() is true but + # isEmpty() is false. In this case, the config needs to be + # split into two contexts - one with just the empty path + # and another with everything but the empty path. + # Unfortunately, the current algorithm does not allow + # getEpsilonTarget to return two configurations, so + # additional modifications are needed before we can support + # the split operation. + lexerActionExecutor = LexerActionExecutor.append(config.lexerActionExecutor, + self.atn.lexerActions[t.actionIndex]) + c = LexerATNConfig(state=t.target, config=config, lexerActionExecutor=lexerActionExecutor) + + else: + # ignore actions in referenced rules + c = LexerATNConfig(state=t.target, config=config) + + elif t.serializationType==Transition.EPSILON: + c = LexerATNConfig(state=t.target, config=config) + + elif t.serializationType in [ Transition.ATOM, Transition.RANGE, Transition.SET ]: + if treatEofAsEpsilon: + if t.matches(Token.EOF, 0, 0xFFFF): + c = LexerATNConfig(state=t.target, config=config) + + return c + + # Evaluate a predicate specified in the lexer. + # + #

      If {@code speculative} is {@code true}, this method was called before + # {@link #consume} for the matched character. This method should call + # {@link #consume} before evaluating the predicate to ensure position + # sensitive values, including {@link Lexer#getText}, {@link Lexer#getLine}, + # and {@link Lexer#getcolumn}, properly reflect the current + # lexer state. This method should restore {@code input} and the simulator + # to the original state before returning (i.e. undo the actions made by the + # call to {@link #consume}.

      + # + # @param input The input stream. + # @param ruleIndex The rule containing the predicate. + # @param predIndex The index of the predicate within the rule. + # @param speculative {@code true} if the current index in {@code input} is + # one character before the predicate's location. + # + # @return {@code true} if the specified predicate evaluates to + # {@code true}. + #/ + def evaluatePredicate(self, input, ruleIndex, predIndex, speculative): + # assume true if no recognizer was provided + if self.recog is None: + return True + + if not speculative: + return self.recog.sempred(None, ruleIndex, predIndex) + + savedcolumn = self.column + savedLine = self.line + index = input.index + marker = input.mark() + try: + self.consume(input) + return self.recog.sempred(None, ruleIndex, predIndex) + finally: + self.column = savedcolumn + self.line = savedLine + input.seek(index) + input.release(marker) + + def captureSimState(self, settings, input, dfaState): + settings.index = input.index + settings.line = self.line + settings.column = self.column + settings.dfaState = dfaState + + def addDFAEdge(self, from_, tk, to=None, cfgs=None): + + if to is None and cfgs is not None: + # leading to this call, ATNConfigSet.hasSemanticContext is used as a + # marker indicating dynamic predicate evaluation makes this edge + # dependent on the specific input sequence, so the static edge in the + # DFA should be omitted. The target DFAState is still created since + # execATN has the ability to resynchronize with the DFA state cache + # following the predicate evaluation step. + # + # TJP notes: next time through the DFA, we see a pred again and eval. + # If that gets us to a previously created (but dangling) DFA + # state, we can continue in pure DFA mode from there. + #/ + suppressEdge = cfgs.hasSemanticContext + cfgs.hasSemanticContext = False + + to = self.addDFAState(cfgs) + + if suppressEdge: + return to + + # add the edge + if tk < self.MIN_DFA_EDGE or tk > self.MAX_DFA_EDGE: + # Only track edges within the DFA bounds + return to + + if self.debug: + print("EDGE " + str(from_) + " -> " + str(to) + " upon "+ chr(tk)) + + if from_.edges is None: + # make room for tokens 1..n and -1 masquerading as index 0 + from_.edges = [ None ] * (self.MAX_DFA_EDGE - self.MIN_DFA_EDGE + 1) + + from_.edges[tk - self.MIN_DFA_EDGE] = to # connect + + return to + + + # Add a new DFA state if there isn't one with this set of + # configurations already. This method also detects the first + # configuration containing an ATN rule stop state. Later, when + # traversing the DFA, we will know which rule to accept. + def addDFAState(self, configs): + # the lexer evaluates predicates on-the-fly; by this point configs + # should not contain any configurations with unevaluated predicates. + assert not configs.hasSemanticContext + + proposed = DFAState(configs=configs) + firstConfigWithRuleStopState = None + for c in configs: + if isinstance(c.state, RuleStopState): + firstConfigWithRuleStopState = c + break + + if firstConfigWithRuleStopState is not None: + proposed.isAcceptState = True + proposed.lexerActionExecutor = firstConfigWithRuleStopState.lexerActionExecutor + proposed.prediction = self.atn.ruleToTokenType[firstConfigWithRuleStopState.state.ruleIndex] + + dfa = self.decisionToDFA[self.mode] + existing = dfa.states.get(proposed, None) + if existing is not None: + return existing + + newState = proposed + + newState.stateNumber = len(dfa.states) + configs.setReadonly(True) + newState.configs = configs + dfa.states[newState] = newState + return newState + + def getDFA(self, mode): + return self.decisionToDFA[mode] + + # Get the text matched so far for the current token. + def getText(self, input): + # index is first lookahead char, don't include. + return input.getText(self.startIndex, input.index-1) + + def consume(self, input): + curChar = input.LA(1) + if curChar==ord('\n'): + self.line += 1 + self.column = 0 + else: + self.column += 1 + input.consume() + + def getTokenName(self, t): + if t==-1: + return "EOF" + else: + return "'" + chr(t) + "'" + + diff --git a/runtime/Python2/src/antlr4/atn/LexerAction.py b/runtime/Python2/src/antlr4/atn/LexerAction.py new file mode 100644 index 000000000..029afe3d2 --- /dev/null +++ b/runtime/Python2/src/antlr4/atn/LexerAction.py @@ -0,0 +1,316 @@ +# +#[The "BSD license"] +# Copyright (c) 2013 Terence Parr +# Copyright (c) 2013 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +class LexerActionType(object): + + CHANNEL = 0 #The type of a {@link LexerChannelAction} action. + CUSTOM = 1 #The type of a {@link LexerCustomAction} action. + MODE = 2 #The type of a {@link LexerModeAction} action. + MORE = 3 #The type of a {@link LexerMoreAction} action. + POP_MODE = 4 #The type of a {@link LexerPopModeAction} action. + PUSH_MODE = 5 #The type of a {@link LexerPushModeAction} action. + SKIP = 6 #The type of a {@link LexerSkipAction} action. + TYPE = 7 #The type of a {@link LexerTypeAction} action. + +class LexerAction(object): + + def __init__(self, action): + self.actionType = action + self.isPositionDependent = False + + def __hash__(self): + return hash(str(self.actionType)) + + def __eq__(self, other): + return self is other + + def __str__(self): + return unicode(self) + + def __unicode__(self): + return unicode(super(LexerAction, self)) + + +# +# Implements the {@code skip} lexer action by calling {@link Lexer#skip}. +# +#

      The {@code skip} command does not have any parameters, so this action is +# implemented as a singleton instance exposed by {@link #INSTANCE}.

      +class LexerSkipAction(LexerAction ): + + # Provides a singleton instance of this parameterless lexer action. + INSTANCE = None + + def __init__(self): + super(LexerSkipAction, self).__init__(LexerActionType.SKIP) + + def execute(self, lexer): + lexer.skip() + + def __unicode__(self): + return u"skip" + +LexerSkipAction.INSTANCE = LexerSkipAction() + +# Implements the {@code type} lexer action by calling {@link Lexer#setType} +# with the assigned type. +class LexerTypeAction(LexerAction): + + def __init__(self, type): + super(LexerTypeAction, self).__init__(LexerActionType.TYPE) + self.type = type + + def execute(self, lexer): + lexer.type = self.type + + def __hash__(self): + return hash(str(self.actionType) + str(self.type)) + + def __eq__(self, other): + if self is other: + return True + elif not isinstance(other, LexerTypeAction): + return False + else: + return self.type == other.type + + def __unicode__(self): + return u"type(" + unicode(self.type) + u")" + + +# Implements the {@code pushMode} lexer action by calling +# {@link Lexer#pushMode} with the assigned mode. +class LexerPushModeAction(LexerAction): + + def __init__(self, mode): + super(LexerPushModeAction, self).__init__(LexerActionType.PUSH_MODE) + self.mode = mode + + #

      This action is implemented by calling {@link Lexer#pushMode} with the + # value provided by {@link #getMode}.

      + def execute(self, lexer): + lexer.pushMode(self.mode) + + def __hash__(self): + return hash(str(self.actionType) + str(self.mode)) + + def __eq__(self, other): + if self is other: + return True + elif not isinstance(other, LexerPushModeAction): + return False + else: + return self.mode == other.mode + + def __unicode__(self): + return u"pushMode(" + unicode(self.mode) + u")" + + +# Implements the {@code popMode} lexer action by calling {@link Lexer#popMode}. +# +#

      The {@code popMode} command does not have any parameters, so this action is +# implemented as a singleton instance exposed by {@link #INSTANCE}.

      +class LexerPopModeAction(LexerAction): + + INSTANCE = None + + def __init__(self): + super(LexerPopModeAction, self).__init__(LexerActionType.POP_MODE) + + #

      This action is implemented by calling {@link Lexer#popMode}.

      + def execute(self, lexer): + lexer.popMode() + + def __unicode__(self): + return "popMode" + +LexerPopModeAction.INSTANCE = LexerPopModeAction() + +# Implements the {@code more} lexer action by calling {@link Lexer#more}. +# +#

      The {@code more} command does not have any parameters, so this action is +# implemented as a singleton instance exposed by {@link #INSTANCE}.

      +class LexerMoreAction(LexerAction): + + INSTANCE = None + + def __init__(self): + super(LexerMoreAction, self).__init__(LexerActionType.MORE) + + #

      This action is implemented by calling {@link Lexer#popMode}.

      + def execute(self, lexer): + lexer.more() + + def __unicode__(self): + return "more" + +LexerMoreAction.INSTANCE = LexerMoreAction() + +# Implements the {@code mode} lexer action by calling {@link Lexer#mode} with +# the assigned mode. +class LexerModeAction(LexerAction): + + def __init__(self, mode): + super(LexerModeAction, self).__init__(LexerActionType.MODE) + self.mode = mode + + #

      This action is implemented by calling {@link Lexer#mode} with the + # value provided by {@link #getMode}.

      + def execute(self, lexer): + lexer.mode(self.mode) + + def __hash__(self): + return hash(str(self.actionType) + str(self.mode)) + + def __eq__(self, other): + if self is other: + return True + elif not isinstance(other, LexerModeAction): + return False + else: + return self.mode == other.mode + + def __unicode__(self): + return u"mode(" + unicode(self.mode) + u")" + +# Executes a custom lexer action by calling {@link Recognizer#action} with the +# rule and action indexes assigned to the custom action. The implementation of +# a custom action is added to the generated code for the lexer in an override +# of {@link Recognizer#action} when the grammar is compiled. +# +#

      This class may represent embedded actions created with the {...} +# syntax in ANTLR 4, as well as actions created for lexer commands where the +# command argument could not be evaluated when the grammar was compiled.

      + +class LexerCustomAction(LexerAction): + + # Constructs a custom lexer action with the specified rule and action + # indexes. + # + # @param ruleIndex The rule index to use for calls to + # {@link Recognizer#action}. + # @param actionIndex The action index to use for calls to + # {@link Recognizer#action}. + #/ + def __init__(self, ruleIndex, actionIndex): + super(LexerCustomAction, self).__init__(LexerActionType.CUSTOM) + self.ruleIndex = ruleIndex + self.actionIndex = actionIndex + self.isPositionDependent = True + + #

      Custom actions are implemented by calling {@link Lexer#action} with the + # appropriate rule and action indexes.

      + def execute(self, lexer): + lexer.action(None, self.ruleIndex, self.actionIndex) + + def __hash__(self): + return hash(str(self.actionType) + str(self.ruleIndex) + str(self.actionIndex)) + + def __eq__(self, other): + if self is other: + return True + elif not isinstance(other, LexerCustomAction): + return False + else: + return self.ruleIndex == other.ruleIndex and self.actionIndex == other.actionIndex + +# Implements the {@code channel} lexer action by calling +# {@link Lexer#setChannel} with the assigned channel. +class LexerChannelAction(LexerAction): + + # Constructs a new {@code channel} action with the specified channel value. + # @param channel The channel value to pass to {@link Lexer#setChannel}. + def __init__(self, channel): + super(LexerChannelAction, self).__init__(LexerActionType.CHANNEL) + self.channel = channel + + #

      This action is implemented by calling {@link Lexer#setChannel} with the + # value provided by {@link #getChannel}.

      + def execute(self, lexer): + lexer._channel = self.channel + + def __hash__(self): + return hash(str(self.actionType) + str(self.channel)) + + def __eq__(self, other): + if self is other: + return True + elif not isinstance(other, LexerChannelAction): + return False + else: + return self.channel == other.channel + + def __unicode__(self): + return u"channel(" + unicode(self.channel) + u")" + +# This implementation of {@link LexerAction} is used for tracking input offsets +# for position-dependent actions within a {@link LexerActionExecutor}. +# +#

      This action is not serialized as part of the ATN, and is only required for +# position-dependent lexer actions which appear at a location other than the +# end of a rule. For more information about DFA optimizations employed for +# lexer actions, see {@link LexerActionExecutor#append} and +# {@link LexerActionExecutor#fixOffsetBeforeMatch}.

      +class LexerIndexedCustomAction(LexerAction): + + # Constructs a new indexed custom action by associating a character offset + # with a {@link LexerAction}. + # + #

      Note: This class is only required for lexer actions for which + # {@link LexerAction#isPositionDependent} returns {@code true}.

      + # + # @param offset The offset into the input {@link CharStream}, relative to + # the token start index, at which the specified lexer action should be + # executed. + # @param action The lexer action to execute at a particular offset in the + # input {@link CharStream}. + def __init__(self, offset, action): + super(LexerIndexedCustomAction, self).__init__(action.actionType) + self.offset = offset + self.action = action + self.isPositionDependent = True + + #

      This method calls {@link #execute} on the result of {@link #getAction} + # using the provided {@code lexer}.

      + def execute(self, lexer): + # assume the input stream position was properly set by the calling code + self.action.execute(lexer) + + def __hash__(self): + return hash(str(self.actionType) + str(self.offset) + str(self.action)) + + def __eq__(self, other): + if self is other: + return True + elif not isinstance(other, LexerIndexedCustomAction): + return False + else: + return self.offset == other.offset and self.action == other.action diff --git a/runtime/Python2/src/antlr4/atn/LexerActionExecutor.py b/runtime/Python2/src/antlr4/atn/LexerActionExecutor.py new file mode 100644 index 000000000..7149a29d4 --- /dev/null +++ b/runtime/Python2/src/antlr4/atn/LexerActionExecutor.py @@ -0,0 +1,160 @@ +# +# [The "BSD license"] +# Copyright (c) 2013 Terence Parr +# Copyright (c) 2013 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#/ + +# Represents an executor for a sequence of lexer actions which traversed during +# the matching operation of a lexer rule (token). +# +#

      The executor tracks position information for position-dependent lexer actions +# efficiently, ensuring that actions appearing only at the end of the rule do +# not cause bloating of the {@link DFA} created for the lexer.

      + + +from antlr4.atn.LexerAction import LexerIndexedCustomAction + +class LexerActionExecutor(object): + + def __init__(self, lexerActions=list()): + self.lexerActions = lexerActions + # Caches the result of {@link #hashCode} since the hash code is an element + # of the performance-critical {@link LexerATNConfig#hashCode} operation. + self.hashCode = hash("".join([str(la) for la in lexerActions])) + + + # Creates a {@link LexerActionExecutor} which executes the actions for + # the input {@code lexerActionExecutor} followed by a specified + # {@code lexerAction}. + # + # @param lexerActionExecutor The executor for actions already traversed by + # the lexer while matching a token within a particular + # {@link LexerATNConfig}. If this is {@code null}, the method behaves as + # though it were an empty executor. + # @param lexerAction The lexer action to execute after the actions + # specified in {@code lexerActionExecutor}. + # + # @return A {@link LexerActionExecutor} for executing the combine actions + # of {@code lexerActionExecutor} and {@code lexerAction}. + @staticmethod + def append(lexerActionExecutor, lexerAction): + if lexerActionExecutor is None: + return LexerActionExecutor([ lexerAction ]) + + lexerActions = lexerActionExecutor.lexerActions + [ lexerAction ] + return LexerActionExecutor(lexerActions) + + # Creates a {@link LexerActionExecutor} which encodes the current offset + # for position-dependent lexer actions. + # + #

      Normally, when the executor encounters lexer actions where + # {@link LexerAction#isPositionDependent} returns {@code true}, it calls + # {@link IntStream#seek} on the input {@link CharStream} to set the input + # position to the end of the current token. This behavior provides + # for efficient DFA representation of lexer actions which appear at the end + # of a lexer rule, even when the lexer rule matches a variable number of + # characters.

      + # + #

      Prior to traversing a match transition in the ATN, the current offset + # from the token start index is assigned to all position-dependent lexer + # actions which have not already been assigned a fixed offset. By storing + # the offsets relative to the token start index, the DFA representation of + # lexer actions which appear in the middle of tokens remains efficient due + # to sharing among tokens of the same length, regardless of their absolute + # position in the input stream.

      + # + #

      If the current executor already has offsets assigned to all + # position-dependent lexer actions, the method returns {@code this}.

      + # + # @param offset The current offset to assign to all position-dependent + # lexer actions which do not already have offsets assigned. + # + # @return A {@link LexerActionExecutor} which stores input stream offsets + # for all position-dependent lexer actions. + #/ + def fixOffsetBeforeMatch(self, offset): + updatedLexerActions = None + for i in range(0, len(self.lexerActions)): + if self.lexerActions[i].isPositionDependent and not isinstance(self.lexerActions[i], LexerIndexedCustomAction): + if updatedLexerActions is None: + updatedLexerActions = [ la for la in self.lexerActions ] + updatedLexerActions[i] = LexerIndexedCustomAction(offset, self.lexerActions[i]) + + if updatedLexerActions is None: + return self + else: + return LexerActionExecutor(updatedLexerActions) + + + # Execute the actions encapsulated by this executor within the context of a + # particular {@link Lexer}. + # + #

      This method calls {@link IntStream#seek} to set the position of the + # {@code input} {@link CharStream} prior to calling + # {@link LexerAction#execute} on a position-dependent action. Before the + # method returns, the input position will be restored to the same position + # it was in when the method was invoked.

      + # + # @param lexer The lexer instance. + # @param input The input stream which is the source for the current token. + # When this method is called, the current {@link IntStream#index} for + # {@code input} should be the start of the following token, i.e. 1 + # character past the end of the current token. + # @param startIndex The token start index. This value may be passed to + # {@link IntStream#seek} to set the {@code input} position to the beginning + # of the token. + #/ + def execute(self, lexer, input, startIndex): + requiresSeek = False + stopIndex = input.index + try: + for lexerAction in self.lexerActions: + if isinstance(lexerAction, LexerIndexedCustomAction): + offset = lexerAction.offset + input.seek(startIndex + offset) + lexerAction = lexerAction.action + requiresSeek = (startIndex + offset) != stopIndex + elif lexerAction.isPositionDependent: + input.seek(stopIndex) + requiresSeek = False + lexerAction.execute(lexer) + finally: + if requiresSeek: + input.seek(stopIndex) + + def __hash__(self): + return self.hashCode + + def __eq__(self, other): + if self is other: + return True + elif not isinstance(other, LexerActionExecutor): + return False + else: + return self.hashCode == other.hashCode \ + and self.lexerActions == other.lexerActions diff --git a/runtime/Python2/src/antlr4/atn/ParserATNSimulator.py b/runtime/Python2/src/antlr4/atn/ParserATNSimulator.py new file mode 100755 index 000000000..cd47a8019 --- /dev/null +++ b/runtime/Python2/src/antlr4/atn/ParserATNSimulator.py @@ -0,0 +1,1523 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# +# The embodiment of the adaptive LL(*), ALL(*), parsing strategy. +# +#

      +# The basic complexity of the adaptive strategy makes it harder to understand. +# We begin with ATN simulation to build paths in a DFA. Subsequent prediction +# requests go through the DFA first. If they reach a state without an edge for +# the current symbol, the algorithm fails over to the ATN simulation to +# complete the DFA path for the current input (until it finds a conflict state +# or uniquely predicting state).

      +# +#

      +# All of that is done without using the outer context because we want to create +# a DFA that is not dependent upon the rule invocation stack when we do a +# prediction. One DFA works in all contexts. We avoid using context not +# necessarily because it's slower, although it can be, but because of the DFA +# caching problem. The closure routine only considers the rule invocation stack +# created during prediction beginning in the decision rule. For example, if +# prediction occurs without invoking another rule's ATN, there are no context +# stacks in the configurations. When lack of context leads to a conflict, we +# don't know if it's an ambiguity or a weakness in the strong LL(*) parsing +# strategy (versus full LL(*)).

      +# +#

      +# When SLL yields a configuration set with conflict, we rewind the input and +# retry the ATN simulation, this time using full outer context without adding +# to the DFA. Configuration context stacks will be the full invocation stacks +# from the start rule. If we get a conflict using full context, then we can +# definitively say we have a true ambiguity for that input sequence. If we +# don't get a conflict, it implies that the decision is sensitive to the outer +# context. (It is not context-sensitive in the sense of context-sensitive +# grammars.)

      +# +#

      +# The next time we reach this DFA state with an SLL conflict, through DFA +# simulation, we will again retry the ATN simulation using full context mode. +# This is slow because we can't save the results and have to "interpret" the +# ATN each time we get that input.

      +# +#

      +# CACHING FULL CONTEXT PREDICTIONS

      +# +#

      +# We could cache results from full context to predicted alternative easily and +# that saves a lot of time but doesn't work in presence of predicates. The set +# of visible predicates from the ATN start state changes depending on the +# context, because closure can fall off the end of a rule. I tried to cache +# tuples (stack context, semantic context, predicted alt) but it was slower +# than interpreting and much more complicated. Also required a huge amount of +# memory. The goal is not to create the world's fastest parser anyway. I'd like +# to keep this algorithm simple. By launching multiple threads, we can improve +# the speed of parsing across a large number of files.

      +# +#

      +# There is no strict ordering between the amount of input used by SLL vs LL, +# which makes it really hard to build a cache for full context. Let's say that +# we have input A B C that leads to an SLL conflict with full context X. That +# implies that using X we might only use A B but we could also use A B C D to +# resolve conflict. Input A B C D could predict alternative 1 in one position +# in the input and A B C E could predict alternative 2 in another position in +# input. The conflicting SLL configurations could still be non-unique in the +# full context prediction, which would lead us to requiring more input than the +# original A B C. To make a prediction cache work, we have to track the exact +# input used during the previous prediction. That amounts to a cache that maps +# X to a specific DFA for that context.

      +# +#

      +# Something should be done for left-recursive expression predictions. They are +# likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry +# with full LL thing Sam does.

      +# +#

      +# AVOIDING FULL CONTEXT PREDICTION

      +# +#

      +# We avoid doing full context retry when the outer context is empty, we did not +# dip into the outer context by falling off the end of the decision state rule, +# or when we force SLL mode.

      +# +#

      +# As an example of the not dip into outer context case, consider as super +# constructor calls versus function calls. One grammar might look like +# this:

      +# +#
      +# ctorBody
      +#   : '{' superCall? stat* '}'
      +#   ;
      +# 
      +# +#

      +# Or, you might see something like

      +# +#
      +# stat
      +#   : superCall ';'
      +#   | expression ';'
      +#   | ...
      +#   ;
      +# 
      +# +#

      +# In both cases I believe that no closure operations will dip into the outer +# context. In the first case ctorBody in the worst case will stop at the '}'. +# In the 2nd case it should stop at the ';'. Both cases should stay within the +# entry rule and not dip into the outer context.

      +# +#

      +# PREDICATES

      +# +#

      +# Predicates are always evaluated if present in either SLL or LL both. SLL and +# LL simulation deals with predicates differently. SLL collects predicates as +# it performs closure operations like ANTLR v3 did. It delays predicate +# evaluation until it reaches and accept state. This allows us to cache the SLL +# ATN simulation whereas, if we had evaluated predicates on-the-fly during +# closure, the DFA state configuration sets would be different and we couldn't +# build up a suitable DFA.

      +# +#

      +# When building a DFA accept state during ATN simulation, we evaluate any +# predicates and return the sole semantically valid alternative. If there is +# more than 1 alternative, we report an ambiguity. If there are 0 alternatives, +# we throw an exception. Alternatives without predicates act like they have +# true predicates. The simple way to think about it is to strip away all +# alternatives with false predicates and choose the minimum alternative that +# remains.

      +# +#

      +# When we start in the DFA and reach an accept state that's predicated, we test +# those and return the minimum semantically viable alternative. If no +# alternatives are viable, we throw an exception.

      +# +#

      +# During full LL ATN simulation, closure always evaluates predicates and +# on-the-fly. This is crucial to reducing the configuration set size during +# closure. It hits a landmine when parsing with the Java grammar, for example, +# without this on-the-fly evaluation.

      +# +#

      +# SHARING DFA

      +# +#

      +# All instances of the same parser share the same decision DFAs through a +# static field. Each instance gets its own ATN simulator but they share the +# same {@link #decisionToDFA} field. They also share a +# {@link PredictionContextCache} object that makes sure that all +# {@link PredictionContext} objects are shared among the DFA states. This makes +# a big size difference.

      +# +#

      +# THREAD SAFETY

      +# +#

      +# The {@link ParserATNSimulator} locks on the {@link #decisionToDFA} field when +# it adds a new DFA object to that array. {@link #addDFAEdge} +# locks on the DFA for the current decision when setting the +# {@link DFAState#edges} field. {@link #addDFAState} locks on +# the DFA for the current decision when looking up a DFA state to see if it +# already exists. We must make sure that all requests to add DFA states that +# are equivalent result in the same shared DFA object. This is because lots of +# threads will be trying to update the DFA at once. The +# {@link #addDFAState} method also locks inside the DFA lock +# but this time on the shared context cache when it rebuilds the +# configurations' {@link PredictionContext} objects using cached +# subgraphs/nodes. No other locking occurs, even during DFA simulation. This is +# safe as long as we can guarantee that all threads referencing +# {@code s.edge[t]} get the same physical target {@link DFAState}, or +# {@code null}. Once into the DFA, the DFA simulation does not reference the +# {@link DFA#states} map. It follows the {@link DFAState#edges} field to new +# targets. The DFA simulator will either find {@link DFAState#edges} to be +# {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or +# {@code dfa.edges[t]} to be non-null. The +# {@link #addDFAEdge} method could be racing to set the field +# but in either case the DFA simulator works; if {@code null}, and requests ATN +# simulation. It could also race trying to get {@code dfa.edges[t]}, but either +# way it will work because it's not doing a test and set operation.

      +# +#

      +# Starting with SLL then failing to combined SLL/LL (Two-Stage +# Parsing)

      +# +#

      +# Sam pointed out that if SLL does not give a syntax error, then there is no +# point in doing full LL, which is slower. We only have to try LL if we get a +# syntax error. For maximum speed, Sam starts the parser set to pure SLL +# mode with the {@link BailErrorStrategy}:

      +# +#
      +# parser.{@link Parser#getInterpreter() getInterpreter()}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )};
      +# parser.{@link Parser#setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}());
      +# 
      +# +#

      +# If it does not get a syntax error, then we're done. If it does get a syntax +# error, we need to retry with the combined SLL/LL strategy.

      +# +#

      +# The reason this works is as follows. If there are no SLL conflicts, then the +# grammar is SLL (at least for that input set). If there is an SLL conflict, +# the full LL analysis must yield a set of viable alternatives which is a +# subset of the alternatives reported by SLL. If the LL set is a singleton, +# then the grammar is LL but not SLL. If the LL set is the same size as the SLL +# set, the decision is SLL. If the LL set has size > 1, then that decision +# is truly ambiguous on the current input. If the LL set is smaller, then the +# SLL conflict resolution might choose an alternative that the full LL would +# rule out as a possibility based upon better context information. If that's +# the case, then the SLL parse will definitely get an error because the full LL +# analysis says it's not viable. If SLL conflict resolution chooses an +# alternative within the LL set, them both SLL and LL would choose the same +# alternative because they both choose the minimum of multiple conflicting +# alternatives.

      +# +#

      +# Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and +# a smaller LL set called s. If s is {@code {2, 3}}, then SLL +# parsing will get an error because SLL will pursue alternative 1. If +# s is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will +# choose the same alternative because alternative one is the minimum of either +# set. If s is {@code {2}} or {@code {3}} then SLL will get a syntax +# error. If s is {@code {1}} then SLL will succeed.

      +# +#

      +# Of course, if the input is invalid, then we will get an error for sure in +# both SLL and LL parsing. Erroneous input will therefore require 2 passes over +# the input.

      +# +from __future__ import print_function +import sys + +from antlr4.PredictionContext import PredictionContext, SingletonPredictionContext, PredictionContextFromRuleContext +from antlr4.ParserRuleContext import ParserRuleContext +from antlr4.Token import Token +from antlr4.Utils import str_list +from antlr4.atn.ATN import ATN +from antlr4.atn.ATNConfig import ATNConfig +from antlr4.atn.ATNConfigSet import ATNConfigSet +from antlr4.atn.ATNSimulator import ATNSimulator +from antlr4.atn.ATNState import StarLoopEntryState, RuleStopState +from antlr4.atn.PredictionMode import PredictionMode +from antlr4.atn.SemanticContext import SemanticContext, andContext, orContext +from antlr4.atn.Transition import Transition, RuleTransition, ActionTransition, AtomTransition, SetTransition, NotSetTransition +from antlr4.dfa.DFAState import DFAState, PredPrediction +from antlr4.error.Errors import NoViableAltException + + +class ParserATNSimulator(ATNSimulator): + + debug = False + debug_list_atn_decisions = False + dfa_debug = False + retry_debug = False + + + def __init__(self, parser, atn, decisionToDFA, sharedContextCache): + super(ParserATNSimulator, self).__init__(atn, sharedContextCache) + self.parser = parser + self.decisionToDFA = decisionToDFA + # SLL, LL, or LL + exact ambig detection?# + self.predictionMode = PredictionMode.LL + # LAME globals to avoid parameters!!!!! I need these down deep in predTransition + self._input = None + self._startIndex = 0 + self._outerContext = None + self._dfa = None + # Each prediction operation uses a cache for merge of prediction contexts. + # Don't keep around as it wastes huge amounts of memory. DoubleKeyMap + # isn't synchronized but we're ok since two threads shouldn't reuse same + # parser/atnsim object because it can only handle one input at a time. + # This maps graphs a and b to merged result c. (a,b)→c. We can avoid + # the merge if we ever see a and b again. Note that (b,a)→c should + # also be examined during cache lookup. + # + self.mergeCache = None + + + def reset(self): + pass + + def adaptivePredict(self, input, decision, outerContext): + if self.debug or self.debug_list_atn_decisions: + print("adaptivePredict decision " + str(decision) + + " exec LA(1)==" + self.getLookaheadName(input) + + " line " + str(input.LT(1).line) + ":" + + str(input.LT(1).column)) + self._input = input + self._startIndex = input.index + self._outerContext = outerContext + + dfa = self.decisionToDFA[decision] + self._dfa = dfa + m = input.mark() + index = input.index + + # Now we are certain to have a specific decision's DFA + # But, do we still need an initial state? + try: + if dfa.precedenceDfa: + # the start state for a precedence DFA depends on the current + # parser precedence, and is provided by a DFA method. + s0 = dfa.getPrecedenceStartState(self.parser.getPrecedence()) + else: + # the start state for a "regular" DFA is just s0 + s0 = dfa.s0 + + if s0 is None: + if outerContext is None: + outerContext = ParserRuleContext.EMPTY + if self.debug or self.debug_list_atn_decisions: + print("predictATN decision " + str(dfa.decision) + + " exec LA(1)==" + self.getLookaheadName(input) + + ", outerContext=" + outerContext.toString(self.parser)) + + # If this is not a precedence DFA, we check the ATN start state + # to determine if this ATN start state is the decision for the + # closure block that determines whether a precedence rule + # should continue or complete. + # + if not dfa.precedenceDfa and isinstance(dfa.atnStartState, StarLoopEntryState): + if dfa.atnStartState.precedenceRuleDecision: + dfa.setPrecedenceDfa(True) + + fullCtx = False + s0_closure = self.computeStartState(dfa.atnStartState, ParserRuleContext.EMPTY, fullCtx) + + if dfa.precedenceDfa: + # If this is a precedence DFA, we use applyPrecedenceFilter + # to convert the computed start state to a precedence start + # state. We then use DFA.setPrecedenceStartState to set the + # appropriate start state for the precedence level rather + # than simply setting DFA.s0. + # + s0_closure = self.applyPrecedenceFilter(s0_closure) + s0 = self.addDFAState(dfa, DFAState(configs=s0_closure)) + dfa.setPrecedenceStartState(self.parser.getPrecedence(), s0) + else: + s0 = self.addDFAState(dfa, DFAState(configs=s0_closure)) + dfa.s0 = s0 + + alt = self.execATN(dfa, s0, input, index, outerContext) + if self.debug: + print("DFA after predictATN: " + dfa.toString(self.parser.tokenNames)) + return alt + finally: + self._dfa = None + self.mergeCache = None # wack cache after each prediction + input.seek(index) + input.release(m) + + # Performs ATN simulation to compute a predicted alternative based + # upon the remaining input, but also updates the DFA cache to avoid + # having to traverse the ATN again for the same input sequence. + + # There are some key conditions we're looking for after computing a new + # set of ATN configs (proposed DFA state): + # if the set is empty, there is no viable alternative for current symbol + # does the state uniquely predict an alternative? + # does the state have a conflict that would prevent us from + # putting it on the work list? + + # We also have some key operations to do: + # add an edge from previous DFA state to potentially new DFA state, D, + # upon current symbol but only if adding to work list, which means in all + # cases except no viable alternative (and possibly non-greedy decisions?) + # collecting predicates and adding semantic context to DFA accept states + # adding rule context to context-sensitive DFA accept states + # consuming an input symbol + # reporting a conflict + # reporting an ambiguity + # reporting a context sensitivity + # reporting insufficient predicates + + # cover these cases: + # dead end + # single alt + # single alt + preds + # conflict + # conflict + preds + # + def execATN(self, dfa, s0, input, startIndex, outerContext ): + if self.debug or self.debug_list_atn_decisions: + print("execATN decision " + str(dfa.decision) + + " exec LA(1)==" + self.getLookaheadName(input) + + " line " + str(input.LT(1).line) + ":" + str(input.LT(1).column)) + + previousD = s0 + + if self.debug: + print("s0 = " + str(s0)) + + t = input.LA(1) + + while True: # while more work + D = self.getExistingTargetState(previousD, t) + if D is None: + D = self.computeTargetState(dfa, previousD, t) + if D is self.ERROR: + # if any configs in previous dipped into outer context, that + # means that input up to t actually finished entry rule + # at least for SLL decision. Full LL doesn't dip into outer + # so don't need special case. + # We will get an error no matter what so delay until after + # decision; better error message. Also, no reachable target + # ATN states in SLL implies LL will also get nowhere. + # If conflict in states that dip out, choose min since we + # will get error no matter what. + e = self.noViableAlt(input, outerContext, previousD.configs, startIndex) + input.seek(startIndex) + alt = self.getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD.configs, outerContext) + if alt!=ATN.INVALID_ALT_NUMBER: + return alt + raise e + + if D.requiresFullContext and self.predictionMode != PredictionMode.SLL: + # IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error) + conflictingAlts = None + if D.predicates is not None: + if self.debug: + print("DFA state has preds in DFA sim LL failover") + conflictIndex = input.index + if conflictIndex != startIndex: + input.seek(startIndex) + + conflictingAlts = self.evalSemanticContext(D.predicates, outerContext, True) + if len(conflictingAlts)==1: + if self.debug: + print("Full LL avoided") + return min(conflictingAlts) + + if conflictIndex != startIndex: + # restore the index so reporting the fallback to full + # context occurs with the index at the correct spot + input.seek(conflictIndex) + + if self.dfa_debug: + print("ctx sensitive state " + str(outerContext) +" in " + str(D)) + fullCtx = True + s0_closure = self.computeStartState(dfa.atnStartState, outerContext, fullCtx) + self.reportAttemptingFullContext(dfa, conflictingAlts, D.configs, startIndex, input.index) + alt = self.execATNWithFullContext(dfa, D, s0_closure, input, startIndex, outerContext) + return alt + + if D.isAcceptState: + if D.predicates is None: + return D.prediction + + stopIndex = input.index + input.seek(startIndex) + alts = self.evalSemanticContext(D.predicates, outerContext, True) + if len(alts)==0: + raise self.noViableAlt(input, outerContext, D.configs, startIndex) + elif len(alts)==1: + return min(alts) + else: + # report ambiguity after predicate evaluation to make sure the correct + # set of ambig alts is reported. + self.reportAmbiguity(dfa, D, startIndex, stopIndex, False, alts, D.configs) + return min(alts) + + previousD = D + + if t != Token.EOF: + input.consume() + t = input.LA(1) + + # + # Get an existing target state for an edge in the DFA. If the target state + # for the edge has not yet been computed or is otherwise not available, + # this method returns {@code null}. + # + # @param previousD The current DFA state + # @param t The next input symbol + # @return The existing target DFA state for the given input symbol + # {@code t}, or {@code null} if the target state for this edge is not + # already cached + # + def getExistingTargetState(self, previousD, t): + edges = previousD.edges + if edges is None or t + 1 < 0 or t + 1 >= len(edges): + return None + else: + return edges[t + 1] + + # + # Compute a target state for an edge in the DFA, and attempt to add the + # computed state and corresponding edge to the DFA. + # + # @param dfa The DFA + # @param previousD The current DFA state + # @param t The next input symbol + # + # @return The computed target DFA state for the given input symbol + # {@code t}. If {@code t} does not lead to a valid DFA state, this method + # returns {@link #ERROR}. + # + def computeTargetState(self, dfa, previousD, t): + reach = self.computeReachSet(previousD.configs, t, False) + if reach is None: + self.addDFAEdge(dfa, previousD, t, self.ERROR) + return self.ERROR + + # create new target state; we'll add to DFA after it's complete + D = DFAState(configs=reach) + + predictedAlt = self.getUniqueAlt(reach) + + if self.debug: + altSubSets = PredictionMode.getConflictingAltSubsets(reach) + print("SLL altSubSets=" + str(altSubSets) + ", configs=" + str(reach) + + ", predict=" + str(predictedAlt) + ", allSubsetsConflict=" + + str(PredictionMode.allSubsetsConflict(altSubSets)) + ", conflictingAlts=" + + str(self.getConflictingAlts(reach))) + + if predictedAlt!=ATN.INVALID_ALT_NUMBER: + # NO CONFLICT, UNIQUELY PREDICTED ALT + D.isAcceptState = True + D.configs.uniqueAlt = predictedAlt + D.prediction = predictedAlt + elif PredictionMode.hasSLLConflictTerminatingPrediction(self.predictionMode, reach): + # MORE THAN ONE VIABLE ALTERNATIVE + D.configs.conflictingAlts = self.getConflictingAlts(reach) + D.requiresFullContext = True + # in SLL-only mode, we will stop at this state and return the minimum alt + D.isAcceptState = True + D.prediction = min(D.configs.conflictingAlts) + + if D.isAcceptState and D.configs.hasSemanticContext: + self.predicateDFAState(D, self.atn.getDecisionState(dfa.decision)) + if D.predicates is not None: + D.prediction = ATN.INVALID_ALT_NUMBER + + # all adds to dfa are done after we've created full D state + D = self.addDFAEdge(dfa, previousD, t, D) + return D + + def predicateDFAState(self, dfaState, decisionState): + # We need to test all predicates, even in DFA states that + # uniquely predict alternative. + nalts = len(decisionState.transitions) + # Update DFA so reach becomes accept state with (predicate,alt) + # pairs if preds found for conflicting alts + altsToCollectPredsFrom = self.getConflictingAltsOrUniqueAlt(dfaState.configs) + altToPred = self.getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState.configs, nalts) + if altToPred is not None: + dfaState.predicates = self.getPredicatePredictions(altsToCollectPredsFrom, altToPred) + dfaState.prediction = ATN.INVALID_ALT_NUMBER # make sure we use preds + else: + # There are preds in configs but they might go away + # when OR'd together like {p}? || NONE == NONE. If neither + # alt has preds, resolve to min alt + dfaState.prediction = min(altsToCollectPredsFrom) + + # comes back with reach.uniqueAlt set to a valid alt + def execATNWithFullContext(self, dfa, D, # how far we got before failing over + s0, + input, + startIndex, + outerContext): + if self.debug or self.debug_list_atn_decisions: + print("execATNWithFullContext "+s0) + fullCtx = True + foundExactAmbig = False + reach = None + previous = s0 + input.seek(startIndex) + t = input.LA(1) + predictedAlt = -1 + while (True): # while more work + reach = self.computeReachSet(previous, t, fullCtx) + if reach is None: + # if any configs in previous dipped into outer context, that + # means that input up to t actually finished entry rule + # at least for LL decision. Full LL doesn't dip into outer + # so don't need special case. + # We will get an error no matter what so delay until after + # decision; better error message. Also, no reachable target + # ATN states in SLL implies LL will also get nowhere. + # If conflict in states that dip out, choose min since we + # will get error no matter what. + e = self.noViableAlt(input, outerContext, previous, startIndex) + input.seek(startIndex) + alt = self.getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previous, outerContext) + if alt!=ATN.INVALID_ALT_NUMBER: + return alt + else: + raise e + + altSubSets = PredictionMode.getConflictingAltSubsets(reach) + if self.debug: + print("LL altSubSets=" + str(altSubSets) + ", predict=" + + str(PredictionMode.getUniqueAlt(altSubSets)) + ", resolvesToJustOneViableAlt=" + + str(PredictionMode.resolvesToJustOneViableAlt(altSubSets))) + + reach.uniqueAlt = self.getUniqueAlt(reach) + # unique prediction? + if reach.uniqueAlt!=ATN.INVALID_ALT_NUMBER: + predictedAlt = reach.uniqueAlt + break + elif self.predictionMode is not PredictionMode.LL_EXACT_AMBIG_DETECTION: + predictedAlt = PredictionMode.resolvesToJustOneViableAlt(altSubSets) + if predictedAlt != ATN.INVALID_ALT_NUMBER: + break + else: + # In exact ambiguity mode, we never try to terminate early. + # Just keeps scarfing until we know what the conflict is + if PredictionMode.allSubsetsConflict(altSubSets) and PredictionMode.allSubsetsEqual(altSubSets): + foundExactAmbig = True + predictedAlt = PredictionMode.getSingleViableAlt(altSubSets) + break + # else there are multiple non-conflicting subsets or + # we're not sure what the ambiguity is yet. + # So, keep going. + + previous = reach + if t != Token.EOF: + input.consume() + t = input.LA(1) + + # If the configuration set uniquely predicts an alternative, + # without conflict, then we know that it's a full LL decision + # not SLL. + if reach.uniqueAlt != ATN.INVALID_ALT_NUMBER : + self.reportContextSensitivity(dfa, predictedAlt, reach, startIndex, input.index) + return predictedAlt + + # We do not check predicates here because we have checked them + # on-the-fly when doing full context prediction. + + # + # In non-exact ambiguity detection mode, we might actually be able to + # detect an exact ambiguity, but I'm not going to spend the cycles + # needed to check. We only emit ambiguity warnings in exact ambiguity + # mode. + # + # For example, we might know that we have conflicting configurations. + # But, that does not mean that there is no way forward without a + # conflict. It's possible to have nonconflicting alt subsets as in: + + # altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}] + + # from + # + # [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]), + # (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])] + # + # In this case, (17,1,[5 $]) indicates there is some next sequence that + # would resolve this without conflict to alternative 1. Any other viable + # next sequence, however, is associated with a conflict. We stop + # looking for input because no amount of further lookahead will alter + # the fact that we should predict alternative 1. We just can't say for + # sure that there is an ambiguity without looking further. + + self.reportAmbiguity(dfa, D, startIndex, input.index, foundExactAmbig, None, reach) + + return predictedAlt + + def computeReachSet(self, closure, t, fullCtx): + if self.debug: + print("in computeReachSet, starting closure: " + str(closure)) + + if self.mergeCache is None: + self.mergeCache = dict() + + intermediate = ATNConfigSet(fullCtx) + + # Configurations already in a rule stop state indicate reaching the end + # of the decision rule (local context) or end of the start rule (full + # context). Once reached, these configurations are never updated by a + # closure operation, so they are handled separately for the performance + # advantage of having a smaller intermediate set when calling closure. + # + # For full-context reach operations, separate handling is required to + # ensure that the alternative matching the longest overall sequence is + # chosen when multiple such configurations can match the input. + + skippedStopStates = None + + # First figure out where we can reach on input t + for c in closure: + if self.debug: + print("testing " + self.getTokenName(t) + " at " + str(c)) + + if isinstance(c.state, RuleStopState): + assert c.context.isEmpty() + if fullCtx or t == Token.EOF: + if skippedStopStates is None: + skippedStopStates = list() + skippedStopStates.append(c) + continue + + for trans in c.state.transitions: + target = self.getReachableTarget(trans, t) + if target is not None: + intermediate.add(ATNConfig(state=target, config=c), self.mergeCache) + + # Now figure out where the reach operation can take us... + + reach = None + + # This block optimizes the reach operation for intermediate sets which + # trivially indicate a termination state for the overall + # adaptivePredict operation. + # + # The conditions assume that intermediate + # contains all configurations relevant to the reach set, but this + # condition is not true when one or more configurations have been + # withheld in skippedStopStates , or when the current symbol is EOF. + # + if skippedStopStates is None and t!=Token.EOF: + if len(intermediate)==1: + # Don't pursue the closure if there is just one state. + # It can only have one alternative; just add to result + # Also don't pursue the closure if there is unique alternative + # among the configurations. + reach = intermediate + elif self.getUniqueAlt(intermediate)!=ATN.INVALID_ALT_NUMBER: + # Also don't pursue the closure if there is unique alternative + # among the configurations. + reach = intermediate + + # If the reach set could not be trivially determined, perform a closure + # operation on the intermediate set to compute its initial value. + # + if reach is None: + reach = ATNConfigSet(fullCtx) + closureBusy = set() + treatEofAsEpsilon = t == Token.EOF + for c in intermediate: + self.closure(c, reach, closureBusy, False, fullCtx, treatEofAsEpsilon) + + if t == Token.EOF: + # After consuming EOF no additional input is possible, so we are + # only interested in configurations which reached the end of the + # decision rule (local context) or end of the start rule (full + # context). Update reach to contain only these configurations. This + # handles both explicit EOF transitions in the grammar and implicit + # EOF transitions following the end of the decision or start rule. + # + # When reach==intermediate, no closure operation was performed. In + # this case, removeAllConfigsNotInRuleStopState needs to check for + # reachable rule stop states as well as configurations already in + # a rule stop state. + # + # This is handled before the configurations in skippedStopStates, + # because any configurations potentially added from that list are + # already guaranteed to meet this condition whether or not it's + # required. + # + reach = self.removeAllConfigsNotInRuleStopState(reach, reach is intermediate) + + # If skippedStopStates is not null, then it contains at least one + # configuration. For full-context reach operations, these + # configurations reached the end of the start rule, in which case we + # only add them back to reach if no configuration during the current + # closure operation reached such a state. This ensures adaptivePredict + # chooses an alternative matching the longest overall sequence when + # multiple alternatives are viable. + # + if skippedStopStates is not None and ( (not fullCtx) or (not PredictionMode.hasConfigInRuleStopState(reach))): + assert len(skippedStopStates)>0 + for c in skippedStopStates: + reach.add(c, self.mergeCache) + if len(reach)==0: + return None + else: + return reach + + # + # Return a configuration set containing only the configurations from + # {@code configs} which are in a {@link RuleStopState}. If all + # configurations in {@code configs} are already in a rule stop state, this + # method simply returns {@code configs}. + # + #

      When {@code lookToEndOfRule} is true, this method uses + # {@link ATN#nextTokens} for each configuration in {@code configs} which is + # not already in a rule stop state to see if a rule stop state is reachable + # from the configuration via epsilon-only transitions.

      + # + # @param configs the configuration set to update + # @param lookToEndOfRule when true, this method checks for rule stop states + # reachable by epsilon-only transitions from each configuration in + # {@code configs}. + # + # @return {@code configs} if all configurations in {@code configs} are in a + # rule stop state, otherwise return a new configuration set containing only + # the configurations from {@code configs} which are in a rule stop state + # + def removeAllConfigsNotInRuleStopState(self, configs, lookToEndOfRule): + if PredictionMode.allConfigsInRuleStopStates(configs): + return configs + result = ATNConfigSet(configs.fullCtx) + for config in configs: + if isinstance(config.state, RuleStopState): + result.add(config, self.mergeCache) + continue + if lookToEndOfRule and config.state.epsilonOnlyTransitions: + nextTokens = self.atn.nextTokens(config.state) + if Token.EPSILON in nextTokens: + endOfRuleState = self.atn.ruleToStopState[config.state.ruleIndex] + result.add(ATNConfig(state=endOfRuleState, config=config), self.mergeCache) + return result + + def computeStartState(self, p, ctx, fullCtx): + # always at least the implicit call to start rule + initialContext = PredictionContextFromRuleContext(self.atn, ctx) + configs = ATNConfigSet(fullCtx) + + for i in range(0, len(p.transitions)): + target = p.transitions[i].target + c = ATNConfig(target, i+1, initialContext) + closureBusy = set() + self.closure(c, configs, closureBusy, True, fullCtx, False) + return configs + + # + # This method transforms the start state computed by + # {@link #computeStartState} to the special start state used by a + # precedence DFA for a particular precedence value. The transformation + # process applies the following changes to the start state's configuration + # set. + # + #
        + #
      1. Evaluate the precedence predicates for each configuration using + # {@link SemanticContext#evalPrecedence}.
      2. + #
      3. Remove all configurations which predict an alternative greater than + # 1, for which another configuration that predicts alternative 1 is in the + # same ATN state with the same prediction context. This transformation is + # valid for the following reasons: + #
          + #
        • The closure block cannot contain any epsilon transitions which bypass + # the body of the closure, so all states reachable via alternative 1 are + # part of the precedence alternatives of the transformed left-recursive + # rule.
        • + #
        • The "primary" portion of a left recursive rule cannot contain an + # epsilon transition, so the only way an alternative other than 1 can exist + # in a state that is also reachable via alternative 1 is by nesting calls + # to the left-recursive rule, with the outer calls not being at the + # preferred precedence level.
        • + #
        + #
      4. + #
      + # + #

      + # The prediction context must be considered by this filter to address + # situations like the following. + #

      + # + #
      +    # grammar TA;
      +    # prog: statement* EOF;
      +    # statement: letterA | statement letterA 'b' ;
      +    # letterA: 'a';
      +    # 
      + #
      + #

      + # If the above grammar, the ATN state immediately before the token + # reference {@code 'a'} in {@code letterA} is reachable from the left edge + # of both the primary and closure blocks of the left-recursive rule + # {@code statement}. The prediction context associated with each of these + # configurations distinguishes between them, and prevents the alternative + # which stepped out to {@code prog} (and then back in to {@code statement} + # from being eliminated by the filter. + #

      + # + # @param configs The configuration set computed by + # {@link #computeStartState} as the start state for the DFA. + # @return The transformed configuration set representing the start state + # for a precedence DFA at a particular precedence level (determined by + # calling {@link Parser#getPrecedence}). + # + def applyPrecedenceFilter(self, configs): + statesFromAlt1 = dict() + configSet = ATNConfigSet(configs.fullCtx) + for config in configs: + # handle alt 1 first + if config.alt != 1: + continue + updatedContext = config.semanticContext.evalPrecedence(self.parser, self._outerContext) + if updatedContext is None: + # the configuration was eliminated + continue + + statesFromAlt1[config.state.stateNumber] = config.context + if updatedContext is not config.semanticContext: + configSet.add(ATNConfig(config=config, semantic=updatedContext), self.mergeCache) + else: + configSet.add(config, self.mergeCache) + + for config in configs: + if config.alt == 1: + # already handled + continue + + # In the future, this elimination step could be updated to also + # filter the prediction context for alternatives predicting alt>1 + # (basically a graph subtraction algorithm). + # + if not config.precedenceFilterSuppressed: + context = statesFromAlt1.get(config.state.stateNumber, None) + if context==config.context: + # eliminated + continue + + configSet.add(config, self.mergeCache) + + return configSet + + def getReachableTarget(self, trans, ttype): + if trans.matches(ttype, 0, self.atn.maxTokenType): + return trans.target + else: + return None + + def getPredsForAmbigAlts(self, ambigAlts, configs, nalts): + # REACH=[1|1|[]|0:0, 1|2|[]|0:1] + # altToPred starts as an array of all null contexts. The entry at index i + # corresponds to alternative i. altToPred[i] may have one of three values: + # 1. null: no ATNConfig c is found such that c.alt==i + # 2. SemanticContext.NONE: At least one ATNConfig c exists such that + # c.alt==i and c.semanticContext==SemanticContext.NONE. In other words, + # alt i has at least one unpredicated config. + # 3. Non-NONE Semantic Context: There exists at least one, and for all + # ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE. + # + # From this, it is clear that NONE||anything==NONE. + # + altToPred = [None] * (nalts + 1) + for c in configs: + if c.alt in ambigAlts: + altToPred[c.alt] = orContext(altToPred[c.alt], c.semanticContext) + + nPredAlts = 0 + for i in range(1, nalts+1): + if altToPred[i] is None: + altToPred[i] = SemanticContext.NONE + elif altToPred[i] is not SemanticContext.NONE: + nPredAlts += 1 + + # nonambig alts are null in altToPred + if nPredAlts==0: + altToPred = None + if self.debug: + print("getPredsForAmbigAlts result " + str_list(altToPred)) + return altToPred + + def getPredicatePredictions(self, ambigAlts, altToPred): + pairs = [] + containsPredicate = False + for i in range(1, len(altToPred)): + pred = altToPred[i] + # unpredicated is indicated by SemanticContext.NONE + assert pred is not None + if ambigAlts is not None and i in ambigAlts: + pairs.append(PredPrediction(pred, i)) + if pred is not SemanticContext.NONE: + containsPredicate = True + + if not containsPredicate: + return None + + return pairs + + # + # This method is used to improve the localization of error messages by + # choosing an alternative rather than throwing a + # {@link NoViableAltException} in particular prediction scenarios where the + # {@link #ERROR} state was reached during ATN simulation. + # + #

      + # The default implementation of this method uses the following + # algorithm to identify an ATN configuration which successfully parsed the + # decision entry rule. Choosing such an alternative ensures that the + # {@link ParserRuleContext} returned by the calling rule will be complete + # and valid, and the syntax error will be reported later at a more + # localized location.

      + # + #
        + #
      • If a syntactically valid path or paths reach the end of the decision rule and + # they are semantically valid if predicated, return the min associated alt.
      • + #
      • Else, if a semantically invalid but syntactically valid path exist + # or paths exist, return the minimum associated alt. + #
      • + #
      • Otherwise, return {@link ATN#INVALID_ALT_NUMBER}.
      • + #
      + # + #

      + # In some scenarios, the algorithm described above could predict an + # alternative which will result in a {@link FailedPredicateException} in + # the parser. Specifically, this could occur if the only configuration + # capable of successfully parsing to the end of the decision rule is + # blocked by a semantic predicate. By choosing this alternative within + # {@link #adaptivePredict} instead of throwing a + # {@link NoViableAltException}, the resulting + # {@link FailedPredicateException} in the parser will identify the specific + # predicate which is preventing the parser from successfully parsing the + # decision rule, which helps developers identify and correct logic errors + # in semantic predicates. + #

      + # + # @param configs The ATN configurations which were valid immediately before + # the {@link #ERROR} state was reached + # @param outerContext The is the \gamma_0 initial parser context from the paper + # or the parser stack at the instant before prediction commences. + # + # @return The value to return from {@link #adaptivePredict}, or + # {@link ATN#INVALID_ALT_NUMBER} if a suitable alternative was not + # identified and {@link #adaptivePredict} should report an error instead. + # + def getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(self, configs, outerContext): + semValidConfigs, semInvalidConfigs = self.splitAccordingToSemanticValidity(configs, outerContext) + alt = self.getAltThatFinishedDecisionEntryRule(semValidConfigs) + if alt!=ATN.INVALID_ALT_NUMBER: # semantically/syntactically viable path exists + return alt + # Is there a syntactically valid path with a failed pred? + if len(semInvalidConfigs)>0: + alt = self.getAltThatFinishedDecisionEntryRule(semInvalidConfigs) + if alt!=ATN.INVALID_ALT_NUMBER: # syntactically viable path exists + return alt + return ATN.INVALID_ALT_NUMBER + + def getAltThatFinishedDecisionEntryRule(self, configs): + alts = set() + for c in configs: + if c.reachesIntoOuterContext>0 or (isinstance(c.state, RuleStopState) and c.context.hasEmptyPath() ): + alts.add(c.alt) + if len(alts)==0: + return ATN.INVALID_ALT_NUMBER + else: + return min(alts) + + # Walk the list of configurations and split them according to + # those that have preds evaluating to true/false. If no pred, assume + # true pred and include in succeeded set. Returns Pair of sets. + # + # Create a new set so as not to alter the incoming parameter. + # + # Assumption: the input stream has been restored to the starting point + # prediction, which is where predicates need to evaluate. + # + def splitAccordingToSemanticValidity(self, configs, outerContext): + succeeded = ATNConfigSet(configs.fullCtx) + failed = ATNConfigSet(configs.fullCtx) + for c in configs: + if c.semanticContext is not SemanticContext.NONE: + predicateEvaluationResult = c.semanticContext.eval(self.parser, outerContext) + if predicateEvaluationResult: + succeeded.add(c) + else: + failed.add(c) + else: + succeeded.add(c) + return (succeeded,failed) + + # Look through a list of predicate/alt pairs, returning alts for the + # pairs that win. A {@code NONE} predicate indicates an alt containing an + # unpredicated config which behaves as "always true." If !complete + # then we stop at the first predicate that evaluates to true. This + # includes pairs with null predicates. + # + def evalSemanticContext(self, predPredictions, outerContext, complete): + predictions = set() + for pair in predPredictions: + if pair.pred is SemanticContext.NONE: + predictions.add(pair.alt) + if not complete: + break + continue + predicateEvaluationResult = pair.pred.eval(self.parser, outerContext) + if self.debug or self.dfa_debug: + print("eval pred " + str(pair) + "=" + str(predicateEvaluationResult)) + + if predicateEvaluationResult: + if self.debug or self.dfa_debug: + print("PREDICT " + str(pair.alt)) + predictions.add(pair.alt) + if not complete: + break + return predictions + + + # TODO: If we are doing predicates, there is no point in pursuing + # closure operations if we reach a DFA state that uniquely predicts + # alternative. We will not be caching that DFA state and it is a + # waste to pursue the closure. Might have to advance when we do + # ambig detection thought :( + # + + def closure(self, config, configs, closureBusy, collectPredicates, fullCtx, treatEofAsEpsilon): + initialDepth = 0; + self.closureCheckingStopState(config, configs, closureBusy, collectPredicates, + fullCtx, initialDepth, treatEofAsEpsilon) + assert not fullCtx or not configs.dipsIntoOuterContext + + + def closureCheckingStopState(self, config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon): + if self.debug: + print("closure(" + config.toString(self.parser,True) + ")") + + if isinstance(config.state, RuleStopState): + # We hit rule end. If we have context info, use it + # run thru all possible stack tops in ctx + if not config.context.isEmpty(): + for i in range(0, len(config.context)): + if config.context.getReturnState(i) is PredictionContext.EMPTY_RETURN_STATE: + if fullCtx: + configs.add(ATNConfig(state=config.state, context=PredictionContext.EMPTY, config=config), self.mergeCache) + continue + else: + # we have no context info, just chase follow links (if greedy) + if self.debug: + print("FALLING off rule " + self.getRuleName(config.state.ruleIndex)) + self.closure_(config, configs, closureBusy, collectPredicates, + fullCtx, depth, treatEofAsEpsilon) + continue + returnState = self.atn.states[config.context.getReturnState(i)] + newContext = config.context.getParent(i) # "pop" return state + c = ATNConfig(state=returnState, alt=config.alt, context=newContext, semantic=config.semanticContext) + # While we have context to pop back from, we may have + # gotten that context AFTER having falling off a rule. + # Make sure we track that we are now out of context. + c.reachesIntoOuterContext = config.reachesIntoOuterContext + assert depth > - 2**63 + self.closureCheckingStopState(c, configs, closureBusy, collectPredicates, fullCtx, depth - 1, treatEofAsEpsilon) + return + elif fullCtx: + # reached end of start rule + configs.add(config, self.mergeCache) + return + else: + # else if we have no context info, just chase follow links (if greedy) + if self.debug: + print("FALLING off rule " + self.getRuleName(config.state.ruleIndex)) + + self.closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon) + + # Do the actual work of walking epsilon edges# + def closure_(self, config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon): + p = config.state + # optimization + if not p.epsilonOnlyTransitions: + configs.add(config, self.mergeCache) + # make sure to not return here, because EOF transitions can act as + # both epsilon transitions and non-epsilon transitions. + + for t in p.transitions: + continueCollecting = collectPredicates and not isinstance(t, ActionTransition) + c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon) + if c is not None: + if not t.isEpsilon: + if c in closureBusy: + # avoid infinite recursion for EOF* and EOF+ + continue + closureBusy.add(c) + newDepth = depth + if isinstance( config.state, RuleStopState): + assert not fullCtx + # target fell off end of rule; mark resulting c as having dipped into outer context + # We can't get here if incoming config was rule stop and we had context + # track how far we dip into outer context. Might + # come in handy and we avoid evaluating context dependent + # preds if this is > 0. + + if c in closureBusy: + # avoid infinite recursion for right-recursive rules + continue + closureBusy.add(c) + + if self._dfa is not None and self._dfa.precedenceDfa: + if t.outermostPrecedenceReturn == self._dfa.atnStartState.ruleIndex: + c.precedenceFilterSuppressed = True + c.reachesIntoOuterContext += 1 + configs.dipsIntoOuterContext = True # TODO: can remove? only care when we add to set per middle of this method + assert newDepth > - 2**63 + newDepth -= 1 + if self.debug: + print("dips into outer ctx: " + str(c)) + elif isinstance(t, RuleTransition): + # latch when newDepth goes negative - once we step out of the entry context we can't return + if newDepth >= 0: + newDepth += 1 + + self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon) + + def getRuleName(self, index): + if self.parser is not None and index>=0: + return self.parser.ruleNames[index] + else: + return "" + + def getEpsilonTarget(self, config, t, collectPredicates, inContext, fullCtx, treatEofAsEpsilon): + tt = t.serializationType + if tt==Transition.RULE: + return self.ruleTransition(config, t) + elif tt==Transition.PRECEDENCE: + return self.precedenceTransition(config, t, collectPredicates, inContext, fullCtx) + elif tt==Transition.PREDICATE: + return self.predTransition(config, t, collectPredicates, inContext, fullCtx) + elif tt==Transition.ACTION: + return self.actionTransition(config, t) + elif tt==Transition.EPSILON: + return ATNConfig(state=t.target, config=config) + elif tt in [ Transition.ATOM, Transition.RANGE, Transition.SET ]: + # EOF transitions act like epsilon transitions after the first EOF + # transition is traversed + if treatEofAsEpsilon: + if t.matches(Token.EOF, 0, 1): + return ATNConfig(state=t.target, config=config) + return None + + else: + return None + + def actionTransition(self, config, t): + if self.debug: + print("ACTION edge " + str(t.ruleIndex) + ":" + str(t.actionIndex)) + return ATNConfig(state=t.target, config=config) + + def precedenceTransition(self, config, pt, collectPredicates, inContext, fullCtx): + if self.debug: + print("PRED (collectPredicates=" + str(collectPredicates) + ") " + + str(pt.precedence) + ">=_p, ctx dependent=true") + if self.parser is not None: + print("context surrounding pred is " + str(self.parser.getRuleInvocationStack())) + + c = None + if collectPredicates and inContext: + if fullCtx: + # In full context mode, we can evaluate predicates on-the-fly + # during closure, which dramatically reduces the size of + # the config sets. It also obviates the need to test predicates + # later during conflict resolution. + currentPosition = self._input.index + self._input.seek(self._startIndex) + predSucceeds = pt.getPredicate().eval(self.parser, self._outerContext); + self._input.seek(currentPosition) + if predSucceeds: + c = ATNConfig(state=pt.target, config=config) # no pred context + else: + newSemCtx = andContext(config.semanticContext, pt.getPredicate()) + c = ATNConfig(state=pt.target, semantic=newSemCtx, config=config) + else: + c = ATNConfig(state=pt.target, config=config) + + if self.debug: + print("config from pred transition=" + str(c)) + return c + + def predTransition(self, config, pt, collectPredicates, inContext, fullCtx): + if self.debug: + print("PRED (collectPredicates=" + str(collectPredicates) + ") " + str(pt.ruleIndex) + + ":" + str(pt.predIndex) + ", ctx dependent=" + str(pt.isCtxDependent)) + if self.parser is not None: + print("context surrounding pred is " + str(self.parser.getRuleInvocationStack())) + + c = None + if collectPredicates and (not pt.isCtxDependent or (pt.isCtxDependent and inContext)): + if fullCtx: + # In full context mode, we can evaluate predicates on-the-fly + # during closure, which dramatically reduces the size of + # the config sets. It also obviates the need to test predicates + # later during conflict resolution. + currentPosition = self._input.index + self._input.seek(self._startIndex) + predSucceeds = pt.getPredicate().eval(self.parser, self._outerContext) + self._input.seek(currentPosition) + if predSucceeds: + c = ATNConfig(state=pt.target, config=config) # no pred context + else: + newSemCtx = andContext(config.semanticContext, pt.getPredicate()) + c = ATNConfig(state=pt.target, semantic=newSemCtx, config=config) + else: + c = ATNConfig(state=pt.target, config=config) + + if self.debug: + print("config from pred transition=" + str(c)) + return c + + def ruleTransition(self, config, t): + if self.debug: + print("CALL rule " + self.getRuleName(t.target.ruleIndex) + ", ctx=" + str(config.context)) + returnState = t.followState + newContext = SingletonPredictionContext.create(config.context, returnState.stateNumber) + return ATNConfig(state=t.target, context=newContext, config=config ) + + def getConflictingAlts(self, configs): + altsets = PredictionMode.getConflictingAltSubsets(configs) + return PredictionMode.getAlts(altsets) + + # Sam pointed out a problem with the previous definition, v3, of + # ambiguous states. If we have another state associated with conflicting + # alternatives, we should keep going. For example, the following grammar + # + # s : (ID | ID ID?) ';' ; + # + # When the ATN simulation reaches the state before ';', it has a DFA + # state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally + # 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node + # because alternative to has another way to continue, via [6|2|[]]. + # The key is that we have a single state that has config's only associated + # with a single alternative, 2, and crucially the state transitions + # among the configurations are all non-epsilon transitions. That means + # we don't consider any conflicts that include alternative 2. So, we + # ignore the conflict between alts 1 and 2. We ignore a set of + # conflicting alts when there is an intersection with an alternative + # associated with a single alt state in the state→config-list map. + # + # It's also the case that we might have two conflicting configurations but + # also a 3rd nonconflicting configuration for a different alternative: + # [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar: + # + # a : A | A | A B ; + # + # After matching input A, we reach the stop state for rule A, state 1. + # State 8 is the state right before B. Clearly alternatives 1 and 2 + # conflict and no amount of further lookahead will separate the two. + # However, alternative 3 will be able to continue and so we do not + # stop working on this state. In the previous example, we're concerned + # with states associated with the conflicting alternatives. Here alt + # 3 is not associated with the conflicting configs, but since we can continue + # looking for input reasonably, I don't declare the state done. We + # ignore a set of conflicting alts when we have an alternative + # that we still need to pursue. + # + + def getConflictingAltsOrUniqueAlt(self, configs): + conflictingAlts = None + if configs.uniqueAlt!= ATN.INVALID_ALT_NUMBER: + conflictingAlts = set() + conflictingAlts.add(configs.uniqueAlt) + else: + conflictingAlts = configs.conflictingAlts + return conflictingAlts + + def getTokenName(self, t): + if t==Token.EOF: + return u"EOF" + if self.parser is not None and self.parser.tokenNames is not None: + if t >= len(self.parser.tokenNames): + print(str(t) + " ttype out of range: " + str_list(self.parser.tokenNames)) + print(str_list(self.parser.getInputStream().getTokens())) + else: + return self.parser.tokensNames[t] + u"<" + unicode(t) + ">" + return unicode(t) + + def getLookaheadName(self, input): + return self.getTokenName(input.LA(1)) + + # Used for debugging in adaptivePredict around execATN but I cut + # it out for clarity now that alg. works well. We can leave this + # "dead" code for a bit. + # + def dumpDeadEndConfigs(self, nvae): + print("dead end configs: ") + for c in nvae.getDeadEndConfigs(): + trans = "no edges" + if len(c.state.transitions)>0: + t = c.state.transitions[0] + if isinstance(t, AtomTransition): + trans = "Atom "+ self.getTokenName(t.label) + elif isinstance(t, SetTransition): + neg = isinstance(t, NotSetTransition) + trans = ("~" if neg else "")+"Set "+ str(t.set) + print(c.toString(self.parser, True) + ":" + trans, file=sys.stderr) + + def noViableAlt(self, input, outerContext, configs, startIndex): + return NoViableAltException(self.parser, input, input.get(startIndex), input.LT(1), configs, outerContext) + + def getUniqueAlt(self, configs): + alt = ATN.INVALID_ALT_NUMBER + for c in configs: + if alt == ATN.INVALID_ALT_NUMBER: + alt = c.alt # found first alt + elif c.alt!=alt: + return ATN.INVALID_ALT_NUMBER + return alt + + # + # Add an edge to the DFA, if possible. This method calls + # {@link #addDFAState} to ensure the {@code to} state is present in the + # DFA. If {@code from} is {@code null}, or if {@code t} is outside the + # range of edges that can be represented in the DFA tables, this method + # returns without adding the edge to the DFA. + # + #

      If {@code to} is {@code null}, this method returns {@code null}. + # Otherwise, this method returns the {@link DFAState} returned by calling + # {@link #addDFAState} for the {@code to} state.

      + # + # @param dfa The DFA + # @param from The source state for the edge + # @param t The input symbol + # @param to The target state for the edge + # + # @return If {@code to} is {@code null}, this method returns {@code null}; + # otherwise this method returns the result of calling {@link #addDFAState} + # on {@code to} + # + def addDFAEdge(self, dfa, from_, t, to): + if self.debug: + print("EDGE " + str(from_) + " -> " + str(to) + " upon " + self.getTokenName(t)) + + if to is None: + return None + + to = self.addDFAState(dfa, to) # used existing if possible not incoming + if from_ is None or t < -1 or t > self.atn.maxTokenType: + return to + + if from_.edges is None: + from_.edges = [None] * (self.atn.maxTokenType + 2) + from_.edges[t+1] = to # connect + + if self.debug: + names = None if self.parser is None else self.parser.tokenNames + print("DFA=\n" + dfa.toString(names)) + + return to + + # + # Add state {@code D} to the DFA if it is not already present, and return + # the actual instance stored in the DFA. If a state equivalent to {@code D} + # is already in the DFA, the existing state is returned. Otherwise this + # method returns {@code D} after adding it to the DFA. + # + #

      If {@code D} is {@link #ERROR}, this method returns {@link #ERROR} and + # does not change the DFA.

      + # + # @param dfa The dfa + # @param D The DFA state to add + # @return The state stored in the DFA. This will be either the existing + # state if {@code D} is already in the DFA, or {@code D} itself if the + # state was not already present. + # + def addDFAState(self, dfa, D): + if D is self.ERROR: + return D + + + existing = dfa.states.get(D, None) + if existing is not None: + return existing + + D.stateNumber = len(dfa.states) + if not D.configs.readonly: + D.configs.optimizeConfigs(self) + D.configs.setReadonly(True) + dfa.states[D] = D + if self.debug: + print("adding new DFA state: " + str(D)) + return D + + def reportAttemptingFullContext(self, dfa, conflictingAlts, configs, startIndex, stopIndex): + if self.debug or self.retry_debug: + interval = range(startIndex, stopIndex + 1) + print("reportAttemptingFullContext decision=" + str(dfa.decision) + ":" + str(configs) + + ", input=" + self.parser.getTokenStream().getText(interval)) + if self.parser is not None: + self.parser.getErrorListenerDispatch().reportAttemptingFullContext(self.parser, dfa, startIndex, stopIndex, conflictingAlts, configs) + + def reportContextSensitivity(self, dfa, prediction, configs, startIndex, stopIndex): + if self.debug or self.retry_debug: + interval = range(startIndex, stopIndex + 1) + print("reportContextSensitivity decision=" + str(dfa.decision) + ":" + str(configs) + + ", input=" + self.parser.getTokenStream().getText(interval)) + if self.parser is not None: + self.parser.getErrorListenerDispatch().reportContextSensitivity(self.parser, dfa, startIndex, stopIndex, prediction, configs) + + # If context sensitive parsing, we know it's ambiguity not conflict# + def reportAmbiguity(self, dfa, D, startIndex, stopIndex, + exact, ambigAlts, configs ): + if self.debug or self.retry_debug: +# ParserATNPathFinder finder = new ParserATNPathFinder(parser, atn); +# int i = 1; +# for (Transition t : dfa.atnStartState.transitions) { +# print("ALT "+i+"="); +# print(startIndex+".."+stopIndex+", len(input)="+parser.getInputStream().size()); +# TraceTree path = finder.trace(t.target, parser.getContext(), (TokenStream)parser.getInputStream(), +# startIndex, stopIndex); +# if ( path!=null ) { +# print("path = "+path.toStringTree()); +# for (TraceTree leaf : path.leaves) { +# List states = path.getPathToNode(leaf); +# print("states="+states); +# } +# } +# i++; +# } + interval = range(startIndex, stopIndex + 1) + print("reportAmbiguity " + str(ambigAlts) + ":" + str(configs) + + ", input=" + self.parser.getTokenStream().getText(interval)) + if self.parser is not None: + self.parser.getErrorListenerDispatch().reportAmbiguity(self.parser, dfa, startIndex, stopIndex, exact, ambigAlts, configs) + diff --git a/runtime/Python2/src/antlr4/atn/PredictionMode.py b/runtime/Python2/src/antlr4/atn/PredictionMode.py new file mode 100644 index 000000000..1947c25f4 --- /dev/null +++ b/runtime/Python2/src/antlr4/atn/PredictionMode.py @@ -0,0 +1,544 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# +# This enumeration defines the prediction modes available in ANTLR 4 along with +# utility methods for analyzing configuration sets for conflicts and/or +# ambiguities. + + +from antlr4.atn.ATN import ATN +from antlr4.atn.ATNConfig import ATNConfig +from antlr4.atn.ATNConfigSet import ATNConfigSet +from antlr4.atn.ATNState import RuleStopState +from antlr4.atn.SemanticContext import SemanticContext + +class PredictionMode(object): + # + # The SLL(*) prediction mode. This prediction mode ignores the current + # parser context when making predictions. This is the fastest prediction + # mode, and provides correct results for many grammars. This prediction + # mode is more powerful than the prediction mode provided by ANTLR 3, but + # may result in syntax errors for grammar and input combinations which are + # not SLL. + # + #

      + # When using this prediction mode, the parser will either return a correct + # parse tree (i.e. the same parse tree that would be returned with the + # {@link #LL} prediction mode), or it will report a syntax error. If a + # syntax error is encountered when using the {@link #SLL} prediction mode, + # it may be due to either an actual syntax error in the input or indicate + # that the particular combination of grammar and input requires the more + # powerful {@link #LL} prediction abilities to complete successfully.

      + # + #

      + # This prediction mode does not provide any guarantees for prediction + # behavior for syntactically-incorrect inputs.

      + # + SLL = 0 + # + # The LL(*) prediction mode. This prediction mode allows the current parser + # context to be used for resolving SLL conflicts that occur during + # prediction. This is the fastest prediction mode that guarantees correct + # parse results for all combinations of grammars with syntactically correct + # inputs. + # + #

      + # When using this prediction mode, the parser will make correct decisions + # for all syntactically-correct grammar and input combinations. However, in + # cases where the grammar is truly ambiguous this prediction mode might not + # report a precise answer for exactly which alternatives are + # ambiguous.

      + # + #

      + # This prediction mode does not provide any guarantees for prediction + # behavior for syntactically-incorrect inputs.

      + # + LL = 1 + # + # The LL(*) prediction mode with exact ambiguity detection. In addition to + # the correctness guarantees provided by the {@link #LL} prediction mode, + # this prediction mode instructs the prediction algorithm to determine the + # complete and exact set of ambiguous alternatives for every ambiguous + # decision encountered while parsing. + # + #

      + # This prediction mode may be used for diagnosing ambiguities during + # grammar development. Due to the performance overhead of calculating sets + # of ambiguous alternatives, this prediction mode should be avoided when + # the exact results are not necessary.

      + # + #

      + # This prediction mode does not provide any guarantees for prediction + # behavior for syntactically-incorrect inputs.

      + # + LL_EXACT_AMBIG_DETECTION = 2 + + + # + # Computes the SLL prediction termination condition. + # + #

      + # This method computes the SLL prediction termination condition for both of + # the following cases.

      + # + #
        + #
      • The usual SLL+LL fallback upon SLL conflict
      • + #
      • Pure SLL without LL fallback
      • + #
      + # + #

      COMBINED SLL+LL PARSING

      + # + #

      When LL-fallback is enabled upon SLL conflict, correct predictions are + # ensured regardless of how the termination condition is computed by this + # method. Due to the substantially higher cost of LL prediction, the + # prediction should only fall back to LL when the additional lookahead + # cannot lead to a unique SLL prediction.

      + # + #

      Assuming combined SLL+LL parsing, an SLL configuration set with only + # conflicting subsets should fall back to full LL, even if the + # configuration sets don't resolve to the same alternative (e.g. + # {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting + # configuration, SLL could continue with the hopes that more lookahead will + # resolve via one of those non-conflicting configurations.

      + # + #

      Here's the prediction termination rule them: SLL (for SLL+LL parsing) + # stops when it sees only conflicting configuration subsets. In contrast, + # full LL keeps going when there is uncertainty.

      + # + #

      HEURISTIC

      + # + #

      As a heuristic, we stop prediction when we see any conflicting subset + # unless we see a state that only has one alternative associated with it. + # The single-alt-state thing lets prediction continue upon rules like + # (otherwise, it would admit defeat too soon):

      + # + #

      {@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}

      + # + #

      When the ATN simulation reaches the state before {@code ';'}, it has a + # DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally + # {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop + # processing this node because alternative to has another way to continue, + # via {@code [6|2|[]]}.

      + # + #

      It also let's us continue for this rule:

      + # + #

      {@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}

      + # + #

      After matching input A, we reach the stop state for rule A, state 1. + # State 8 is the state right before B. Clearly alternatives 1 and 2 + # conflict and no amount of further lookahead will separate the two. + # However, alternative 3 will be able to continue and so we do not stop + # working on this state. In the previous example, we're concerned with + # states associated with the conflicting alternatives. Here alt 3 is not + # associated with the conflicting configs, but since we can continue + # looking for input reasonably, don't declare the state done.

      + # + #

      PURE SLL PARSING

      + # + #

      To handle pure SLL parsing, all we have to do is make sure that we + # combine stack contexts for configurations that differ only by semantic + # predicate. From there, we can do the usual SLL termination heuristic.

      + # + #

      PREDICATES IN SLL+LL PARSING

      + # + #

      SLL decisions don't evaluate predicates until after they reach DFA stop + # states because they need to create the DFA cache that works in all + # semantic situations. In contrast, full LL evaluates predicates collected + # during start state computation so it can ignore predicates thereafter. + # This means that SLL termination detection can totally ignore semantic + # predicates.

      + # + #

      Implementation-wise, {@link ATNConfigSet} combines stack contexts but not + # semantic predicate contexts so we might see two configurations like the + # following.

      + # + #

      {@code (s, 1, x, {}), (s, 1, x', {p})}

      + # + #

      Before testing these configurations against others, we have to merge + # {@code x} and {@code x'} (without modifying the existing configurations). + # For example, we test {@code (x+x')==x''} when looking for conflicts in + # the following configurations.

      + # + #

      {@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}

      + # + #

      If the configuration set has predicates (as indicated by + # {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of + # the configurations to strip out all of the predicates so that a standard + # {@link ATNConfigSet} will merge everything ignoring predicates.

      + # + @classmethod + def hasSLLConflictTerminatingPrediction(cls, mode, configs): + # Configs in rule stop states indicate reaching the end of the decision + # rule (local context) or end of start rule (full context). If all + # configs meet this condition, then none of the configurations is able + # to match additional input so we terminate prediction. + # + if cls.allConfigsInRuleStopStates(configs): + return True + + # pure SLL mode parsing + if mode == PredictionMode.SLL: + # Don't bother with combining configs from different semantic + # contexts if we can fail over to full LL; costs more time + # since we'll often fail over anyway. + if configs.hasSemanticContext: + # dup configs, tossing out semantic predicates + dup = ATNConfigSet() + for c in configs: + c = ATNConfig(c,SemanticContext.NONE) + dup.add(c) + configs = dup + # now we have combined contexts for configs with dissimilar preds + + # pure SLL or combined SLL+LL mode parsing + altsets = cls.getConflictingAltSubsets(configs) + return cls.hasConflictingAltSet(altsets) and not cls.hasStateAssociatedWithOneAlt(configs) + + # Checks if any configuration in {@code configs} is in a + # {@link RuleStopState}. Configurations meeting this condition have reached + # the end of the decision rule (local context) or end of start rule (full + # context). + # + # @param configs the configuration set to test + # @return {@code true} if any configuration in {@code configs} is in a + # {@link RuleStopState}, otherwise {@code false} + @classmethod + def hasConfigInRuleStopState(cls, configs): + for c in configs: + if isinstance(c.state, RuleStopState): + return True + return False + + # Checks if all configurations in {@code configs} are in a + # {@link RuleStopState}. Configurations meeting this condition have reached + # the end of the decision rule (local context) or end of start rule (full + # context). + # + # @param configs the configuration set to test + # @return {@code true} if all configurations in {@code configs} are in a + # {@link RuleStopState}, otherwise {@code false} + @classmethod + def allConfigsInRuleStopStates(cls, configs): + for config in configs: + if not isinstance(config.state, RuleStopState): + return False + return True + + # + # Full LL prediction termination. + # + #

      Can we stop looking ahead during ATN simulation or is there some + # uncertainty as to which alternative we will ultimately pick, after + # consuming more input? Even if there are partial conflicts, we might know + # that everything is going to resolve to the same minimum alternative. That + # means we can stop since no more lookahead will change that fact. On the + # other hand, there might be multiple conflicts that resolve to different + # minimums. That means we need more look ahead to decide which of those + # alternatives we should predict.

      + # + #

      The basic idea is to split the set of configurations {@code C}, into + # conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with + # non-conflicting configurations. Two configurations conflict if they have + # identical {@link ATNConfig#state} and {@link ATNConfig#context} values + # but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)} + # and {@code (s, j, ctx, _)} for {@code i!=j}.

      + # + #

      Reduce these configuration subsets to the set of possible alternatives. + # You can compute the alternative subsets in one pass as follows:

      + # + #

      {@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in + # {@code C} holding {@code s} and {@code ctx} fixed.

      + # + #

      Or in pseudo-code, for each configuration {@code c} in {@code C}:

      + # + #
      +    # map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
      +    # alt and not pred
      +    # 
      + # + #

      The values in {@code map} are the set of {@code A_s,ctx} sets.

      + # + #

      If {@code |A_s,ctx|=1} then there is no conflict associated with + # {@code s} and {@code ctx}.

      + # + #

      Reduce the subsets to singletons by choosing a minimum of each subset. If + # the union of these alternative subsets is a singleton, then no amount of + # more lookahead will help us. We will always pick that alternative. If, + # however, there is more than one alternative, then we are uncertain which + # alternative to predict and must continue looking for resolution. We may + # or may not discover an ambiguity in the future, even if there are no + # conflicting subsets this round.

      + # + #

      The biggest sin is to terminate early because it means we've made a + # decision but were uncertain as to the eventual outcome. We haven't used + # enough lookahead. On the other hand, announcing a conflict too late is no + # big deal; you will still have the conflict. It's just inefficient. It + # might even look until the end of file.

      + # + #

      No special consideration for semantic predicates is required because + # predicates are evaluated on-the-fly for full LL prediction, ensuring that + # no configuration contains a semantic context during the termination + # check.

      + # + #

      CONFLICTING CONFIGS

      + # + #

      Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict + # when {@code i!=j} but {@code x=x'}. Because we merge all + # {@code (s, i, _)} configurations together, that means that there are at + # most {@code n} configurations associated with state {@code s} for + # {@code n} possible alternatives in the decision. The merged stacks + # complicate the comparison of configuration contexts {@code x} and + # {@code x'}. Sam checks to see if one is a subset of the other by calling + # merge and checking to see if the merged result is either {@code x} or + # {@code x'}. If the {@code x} associated with lowest alternative {@code i} + # is the superset, then {@code i} is the only possible prediction since the + # others resolve to {@code min(i)} as well. However, if {@code x} is + # associated with {@code j>i} then at least one stack configuration for + # {@code j} is not in conflict with alternative {@code i}. The algorithm + # should keep going, looking for more lookahead due to the uncertainty.

      + # + #

      For simplicity, I'm doing a equality check between {@code x} and + # {@code x'} that lets the algorithm continue to consume lookahead longer + # than necessary. The reason I like the equality is of course the + # simplicity but also because that is the test you need to detect the + # alternatives that are actually in conflict.

      + # + #

      CONTINUE/STOP RULE

      + # + #

      Continue if union of resolved alternative sets from non-conflicting and + # conflicting alternative subsets has more than one alternative. We are + # uncertain about which alternative to predict.

      + # + #

      The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which + # alternatives are still in the running for the amount of input we've + # consumed at this point. The conflicting sets let us to strip away + # configurations that won't lead to more states because we resolve + # conflicts to the configuration with a minimum alternate for the + # conflicting set.

      + # + #

      CASES

      + # + #
        + # + #
      • no conflicts and more than 1 alternative in set => continue
      • + # + #
      • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)}, + # {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set + # {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = + # {@code {1,3}} => continue + #
      • + # + #
      • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, + # {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set + # {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = + # {@code {1}} => stop and predict 1
      • + # + #
      • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, + # {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U + # {@code {1}} = {@code {1}} => stop and predict 1, can announce + # ambiguity {@code {1,2}}
      • + # + #
      • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)}, + # {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U + # {@code {2}} = {@code {1,2}} => continue
      • + # + #
      • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)}, + # {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U + # {@code {3}} = {@code {1,3}} => continue
      • + # + #
      + # + #

      EXACT AMBIGUITY DETECTION

      + # + #

      If all states report the same conflicting set of alternatives, then we + # know we have the exact ambiguity set.

      + # + #

      |A_i|>1 and + # A_i = A_j for all i, j.

      + # + #

      In other words, we continue examining lookahead until all {@code A_i} + # have more than one alternative and all {@code A_i} are the same. If + # {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate + # because the resolved set is {@code {1}}. To determine what the real + # ambiguity is, we have to know whether the ambiguity is between one and + # two or one and three so we keep going. We can only stop prediction when + # we need exact ambiguity detection when the sets look like + # {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...

      + # + @classmethod + def resolvesToJustOneViableAlt(cls, altsets): + return cls.getSingleViableAlt(altsets) + + # + # Determines if every alternative subset in {@code altsets} contains more + # than one alternative. + # + # @param altsets a collection of alternative subsets + # @return {@code true} if every {@link BitSet} in {@code altsets} has + # {@link BitSet#cardinality cardinality} > 1, otherwise {@code false} + # + @classmethod + def allSubsetsConflict(cls, altsets): + return not cls.hasNonConflictingAltSet(altsets) + + # + # Determines if any single alternative subset in {@code altsets} contains + # exactly one alternative. + # + # @param altsets a collection of alternative subsets + # @return {@code true} if {@code altsets} contains a {@link BitSet} with + # {@link BitSet#cardinality cardinality} 1, otherwise {@code false} + # + @classmethod + def hasNonConflictingAltSet(cls, altsets): + for alts in altsets: + if len(alts)==1: + return True + return False + + # + # Determines if any single alternative subset in {@code altsets} contains + # more than one alternative. + # + # @param altsets a collection of alternative subsets + # @return {@code true} if {@code altsets} contains a {@link BitSet} with + # {@link BitSet#cardinality cardinality} > 1, otherwise {@code false} + # + @classmethod + def hasConflictingAltSet(cls, altsets): + for alts in altsets: + if len(alts)>1: + return True + return False + + # + # Determines if every alternative subset in {@code altsets} is equivalent. + # + # @param altsets a collection of alternative subsets + # @return {@code true} if every member of {@code altsets} is equal to the + # others, otherwise {@code false} + # + @classmethod + def allSubsetsEqual(cls, altsets): + first = None + for alts in altsets: + if first is None: + first = alts + elif not alts==first: + return False + return True + + # + # Returns the unique alternative predicted by all alternative subsets in + # {@code altsets}. If no such alternative exists, this method returns + # {@link ATN#INVALID_ALT_NUMBER}. + # + # @param altsets a collection of alternative subsets + # + @classmethod + def getUniqueAlt(cls, altsets): + all = cls.getAlts(altsets) + if len(all)==1: + return all[0] + else: + return ATN.INVALID_ALT_NUMBER + + # Gets the complete set of represented alternatives for a collection of + # alternative subsets. This method returns the union of each {@link BitSet} + # in {@code altsets}. + # + # @param altsets a collection of alternative subsets + # @return the set of represented alternatives in {@code altsets} + # + @classmethod + def getAlts(cls, altsets): + all = set() + for alts in altsets: + all = all | alts + return all + + # + # This function gets the conflicting alt subsets from a configuration set. + # For each configuration {@code c} in {@code configs}: + # + #
      +    # map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
      +    # alt and not pred
      +    # 
      + # + @classmethod + def getConflictingAltSubsets(cls, configs): + configToAlts = dict() + for c in configs: + s = str(c.state.stateNumber) + "/" + str(c.context) + alts = configToAlts.get(s, None) + if alts is None: + alts = set() + configToAlts[s] = alts + alts.add(c.alt) + return configToAlts.values() + + # + # Get a map from state to alt subset from a configuration set. For each + # configuration {@code c} in {@code configs}: + # + #
      +    # map[c.{@link ATNConfig#state state}] U= c.{@link ATNConfig#alt alt}
      +    # 
      + # + @classmethod + def getStateToAltMap(cls, configs): + m = dict() + for c in configs: + alts = m.get(c.state, None) + if alts is None: + alts = set() + m[c.state] = alts + alts.add(c.alt) + return m + + @classmethod + def hasStateAssociatedWithOneAlt(cls, configs): + x = cls.getStateToAltMap(configs) + for alts in x.values(): + if len(alts)==1: + return True + return False + + @classmethod + def getSingleViableAlt(cls, altsets): + viableAlts = set() + for alts in altsets: + minAlt = min(alts) + viableAlts.add(minAlt); + if len(viableAlts)>1 : # more than 1 viable alt + return ATN.INVALID_ALT_NUMBER + return min(viableAlts) diff --git a/runtime/Python2/src/antlr4/atn/SemanticContext.py b/runtime/Python2/src/antlr4/atn/SemanticContext.py new file mode 100644 index 000000000..4a300b60b --- /dev/null +++ b/runtime/Python2/src/antlr4/atn/SemanticContext.py @@ -0,0 +1,360 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# A tree structure used to record the semantic context in which +# an ATN configuration is valid. It's either a single predicate, +# a conjunction {@code p1&&p2}, or a sum of products {@code p1||p2}. +# +#

      I have scoped the {@link AND}, {@link OR}, and {@link Predicate} subclasses of +# {@link SemanticContext} within the scope of this outer class.

      +# +from io import StringIO + + +class SemanticContext(object): + # + # The default {@link SemanticContext}, which is semantically equivalent to + # a predicate of the form {@code {true}?}. + # + NONE = None + + # + # For context independent predicates, we evaluate them without a local + # context (i.e., null context). That way, we can evaluate them without + # having to create proper rule-specific context during prediction (as + # opposed to the parser, which creates them naturally). In a practical + # sense, this avoids a cast exception from RuleContext to myruleContext. + # + #

      For context dependent predicates, we must pass in a local context so that + # references such as $arg evaluate properly as _localctx.arg. We only + # capture context dependent predicates in the context in which we begin + # prediction, so we passed in the outer context here in case of context + # dependent predicate evaluation.

      + # + def eval(self, parser, outerContext): + pass + + # + # Evaluate the precedence predicates for the context and reduce the result. + # + # @param parser The parser instance. + # @param outerContext The current parser context object. + # @return The simplified semantic context after precedence predicates are + # evaluated, which will be one of the following values. + #
        + #
      • {@link #NONE}: if the predicate simplifies to {@code true} after + # precedence predicates are evaluated.
      • + #
      • {@code null}: if the predicate simplifies to {@code false} after + # precedence predicates are evaluated.
      • + #
      • {@code this}: if the semantic context is not changed as a result of + # precedence predicate evaluation.
      • + #
      • A non-{@code null} {@link SemanticContext}: the new simplified + # semantic context after precedence predicates are evaluated.
      • + #
      + # + def evalPrecedence(self, parser, outerContext): + return self + + def __str__(self): + return unicode(self) + + def __unicode__(self): + return unicode(super(SemanticContext, self)) + + +def andContext(a, b): + if a is None or a is SemanticContext.NONE: + return b + if b is None or b is SemanticContext.NONE: + return a + result = AND(a, b) + if len(result.opnds) == 1: + return result.opnds[0] + else: + return result + +def orContext(a, b): + if a is None: + return b + if b is None: + return a + if a is SemanticContext.NONE or b is SemanticContext.NONE: + return SemanticContext.NONE + result = OR(a, b) + if len(result.opnds) == 1: + return result.opnds[0] + else: + return result + +def filterPrecedencePredicates(collection): + result = [] + for context in collection: + if isinstance(context, PrecedencePredicate): + if result is None: + result = [] + result.append(context) + return result + + +class Predicate(SemanticContext): + + def __init__(self, ruleIndex=-1, predIndex=-1, isCtxDependent=False): + self.ruleIndex = ruleIndex + self.predIndex = predIndex + self.isCtxDependent = isCtxDependent # e.g., $i ref in pred + + def eval(self, parser, outerContext): + localctx = outerContext if self.isCtxDependent else None + return parser.sempred(localctx, self.ruleIndex, self.predIndex) + + def __hash__(self): + with StringIO() as buf: + buf.write(unicode(self.ruleIndex)) + buf.write(u"/") + buf.write(unicode(self.predIndex)) + buf.write(u"/") + buf.write(unicode(self.isCtxDependent)) + return hash(buf.getvalue()) + + def __eq__(self, other): + if self is other: + return True + elif not isinstance(other, Predicate): + return False + return self.ruleIndex == other.ruleIndex and \ + self.predIndex == other.predIndex and \ + self.isCtxDependent == other.isCtxDependent + + def __unicode__(self): + return u"{" + unicode(self.ruleIndex) + u":" + unicode(self.predIndex) + u"}?" + + +class PrecedencePredicate(SemanticContext): + + def __init__(self, precedence=0): + self.precedence = precedence + + def eval(self, parser, outerContext): + return parser.precpred(outerContext, self.precedence) + + def evalPrecedence(self, parser, outerContext): + if parser.precpred(outerContext, self.precedence): + return SemanticContext.NONE + else: + return None + + def __cmp__(self, other): + return self.precedence - other.precedence + + def __hash__(self): + return 31 + + def __eq__(self, other): + if self is other: + return True + elif not isinstance(other, PrecedencePredicate): + return False + else: + return self.precedence == other.precedence + +# A semantic context which is true whenever none of the contained contexts +# is false. +# +class AND(SemanticContext): + + def __init__(self, a, b): + operands = set() + if isinstance( a, AND): + for o in a.opnds: + operands.add(o) + else: + operands.add(a) + if isinstance( b, AND): + for o in b.opnds: + operands.add(o) + else: + operands.add(b) + + precedencePredicates = filterPrecedencePredicates(operands) + if len(precedencePredicates)>0: + # interested in the transition with the lowest precedence + reduced = min(precedencePredicates) + operands.add(reduced) + + self.opnds = [ o for o in operands ] + + def __eq__(self, other): + if self is other: + return True + elif not isinstance(other, AND): + return False + else: + return self.opnds == other.opnds + + def __hash__(self): + return hash(str(self.opnds)+ "/AND") + + # + # {@inheritDoc} + # + #

      + # The evaluation of predicates by this context is short-circuiting, but + # unordered.

      + # + def eval(self, parser, outerContext): + for opnd in self.opnds: + if not opnd.eval(parser, outerContext): + return False + return True + + def evalPrecedence(self, parser, outerContext): + differs = False + operands = [] + for context in self.opnds: + evaluated = context.evalPrecedence(parser, outerContext) + differs |= evaluated is not context + if evaluated is None: + # The AND context is false if any element is false + return None + elif evaluated is not SemanticContext.NONE: + # Reduce the result by skipping true elements + operands.append(evaluated) + + if not differs: + return self + + if len(operands)==0: + # all elements were true, so the AND context is true + return SemanticContext.NONE + + result = None + for o in operands: + result = o if result is None else andContext(result, o) + + return result + + def __unicode__(self): + with StringIO() as buf: + first = True + for o in self.opnds: + if not first: + buf.write(u"&&") + buf.write(unicode(o)) + first = False + return buf.getvalue() + +# +# A semantic context which is true whenever at least one of the contained +# contexts is true. +# +class OR (SemanticContext): + + def __init__(self, a, b): + operands = set() + if isinstance( a, OR): + for o in a.opnds: + operands.add(o) + else: + operands.add(a) + if isinstance( b, OR): + for o in b.opnds: + operands.add(o) + else: + operands.add(b) + + precedencePredicates = filterPrecedencePredicates(operands) + if len(precedencePredicates)>0: + # interested in the transition with the highest precedence + s = sorted(precedencePredicates) + reduced = s[len(s)-1] + operands.add(reduced) + + self.opnds = [ o for o in operands ] + + def __eq__(self, other): + if self is other: + return True + elif not isinstance(other, OR): + return False + else: + return self.opnds == other.opnds + + def __hash__(self): + return hash(str(self.opnds)+"/OR") + + #

      + # The evaluation of predicates by this context is short-circuiting, but + # unordered.

      + # + def eval(self, parser, outerContext): + for opnd in self.opnds: + if opnd.eval(parser, outerContext): + return True + return False + + def evalPrecedence(self, parser, outerContext): + differs = False + operands = [] + for context in self.opnds: + evaluated = context.evalPrecedence(parser, outerContext) + differs |= evaluated is not context + if evaluated is SemanticContext.NONE: + # The OR context is true if any element is true + return SemanticContext.NONE + elif evaluated is not None: + # Reduce the result by skipping false elements + operands.append(evaluated) + + if not differs: + return self + + if len(operands)==0: + # all elements were false, so the OR context is false + return None + + result = None + for o in operands: + result = o if result is None else orContext(result, o) + + return result + + def __unicode__(self): + with StringIO() as buf: + first = True + for o in self.opnds: + if not first: + buf.write(u"||") + buf.write(unicode(o)) + first = False + return buf.getvalue() + + +SemanticContext.NONE = Predicate() \ No newline at end of file diff --git a/runtime/Python2/src/antlr4/atn/Transition.py b/runtime/Python2/src/antlr4/atn/Transition.py new file mode 100644 index 000000000..457d724bb --- /dev/null +++ b/runtime/Python2/src/antlr4/atn/Transition.py @@ -0,0 +1,279 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# An ATN transition between any two ATN states. Subclasses define +# atom, set, epsilon, action, predicate, rule transitions. +# +#

      This is a one way link. It emanates from a state (usually via a list of +# transitions) and has a target state.

      +# +#

      Since we never have to change the ATN transitions once we construct it, +# we can fix these transitions as specific classes. The DFA transitions +# on the other hand need to update the labels as it adds transitions to +# the states. We'll use the term Edge for the DFA to distinguish them from +# ATN transitions.

      +# +from __builtin__ import unicode + +from antlr4.IntervalSet import IntervalSet, Interval +from antlr4.Token import Token +from antlr4.atn.SemanticContext import Predicate, PrecedencePredicate + + +class Transition (object): + # constants for serialization + EPSILON = 1 + RANGE = 2 + RULE = 3 + PREDICATE = 4 # e.g., {isType(input.LT(1))}? + ATOM = 5 + ACTION = 6 + SET = 7 # ~(A|B) or ~atom, wildcard, which convert to next 2 + NOT_SET = 8 + WILDCARD = 9 + PRECEDENCE = 10 + + serializationNames = [ + u"INVALID", + u"EPSILON", + u"RANGE", + u"RULE", + u"PREDICATE", + u"ATOM", + u"ACTION", + u"SET", + u"NOT_SET", + u"WILDCARD", + u"PRECEDENCE" + ] + + serializationTypes = dict() + + def __init__(self, target): + # The target of this transition. + if target is None: + raise Exception("target cannot be null.") + self.target = target + # Are we epsilon, action, sempred? + self.isEpsilon = False + self.label = None + + def __str__(self): + return unicode(self) + + +# TODO: make all transitions sets? no, should remove set edges +class AtomTransition(Transition): + + def __init__(self, target, label): + super(AtomTransition, self).__init__(target) + self.label_ = label # The token type or character value; or, signifies special label. + self.label = self.makeLabel() + self.serializationType = self.ATOM + + def makeLabel(self): + s = IntervalSet() + s.addOne(self.label_) + return s + + def matches( self, symbol, minVocabSymbol, maxVocabSymbol): + return self.label_ == symbol + + def __unicode__(self): + return unicode(self.label_) + +class RuleTransition(Transition): + + def __init__(self, ruleStart, ruleIndex, precedence, followState): + super(RuleTransition, self).__init__(ruleStart) + self.ruleIndex = ruleIndex # ptr to the rule definition object for this rule ref + self.precedence = precedence + self.followState = followState # what node to begin computations following ref to rule + self.serializationType = self.RULE + self.isEpsilon = True + + def matches( self, symbol, minVocabSymbol, maxVocabSymbol): + return False + + +class EpsilonTransition(Transition): + + def __init__(self, target, outermostPrecedenceReturn=-1): + super(EpsilonTransition, self).__init__(target) + self.serializationType = self.EPSILON + self.isEpsilon = True + self.outermostPrecedenceReturn = outermostPrecedenceReturn + + def matches( self, symbol, minVocabSymbol, maxVocabSymbol): + return False + + def __unicode__(self): + return "epsilon" + +class RangeTransition(Transition): + + def __init__(self, target, start, stop): + super(RangeTransition, self).__init__(target) + self.serializationType = self.RANGE + self.start = start + self.stop = stop + self.label = self.makeLabel() + + def makeLabel(self): + s = IntervalSet() + s.addRange(Interval(self.start, self.stop + 1)) + return s + + def matches( self, symbol, minVocabSymbol, maxVocabSymbol): + return symbol >= self.start and symbol <= self.stop + + def __unicode__(self): + return "'" + chr(self.start) + "'..'" + chr(self.stop) + "'" + +class AbstractPredicateTransition(Transition): + + def __init__(self, target): + super(AbstractPredicateTransition, self).__init__(target) + + +class PredicateTransition(AbstractPredicateTransition): + + def __init__(self, target, ruleIndex, predIndex, isCtxDependent): + super(PredicateTransition, self).__init__(target) + self.serializationType = self.PREDICATE + self.ruleIndex = ruleIndex + self.predIndex = predIndex + self.isCtxDependent = isCtxDependent # e.g., $i ref in pred + self.isEpsilon = True + + def matches( self, symbol, minVocabSymbol, maxVocabSymbol): + return False + + def getPredicate(self): + return Predicate(self.ruleIndex, self.predIndex, self.isCtxDependent) + + def __unicode__(self): + return u"pred_" + unicode(self.ruleIndex) + u":" + unicode(self.predIndex) + +class ActionTransition(Transition): + + def __init__(self, target, ruleIndex, actionIndex=-1, isCtxDependent=False): + super(ActionTransition, self).__init__(target) + self.serializationType = self.ACTION + self.ruleIndex = ruleIndex + self.actionIndex = actionIndex + self.isCtxDependent = isCtxDependent # e.g., $i ref in pred + self.isEpsilon = True + + def matches( self, symbol, minVocabSymbol, maxVocabSymbol): + return False + + def __unicode__(self): + return u"action_" + unicode(self.ruleIndex) + u":" + unicode(self.actionIndex) + +# A transition containing a set of values. +class SetTransition(Transition): + + def __init__(self, target, set): + super(SetTransition, self).__init__(target) + self.serializationType = self.SET + if set is not None: + self.label = set + else: + self.label = IntervalSet() + self.label.addRange(Interval(Token.INVALID_TYPE, Token.INVALID_TYPE + 1)) + + def matches( self, symbol, minVocabSymbol, maxVocabSymbol): + return symbol in self.label + + def __unicode__(self): + return unicode(self.label) + +class NotSetTransition(SetTransition): + + def __init__(self, target, set): + super(NotSetTransition, self).__init__(target, set) + self.serializationType = self.NOT_SET + + def matches( self, symbol, minVocabSymbol, maxVocabSymbol): + return symbol >= minVocabSymbol \ + and symbol <= maxVocabSymbol \ + and not super(type(self), self).matches(symbol, minVocabSymbol, maxVocabSymbol) + + def __unicode__(self): + return u'~' + super(type(self), self).__unicode__() + + +class WildcardTransition(Transition): + + def __init__(self, target): + super(WildcardTransition, self).__init__(target) + self.serializationType = self.WILDCARD + + def matches( self, symbol, minVocabSymbol, maxVocabSymbol): + return symbol >= minVocabSymbol and symbol <= maxVocabSymbol + + def __unicode__(self): + return u"." + + +class PrecedencePredicateTransition(AbstractPredicateTransition): + + def __init__(self, target, precedence): + super(PrecedencePredicateTransition, self).__init__(target) + self.serializationType = self.PRECEDENCE + self.precedence = precedence + self.isEpsilon = True + + def matches( self, symbol, minVocabSymbol, maxVocabSymbol): + return False + + + def getPredicate(self): + return PrecedencePredicate(self.precedence) + + def __unicode__(self): + return self.precedence + " >= _p" + + +Transition.serializationTypes = { + EpsilonTransition: Transition.EPSILON, + RangeTransition: Transition.RANGE, + RuleTransition: Transition.RULE, + PredicateTransition: Transition.PREDICATE, + AtomTransition: Transition.ATOM, + ActionTransition: Transition.ACTION, + SetTransition: Transition.SET, + NotSetTransition: Transition.NOT_SET, + WildcardTransition: Transition.WILDCARD, + PrecedencePredicateTransition: Transition.PRECEDENCE + } + diff --git a/runtime/Python2/src/antlr4/atn/__init__.py b/runtime/Python2/src/antlr4/atn/__init__.py new file mode 100644 index 000000000..216c000dc --- /dev/null +++ b/runtime/Python2/src/antlr4/atn/__init__.py @@ -0,0 +1 @@ +__author__ = 'ericvergnaud' diff --git a/runtime/Python2/src/antlr4/dfa/DFA.py b/runtime/Python2/src/antlr4/dfa/DFA.py new file mode 100644 index 000000000..4ed5fb6e9 --- /dev/null +++ b/runtime/Python2/src/antlr4/dfa/DFA.py @@ -0,0 +1,150 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from antlr4.atn.ATNConfigSet import ATNConfigSet +from antlr4.dfa.DFAState import DFAState +from antlr4.error.Errors import IllegalStateException + + +class DFA(object): + + def __init__(self, atnStartState, decision=0): + # From which ATN state did we create this DFA? + self.atnStartState = atnStartState + self.decision = decision + # A set of all DFA states. Use {@link Map} so we can get old state back + # ({@link Set} only allows you to see if it's there). + self._states = dict() + self.s0 = None + # {@code true} if this DFA is for a precedence decision; otherwise, + # {@code false}. This is the backing field for {@link #isPrecedenceDfa}, + # {@link #setPrecedenceDfa}. + self.precedenceDfa = False + + + # Get the start state for a specific precedence value. + # + # @param precedence The current precedence. + # @return The start state corresponding to the specified precedence, or + # {@code null} if no start state exists for the specified precedence. + # + # @throws IllegalStateException if this is not a precedence DFA. + # @see #isPrecedenceDfa() + + def getPrecedenceStartState(self, precedence): + if not self.precedenceDfa: + raise IllegalStateException("Only precedence DFAs may contain a precedence start state.") + + # s0.edges is never null for a precedence DFA + if precedence < 0 or precedence >= len(self.s0.edges): + return None + return self.s0.edges[precedence] + + # Set the start state for a specific precedence value. + # + # @param precedence The current precedence. + # @param startState The start state corresponding to the specified + # precedence. + # + # @throws IllegalStateException if this is not a precedence DFA. + # @see #isPrecedenceDfa() + # + def setPrecedenceStartState(self, precedence, startState): + if not self.precedenceDfa: + raise IllegalStateException("Only precedence DFAs may contain a precedence start state.") + + if precedence < 0: + return + + # synchronization on s0 here is ok. when the DFA is turned into a + # precedence DFA, s0 will be initialized once and not updated again + # s0.edges is never null for a precedence DFA + if precedence >= len(self.s0.edges): + ext = [None] * (precedence + 1 - len(self.s0.edges)) + self.s0.edges.extend(ext) + self.s0.edges[precedence] = startState + # + # Sets whether this is a precedence DFA. If the specified value differs + # from the current DFA configuration, the following actions are taken; + # otherwise no changes are made to the current DFA. + # + #
        + #
      • The {@link #states} map is cleared
      • + #
      • If {@code precedenceDfa} is {@code false}, the initial state + # {@link #s0} is set to {@code null}; otherwise, it is initialized to a new + # {@link DFAState} with an empty outgoing {@link DFAState#edges} array to + # store the start states for individual precedence values.
      • + #
      • The {@link #precedenceDfa} field is updated
      • + #
      + # + # @param precedenceDfa {@code true} if this is a precedence DFA; otherwise, + # {@code false} + + def setPrecedenceDfa(self, precedenceDfa): + if self.precedenceDfa != precedenceDfa: + self._states = dict() + if precedenceDfa: + precedenceState = DFAState(ATNConfigSet()) + precedenceState.edges = [] + precedenceState.isAcceptState = False + precedenceState.requiresFullContext = False + self.s0 = precedenceState + else: + self.s0 = None + self.precedenceDfa = precedenceDfa + + @property + def states(self): + return self._states + + # Return a list of all states in this DFA, ordered by state number. + def sortedStates(self): + return sorted(self._states.keys(), key=lambda state: state.stateNumber) + + def __str__(self): + return unicode(self) + + def __unicode__(self): + return self.toString(None) + + def toString(self, literalNames=None, symbolicNames=None): + if self.s0 is None: + return "" + from antlr4.dfa.DFASerializer import DFASerializer + serializer = DFASerializer(self, literalNames, symbolicNames) + return unicode(serializer) + + def toLexerString(self): + if self.s0 is None: + return "" + from antlr4.dfa.DFASerializer import LexerDFASerializer + serializer = LexerDFASerializer(self) + return unicode(serializer) + diff --git a/runtime/Python2/src/antlr4/dfa/DFASerializer.py b/runtime/Python2/src/antlr4/dfa/DFASerializer.py new file mode 100644 index 000000000..8b585c765 --- /dev/null +++ b/runtime/Python2/src/antlr4/dfa/DFASerializer.py @@ -0,0 +1,100 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#/ + +# A DFA walker that knows how to dump them to serialized strings.#/ +from io import StringIO + +from antlr4.Utils import str_list + + +class DFASerializer(object): + + def __init__(self, dfa, literalNames=None, symbolicNames=None): + self.dfa = dfa + self.literalNames = literalNames + self.symbolicNames = symbolicNames + + def __str__(self): + return unicode(self) + + def __unicode__(self): + if self.dfa.s0 is None: + return None + with StringIO() as buf: + for s in self.dfa.sortedStates(): + n = 0 + if s.edges is not None: + n = len(s.edges) + for i in range(0, n): + t = s.edges[i] + if t is not None and t.stateNumber != 0x7FFFFFFF: + buf.write(self.getStateString(s)) + label = self.getEdgeLabel(i) + buf.write(u"-") + buf.write(label) + buf.write(u"->") + buf.write(self.getStateString(t)) + buf.write(u'\n') + output = buf.getvalue() + if len(output)==0: + return None + else: + return output + + def getEdgeLabel(self, i): + if i==0: + return u"EOF" + if self.literalNames is not None and i<=len(self.literalNames): + return self.literalNames[i-1] + elif self.symbolicNames is not None and i<=len(self.symbolicNames): + return self.symbolicNames[i-1] + else: + return unicode(i-1) + + def getStateString(self, s): + n = s.stateNumber + baseStateStr = ( u":" if s.isAcceptState else u"") + u"s" + unicode(n) + \ + ( u"^" if s.requiresFullContext else u"") + if s.isAcceptState: + if s.predicates is not None: + return baseStateStr + u"=>" + str_list(s.predicates) + else: + return baseStateStr + u"=>" + unicode(s.prediction) + else: + return baseStateStr + +class LexerDFASerializer(DFASerializer): + + def __init__(self, dfa): + super(LexerDFASerializer, self).__init__(dfa, None) + + def getEdgeLabel(self, i): + return u"'" + unichr(i) + u"'" diff --git a/runtime/Python2/src/antlr4/dfa/DFAState.py b/runtime/Python2/src/antlr4/dfa/DFAState.py new file mode 100644 index 000000000..1a1e5e8db --- /dev/null +++ b/runtime/Python2/src/antlr4/dfa/DFAState.py @@ -0,0 +1,156 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#/ + +# Map a predicate to a predicted alternative.#/ +from io import StringIO + +from antlr4.atn.ATNConfigSet import ATNConfigSet + + +class PredPrediction(object): + def __init__(self, pred, alt): + self.alt = alt + self.pred = pred + + def __str__(self): + return unicode(self) + + def __unicode__(self): + return u"(" + unicode(self.pred) + u", " + unicode(self.alt) + u")" + +# A DFA state represents a set of possible ATN configurations. +# As Aho, Sethi, Ullman p. 117 says "The DFA uses its state +# to keep track of all possible states the ATN can be in after +# reading each input symbol. That is to say, after reading +# input a1a2..an, the DFA is in a state that represents the +# subset T of the states of the ATN that are reachable from the +# ATN's start state along some path labeled a1a2..an." +# In conventional NFA→DFA conversion, therefore, the subset T +# would be a bitset representing the set of states the +# ATN could be in. We need to track the alt predicted by each +# state as well, however. More importantly, we need to maintain +# a stack of states, tracking the closure operations as they +# jump from rule to rule, emulating rule invocations (method calls). +# I have to add a stack to simulate the proper lookahead sequences for +# the underlying LL grammar from which the ATN was derived. +# +#

      I use a set of ATNConfig objects not simple states. An ATNConfig +# is both a state (ala normal conversion) and a RuleContext describing +# the chain of rules (if any) followed to arrive at that state.

      +# +#

      A DFA state may have multiple references to a particular state, +# but with different ATN contexts (with same or different alts) +# meaning that state was reached via a different set of rule invocations.

      +#/ +class DFAState(object): + + def __init__(self, stateNumber=-1, configs=ATNConfigSet()): + self.stateNumber = stateNumber + self.configs = configs + # {@code edges[symbol]} points to target of symbol. Shift up by 1 so (-1) + # {@link Token#EOF} maps to {@code edges[0]}. + self.edges = None + self.isAcceptState = False + # if accept state, what ttype do we match or alt do we predict? + # This is set to {@link ATN#INVALID_ALT_NUMBER} when {@link #predicates}{@code !=null} or + # {@link #requiresFullContext}. + self.prediction = 0 + self.lexerActionExecutor = None + # Indicates that this state was created during SLL prediction that + # discovered a conflict between the configurations in the state. Future + # {@link ParserATNSimulator#execATN} invocations immediately jumped doing + # full context prediction if this field is true. + self.requiresFullContext = False + # During SLL parsing, this is a list of predicates associated with the + # ATN configurations of the DFA state. When we have predicates, + # {@link #requiresFullContext} is {@code false} since full context prediction evaluates predicates + # on-the-fly. If this is not null, then {@link #prediction} is + # {@link ATN#INVALID_ALT_NUMBER}. + # + #

      We only use these for non-{@link #requiresFullContext} but conflicting states. That + # means we know from the context (it's $ or we don't dip into outer + # context) that it's an ambiguity not a conflict.

      + # + #

      This list is computed by {@link ParserATNSimulator#predicateDFAState}.

      + self.predicates = None + + + + # Get the set of all alts mentioned by all ATN configurations in this + # DFA state. + def getAltSet(self): + alts = set() + if self.configs is not None: + for c in self.configs: + alts.add(c.alt) + if len(alts)==0: + return None + else: + return alts + + def __hash__(self): + return hash(self.configs) + + # Two {@link DFAState} instances are equal if their ATN configuration sets + # are the same. This method is used to see if a state already exists. + # + #

      Because the number of alternatives and number of ATN configurations are + # finite, there is a finite number of DFA states that can be processed. + # This is necessary to show that the algorithm terminates.

      + # + #

      Cannot test the DFA state numbers here because in + # {@link ParserATNSimulator#addDFAState} we need to know if any other state + # exists that has this exact set of ATN configurations. The + # {@link #stateNumber} is irrelevant.

      + def __eq__(self, other): + # compare set of ATN configurations in this set with other + if self is other: + return True + elif not isinstance(other, DFAState): + return False + else: + return self.configs==other.configs + + def __str__(self): + return unicode(self) + + def __unicode__(self): + with StringIO() as buf: + buf.write(unicode(self.stateNumber)) + buf.write(u":") + buf.write(unicode(self.configs)) + if self.isAcceptState: + buf.write(u"=>") + if self.predicates is not None: + buf.write(unicode(self.predicates)) + else: + buf.write(unicode(self.prediction)) + return buf.getvalue() diff --git a/runtime/Python2/src/antlr4/dfa/__init__.py b/runtime/Python2/src/antlr4/dfa/__init__.py new file mode 100644 index 000000000..216c000dc --- /dev/null +++ b/runtime/Python2/src/antlr4/dfa/__init__.py @@ -0,0 +1 @@ +__author__ = 'ericvergnaud' diff --git a/runtime/Python2/src/antlr4/error/DiagnosticErrorListener.py b/runtime/Python2/src/antlr4/error/DiagnosticErrorListener.py new file mode 100644 index 000000000..f1c9be8be --- /dev/null +++ b/runtime/Python2/src/antlr4/error/DiagnosticErrorListener.py @@ -0,0 +1,133 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + + +# +# This implementation of {@link ANTLRErrorListener} can be used to identify +# certain potential correctness and performance problems in grammars. "Reports" +# are made by calling {@link Parser#notifyErrorListeners} with the appropriate +# message. +# +#
        +#
      • Ambiguities: These are cases where more than one path through the +# grammar can match the input.
      • +#
      • Weak context sensitivity: These are cases where full-context +# prediction resolved an SLL conflict to a unique alternative which equaled the +# minimum alternative of the SLL conflict.
      • +#
      • Strong (forced) context sensitivity: These are cases where the +# full-context prediction resolved an SLL conflict to a unique alternative, +# and the minimum alternative of the SLL conflict was found to not be +# a truly viable alternative. Two-stage parsing cannot be used for inputs where +# this situation occurs.
      • +#
      + +from io import StringIO + +from antlr4.Utils import str_set +from antlr4.error.ErrorListener import ErrorListener + + +class DiagnosticErrorListener(ErrorListener): + + def __init__(self, exactOnly=True): + # whether all ambiguities or only exact ambiguities are reported. + self.exactOnly = exactOnly + + def reportAmbiguity(self, recognizer, dfa, startIndex, + stopIndex, exact, ambigAlts, configs): + if self.exactOnly and not exact: + return + + with StringIO() as buf: + buf.write(u"reportAmbiguity d=") + buf.write(self.getDecisionDescription(recognizer, dfa)) + buf.write(u": ambigAlts=") + buf.write(str_set(self.getConflictingAlts(ambigAlts, configs))) + buf.write(u", input='") + buf.write(recognizer.getTokenStream().getText((startIndex, stopIndex))) + buf.write(u"'") + recognizer.notifyErrorListeners(buf.getvalue()) + + + def reportAttemptingFullContext(self, recognizer, dfa, startIndex, + stopIndex, conflictingAlts, configs): + with StringIO() as buf: + buf.write(u"reportAttemptingFullContext d=") + buf.write(self.getDecisionDescription(recognizer, dfa)) + buf.write(u", input='") + buf.write(recognizer.getTokenStream().getText((startIndex, stopIndex))) + buf.write(u"'") + recognizer.notifyErrorListeners(buf.getvalue()) + + def reportContextSensitivity(self, recognizer, dfa, startIndex, + stopIndex, prediction, configs): + with StringIO() as buf: + buf.write(u"reportContextSensitivity d=") + buf.write(self.getDecisionDescription(recognizer, dfa)) + buf.write(u", input='") + buf.write(recognizer.getTokenStream().getText((startIndex, stopIndex))) + buf.write(u"'") + recognizer.notifyErrorListeners(buf.getvalue()) + + def getDecisionDescription(self, recognizer, dfa): + decision = dfa.decision + ruleIndex = dfa.atnStartState.ruleIndex + + ruleNames = recognizer.ruleNames + if ruleIndex < 0 or ruleIndex >= len(ruleNames): + return unicode(decision) + + ruleName = ruleNames[ruleIndex] + if ruleName is None or len(ruleName)==0: + return unicode(decision) + + return unicode(decision) + u" (" + ruleName + u")" + + # + # Computes the set of conflicting or ambiguous alternatives from a + # configuration set, if that information was not already provided by the + # parser. + # + # @param reportedAlts The set of conflicting or ambiguous alternatives, as + # reported by the parser. + # @param configs The conflicting or ambiguous configuration set. + # @return Returns {@code reportedAlts} if it is not {@code null}, otherwise + # returns the set of alternatives represented in {@code configs}. + # + def getConflictingAlts(self, reportedAlts, configs): + if reportedAlts is not None: + return reportedAlts + + result = set() + for config in configs: + result.add(config.alt) + + return result diff --git a/runtime/Python2/src/antlr4/error/ErrorListener.py b/runtime/Python2/src/antlr4/error/ErrorListener.py new file mode 100644 index 000000000..bfe06e03a --- /dev/null +++ b/runtime/Python2/src/antlr4/error/ErrorListener.py @@ -0,0 +1,97 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Provides an empty default implementation of {@link ANTLRErrorListener}. The +# default implementation of each method does nothing, but can be overridden as +# necessary. + +from __future__ import print_function +import sys + +class ErrorListener(object): + + def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e): + pass + + def reportAmbiguity(self, recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs): + pass + + def reportAttemptingFullContext(self, recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs): + pass + + def reportContextSensitivity(self, recognizer, dfa, startIndex, stopIndex, prediction, configs): + pass + +class ConsoleErrorListener(ErrorListener): + # + # Provides a default instance of {@link ConsoleErrorListener}. + # + INSTANCE = None + + # + # {@inheritDoc} + # + #

      + # This implementation prints messages to {@link System#err} containing the + # values of {@code line}, {@code charPositionInLine}, and {@code msg} using + # the following format.

      + # + #
      +    # line line:charPositionInLine msg
      +    # 
      + # + def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e): + print("line " + str(line) + ":" + str(column) + " " + msg, file=sys.stderr) + +ConsoleErrorListener.INSTANCE = ConsoleErrorListener() + +class ProxyErrorListener(ErrorListener): + + def __init__(self, delegates): + super(ProxyErrorListener, self).__init__() + if delegates is None: + raise ReferenceError("delegates") + self.delegates = delegates + + def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e): + for delegate in self.delegates: + delegate.syntaxError(recognizer, offendingSymbol, line, column, msg, e) + + def reportAmbiguity(self, recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs): + for delegate in self.delegates: + delegate.reportAmbiguity(recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs) + + def reportAttemptingFullContext(self, recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs): + for delegate in self.delegates: + delegate.reportAttemptingFullContext(recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs) + + def reportContextSensitivity(self, recognizer, dfa, startIndex, stopIndex, prediction, configs): + for delegate in self.delegates: + delegate.reportContextSensitivity(recognizer, dfa, startIndex, stopIndex, prediction, configs) diff --git a/runtime/Python2/src/antlr4/error/ErrorStrategy.py b/runtime/Python2/src/antlr4/error/ErrorStrategy.py new file mode 100644 index 000000000..94efb89a5 --- /dev/null +++ b/runtime/Python2/src/antlr4/error/ErrorStrategy.py @@ -0,0 +1,719 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +from antlr4.IntervalSet import IntervalSet + +from antlr4.Token import Token +from antlr4.atn.ATNState import ATNState +from antlr4.error.Errors import NoViableAltException, InputMismatchException, FailedPredicateException, ParseCancellationException + +class ErrorStrategy(object): + + def reset(self, recognizer): + pass + + def recoverInline(self, recognizer): + pass + + def recover(self, recognizer, e): + pass + + def sync(self, recognizer): + pass + + def inErrorRecoveryMode(self, recognizer): + pass + + def reportError(self, recognizer, e): + pass + + +# This is the default implementation of {@link ANTLRErrorStrategy} used for +# error reporting and recovery in ANTLR parsers. +# +class DefaultErrorStrategy(ErrorStrategy): + + def __init__(self): + super(DefaultErrorStrategy, self).__init__() + # Indicates whether the error strategy is currently "recovering from an + # error". This is used to suppress reporting multiple error messages while + # attempting to recover from a detected syntax error. + # + # @see #inErrorRecoveryMode + # + self.errorRecoveryMode = False + + # The index into the input stream where the last error occurred. + # This is used to prevent infinite loops where an error is found + # but no token is consumed during recovery...another error is found, + # ad nauseum. This is a failsafe mechanism to guarantee that at least + # one token/tree node is consumed for two errors. + # + self.lastErrorIndex = -1 + self.lastErrorStates = None + + #

      The default implementation simply calls {@link #endErrorCondition} to + # ensure that the handler is not in error recovery mode.

      + def reset(self, recognizer): + self.endErrorCondition(recognizer) + + # + # This method is called to enter error recovery mode when a recognition + # exception is reported. + # + # @param recognizer the parser instance + # + def beginErrorCondition(self, recognizer): + self.errorRecoveryMode = True + + def inErrorRecoveryMode(self, recognizer): + return self.errorRecoveryMode + + # + # This method is called to leave error recovery mode after recovering from + # a recognition exception. + # + # @param recognizer + # + def endErrorCondition(self, recognizer): + self.errorRecoveryMode = False + self.lastErrorStates = None + self.lastErrorIndex = -1 + + # + # {@inheritDoc} + # + #

      The default implementation simply calls {@link #endErrorCondition}.

      + # + def reportMatch(self, recognizer): + self.endErrorCondition(recognizer) + + # + # {@inheritDoc} + # + #

      The default implementation returns immediately if the handler is already + # in error recovery mode. Otherwise, it calls {@link #beginErrorCondition} + # and dispatches the reporting task based on the runtime type of {@code e} + # according to the following table.

      + # + #
        + #
      • {@link NoViableAltException}: Dispatches the call to + # {@link #reportNoViableAlternative}
      • + #
      • {@link InputMismatchException}: Dispatches the call to + # {@link #reportInputMismatch}
      • + #
      • {@link FailedPredicateException}: Dispatches the call to + # {@link #reportFailedPredicate}
      • + #
      • All other types: calls {@link Parser#notifyErrorListeners} to report + # the exception
      • + #
      + # + def reportError(self, recognizer, e): + # if we've already reported an error and have not matched a token + # yet successfully, don't report any errors. + if self.inErrorRecoveryMode(recognizer): + return # don't report spurious errors + self.beginErrorCondition(recognizer) + if isinstance( e, NoViableAltException ): + self.reportNoViableAlternative(recognizer, e) + elif isinstance( e, InputMismatchException ): + self.reportInputMismatch(recognizer, e) + elif isinstance( e, FailedPredicateException ): + self.reportFailedPredicate(recognizer, e) + else: + print("unknown recognition error type: " + type(e).__name__) + recognizer.notifyErrorListeners(e.getOffendingToken(), e.getMessage(), e) + + # + # {@inheritDoc} + # + #

      The default implementation resynchronizes the parser by consuming tokens + # until we find one in the resynchronization set--loosely the set of tokens + # that can follow the current rule.

      + # + def recover(self, recognizer, e): + if self.lastErrorIndex==recognizer.getInputStream().index \ + and self.lastErrorStates is not None \ + and recognizer.state in self.lastErrorStates: + # uh oh, another error at same token index and previously-visited + # state in ATN; must be a case where LT(1) is in the recovery + # token set so nothing got consumed. Consume a single token + # at least to prevent an infinite loop; this is a failsafe. + recognizer.consume() + + self.lastErrorIndex = recognizer._input.index + if self.lastErrorStates is None: + self.lastErrorStates = [] + self.lastErrorStates.append(recognizer.state) + followSet = self.getErrorRecoverySet(recognizer) + self.consumeUntil(recognizer, followSet) + + # The default implementation of {@link ANTLRErrorStrategy#sync} makes sure + # that the current lookahead symbol is consistent with what were expecting + # at this point in the ATN. You can call this anytime but ANTLR only + # generates code to check before subrules/loops and each iteration. + # + #

      Implements Jim Idle's magic sync mechanism in closures and optional + # subrules. E.g.,

      + # + #
      +    # a : sync ( stuff sync )* ;
      +    # sync : {consume to what can follow sync} ;
      +    # 
      + # + # At the start of a sub rule upon error, {@link #sync} performs single + # token deletion, if possible. If it can't do that, it bails on the current + # rule and uses the default error recovery, which consumes until the + # resynchronization set of the current rule. + # + #

      If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block + # with an empty alternative), then the expected set includes what follows + # the subrule.

      + # + #

      During loop iteration, it consumes until it sees a token that can start a + # sub rule or what follows loop. Yes, that is pretty aggressive. We opt to + # stay in the loop as long as possible.

      + # + #

      ORIGINS

      + # + #

      Previous versions of ANTLR did a poor job of their recovery within loops. + # A single mismatch token or missing token would force the parser to bail + # out of the entire rules surrounding the loop. So, for rule

      + # + #
      +    # classDef : 'class' ID '{' member* '}'
      +    # 
      + # + # input with an extra token between members would force the parser to + # consume until it found the next class definition rather than the next + # member definition of the current class. + # + #

      This functionality cost a little bit of effort because the parser has to + # compare token set at the start of the loop and at each iteration. If for + # some reason speed is suffering for you, you can turn off this + # functionality by simply overriding this method as a blank { }.

      + # + def sync(self, recognizer): + # If already recovering, don't try to sync + if self.inErrorRecoveryMode(recognizer): + return + + s = recognizer._interp.atn.states[recognizer.state] + la = recognizer.getTokenStream().LA(1) + # try cheaper subset first; might get lucky. seems to shave a wee bit off + if la==Token.EOF or la in recognizer.atn.nextTokens(s): + return + + # Return but don't end recovery. only do that upon valid token match + if recognizer.isExpectedToken(la): + return + + if s.stateType in [ATNState.BLOCK_START, ATNState.STAR_BLOCK_START, + ATNState.PLUS_BLOCK_START, ATNState.STAR_LOOP_ENTRY]: + # report error and recover if possible + if self.singleTokenDeletion(recognizer)is not None: + return + else: + raise InputMismatchException(recognizer) + + elif s.stateType in [ATNState.PLUS_LOOP_BACK, ATNState.STAR_LOOP_BACK]: + self.reportUnwantedToken(recognizer) + expecting = recognizer.getExpectedTokens() + whatFollowsLoopIterationOrRule = expecting.addSet(self.getErrorRecoverySet(recognizer)) + self.consumeUntil(recognizer, whatFollowsLoopIterationOrRule) + + else: + # do nothing if we can't identify the exact kind of ATN state + pass + + # This is called by {@link #reportError} when the exception is a + # {@link NoViableAltException}. + # + # @see #reportError + # + # @param recognizer the parser instance + # @param e the recognition exception + # + def reportNoViableAlternative(self, recognizer, e): + tokens = recognizer.getTokenStream() + if tokens is not None: + if e.startToken.type==Token.EOF: + input = "" + else: + input = tokens.getText((e.startToken, e.offendingToken)) + else: + input = "" + msg = "no viable alternative at input " + self.escapeWSAndQuote(input) + recognizer.notifyErrorListeners(msg, e.offendingToken, e) + + # + # This is called by {@link #reportError} when the exception is an + # {@link InputMismatchException}. + # + # @see #reportError + # + # @param recognizer the parser instance + # @param e the recognition exception + # + def reportInputMismatch(self, recognizer, e): + msg = "mismatched input " + self.getTokenErrorDisplay(e.offendingToken) \ + + " expecting " + e.getExpectedTokens().toString(recognizer.literalNames, recognizer.symbolicNames) + recognizer.notifyErrorListeners(msg, e.offendingToken, e) + + # + # This is called by {@link #reportError} when the exception is a + # {@link FailedPredicateException}. + # + # @see #reportError + # + # @param recognizer the parser instance + # @param e the recognition exception + # + def reportFailedPredicate(self, recognizer, e): + ruleName = recognizer.ruleNames[recognizer._ctx.getRuleIndex()] + msg = "rule " + ruleName + " " + e.message + recognizer.notifyErrorListeners(msg, e.offendingToken, e) + + # This method is called to report a syntax error which requires the removal + # of a token from the input stream. At the time this method is called, the + # erroneous symbol is current {@code LT(1)} symbol and has not yet been + # removed from the input stream. When this method returns, + # {@code recognizer} is in error recovery mode. + # + #

      This method is called when {@link #singleTokenDeletion} identifies + # single-token deletion as a viable recovery strategy for a mismatched + # input error.

      + # + #

      The default implementation simply returns if the handler is already in + # error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + # enter error recovery mode, followed by calling + # {@link Parser#notifyErrorListeners}.

      + # + # @param recognizer the parser instance + # + def reportUnwantedToken(self, recognizer): + if self.inErrorRecoveryMode(recognizer): + return + + self.beginErrorCondition(recognizer) + t = recognizer.getCurrentToken() + tokenName = self.getTokenErrorDisplay(t) + expecting = self.getExpectedTokens(recognizer) + msg = "extraneous input " + tokenName + " expecting " \ + + expecting.toString(recognizer.literalNames, recognizer.symbolicNames) + recognizer.notifyErrorListeners(msg, t, None) + + # This method is called to report a syntax error which requires the + # insertion of a missing token into the input stream. At the time this + # method is called, the missing token has not yet been inserted. When this + # method returns, {@code recognizer} is in error recovery mode. + # + #

      This method is called when {@link #singleTokenInsertion} identifies + # single-token insertion as a viable recovery strategy for a mismatched + # input error.

      + # + #

      The default implementation simply returns if the handler is already in + # error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + # enter error recovery mode, followed by calling + # {@link Parser#notifyErrorListeners}.

      + # + # @param recognizer the parser instance + # + def reportMissingToken(self, recognizer): + if self.inErrorRecoveryMode(recognizer): + return + self.beginErrorCondition(recognizer) + t = recognizer.getCurrentToken() + expecting = self.getExpectedTokens(recognizer) + msg = "missing " + expecting.toString(recognizer.literalNames, recognizer.symbolicNames) \ + + " at " + self.getTokenErrorDisplay(t) + recognizer.notifyErrorListeners(msg, t, None) + + #

      The default implementation attempts to recover from the mismatched input + # by using single token insertion and deletion as described below. If the + # recovery attempt fails, this method throws an + # {@link InputMismatchException}.

      + # + #

      EXTRA TOKEN (single token deletion)

      + # + #

      {@code LA(1)} is not what we are looking for. If {@code LA(2)} has the + # right token, however, then assume {@code LA(1)} is some extra spurious + # token and delete it. Then consume and return the next token (which was + # the {@code LA(2)} token) as the successful result of the match operation.

      + # + #

      This recovery strategy is implemented by {@link #singleTokenDeletion}.

      + # + #

      MISSING TOKEN (single token insertion)

      + # + #

      If current token (at {@code LA(1)}) is consistent with what could come + # after the expected {@code LA(1)} token, then assume the token is missing + # and use the parser's {@link TokenFactory} to create it on the fly. The + # "insertion" is performed by returning the created token as the successful + # result of the match operation.

      + # + #

      This recovery strategy is implemented by {@link #singleTokenInsertion}.

      + # + #

      EXAMPLE

      + # + #

      For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When + # the parser returns from the nested call to {@code expr}, it will have + # call chain:

      + # + #
      +    # stat → expr → atom
      +    # 
      + # + # and it will be trying to match the {@code ')'} at this point in the + # derivation: + # + #
      +    # => ID '=' '(' INT ')' ('+' atom)* ';'
      +    #                    ^
      +    # 
      + # + # The attempt to match {@code ')'} will fail when it sees {@code ';'} and + # call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} + # is in the set of tokens that can follow the {@code ')'} token reference + # in rule {@code atom}. It can assume that you forgot the {@code ')'}. + # + def recoverInline(self, recognizer): + # SINGLE TOKEN DELETION + matchedSymbol = self.singleTokenDeletion(recognizer) + if matchedSymbol is not None: + # we have deleted the extra token. + # now, move past ttype token as if all were ok + recognizer.consume() + return matchedSymbol + + # SINGLE TOKEN INSERTION + if self.singleTokenInsertion(recognizer): + return self.getMissingSymbol(recognizer) + + # even that didn't work; must throw the exception + raise InputMismatchException(recognizer) + + # + # This method implements the single-token insertion inline error recovery + # strategy. It is called by {@link #recoverInline} if the single-token + # deletion strategy fails to recover from the mismatched input. If this + # method returns {@code true}, {@code recognizer} will be in error recovery + # mode. + # + #

      This method determines whether or not single-token insertion is viable by + # checking if the {@code LA(1)} input symbol could be successfully matched + # if it were instead the {@code LA(2)} symbol. If this method returns + # {@code true}, the caller is responsible for creating and inserting a + # token with the correct type to produce this behavior.

      + # + # @param recognizer the parser instance + # @return {@code true} if single-token insertion is a viable recovery + # strategy for the current mismatched input, otherwise {@code false} + # + def singleTokenInsertion(self, recognizer): + currentSymbolType = recognizer.getTokenStream().LA(1) + # if current token is consistent with what could come after current + # ATN state, then we know we're missing a token; error recovery + # is free to conjure up and insert the missing token + atn = recognizer._interp.atn + currentState = atn.states[recognizer.state] + next = currentState.transitions[0].target + expectingAtLL2 = atn.nextTokens(next, recognizer._ctx) + if currentSymbolType in expectingAtLL2: + self.reportMissingToken(recognizer) + return True + else: + return False + + # This method implements the single-token deletion inline error recovery + # strategy. It is called by {@link #recoverInline} to attempt to recover + # from mismatched input. If this method returns null, the parser and error + # handler state will not have changed. If this method returns non-null, + # {@code recognizer} will not be in error recovery mode since the + # returned token was a successful match. + # + #

      If the single-token deletion is successful, this method calls + # {@link #reportUnwantedToken} to report the error, followed by + # {@link Parser#consume} to actually "delete" the extraneous token. Then, + # before returning {@link #reportMatch} is called to signal a successful + # match.

      + # + # @param recognizer the parser instance + # @return the successfully matched {@link Token} instance if single-token + # deletion successfully recovers from the mismatched input, otherwise + # {@code null} + # + def singleTokenDeletion(self, recognizer): + nextTokenType = recognizer.getTokenStream().LA(2) + expecting = self.getExpectedTokens(recognizer) + if nextTokenType in expecting: + self.reportUnwantedToken(recognizer) + # print("recoverFromMismatchedToken deleting " \ + # + str(recognizer.getTokenStream().LT(1)) \ + # + " since " + str(recognizer.getTokenStream().LT(2)) \ + # + " is what we want", file=sys.stderr) + recognizer.consume() # simply delete extra token + # we want to return the token we're actually matching + matchedSymbol = recognizer.getCurrentToken() + self.reportMatch(recognizer) # we know current token is correct + return matchedSymbol + else: + return None + + # Conjure up a missing token during error recovery. + # + # The recognizer attempts to recover from single missing + # symbols. But, actions might refer to that missing symbol. + # For example, x=ID {f($x);}. The action clearly assumes + # that there has been an identifier matched previously and that + # $x points at that token. If that token is missing, but + # the next token in the stream is what we want we assume that + # this token is missing and we keep going. Because we + # have to return some token to replace the missing token, + # we have to conjure one up. This method gives the user control + # over the tokens returned for missing tokens. Mostly, + # you will want to create something special for identifier + # tokens. For literals such as '{' and ',', the default + # action in the parser or tree parser works. It simply creates + # a CommonToken of the appropriate type. The text will be the token. + # If you change what tokens must be created by the lexer, + # override this method to create the appropriate tokens. + # + def getMissingSymbol(self, recognizer): + currentSymbol = recognizer.getCurrentToken() + expecting = self.getExpectedTokens(recognizer) + expectedTokenType = expecting[0] # get any element + if expectedTokenType==Token.EOF: + tokenText = "" + else: + name = None + if expectedTokenType < len(recognizer.literalNames): + name = recognizer.literalNames[expectedTokenType] + if name is None and expectedTokenType < len(recognizer.symbolicNames): + name = recognizer.symbolicNames[expectedTokenType] + tokenText = "" + current = currentSymbol + lookback = recognizer.getTokenStream().LT(-1) + if current.type==Token.EOF and lookback is not None: + current = lookback + return recognizer.getTokenFactory().create(current.source, + expectedTokenType, tokenText, Token.DEFAULT_CHANNEL, + -1, -1, current.line, current.column) + + def getExpectedTokens(self, recognizer): + return recognizer.getExpectedTokens() + + # How should a token be displayed in an error message? The default + # is to display just the text, but during development you might + # want to have a lot of information spit out. Override in that case + # to use t.toString() (which, for CommonToken, dumps everything about + # the token). This is better than forcing you to override a method in + # your token objects because you don't have to go modify your lexer + # so that it creates a new Java type. + # + def getTokenErrorDisplay(self, t): + if t is None: + return u"" + s = t.text + if s is None: + if t.type==Token.EOF: + s = u"" + else: + s = u"<" + unicode(t.type) + u">" + return self.escapeWSAndQuote(s) + + def escapeWSAndQuote(self, s): + s = s.replace(u"\n",u"\\n") + s = s.replace(u"\r",u"\\r") + s = s.replace(u"\t",u"\\t") + return u"'" + s + u"'" + + # Compute the error recovery set for the current rule. During + # rule invocation, the parser pushes the set of tokens that can + # follow that rule reference on the stack; this amounts to + # computing FIRST of what follows the rule reference in the + # enclosing rule. See LinearApproximator.FIRST(). + # This local follow set only includes tokens + # from within the rule; i.e., the FIRST computation done by + # ANTLR stops at the end of a rule. + # + # EXAMPLE + # + # When you find a "no viable alt exception", the input is not + # consistent with any of the alternatives for rule r. The best + # thing to do is to consume tokens until you see something that + # can legally follow a call to r#or* any rule that called r. + # You don't want the exact set of viable next tokens because the + # input might just be missing a token--you might consume the + # rest of the input looking for one of the missing tokens. + # + # Consider grammar: + # + # a : '[' b ']' + # | '(' b ')' + # ; + # b : c '^' INT ; + # c : ID + # | INT + # ; + # + # At each rule invocation, the set of tokens that could follow + # that rule is pushed on a stack. Here are the various + # context-sensitive follow sets: + # + # FOLLOW(b1_in_a) = FIRST(']') = ']' + # FOLLOW(b2_in_a) = FIRST(')') = ')' + # FOLLOW(c_in_b) = FIRST('^') = '^' + # + # Upon erroneous input "[]", the call chain is + # + # a -> b -> c + # + # and, hence, the follow context stack is: + # + # depth follow set start of rule execution + # 0 a (from main()) + # 1 ']' b + # 2 '^' c + # + # Notice that ')' is not included, because b would have to have + # been called from a different context in rule a for ')' to be + # included. + # + # For error recovery, we cannot consider FOLLOW(c) + # (context-sensitive or otherwise). We need the combined set of + # all context-sensitive FOLLOW sets--the set of all tokens that + # could follow any reference in the call chain. We need to + # resync to one of those tokens. Note that FOLLOW(c)='^' and if + # we resync'd to that token, we'd consume until EOF. We need to + # sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. + # In this case, for input "[]", LA(1) is ']' and in the set, so we would + # not consume anything. After printing an error, rule c would + # return normally. Rule b would not find the required '^' though. + # At this point, it gets a mismatched token error and throws an + # exception (since LA(1) is not in the viable following token + # set). The rule exception handler tries to recover, but finds + # the same recovery set and doesn't consume anything. Rule b + # exits normally returning to rule a. Now it finds the ']' (and + # with the successful match exits errorRecovery mode). + # + # So, you can see that the parser walks up the call chain looking + # for the token that was a member of the recovery set. + # + # Errors are not generated in errorRecovery mode. + # + # ANTLR's error recovery mechanism is based upon original ideas: + # + # "Algorithms + Data Structures = Programs" by Niklaus Wirth + # + # and + # + # "A note on error recovery in recursive descent parsers": + # http:#portal.acm.org/citation.cfm?id=947902.947905 + # + # Later, Josef Grosch had some good ideas: + # + # "Efficient and Comfortable Error Recovery in Recursive Descent + # Parsers": + # ftp:#www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip + # + # Like Grosch I implement context-sensitive FOLLOW sets that are combined + # at run-time upon error to avoid overhead during parsing. + # + def getErrorRecoverySet(self, recognizer): + atn = recognizer._interp.atn + ctx = recognizer._ctx + recoverSet = IntervalSet() + while ctx is not None and ctx.invokingState>=0: + # compute what follows who invoked us + invokingState = atn.states[ctx.invokingState] + rt = invokingState.transitions[0] + follow = atn.nextTokens(rt.followState) + recoverSet.addSet(follow) + ctx = ctx.parentCtx + recoverSet.removeOne(Token.EPSILON) + return recoverSet + + # Consume tokens until one matches the given token set.# + def consumeUntil(self, recognizer, set_): + ttype = recognizer.getTokenStream().LA(1) + while ttype != Token.EOF and not ttype in set_: + recognizer.consume() + ttype = recognizer.getTokenStream().LA(1) + + +# +# This implementation of {@link ANTLRErrorStrategy} responds to syntax errors +# by immediately canceling the parse operation with a +# {@link ParseCancellationException}. The implementation ensures that the +# {@link ParserRuleContext#exception} field is set for all parse tree nodes +# that were not completed prior to encountering the error. +# +#

      +# This error strategy is useful in the following scenarios.

      +# +#
        +#
      • Two-stage parsing: This error strategy allows the first +# stage of two-stage parsing to immediately terminate if an error is +# encountered, and immediately fall back to the second stage. In addition to +# avoiding wasted work by attempting to recover from errors here, the empty +# implementation of {@link BailErrorStrategy#sync} improves the performance of +# the first stage.
      • +#
      • Silent validation: When syntax errors are not being +# reported or logged, and the parse result is simply ignored if errors occur, +# the {@link BailErrorStrategy} avoids wasting work on recovering from errors +# when the result will be ignored either way.
      • +#
      +# +#

      +# {@code myparser.setErrorHandler(new BailErrorStrategy());}

      +# +# @see Parser#setErrorHandler(ANTLRErrorStrategy) +# +class BailErrorStrategy(DefaultErrorStrategy): + # Instead of recovering from exception {@code e}, re-throw it wrapped + # in a {@link ParseCancellationException} so it is not caught by the + # rule function catches. Use {@link Exception#getCause()} to get the + # original {@link RecognitionException}. + # + def recover(self, recognizer, e): + context = recognizer._ctx + while context is not None: + context.exception = e + context = context.parentCtx + raise ParseCancellationException(e) + + # Make sure we don't attempt to recover inline; if the parser + # successfully recovers, it won't throw an exception. + # + def recoverInline(self, recognizer): + self.recover(recognizer, InputMismatchException(recognizer)) + + # Make sure we don't attempt to recover from problems in subrules.# + def sync(self, recognizer): + pass diff --git a/runtime/Python2/src/antlr4/error/Errors.py b/runtime/Python2/src/antlr4/error/Errors.py new file mode 100644 index 000000000..e397a0af9 --- /dev/null +++ b/runtime/Python2/src/antlr4/error/Errors.py @@ -0,0 +1,177 @@ +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#/ +from antlr4.atn.Transition import PredicateTransition + + +class UnsupportedOperationException(Exception): + + def __init__(self, msg): + super(UnsupportedOperationException, self).__init__(msg) + +class IllegalStateException(Exception): + + def __init__(self, msg): + super(IllegalStateException, self).__init__(msg) + +class CancellationException(IllegalStateException): + + def __init__(self, msg): + super(CancellationException, self).__init__(msg) + +# The root of the ANTLR exception hierarchy. In general, ANTLR tracks just +# 3 kinds of errors: prediction errors, failed predicate errors, and +# mismatched input errors. In each case, the parser knows where it is +# in the input, where it is in the ATN, the rule invocation stack, +# and what kind of problem occurred. + +class RecognitionException(Exception): + + + def __init__(self, message=None, recognizer=None, input=None, ctx=None): + super(RecognitionException, self).__init__(message) + self.recognizer = recognizer + self.input = input + self.ctx = ctx + # The current {@link Token} when an error occurred. Since not all streams + # support accessing symbols by index, we have to track the {@link Token} + # instance itself. + self.offendingToken = None + # Get the ATN state number the parser was in at the time the error + # occurred. For {@link NoViableAltException} and + # {@link LexerNoViableAltException} exceptions, this is the + # {@link DecisionState} number. For others, it is the state whose outgoing + # edge we couldn't match. + self.offendingState = -1 + if recognizer is not None: + self.offendingState = recognizer.state + + #

      If the state number is not known, this method returns -1.

      + + # + # Gets the set of input symbols which could potentially follow the + # previously matched symbol at the time this exception was thrown. + # + #

      If the set of expected tokens is not known and could not be computed, + # this method returns {@code null}.

      + # + # @return The set of token types that could potentially follow the current + # state in the ATN, or {@code null} if the information is not available. + #/ + def getExpectedTokens(self): + if self.recognizer is not None: + return self.recognizer.atn.getExpectedTokens(self.offendingState, self.ctx) + else: + return None + + def __str__(self): + return unicode(self) + + +class LexerNoViableAltException(RecognitionException): + + def __init__(self, lexer, input, startIndex, deadEndConfigs): + super(LexerNoViableAltException, self).__init__(message=None, recognizer=lexer, input=input, ctx=None) + self.startIndex = startIndex + self.deadEndConfigs = deadEndConfigs + + def __unicode__(self): + symbol = "" + if self.startIndex >= 0 and self.startIndex < self.input.size(): + symbol = self.input.getText((self.startIndex,self.startIndex)) + # TODO symbol = Utils.escapeWhitespace(symbol, false); + return u"LexerNoViableAltException" + symbol + +# Indicates that the parser could not decide which of two or more paths +# to take based upon the remaining input. It tracks the starting token +# of the offending input and also knows where the parser was +# in the various paths when the error. Reported by reportNoViableAlternative() +# +class NoViableAltException(RecognitionException): + + def __init__(self, recognizer, input=None, startToken=None, offendingToken=None, deadEndConfigs=None, ctx=None): + if ctx is None: + ctx = recognizer._ctx + if offendingToken is None: + offendingToken = recognizer.getCurrentToken() + if startToken is None: + startToken = recognizer.getCurrentToken() + if input is None: + input = recognizer.getInputStream() + super(NoViableAltException, self).__init__(recognizer=recognizer, input=input, ctx=ctx) + # Which configurations did we try at input.index() that couldn't match input.LT(1)?# + self.deadEndConfigs = deadEndConfigs + # The token object at the start index; the input stream might + # not be buffering tokens so get a reference to it. (At the + # time the error occurred, of course the stream needs to keep a + # buffer all of the tokens but later we might not have access to those.) + self.startToken = startToken + self.offendingToken = offendingToken + +# This signifies any kind of mismatched input exceptions such as +# when the current input does not match the expected token. +# +class InputMismatchException(RecognitionException): + + def __init__(self, recognizer): + super(InputMismatchException, self).__init__(recognizer=recognizer, input=recognizer.getInputStream(), ctx=recognizer._ctx) + self.offendingToken = recognizer.getCurrentToken() + + +# A semantic predicate failed during validation. Validation of predicates +# occurs when normally parsing the alternative just like matching a token. +# Disambiguating predicate evaluation occurs when we test a predicate during +# prediction. + +class FailedPredicateException(RecognitionException): + + def __init__(self, recognizer, predicate=None, message=None): + super(FailedPredicateException, self).__init__(message=self.formatMessage(predicate,message), recognizer=recognizer, + input=recognizer.getInputStream(), ctx=recognizer._ctx) + s = recognizer._interp.atn.states[recognizer.state] + trans = s.transitions[0] + if isinstance(trans, PredicateTransition): + self.ruleIndex = trans.ruleIndex + self.predicateIndex = trans.predIndex + else: + self.ruleIndex = 0 + self.predicateIndex = 0 + self.predicate = predicate + self.offendingToken = recognizer.getCurrentToken() + + def formatMessage(self, predicate, message): + if message is not None: + return message + else: + return "failed predicate: {" + predicate + "}?" + +class ParseCancellationException(CancellationException): + + pass + diff --git a/runtime/Python2/src/antlr4/error/__init__.py b/runtime/Python2/src/antlr4/error/__init__.py new file mode 100644 index 000000000..216c000dc --- /dev/null +++ b/runtime/Python2/src/antlr4/error/__init__.py @@ -0,0 +1 @@ +__author__ = 'ericvergnaud' diff --git a/runtime/Python2/src/antlr4/tree/Chunk.py b/runtime/Python2/src/antlr4/tree/Chunk.py new file mode 100644 index 000000000..f047f15aa --- /dev/null +++ b/runtime/Python2/src/antlr4/tree/Chunk.py @@ -0,0 +1,26 @@ +class Chunk(object): + + def __str__(self): + return unicode(self) + + +class TagChunk(Chunk): + + def __init__(self, tag, label=None): + self.tag = tag + self.label = label + + def __unicode__(self): + if self.label is None: + return self.tag + else: + return self.label + ":" + self.tag + +class TextChunk(Chunk): + + def __init__(self, text): + self.text = text + + def __unicode__(self): + return "'" + self.text + "'" + diff --git a/runtime/Python2/src/antlr4/tree/ParseTreeMatch.py b/runtime/Python2/src/antlr4/tree/ParseTreeMatch.py new file mode 100644 index 000000000..f658197ef --- /dev/null +++ b/runtime/Python2/src/antlr4/tree/ParseTreeMatch.py @@ -0,0 +1,145 @@ +# +# [The "BSD license"] +# Copyright (c) 2013 Terence Parr +# Copyright (c) 2013 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + + +# +# Represents the result of matching a {@link ParseTree} against a tree pattern. +# +from io import StringIO + + +class ParseTreeMatch(object): + + # + # Constructs a new instance of {@link ParseTreeMatch} from the specified + # parse tree and pattern. + # + # @param tree The parse tree to match against the pattern. + # @param pattern The parse tree pattern. + # @param labels A mapping from label names to collections of + # {@link ParseTree} objects located by the tree pattern matching process. + # @param mismatchedNode The first node which failed to match the tree + # pattern during the matching process. + # + # @exception IllegalArgumentException if {@code tree} is {@code null} + # @exception IllegalArgumentException if {@code pattern} is {@code null} + # @exception IllegalArgumentException if {@code labels} is {@code null} + # + def __init__(self, tree, pattern, labels, mismatchedNode): + if tree is None: + raise Exception("tree cannot be null") + if pattern is None: + raise Exception("pattern cannot be null") + if labels is None: + raise Exception("labels cannot be null") + self.tree = tree + self.pattern = pattern + self.labels = labels + self.mismatchedNode = mismatchedNode + + # + # Get the last node associated with a specific {@code label}. + # + #

      For example, for pattern {@code }, {@code get("id")} returns the + # node matched for that {@code ID}. If more than one node + # matched the specified label, only the last is returned. If there is + # no node associated with the label, this returns {@code null}.

      + # + #

      Pattern tags like {@code } and {@code } without labels are + # considered to be labeled with {@code ID} and {@code expr}, respectively.

      + # + # @param label The label to check. + # + # @return The last {@link ParseTree} to match a tag with the specified + # label, or {@code null} if no parse tree matched a tag with the label. + # + def get(self, label): + parseTrees = self.labels.get(label, None) + if parseTrees is None or len(parseTrees)==0: + return None + else: + return parseTrees[len(parseTrees)-1] + + # + # Return all nodes matching a rule or token tag with the specified label. + # + #

      If the {@code label} is the name of a parser rule or token in the + # grammar, the resulting list will contain both the parse trees matching + # rule or tags explicitly labeled with the label and the complete set of + # parse trees matching the labeled and unlabeled tags in the pattern for + # the parser rule or token. For example, if {@code label} is {@code "foo"}, + # the result will contain all of the following.

      + # + #
        + #
      • Parse tree nodes matching tags of the form {@code } and + # {@code }.
      • + #
      • Parse tree nodes matching tags of the form {@code }.
      • + #
      • Parse tree nodes matching tags of the form {@code }.
      • + #
      + # + # @param label The label. + # + # @return A collection of all {@link ParseTree} nodes matching tags with + # the specified {@code label}. If no nodes matched the label, an empty list + # is returned. + # + def getAll(self, label): + nodes = self.labels.get(label, None) + if nodes is None: + return list() + else: + return nodes + + + # + # Gets a value indicating whether the match operation succeeded. + # + # @return {@code true} if the match operation succeeded; otherwise, + # {@code false}. + # + def succeeded(self): + return self.mismatchedNode is None + + # + # {@inheritDoc} + # + def __str__(self): + return unicode(self) + + + def __unicode__(self): + with StringIO() as buf: + buf.write(u"Match ") + buf.write(u"succeeded" if self.succeeded() else "failed") + buf.write(u"; found ") + buf.write(unicode(len(self.labels))) + buf.write(u" labels") + return buf.getvalue() diff --git a/runtime/Python2/src/antlr4/tree/ParseTreePattern.py b/runtime/Python2/src/antlr4/tree/ParseTreePattern.py new file mode 100644 index 000000000..96119c36f --- /dev/null +++ b/runtime/Python2/src/antlr4/tree/ParseTreePattern.py @@ -0,0 +1,94 @@ +# +# [The "BSD license"] +# Copyright (c) 2013 Terence Parr +# Copyright (c) 2013 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# +# A pattern like {@code = ;} converted to a {@link ParseTree} by +# {@link ParseTreePatternMatcher#compile(String, int)}. +# +from antlr4.xpath.XPath import XPath + + +class ParseTreePattern(object): + + # Construct a new instance of the {@link ParseTreePattern} class. + # + # @param matcher The {@link ParseTreePatternMatcher} which created this + # tree pattern. + # @param pattern The tree pattern in concrete syntax form. + # @param patternRuleIndex The parser rule which serves as the root of the + # tree pattern. + # @param patternTree The tree pattern in {@link ParseTree} form. + # + def __init__(self, matcher, pattern, patternRuleIndex , patternTree): + self.matcher = matcher + self.patternRuleIndex = patternRuleIndex + self.pattern = pattern + self.patternTree = patternTree + + # + # Match a specific parse tree against this tree pattern. + # + # @param tree The parse tree to match against this tree pattern. + # @return A {@link ParseTreeMatch} object describing the result of the + # match operation. The {@link ParseTreeMatch#succeeded()} method can be + # used to determine whether or not the match was successful. + # + def match(self, tree): + return self.matcher.match(tree, self) + + # + # Determine whether or not a parse tree matches this tree pattern. + # + # @param tree The parse tree to match against this tree pattern. + # @return {@code true} if {@code tree} is a match for the current tree + # pattern; otherwise, {@code false}. + # + def matches(self, tree): + return self.matcher.match(tree, self).succeeded() + + # Find all nodes using XPath and then try to match those subtrees against + # this tree pattern. + # + # @param tree The {@link ParseTree} to match against this pattern. + # @param xpath An expression matching the nodes + # + # @return A collection of {@link ParseTreeMatch} objects describing the + # successful matches. Unsuccessful matches are omitted from the result, + # regardless of the reason for the failure. + # + def findAll(self, tree, xpath): + subtrees = XPath.findAll(tree, xpath, self.matcher.parser) + matches = list() + for t in subtrees: + match = self.match(t) + if match.succeeded(): + matches.append(match) + return matches diff --git a/runtime/Python2/src/antlr4/tree/ParseTreePatternMatcher.py b/runtime/Python2/src/antlr4/tree/ParseTreePatternMatcher.py new file mode 100644 index 000000000..5011e3ba6 --- /dev/null +++ b/runtime/Python2/src/antlr4/tree/ParseTreePatternMatcher.py @@ -0,0 +1,392 @@ +# +# [The "BSD license"] +# Copyright (c) 2013 Terence Parr +# Copyright (c) 2013 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# +# A tree pattern matching mechanism for ANTLR {@link ParseTree}s. +# +#

      Patterns are strings of source input text with special tags representing +# token or rule references such as:

      +# +#

      {@code = ;}

      +# +#

      Given a pattern start rule such as {@code statement}, this object constructs +# a {@link ParseTree} with placeholders for the {@code ID} and {@code expr} +# subtree. Then the {@link #match} routines can compare an actual +# {@link ParseTree} from a parse with this pattern. Tag {@code } matches +# any {@code ID} token and tag {@code } references the result of the +# {@code expr} rule (generally an instance of {@code ExprContext}.

      +# +#

      Pattern {@code x = 0;} is a similar pattern that matches the same pattern +# except that it requires the identifier to be {@code x} and the expression to +# be {@code 0}.

      +# +#

      The {@link #matches} routines return {@code true} or {@code false} based +# upon a match for the tree rooted at the parameter sent in. The +# {@link #match} routines return a {@link ParseTreeMatch} object that +# contains the parse tree, the parse tree pattern, and a map from tag name to +# matched nodes (more below). A subtree that fails to match, returns with +# {@link ParseTreeMatch#mismatchedNode} set to the first tree node that did not +# match.

      +# +#

      For efficiency, you can compile a tree pattern in string form to a +# {@link ParseTreePattern} object.

      +# +#

      See {@code TestParseTreeMatcher} for lots of examples. +# {@link ParseTreePattern} has two static helper methods: +# {@link ParseTreePattern#findAll} and {@link ParseTreePattern#match} that +# are easy to use but not super efficient because they create new +# {@link ParseTreePatternMatcher} objects each time and have to compile the +# pattern in string form before using it.

      +# +#

      The lexer and parser that you pass into the {@link ParseTreePatternMatcher} +# constructor are used to parse the pattern in string form. The lexer converts +# the {@code = ;} into a sequence of four tokens (assuming lexer +# throws out whitespace or puts it on a hidden channel). Be aware that the +# input stream is reset for the lexer (but not the parser; a +# {@link ParserInterpreter} is created to parse the input.). Any user-defined +# fields you have put into the lexer might get changed when this mechanism asks +# it to scan the pattern string.

      +# +#

      Normally a parser does not accept token {@code } as a valid +# {@code expr} but, from the parser passed in, we create a special version of +# the underlying grammar representation (an {@link ATN}) that allows imaginary +# tokens representing rules ({@code }) to match entire rules. We call +# these bypass alternatives.

      +# +#

      Delimiters are {@code <} and {@code >}, with {@code \} as the escape string +# by default, but you can set them to whatever you want using +# {@link #setDelimiters}. You must escape both start and stop strings +# {@code \<} and {@code \>}.

      +# +from antlr4 import CommonTokenStream, ParserRuleContext +from antlr4.InputStream import InputStream +from antlr4.ListTokenSource import ListTokenSource +from antlr4.Token import Token +from antlr4.error.ErrorStrategy import BailErrorStrategy +from antlr4.error.Errors import RecognitionException, ParseCancellationException +from antlr4.tree.Chunk import TagChunk, TextChunk +from antlr4.tree.RuleTagToken import RuleTagToken +from antlr4.tree.TokenTagToken import TokenTagToken +from antlr4.tree.Tree import TerminalNode, RuleNode + +class CannotInvokeStartRule(Exception): + + def __init__(self, e): + super(CannotInvokeStartRule, self).__init__(e) + +class StartRuleDoesNotConsumeFullPattern(Exception): + + pass + + +class ParseTreePatternMatcher(object): + + # Constructs a {@link ParseTreePatternMatcher} or from a {@link Lexer} and + # {@link Parser} object. The lexer input stream is altered for tokenizing + # the tree patterns. The parser is used as a convenient mechanism to get + # the grammar name, plus token, rule names. + def __init__(self, lexer, parser): + self.lexer = lexer + self.parser = parser + self.start = "<" + self.stop = ">" + self.escape = "\\" # e.g., \< and \> must escape BOTH! + + # Set the delimiters used for marking rule and token tags within concrete + # syntax used by the tree pattern parser. + # + # @param start The start delimiter. + # @param stop The stop delimiter. + # @param escapeLeft The escape sequence to use for escaping a start or stop delimiter. + # + # @exception IllegalArgumentException if {@code start} is {@code null} or empty. + # @exception IllegalArgumentException if {@code stop} is {@code null} or empty. + # + def setDelimiters(self, start, stop, escapeLeft): + if start is None or len(start)==0: + raise Exception("start cannot be null or empty") + if stop is None or len(stop)==0: + raise Exception("stop cannot be null or empty") + self.start = start + self.stop = stop + self.escape = escapeLeft + + # Does {@code pattern} matched as rule {@code patternRuleIndex} match {@code tree}?# + def matchesRuleIndex(self, tree, pattern, patternRuleIndex): + p = self.compileTreePattern(pattern, patternRuleIndex) + return self.matches(tree, p) + + # Does {@code pattern} matched as rule patternRuleIndex match tree? Pass in a + # compiled pattern instead of a string representation of a tree pattern. + # + def matchesPattern(self, tree, pattern): + mismatchedNode = self.matchImpl(tree, pattern.patternTree, dict()) + return mismatchedNode is None + + # + # Compare {@code pattern} matched as rule {@code patternRuleIndex} against + # {@code tree} and return a {@link ParseTreeMatch} object that contains the + # matched elements, or the node at which the match failed. + # + def matchRuleIndex(self, tree, pattern, patternRuleIndex): + p = self.compileTreePattern(pattern, patternRuleIndex) + return self.matchPattern(tree, p) + + # + # Compare {@code pattern} matched against {@code tree} and return a + # {@link ParseTreeMatch} object that contains the matched elements, or the + # node at which the match failed. Pass in a compiled pattern instead of a + # string representation of a tree pattern. + # + def matchPattern(self, tree, pattern): + labels = dict() + mismatchedNode = self.matchImpl(tree, pattern.patternTree, labels) + from antlr4.tree.ParseTreeMatch import ParseTreeMatch + return ParseTreeMatch(tree, pattern, labels, mismatchedNode) + + # + # For repeated use of a tree pattern, compile it to a + # {@link ParseTreePattern} using this method. + # + def compileTreePattern(self, pattern, patternRuleIndex): + tokenList = self.tokenize(pattern) + tokenSrc = ListTokenSource(tokenList) + tokens = CommonTokenStream(tokenSrc) + from antlr4.ParserInterpreter import ParserInterpreter + parserInterp = ParserInterpreter(self.parser.grammarFileName, self.parser.tokenNames, + self.parser.ruleNames, self.parser.getATNWithBypassAlts(),tokens) + tree = None + try: + parserInterp.setErrorHandler(BailErrorStrategy()) + tree = parserInterp.parse(patternRuleIndex) + except ParseCancellationException as e: + raise e.cause + except RecognitionException as e: + raise e + except Exception as e: + raise CannotInvokeStartRule(e) + + # Make sure tree pattern compilation checks for a complete parse + if tokens.LA(1)!=Token.EOF: + raise StartRuleDoesNotConsumeFullPattern() + + from antlr4.tree.ParseTreePattern import ParseTreePattern + return ParseTreePattern(self, pattern, patternRuleIndex, tree) + + # + # Recursively walk {@code tree} against {@code patternTree}, filling + # {@code match.}{@link ParseTreeMatch#labels labels}. + # + # @return the first node encountered in {@code tree} which does not match + # a corresponding node in {@code patternTree}, or {@code null} if the match + # was successful. The specific node returned depends on the matching + # algorithm used by the implementation, and may be overridden. + # + def matchImpl(self, tree, patternTree, labels): + if tree is None: + raise Exception("tree cannot be null") + if patternTree is None: + raise Exception("patternTree cannot be null") + + # x and , x and y, or x and x; or could be mismatched types + if isinstance(tree, TerminalNode) and isinstance(patternTree, TerminalNode ): + mismatchedNode = None + # both are tokens and they have same type + if tree.symbol.type == patternTree.symbol.type: + if isinstance( patternTree.symbol, TokenTagToken ): # x and + tokenTagToken = patternTree.symbol + # track label->list-of-nodes for both token name and label (if any) + self.map(labels, tokenTagToken.tokenName, tree) + if tokenTagToken.label is not None: + self.map(labels, tokenTagToken.label, tree) + elif tree.getText()==patternTree.getText(): + # x and x + pass + else: + # x and y + if mismatchedNode is None: + mismatchedNode = tree + else: + if mismatchedNode is None: + mismatchedNode = tree + + return mismatchedNode + + if isinstance(tree, ParserRuleContext) and isinstance(patternTree, ParserRuleContext): + mismatchedNode = None + # (expr ...) and + ruleTagToken = self.getRuleTagToken(patternTree) + if ruleTagToken is not None: + m = None + if tree.ruleContext.ruleIndex == patternTree.ruleContext.ruleIndex: + # track label->list-of-nodes for both rule name and label (if any) + self.map(labels, ruleTagToken.ruleName, tree) + if ruleTagToken.label is not None: + self.map(labels, ruleTagToken.label, tree) + else: + if mismatchedNode is None: + mismatchedNode = tree + + return mismatchedNode + + # (expr ...) and (expr ...) + if tree.getChildCount()!=patternTree.getChildCount(): + if mismatchedNode is None: + mismatchedNode = tree + return mismatchedNode + + n = tree.getChildCount() + for i in range(0, n): + childMatch = self.matchImpl(tree.getChild(i), patternTree.getChild(i), labels) + if childMatch is not None: + return childMatch + + return mismatchedNode + + # if nodes aren't both tokens or both rule nodes, can't match + return tree + + def map(self, labels, label, tree): + v = labels.get(label, None) + if v is None: + v = list() + labels[label] = v + v.append(tree) + + # Is {@code t} {@code (expr )} subtree?# + def getRuleTagToken(self, tree): + if isinstance( tree, RuleNode ): + if tree.getChildCount()==1 and isinstance(tree.getChild(0), TerminalNode ): + c = tree.getChild(0) + if isinstance( c.symbol, RuleTagToken ): + return c.symbol + return None + + def tokenize(self, pattern): + # split pattern into chunks: sea (raw input) and islands (, ) + chunks = self.split(pattern) + + # create token stream from text and tags + tokens = list() + for chunk in chunks: + if isinstance( chunk, TagChunk ): + # add special rule token or conjure up new token from name + if chunk.tag[0].isupper(): + ttype = self.parser.getTokenType(chunk.tag) + if ttype==Token.INVALID_TYPE: + raise Exception("Unknown token " + str(chunk.tag) + " in pattern: " + pattern) + tokens.append(TokenTagToken(chunk.tag, ttype, chunk.label)) + elif chunk.tag[0].islower(): + ruleIndex = self.parser.getRuleIndex(chunk.tag) + if ruleIndex==-1: + raise Exception("Unknown rule " + str(chunk.tag) + " in pattern: " + pattern) + ruleImaginaryTokenType = self.parser.getATNWithBypassAlts().ruleToTokenType[ruleIndex] + tokens.append(RuleTagToken(chunk.tag, ruleImaginaryTokenType, chunk.label)) + else: + raise Exception("invalid tag: " + str(chunk.tag) + " in pattern: " + pattern) + else: + self.lexer.setInputStream(InputStream(chunk.text)) + t = self.lexer.nextToken() + while t.type!=Token.EOF: + tokens.append(t) + t = self.lexer.nextToken() + return tokens + + # Split {@code = ;} into 4 chunks for tokenizing by {@link #tokenize}.# + def split(self, pattern): + p = 0 + n = len(pattern) + chunks = list() + # find all start and stop indexes first, then collect + starts = list() + stops = list() + while p < n : + if p == pattern.find(self.escape + self.start, p): + p += len(self.escape) + len(self.start) + elif p == pattern.find(self.escape + self.stop, p): + p += len(self.escape) + len(self.stop) + elif p == pattern.find(self.start, p): + starts.append(p) + p += len(self.start) + elif p == pattern.find(self.stop, p): + stops.append(p) + p += len(self.stop) + else: + p += 1 + + nt = len(starts) + + if nt > len(stops): + raise Exception("unterminated tag in pattern: " + pattern) + if nt < len(stops): + raise Exception("missing start tag in pattern: " + pattern) + + for i in range(0, nt): + if starts[i] >= stops[i]: + raise Exception("tag delimiters out of order in pattern: " + pattern) + + # collect into chunks now + if nt==0: + chunks.append(TextChunk(pattern)) + + if nt>0 and starts[0]>0: # copy text up to first tag into chunks + text = pattern[0:starts[0]] + chunks.add(TextChunk(text)) + + for i in range(0, nt): + # copy inside of + tag = pattern[starts[i] + len(self.start) : stops[i]] + ruleOrToken = tag + label = None + colon = tag.find(':') + if colon >= 0: + label = tag[0:colon] + ruleOrToken = tag[colon+1 : len(tag)] + chunks.append(TagChunk(label, ruleOrToken)) + if i+1 < len(starts): + # copy from end of to start of next + text = pattern[stops[i] + len(self.stop) : starts[i + 1]] + chunks.append(TextChunk(text)) + + if nt > 0 : + afterLastTag = stops[nt - 1] + len(self.stop) + if afterLastTag < n : # copy text from end of last tag to end + text = pattern[afterLastTag : n] + chunks.append(TextChunk(text)) + + # strip out the escape sequences from text chunks but not tags + for i in range(0, len(chunks)): + c = chunks[i] + if isinstance( c, TextChunk ): + unescaped = c.text.replace(self.escape, "") + if len(unescaped) < len(c.text): + chunks[i] = TextChunk(unescaped) + return chunks diff --git a/runtime/Python2/src/antlr4/tree/RuleTagToken.py b/runtime/Python2/src/antlr4/tree/RuleTagToken.py new file mode 100644 index 000000000..021373f01 --- /dev/null +++ b/runtime/Python2/src/antlr4/tree/RuleTagToken.py @@ -0,0 +1,74 @@ +# +# [The "BSD license"] +# Copyright (c) 2013 Terence Parr +# Copyright (c) 2013 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# +# A {@link Token} object representing an entire subtree matched by a parser +# rule; e.g., {@code }. These tokens are created for {@link TagChunk} +# chunks where the tag corresponds to a parser rule. +# +from antlr4.Token import Token + + +class RuleTagToken(Token): + # + # Constructs a new instance of {@link RuleTagToken} with the specified rule + # name, bypass token type, and label. + # + # @param ruleName The name of the parser rule this rule tag matches. + # @param bypassTokenType The bypass token type assigned to the parser rule. + # @param label The label associated with the rule tag, or {@code null} if + # the rule tag is unlabeled. + # + # @exception IllegalArgumentException if {@code ruleName} is {@code null} + # or empty. + + def __init__(self, ruleName, bypassTokenType, label=None): + if ruleName is None or len(ruleName)==0: + raise Exception("ruleName cannot be null or empty.") + self.source = None + self.type = bypassTokenType # token type of the token + self.channel = Token.DEFAULT_CHANNEL # The parser ignores everything not on DEFAULT_CHANNEL + self.start = -1 # optional; return -1 if not implemented. + self.stop = -1 # optional; return -1 if not implemented. + self.tokenIndex = -1 # from 0..n-1 of the token object in the input stream + self.line = 0 # line=1..n of the 1st character + self.column = -1 # beginning of the line at which it occurs, 0..n-1 + self.label = label + self._text = self.getText() # text of the token. + + self.ruleName = ruleName + + + def getText(self): + if self.label is None: + return "<" + self.ruleName + ">" + else: + return "<" + self.label + ":" + self.ruleName + ">" diff --git a/runtime/Python2/src/antlr4/tree/TokenTagToken.py b/runtime/Python2/src/antlr4/tree/TokenTagToken.py new file mode 100644 index 000000000..0517cef1f --- /dev/null +++ b/runtime/Python2/src/antlr4/tree/TokenTagToken.py @@ -0,0 +1,72 @@ +# +# [The "BSD license"] +# Copyright (c) 2013 Terence Parr +# Copyright (c) 2013 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# +# A {@link Token} object representing a token of a particular type; e.g., +# {@code }. These tokens are created for {@link TagChunk} chunks where the +# tag corresponds to a lexer rule or token type. +# +from antlr4.Token import CommonToken + + +class TokenTagToken(CommonToken): + + # Constructs a new instance of {@link TokenTagToken} with the specified + # token name, type, and label. + # + # @param tokenName The token name. + # @param type The token type. + # @param label The label associated with the token tag, or {@code null} if + # the token tag is unlabeled. + # + def __init__(self, tokenName, type, label=None): + super(TokenTagToken, self).__init__(type=type) + self.tokenName = tokenName + self.label = label + self._text = self.getText() + + # + # {@inheritDoc} + # + #

      The implementation for {@link TokenTagToken} returns the token tag + # formatted with {@code <} and {@code >} delimiters.

      + # + def getText(self): + if self.label is None: + return "<" + self.tokenName + ">" + else: + return "<" + self.label + ":" + self.tokenName + ">" + + #

      The implementation for {@link TokenTagToken} returns a string of the form + # {@code tokenName:type}.

      + # + def __unicode__(self): + return self.tokenName + u":" + unicode(self.type) diff --git a/runtime/Python2/src/antlr4/tree/Tree.py b/runtime/Python2/src/antlr4/tree/Tree.py new file mode 100644 index 000000000..272acf14f --- /dev/null +++ b/runtime/Python2/src/antlr4/tree/Tree.py @@ -0,0 +1,191 @@ +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#/ + + +# The basic notion of a tree has a parent, a payload, and a list of children. +# It is the most abstract interface for all the trees used by ANTLR. +#/ +from antlr4.Token import Token + +INVALID_INTERVAL = (-1, -2) + +class Tree(object): + + def __str__(self): + return unicode(self) + +class SyntaxTree(Tree): + pass + +class ParseTree(SyntaxTree): + pass + +class RuleNode(ParseTree): + pass + +class TerminalNode(ParseTree): + pass + +class ErrorNode(TerminalNode): + pass + +class ParseTreeVisitor(object): + def visit(self, tree): + return tree.accept(self) + + def visitChildren(self, node): + result = self.defaultResult() + n = node.getChildCount() + for i in range(n): + if not self.shouldVisitNextChild(node, result): + return + + c = node.getChild(i) + childResult = c.accept(self) + result = self.aggregateResult(result, childResult) + + return result + + def visitTerminal(self, node): + return self.defaultResult() + + def visitErrorNode(self, node): + return self.defaultResult() + + def defaultResult(self): + return None + + def aggregateResult(self, aggregate, nextResult): + return nextResult + + def shouldVisitNextChild(self, node, currentResult): + return True + +class ParseTreeListener(object): + + def visitTerminal(self, node): + pass + + def visitErrorNode(self, node): + pass + + def enterEveryRule(self, ctx): + pass + + def exitEveryRule(self, ctx): + pass + +class TerminalNodeImpl(TerminalNode): + + def __init__(self, symbol): + self.parentCtx = None + self.symbol = symbol + + def getChild(self, i): + return None + + def getSymbol(self): + return self.symbol + + def getParent(self): + return self.parentCtx + + def getPayload(self): + return self.symbol + + def getSourceInterval(self): + if self.symbol is None: + return INVALID_INTERVAL + tokenIndex = self.symbol.tokenIndex + return (tokenIndex, tokenIndex) + + def getChildCount(self): + return 0 + + def accept(self, visitor): + return visitor.visitTerminal(self) + + def getText(self): + return self.symbol.text + + def __unicode__(self): + if self.symbol.type == Token.EOF: + return "" + else: + return self.symbol.text + +# Represents a token that was consumed during resynchronization +# rather than during a valid match operation. For example, +# we will create this kind of a node during single token insertion +# and deletion as well as during "consume until error recovery set" +# upon no viable alternative exceptions. + +class ErrorNodeImpl(TerminalNodeImpl,ErrorNode): + + def __init__(self, token): + super(ErrorNodeImpl, self).__init__(token) + + def accept(self, visitor): + return visitor.visitErrorNode(self) + + +class ParseTreeWalker(object): + + DEFAULT = None + + def walk(self, listener, t): + if isinstance(t, ErrorNode): + listener.visitErrorNode(t) + return + elif isinstance(t, TerminalNode): + listener.visitTerminal(t) + return + self.enterRule(listener, t) + for child in t.getChildren(): + self.walk(listener, child) + self.exitRule(listener, t) + + # + # The discovery of a rule node, involves sending two events: the generic + # {@link ParseTreeListener#enterEveryRule} and a + # {@link RuleContext}-specific event. First we trigger the generic and then + # the rule specific. We to them in reverse order upon finishing the node. + # + def enterRule(self, listener, r): + ctx = r.getRuleContext() + listener.enterEveryRule(ctx) + ctx.enterRule(listener) + + def exitRule(self, listener, r): + ctx = r.getRuleContext() + ctx.exitRule(listener) + listener.exitEveryRule(ctx) + +ParseTreeWalker.DEFAULT = ParseTreeWalker() \ No newline at end of file diff --git a/runtime/Python2/src/antlr4/tree/Trees.py b/runtime/Python2/src/antlr4/tree/Trees.py new file mode 100644 index 000000000..f750449d5 --- /dev/null +++ b/runtime/Python2/src/antlr4/tree/Trees.py @@ -0,0 +1,134 @@ +# +# [The "BSD license"] +# Copyright (c) 2012 Terence Parr +# Copyright (c) 2012 Sam Harwell +# Copyright (c) 2014 Eric Vergnaud +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + + +# A set of utility routines useful for all kinds of ANTLR trees.# +from io import StringIO + +from antlr4.Token import Token +from antlr4.Utils import escapeWhitespace +from antlr4.tree.Tree import RuleNode, ErrorNode, TerminalNode + + +class Trees(object): + + # Print out a whole tree in LISP form. {@link #getNodeText} is used on the + # node payloads to get the text for the nodes. Detect + # parse trees and extract data appropriately. + @classmethod + def toStringTree(cls, t, ruleNames=None, recog=None): + if recog is not None: + ruleNames = recog.ruleNames + s = escapeWhitespace(cls.getNodeText(t, ruleNames), False) + if t.getChildCount()==0: + return s + with StringIO() as buf: + buf.write(u"(") + buf.write(s) + buf.write(u' ') + for i in range(0, t.getChildCount()): + if i > 0: + buf.write(u' ') + buf.write(cls.toStringTree(t.getChild(i), ruleNames)) + buf.write(u")") + return buf.getvalue() + + @classmethod + def getNodeText(cls, t, ruleNames=None, recog=None): + if recog is not None: + ruleNames = recog.ruleNames + if ruleNames is not None: + if isinstance(t, RuleNode): + return ruleNames[t.getRuleContext().getRuleIndex()] + elif isinstance( t, ErrorNode): + return unicode(t) + elif isinstance(t, TerminalNode): + if t.symbol is not None: + return t.symbol.text + # no recog for rule names + payload = t.getPayload() + if isinstance(payload, Token ): + return payload.text + return unicode(t.getPayload()) + + + # Return ordered list of all children of this node + @classmethod + def getChildren(cls, t): + return [ t.getChild(i) for i in range(0, t.getChildCount()) ] + + # Return a list of all ancestors of this node. The first node of + # list is the root and the last is the parent of this node. + # + @classmethod + def getAncestors(cls, t): + ancestors = [] + t = t.getParent() + while t is not None: + ancestors.append(0, t) # insert at start + t = t.getParent() + return ancestors + + @classmethod + def findAllTokenNodes(cls, t, ttype): + return cls.findAllNodes(t, ttype, True) + + @classmethod + def findAllRuleNodes(cls, t, ruleIndex): + return cls.findAllNodes(t, ruleIndex, False) + + @classmethod + def findAllNodes(cls, t, index, findTokens): + nodes = [] + cls._findAllNodes(t, index, findTokens, nodes) + return nodes + + @classmethod + def _findAllNodes(cls, t, index, findTokens, nodes): + from antlr4.ParserRuleContext import ParserRuleContext + # check this node (the root) first + if findTokens and isinstance(t, TerminalNode): + if t.symbol.type==index: + nodes.append(t) + elif not findTokens and isinstance(t, ParserRuleContext): + if t.ruleIndex == index: + nodes.append(t) + # check children + for i in range(0, t.getChildCount()): + cls._findAllNodes(t.getChild(i), index, findTokens, nodes) + + @classmethod + def descendants(cls, t): + nodes = [] + nodes.append(t) + for i in range(0, t.getChildCount()): + nodes.extend(cls.descendants(t.getChild(i))) + return nodes diff --git a/runtime/Python2/src/antlr4/tree/__init__.py b/runtime/Python2/src/antlr4/tree/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/runtime/Python2/src/antlr4/xpath/XPath.py b/runtime/Python2/src/antlr4/xpath/XPath.py new file mode 100644 index 000000000..6eaa03e01 --- /dev/null +++ b/runtime/Python2/src/antlr4/xpath/XPath.py @@ -0,0 +1,346 @@ + +# +# Represent a subset of XPath XML path syntax for use in identifying nodes in +# parse trees. +# +#

      +# Split path into words and separators {@code /} and {@code //} via ANTLR +# itself then walk path elements from left to right. At each separator-word +# pair, find set of nodes. Next stage uses those as work list.

      +# +#

      +# The basic interface is +# {@link XPath#findAll ParseTree.findAll}{@code (tree, pathString, parser)}. +# But that is just shorthand for:

      +# +#
      +# {@link XPath} p = new {@link XPath#XPath XPath}(parser, pathString);
      +# return p.{@link #evaluate evaluate}(tree);
      +# 
      +# +#

      +# See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this +# allows operators:

      +# +#
      +#
      /
      root
      +#
      //
      anywhere
      +#
      !
      invert; this must appear directly after root or anywhere +# operator
      +#
      +# +#

      +# and path elements:

      +# +#
      +#
      ID
      token name
      +#
      'string'
      any string literal token from the grammar
      +#
      expr
      rule name
      +#
      *
      wildcard matching any node
      +#
      +# +#

      +# Whitespace is not allowed.

      +# +from io import StringIO + +from antlr4 import CommonTokenStream, DFA, PredictionContextCache, Lexer, LexerATNSimulator, ParserRuleContext, TerminalNode +from antlr4.atn.ATNDeserializer import ATNDeserializer +from antlr4.InputStream import InputStream +from antlr4.Token import Token +from antlr4.error.ErrorListener import ErrorListener +from antlr4.error.Errors import LexerNoViableAltException +from antlr4.tree.Trees import Trees + + +def serializedATN(): + with StringIO() as buf: + buf.write(u"\3\u0430\ud6d1\u8206\uad2d\u4417\uaef1\u8d80\uaadd\2") + buf.write(u"\n\64\b\1\4\2\t\2\4\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7") + buf.write(u"\t\7\4\b\t\b\4\t\t\t\3\2\3\2\3\2\3\3\3\3\3\4\3\4\3\5") + buf.write(u"\3\5\3\6\3\6\7\6\37\n\6\f\6\16\6\"\13\6\3\6\3\6\3\7\3") + buf.write(u"\7\5\7(\n\7\3\b\3\b\3\t\3\t\7\t.\n\t\f\t\16\t\61\13\t") + buf.write(u"\3\t\3\t\3/\2\n\3\5\5\6\7\7\t\b\13\t\r\2\17\2\21\n\3") + buf.write(u"\2\4\7\2\62;aa\u00b9\u00b9\u0302\u0371\u2041\u2042\17") + buf.write(u"\2C\\c|\u00c2\u00d8\u00da\u00f8\u00fa\u0301\u0372\u037f") + buf.write(u"\u0381\u2001\u200e\u200f\u2072\u2191\u2c02\u2ff1\u3003") + buf.write(u"\ud801\uf902\ufdd1\ufdf2\uffff\64\2\3\3\2\2\2\2\5\3\2") + buf.write(u"\2\2\2\7\3\2\2\2\2\t\3\2\2\2\2\13\3\2\2\2\2\21\3\2\2") + buf.write(u"\2\3\23\3\2\2\2\5\26\3\2\2\2\7\30\3\2\2\2\t\32\3\2\2") + buf.write(u"\2\13\34\3\2\2\2\r\'\3\2\2\2\17)\3\2\2\2\21+\3\2\2\2") + buf.write(u"\23\24\7\61\2\2\24\25\7\61\2\2\25\4\3\2\2\2\26\27\7\61") + buf.write(u"\2\2\27\6\3\2\2\2\30\31\7,\2\2\31\b\3\2\2\2\32\33\7#") + buf.write(u"\2\2\33\n\3\2\2\2\34 \5\17\b\2\35\37\5\r\7\2\36\35\3") + buf.write(u"\2\2\2\37\"\3\2\2\2 \36\3\2\2\2 !\3\2\2\2!#\3\2\2\2\"") + buf.write(u" \3\2\2\2#$\b\6\2\2$\f\3\2\2\2%(\5\17\b\2&(\t\2\2\2\'") + buf.write(u"%\3\2\2\2\'&\3\2\2\2(\16\3\2\2\2)*\t\3\2\2*\20\3\2\2") + buf.write(u"\2+/\7)\2\2,.\13\2\2\2-,\3\2\2\2.\61\3\2\2\2/\60\3\2") + buf.write(u"\2\2/-\3\2\2\2\60\62\3\2\2\2\61/\3\2\2\2\62\63\7)\2\2") + buf.write(u"\63\22\3\2\2\2\6\2 \'/\3\3\6\2") + return buf.getvalue() + + +class XPathLexer(Lexer): + + atn = ATNDeserializer().deserialize(serializedATN()) + + decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ] + + + TOKEN_REF = 1 + RULE_REF = 2 + ANYWHERE = 3 + ROOT = 4 + WILDCARD = 5 + BANG = 6 + ID = 7 + STRING = 8 + + modeNames = [ u"DEFAULT_MODE" ] + + literalNames = [ u"", + u"'//'", u"'/'", u"'*'", u"'!'" ] + + symbolicNames = [ u"", + u"TOKEN_REF", u"RULE_REF", u"ANYWHERE", u"ROOT", u"WILDCARD", + u"BANG", u"ID", u"STRING" ] + + ruleNames = [ u"ANYWHERE", u"ROOT", u"WILDCARD", u"BANG", u"ID", u"NameChar", + u"NameStartChar", u"STRING" ] + + grammarFileName = u"XPathLexer.g4" + + def __init__(self, input=None): + super(XPathLexer, self).__init__(input) + self.checkVersion("4.5") + self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache()) + self._actions = None + self._predicates = None + + + def action(self, localctx, ruleIndex, actionIndex): + if self._actions is None: + actions = dict() + actions[4] = self.ID_action + self._actions = actions + action = self._actions.get(ruleIndex, None) + if action is not None: + action(localctx, actionIndex) + else: + raise Exception("No registered action for:" + str(ruleIndex)) + + def ID_action(self, localctx , actionIndex): + if actionIndex == 0: + char = self.text[0] + if char.isupper(): + self.type = XPathLexer.TOKEN_REF + else: + self.type = XPathLexer.RULE_REF + +class XPath(object): + + WILDCARD = "*" # word not operator/separator + NOT = "!" # word for invert operator + + def __init__(self, parser, path): + self.parser = parser + self.path = path + self.elements = self.split(path) + + def split(self, path): + input = InputStream(path) + lexer = XPathLexer(input) + def recover(self, e): + raise e + lexer.recover = recover + lexer.removeErrorListeners() + lexer.addErrorListener(ErrorListener()) # XPathErrorListener does no more + tokenStream = CommonTokenStream(lexer) + try: + tokenStream.fill() + except LexerNoViableAltException as e: + pos = lexer.getColumn() + msg = "Invalid tokens or characters at index " + str(pos) + " in path '" + path + "'" + raise Exception(msg, e) + + tokens = tokenStream.getTokens() + elements = list() + n = len(tokens) + i=0 + while i < n : + el = tokens[i] + next = None + if el.type in [XPathLexer.ROOT, XPathLexer.ANYWHERE]: + anywhere = el.type == XPathLexer.ANYWHERE + i += 1 + next = tokens[i] + invert = next.type==XPathLexer.BANG + if invert: + i += 1 + next = tokens[i] + pathElement = self.getXPathElement(next, anywhere) + pathElement.invert = invert + elements.append(pathElement) + i += 1 + + elif el.type in [XPathLexer.TOKEN_REF, XPathLexer.RULE_REF, XPathLexer.WILDCARD] : + elements.append( self.getXPathElement(el, False) ) + i += 1 + + elif el.type==Token.EOF : + break + + else: + raise Exception("Unknown path element " + str(el)) + + return elements + + # + # Convert word like {@code#} or {@code ID} or {@code expr} to a path + # element. {@code anywhere} is {@code true} if {@code //} precedes the + # word. + # + def getXPathElement(self, wordToken, anywhere): + if wordToken.type==Token.EOF: + raise Exception("Missing path element at end of path") + word = wordToken.text + ttype = self.parser.getTokenType(word) + ruleIndex = self.parser.getRuleIndex(word) + + if wordToken.type==XPathLexer.WILDCARD : + + return XPathWildcardAnywhereElement() if anywhere else XPathWildcardElement() + + elif wordToken.type in [XPathLexer.TOKEN_REF, XPathLexer.STRING]: + + if ttype==Token.INVALID_TYPE: + raise Exception( word + " at index " + str(wordToken.startIndex) + " isn't a valid token name") + return XPathTokenAnywhereElement(word, ttype) if anywhere else XPathTokenElement(word, ttype) + + else: + + if ruleIndex==-1: + raise Exception( word + " at index " + str(wordToken.getStartIndex()) + " isn't a valid rule name") + return XPathRuleAnywhereElement(word, ruleIndex) if anywhere else XPathRuleElement(word, ruleIndex) + + + def findAll(self, tree, xpath, parser): + p = XPath(parser, xpath) + return p.evaluate(tree) + + # + # Return a list of all nodes starting at {@code t} as root that satisfy the + # path. The root {@code /} is relative to the node passed to + # {@link #evaluate}. + # + def evaluate(self, t): + dummyRoot = ParserRuleContext() + dummyRoot.children = [t] # don't set t's parent. + + work = [dummyRoot] + + for i in range(0, len(self.elements)): + next = set() + for node in work: + if len( node.children) > 0 : + # only try to match next element if it has children + # e.g., //func/*/stat might have a token node for which + # we can't go looking for stat nodes. + matching = self.elements[i].evaluate(node) + next |= matching + i += 1 + work = next + + return work + + +class XPathElement(object): + + def __init__(self, nodeName): + self.nodeName = nodeName + self.invert = False + + def __str__(self): + return unicode(self) + + def __unicode__(self): + return type(self).__name__ + "[" + ("!" if self.invert else "") + self.nodeName + "]" + + + +# +# Either {@code ID} at start of path or {@code ...//ID} in middle of path. +# +class XPathRuleAnywhereElement(XPathElement): + + def __init__(self, ruleName, ruleIndex): + super(XPathRuleAnywhereElement, self).__init__(ruleName) + self.ruleIndex = ruleIndex + + def evaluate(self, t): + return Trees.findAllRuleNodes(t, self.ruleIndex) + + +class XPathRuleElement(XPathElement): + + def __init__(self, ruleName, ruleIndex): + super(XPathRuleElement, self).__init__(ruleName) + self.ruleIndex = ruleIndex + + def evaluate(self, t): + # return all children of t that match nodeName + nodes = [] + for c in Trees.getChildren(t): + if isinstance(c, ParserRuleContext ): + if (c.ruleIndex == self.ruleIndex ) == (not self.invert): + nodes.append(c) + return nodes + +class XPathTokenAnywhereElement(XPathElement): + + def __init__(self, ruleName, tokenType): + super(XPathTokenAnywhereElement, self).__init__(ruleName) + self.tokenType = tokenType + + def evaluate(self, t): + return Trees.findAllTokenNodes(t, self.tokenType) + + +class XPathTokenElement(XPathElement): + + def __init__(self, ruleName, tokenType): + super(XPathTokenElement, self).__init__(ruleName) + self.tokenType = tokenType + + def evaluate(self, t): + # return all children of t that match nodeName + nodes = [] + for c in Trees.getChildren(t): + if isinstance(c, TerminalNode): + if (c.symbol.type == self.tokenType ) == (not self.invert): + nodes.append(c) + return nodes + +class XPathWildcardAnywhereElement(XPathElement): + + def __init__(self): + super(XPathWildcardAnywhereElement, self).__init__(XPath.WILDCARD) + + def evaluate(self, t): + if self.invert: + return list() # !* is weird but valid (empty) + else: + return Trees.descendants(t) + + +class XPathWildcardElement(XPathElement): + + def __init__(self): + super(XPathWildcardElement, self).__init__(XPath.WILDCARD) + + + def evaluate(self, t): + if self.invert: + return list() # !* is weird but valid (empty) + else: + return Trees.getChildren(t) diff --git a/runtime/Python2/src/antlr4/xpath/__init__.py b/runtime/Python2/src/antlr4/xpath/__init__.py new file mode 100644 index 000000000..216c000dc --- /dev/null +++ b/runtime/Python2/src/antlr4/xpath/__init__.py @@ -0,0 +1 @@ +__author__ = 'ericvergnaud' diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Python2/Python2.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Python2/Python2.stg new file mode 100644 index 000000000..b4e66eb78 --- /dev/null +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Python2/Python2.stg @@ -0,0 +1,805 @@ +/* + * [The "BSD license"] + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * Copyright (c) 2014 Eric Vergnaud + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** ANTLR tool checks output templates are compatible with tool code generation. + * For now, a simple string match used on x.y of x.y.z scheme. + * Must match Tool.VERSION during load to templates. + * + * REQUIRED. + */ + +pythonTypeInitMap ::= [ + "bool":"False", + "int":"0", + "float":"0.0", + "str":"", + default:"None" // anything other than a primitive type is an object +] + +// args must be , + +ParserFile(file, parser, namedActions) ::= << + +# encoding: utf-8 +from __future__ import print_function +from antlr4 import * +from io import StringIO + + + + +>> + +ListenerFile(file, header) ::= << + +from antlr4 import * +
      + +# This class defines a complete listener for a parse tree produced by . +class Listener(ParseTreeListener): + + #. +def enter(self, ctx): + pass + +# Exit a parse tree produced by #. +def exit(self, ctx): + pass + +}; separator="\n"> + +>> + + +VisitorFile(file, header) ::= << + +from antlr4 import * +
      + +# This class defines a complete generic visitor for a parse tree produced by . + +class Visitor(ParseTreeVisitor): + + #. +def visit(self, ctx): + return self.visitChildren(ctx) + +}; separator="\n"> + +>> + + +fileHeader(grammarFileName, ANTLRVersion) ::= << +# Generated from by ANTLR +>> + +Parser(parser, funcs, atn, sempredFuncs, superClass) ::= << + +>> + +Parser_(parser, funcs, atn, sempredFuncs, ctor, superClass) ::= << + +from . import + + + + +class ( Parser ): + + grammarFileName = "" + + atn = ATNDeserializer().deserialize(serializedATN()) + + decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ] + + sharedContextCache = PredictionContextCache() + + literalNames = [ }; null="u\"\\"", separator=", ", wrap, anchor> ] + + symbolicNames = [ }; null="u\"\\"", separator=", ", wrap, anchor> ] + + = }; separator="\n", wrap, anchor> + + ruleNames = [ "}; separator=", ", wrap, anchor> ] + + EOF = .EOF + + =}; separator="\n", wrap, anchor> + + + + + + + + + + + def sempred(self, localctx, ruleIndex, predIndex): + if self._predicates == None: + self._predicates = dict() +] = self._sempred}; separator="\n "> + pred = self._predicates.get(ruleIndex, None) + if pred is None: + raise Exception("No predicate with index:" + str(ruleIndex)) + else: + return pred(localctx, predIndex) + + + + + + +>> + +dumpActions(recog, argFuncs, actionFuncs, sempredFuncs) ::= << + +def action(self, localctx, ruleIndex, actionIndex): + if self._actions is None: + actions = dict() +] = self._action }; separator="\n"> + self._actions = actions + action = self._actions.get(ruleIndex, None) + if action is not None: + action(localctx, actionIndex) + else: + raise Exception("No registered action for:" + str(ruleIndex)) + + + + + +def sempred(self, localctx, ruleIndex, predIndex): + if self._predicates is None: + preds = dict() +] = self._sempred}; separator="\n"> + self._predicates = preds + pred = self._predicates.get(ruleIndex, None) + if pred is not None: + return pred(localctx, predIndex) + else: + raise Exception("No registered predicate for:" + str(ruleIndex)) + + + +>> + +parser_ctor(p) ::= << +def __init__(self, input): + super(, self).__init__(input) + self.checkVersion("") + self._interp = ParserATNSimulator(self, self.atn, self.decisionsToDFA, self.sharedContextCache) + self._predicates = None + +>> + +/* This generates a private method since the actionIndex is generated, making an + * overriding implementation impossible to maintain. + */ +RuleActionFunction(r, actions) ::= << + +def _action(self, localctx , actionIndex): + + if actionIndex == : + + + elif actionIndex == : + + }; separator="\n"> +>> + +/* This generates a private method since the predIndex is generated, making an + * overriding implementation impossible to maintain. + */ +RuleSempredFunction(r, actions) ::= << +def _sempred(self, localctx, predIndex): + + if predIndex == : + return + + elif predIndex == : + return + }; separator="\n"> + +>> + +RuleFunction(currentRule,args,code,locals,ruleCtx,altLabelCtxs,namedActions,finallyAction,postamble,exceptions) ::= << + + + +}; separator="\n"> + +def (self}>): + + localctx = .(self, self._ctx, self.state}>) + self.enterRule(localctx, , self.RULE_) + + + try: + + + + + + + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + + finally: + + self.exitRule() + return localctx + +>> + +LeftRecursiveRuleFunction(currentRule,args,code,locals,ruleCtx,altLabelCtxs, + namedActions,finallyAction,postamble) ::= +<< + + +}; separator="\n"> + +def (self, _p=0, }>): + _parentctx = self._ctx + _parentState = self.state + localctx = .(self, self._ctx, _parentState}>) + _prevctx = localctx + _startState = + self.enterRecursionRule(localctx, , self.RULE_, _p) + + + try: + + + + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + + self.unrollRecursionContexts(_parentctx) + return localctx + +>> + +CodeBlockForOuterMostAlt(currentOuterMostAltCodeBlock, locals, preamble, ops) ::= << +localctx = .Context(self, localctx) +self.enterOuterAlt(localctx, ) + +>> + +CodeBlockForAlt(currentAltCodeBlock, locals, preamble, ops) ::= << + + + +>> + +LL1AltBlock(choice, preamble, alts, error) ::= << +self.state = + + = _input.LT(1) + +token = self._input.LA(1) + + + }; separator="\nel"> +else: + + +>> + +LL1OptionalBlock(choice, alts, error) ::= << +self.state = + +token = self._input.LA(1) + + + pass}; separator="\nel"> +else: + +>> + +LL1OptionalBlockSingleAlt(choice, expr, alts, preamble, error, followExpr) ::= << +self.state = + + +if : + + +) ) !> +>> + + +LL1StarBlockSingleAlt(choice, loopExpr, alts, preamble, iteration) ::= << +self.state = +self._errHandler.sync(self) + +while : + + self.state = + self._errHandler.sync(self) + + +>> + +LL1PlusBlockSingleAlt(choice, loopExpr, alts, preamble, iteration) ::= << +self.state = +self._errHandler.sync(self) + +while True: + + self.state = + self._errHandler.sync(self) + + if not (): + break + +>> + +// LL(*) stuff + +AltBlock(choice, preamble, alts, error) ::= << +self.state = + + = _input.LT(1) + +la_ = self._interp.adaptivePredict(self._input,,self._ctx) +: + + pass +}; separator="\nel"> + +>> + +OptionalBlock(choice, alts, error) ::= << +self.state = + +la_ = self._interp.adaptivePredict(self._input,,self._ctx) ++1: + +}; separator="\nel"> + +>> + +StarBlock(choice, alts, sync, iteration) ::= << +self.state = +self._errHandler.sync(self) +_alt = self._interp.adaptivePredict(self._input,,self._ctx) +while _alt!= and _alt!=ATN.INVALID_ALT_NUMBER: + if _alt==1+1: + + + self.state = + self._errHandler.sync(self) + _alt = self._interp.adaptivePredict(self._input,,self._ctx) + +>> + +PlusBlock(choice, alts, error) ::= << +self.state = +self._errHandler.sync(self) +_alt = 1+1 +while _alt!= and _alt!=ATN.INVALID_ALT_NUMBER: + +1: + +}; separator="\nel"> + else: + + self.state = + self._errHandler.sync(self) + _alt = self._interp.adaptivePredict(self._input,,self._ctx) + +>> + +Sync(s) ::= "sync()" + +ThrowNoViableAlt(t) ::= "raise NoViableAltException(self)" + +TestSetInline(s) ::= << +}; separator=" or "> +>> + +// Java language spec 15.19 - shift operators mask operands rather than overflow to 0... need range test +testShiftInRange(shiftAmount) ::= << +(() & ~0x3f) == 0 +>> + +// produces smaller bytecode only when bits.ttypes contains more than two items +bitsetBitfieldComparison(s, bits) ::= <% +(})> and ((1 \<\< ) & ()}; separator=" | ">)) != 0) +%> + +isZero ::= [ +"0":true, +default:false +] + +offsetShiftVar(shiftAmount, offset) ::= <% +( - ) +%> + +offsetShiftType(shiftAmount, offset) ::= <% +(. - ). +%> + +// produces more efficient bytecode when bits.ttypes contains at most two items +bitsetInlineComparison(s, bits) ::= <% +==.}; separator=" or "> +%> + +cases(ttypes) ::= << +if token in [.}; separator=", ">]: +>> + +InvokeRule(r, argExprsChunks) ::= << +self.state = + = }>self.(,) +>> + +MatchToken(m) ::= << +self.state = + = }>self.match(.) +>> + +MatchSet(m, expr, capture) ::= "" + +MatchNotSet(m, expr, capture) ::= "" + +CommonSetStuff(m, expr, capture, invert) ::= << +self.state = + = }>self._input.LT(1) + +if \<= 0 or if not(): + = }> self._errHandler.recoverInline(self) +else: + self.consume() +>> + +Wildcard(w) ::= << +self.state = + = }>self.matchWildcard() +>> + +// ACTION STUFF + +Action(a, foo, chunks) ::= "" + +ArgAction(a, chunks) ::= "" + +SemPred(p, chunks, failChunks) ::= << +self.state = +if not : + from antlr4.error.Errors import FailedPredicateException + raise FailedPredicateException(self, , , ) +>> + +ExceptionClause(e, catchArg, catchAction) ::= << +catch () { + +} +>> + +// lexer actions are not associated with model objects + +LexerSkipCommand() ::= "skip()" +LexerMoreCommand() ::= "more()" +LexerPopModeCommand() ::= "popMode()" + +LexerTypeCommand(arg) ::= "_type = " +LexerChannelCommand(arg) ::= "_channel = " +LexerModeCommand(arg) ::= "_mode = " +LexerPushModeCommand(arg) ::= "pushMode()" + +ActionText(t) ::= "" +ActionTemplate(t) ::= "" +ArgRef(a) ::= "localctx." +LocalRef(a) ::= "localctx." +RetValueRef(a) ::= "localctx." +QRetValueRef(a) ::= ".." +/** How to translate $tokenLabel */ +TokenRef(t) ::= "." +LabelRef(t) ::= "." +ListLabelRef(t) ::= "." +SetAttr(s,rhsChunks) ::= ". = " + +TokenLabelType() ::= "" +InputSymbolType() ::= "" + +TokenPropertyRef_text(t) ::= "(None if . is None else ..text)" +TokenPropertyRef_type(t) ::= "(0 if . is None else ..type()" +TokenPropertyRef_line(t) ::= "(0 if . is None else ..line)" +TokenPropertyRef_pos(t) ::= "(0 if . is None else ..column)" +TokenPropertyRef_channel(t) ::= "(0 if (. is None else ..channel)" +TokenPropertyRef_index(t) ::= "(0 if . is None else ..tokenIndex)" +TokenPropertyRef_int(t) ::= "(0 if . is None else int(..text))" + +RulePropertyRef_start(r) ::= "(None if . is None else ..start)" +RulePropertyRef_stop(r) ::= "(None if . is None else ..stop)" +RulePropertyRef_text(r) ::= "(None if . is None else self._input.getText((..start,..stop)))" +RulePropertyRef_ctx(r) ::= "." +RulePropertyRef_parser(r) ::= "self" + +ThisRulePropertyRef_start(r) ::= "localctx.start" +ThisRulePropertyRef_stop(r) ::= "localctx.stop" +ThisRulePropertyRef_text(r) ::= "self._input.getText((localctx.start, self._input.LT(-1)))" +ThisRulePropertyRef_ctx(r) ::= "localctx" +ThisRulePropertyRef_parser(r) ::= "self" + +NonLocalAttrRef(s) ::= "getInvokingContext()." +SetNonLocalAttr(s, rhsChunks) ::= "getInvokingContext(). = " + +AddToLabelList(a) ::= "..append()" + +TokenDecl(t) ::= "self. = None # " +TokenTypeDecl(t) ::= "self. = 0 # type" +TokenListDecl(t) ::= "self. = list() # of s" +RuleContextDecl(r) ::= "self. = None # " +RuleContextListDecl(rdecl) ::= "self. = list() # of s" + +ContextTokenGetterDecl(t) ::= << +def (self): + return self.getToken(., 0) +>> + +// should never be called +ContextTokenListGetterDecl(t) ::= << +def _list(self): + return self.getTokens(.) +>> + +ContextTokenListIndexedGetterDecl(t) ::= << +def (self, i=None): + if i is None: + return self.getTokens(.) + else: + return self.getToken(., i) +>> + +ContextRuleGetterDecl(r) ::= << +def (self): + return self.getTypedRuleContext(.,0) + +>> + +// should never be called +ContextRuleListGetterDecl(r) ::= << +def _list(self): + return self.getTypedRuleContexts(.) + +>> + +ContextRuleListIndexedGetterDecl(r) ::= << +def (self, i=None): + if i is None: + return self.getTypedRuleContexts(.) + else: + return self.getTypedRuleContext(.,i) + +>> + +LexerRuleContext() ::= "RuleContext" + +/** The rule context name is the rule followed by a suffix; e.g., + * r becomes rContext. + */ +RuleContextNameSuffix() ::= "Context" + +ImplicitTokenLabel(tokenName) ::= "_" +ImplicitRuleLabel(ruleName) ::= "_" +ImplicitSetLabel(id) ::= "_tset" +ListLabelName(label) ::= "