forked from jasder/antlr
all Python2 runtime tests pass
This commit is contained in:
parent
d1f95085fe
commit
8e89af918f
|
@ -20,7 +20,6 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/tool-testsuite/test" isTestSource="true" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/runtime" type="java-resource" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/runtime-testsuite/test/org/antlr/v4/test/runtime/javascript" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/runtime-testsuite/test/org/antlr/v4/test/runtime/python2" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
|
|
|
@ -0,0 +1,399 @@
|
|||
IgnoredTests ::= [
|
||||
default: false
|
||||
]
|
||||
|
||||
TestFile(file) ::= <<
|
||||
/* This file is generated by TestGenerator, any edits will be overwritten by the next generation. */
|
||||
package org.antlr.v4.test.runtime.python2;
|
||||
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
<if(file.Options.("ImportErrorQueue"))>
|
||||
import org.antlr.v4.test.runtime.java.ErrorQueue;
|
||||
<endif>
|
||||
<if(file.Options.("ImportGrammar"))>
|
||||
import org.antlr.v4.tool.Grammar;
|
||||
<endif>
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public class Test<file.name> extends BasePython2Test {
|
||||
|
||||
<file.tests:{test | <test>}; separator="\n", wrap, anchor>
|
||||
|
||||
}
|
||||
>>
|
||||
|
||||
LexerTestMethod(test) ::= <<
|
||||
/* This file and method are generated by TestGenerator, any edits will be overwritten by the next generation. */
|
||||
<testAnnotations(test)>
|
||||
public void test<test.name>() throws Exception {
|
||||
mkdir(tmpdir);
|
||||
|
||||
<test.SlaveGrammars:{grammar |
|
||||
String slave_<grammar> =<writeStringLiteral(test.SlaveGrammars.(grammar))>;
|
||||
writeFile(tmpdir, "<grammar>.g4", slave_<grammar>);
|
||||
}; separator="\n">
|
||||
<test.Grammar:{grammar |
|
||||
<buildStringLiteral(test.Grammar.(grammar), "grammar")>
|
||||
|
||||
<if(test.AfterGrammar)>
|
||||
<test.AfterGrammar>
|
||||
<endif>
|
||||
String input =<writeStringLiteral(test.Input)>;
|
||||
String found = execLexer("<grammar>.g4", grammar, "<grammar><if(test.Options.("CombinedGrammar"))>Lexer<endif>", input, <writeBoolean(test.Options.("ShowDFA"))>);
|
||||
assertEquals(<writeStringLiteral(test.Output)>, found);
|
||||
<if(!isEmpty.(test.Errors))>
|
||||
assertEquals(<writeStringLiteral(test.Errors)>, this.stderrDuringParse);
|
||||
<else>
|
||||
assertNull(this.stderrDuringParse);
|
||||
<endif>
|
||||
}>
|
||||
}
|
||||
|
||||
>>
|
||||
|
||||
CompositeLexerTestMethod(test) ::= <<
|
||||
<LexerTestMethod(test)>
|
||||
>>
|
||||
|
||||
ParserTestMethod(test) ::= <<
|
||||
/* This file and method are generated by TestGenerator, any edits will be overwritten by the next generation. */
|
||||
<testAnnotations(test)>
|
||||
public void test<test.name>() throws Exception {
|
||||
mkdir(tmpdir);
|
||||
|
||||
<test.SlaveGrammars:{grammar |
|
||||
String slave_<grammar> =<writeStringLiteral(test.SlaveGrammars.(grammar))>;
|
||||
<if(test.Options.("SlaveIsLexer"))>
|
||||
rawGenerateAndBuildRecognizer("<grammar>.g4", slave_<grammar>, null, "<grammar>");
|
||||
<else>
|
||||
writeFile(tmpdir, "<grammar>.g4", slave_<grammar>);
|
||||
<endif>
|
||||
}; separator="\n">
|
||||
<test.Grammar:{grammar |
|
||||
<buildStringLiteral(test.Grammar.(grammar), "grammar")>
|
||||
|
||||
<test.AfterGrammar>
|
||||
|
||||
String input =<writeStringLiteral(test.Input)>;
|
||||
String found = execParser("<grammar>.g4", grammar, "<grammar><if(!test.slaveIsLexer)>Parser<endif>", "<if(test.slaveIsLexer)><first(test.slaveGrammars).grammarName><else><grammar>Lexer<endif>", "<grammar>Listener", "<grammar>Visitor", "<test.Rule>", input, <writeBoolean(test.Options.("Debug"))>);
|
||||
|
||||
assertEquals(<writeStringLiteral(test.Output)>, found);
|
||||
<if(!isEmpty.(test.Errors))>
|
||||
assertEquals(<writeStringLiteral(test.Errors)>, this.stderrDuringParse);
|
||||
<else>
|
||||
assertNull(this.stderrDuringParse);
|
||||
<endif>
|
||||
}>
|
||||
}
|
||||
|
||||
>>
|
||||
|
||||
CompositeParserTestMethod(test) ::= <<
|
||||
<ParserTestMethod(test)>
|
||||
>>
|
||||
|
||||
AbstractParserTestMethod(test) ::= <<
|
||||
/* this file and method are generated, any edit will be overwritten by the next generation */
|
||||
String test<test.name>(String input) throws Exception {
|
||||
String grammar = <test.grammar.lines:{ line | "<line>};separator="\\n\" +\n", wrap, anchor>";
|
||||
return execParser("<test.grammar.grammarName>.g4", grammar, "<test.grammar.grammarName>Parser", "<test.grammar.grammarName>Lexer", "<test.startRule>", input, <test.debug>);
|
||||
}
|
||||
|
||||
>>
|
||||
|
||||
ConcreteParserTestMethod(test) ::= <<
|
||||
/* this file and method are generated, any edit will be overwritten by the next generation */
|
||||
@Test
|
||||
public void test<test.name>() throws Exception {
|
||||
String found = test<test.baseName>("<test.input>");
|
||||
assertEquals("<test.expectedOutput>", found);
|
||||
<if(test.expectedErrors)>
|
||||
assertEquals("<test.expectedErrors>", this.stderrDuringParse);
|
||||
<else>
|
||||
assertNull(this.stderrDuringParse);
|
||||
<endif>
|
||||
}
|
||||
|
||||
>>
|
||||
|
||||
testAnnotations(test) ::= <%
|
||||
@Test
|
||||
<if(test.Options.("Ignore"))>
|
||||
<\n>@Ignore(<writeStringLiteral(test.Options.("Ignore"))>)
|
||||
<elseif(IgnoredTests.(({<file.name>.<test.name>})))>
|
||||
<\n>@Ignore(<writeStringLiteral(IgnoredTests.(({<file.name>.<test.name>})))>)
|
||||
<endif>
|
||||
%>
|
||||
|
||||
buildStringLiteral(text, variable) ::= <<
|
||||
StringBuilder <variable>Builder = new StringBuilder(<strlen.(text)>);
|
||||
<lines.(text):{line|<variable>Builder.append("<escape.(line)>");}; separator="\n">
|
||||
String <variable> = <variable>Builder.toString();
|
||||
>>
|
||||
|
||||
writeStringLiteral(text) ::= <%
|
||||
<if(isEmpty.(text))>
|
||||
""
|
||||
<else>
|
||||
<writeLines(lines.(text))>
|
||||
<endif>
|
||||
%>
|
||||
|
||||
writeLines(textLines) ::= <%
|
||||
<if(rest(textLines))>
|
||||
<textLines:{line|
|
||||
<\n> "<escape.(line)>}; separator="\" +">"
|
||||
<else>
|
||||
"<escape.(first(textLines))>"
|
||||
<endif>
|
||||
%>
|
||||
|
||||
string(text) ::= <<
|
||||
"<escape.(text)>"
|
||||
>>
|
||||
|
||||
writeBoolean(o) ::= "<if(o && !isEmpty.(o))>true<else>false<endif>"
|
||||
|
||||
writeln(s) ::= <<print(<s>)>>
|
||||
|
||||
write(s) ::= <<print(<s>,end='')>>
|
||||
|
||||
False() ::= "False"
|
||||
|
||||
True() ::= "True"
|
||||
|
||||
Not(v) ::= "not <v>"
|
||||
|
||||
Assert(s) ::= ""
|
||||
|
||||
Cast(t,v) ::= "<v>"
|
||||
|
||||
Append(a,b) ::= "<a> + str(<b>)"
|
||||
|
||||
Concat(a,b) ::= "<a><b>"
|
||||
|
||||
DeclareLocal(s,v) ::= "<s> = <v>"
|
||||
|
||||
AssertIsList(v) ::= "assert isinstance(v, (list, tuple))"
|
||||
|
||||
AssignLocal(s,v) ::= "<s> = <v>"
|
||||
|
||||
InitIntMember(n,v) ::= <%<n> = <v>%>
|
||||
|
||||
InitBooleanMember(n,v) ::= <%<n> = <v>%>
|
||||
|
||||
GetMember(n) ::= <%self.<n>%>
|
||||
|
||||
SetMember(n,v) ::= <%self.<n> = <v>%>
|
||||
|
||||
AddMember(n,v) ::= <%self.<n> += <v>%>
|
||||
|
||||
PlusMember(v,n) ::= <%<v> + str(self.<n>)%>
|
||||
|
||||
MemberEquals(n,v) ::= <%self.<n> == <v>%>
|
||||
|
||||
ModMemberEquals(n,m,v) ::= <%self.<n> % <m> == <v>%>
|
||||
|
||||
ModMemberNotEquals(n,m,v) ::= <%self.<n> % <m> != <v>%>
|
||||
|
||||
DumpDFA() ::= "self.dumpDFA()"
|
||||
|
||||
Pass() ::= "pass"
|
||||
|
||||
StringList() ::= ""
|
||||
|
||||
BuildParseTrees() ::= "self._buildParseTrees = True"
|
||||
|
||||
BailErrorStrategy() ::= <%self._errHandler = BailErrorStrategy()%>
|
||||
|
||||
ToStringTree(s) ::= <%<s>.toStringTree(recog=self)%>
|
||||
|
||||
Column() ::= "self.column"
|
||||
|
||||
Text() ::= "self.text"
|
||||
|
||||
ValEquals(a,b) ::= <%<a>==<b>%>
|
||||
|
||||
TextEquals(a) ::= <%self.text=="<a>"%>
|
||||
|
||||
PlusText(a) ::= <%"<a>" + self.text%>
|
||||
|
||||
InputText() ::= "self._input.getText()"
|
||||
|
||||
LTEquals(i, v) ::= <%self._input.LT(<i>).text==<v>%>
|
||||
|
||||
LANotEquals(i, v) ::= <%self._input.LA(<i>)!=<v>%>
|
||||
|
||||
TokenStartColumnEquals(i) ::= <%self._tokenStartColumn==<i>%>
|
||||
|
||||
ImportListener(X) ::= ""
|
||||
|
||||
GetExpectedTokenNames() ::= "self.getExpectedTokens().toString(self.literalNames, self.symbolicNames)"
|
||||
|
||||
RuleInvocationStack() ::= "str_list(self.getRuleInvocationStack())"
|
||||
|
||||
LL_EXACT_AMBIG_DETECTION() ::= <<self._interp.predictionMode = PredictionMode.LL_EXACT_AMBIG_DETECTION>>
|
||||
|
||||
ParserPropertyMember() ::= <<
|
||||
@members {
|
||||
def Property(self):
|
||||
return True
|
||||
|
||||
}
|
||||
>>
|
||||
|
||||
PositionAdjustingLexer() ::= <<
|
||||
|
||||
def resetAcceptPosition(self, index, line, column):
|
||||
self._input.seek(index)
|
||||
self.line = line
|
||||
self.column = column
|
||||
self._interp.consume(self._input)
|
||||
|
||||
def nextToken(self):
|
||||
if self._interp.__dict__.get("resetAcceptPosition", None) is None:
|
||||
self._interp.__dict__["resetAcceptPosition"] = self.resetAcceptPosition
|
||||
return super(type(self),self).nextToken()
|
||||
|
||||
def emit(self):
|
||||
if self._type==PositionAdjustingLexer.TOKENS:
|
||||
self.handleAcceptPositionForKeyword("tokens")
|
||||
elif self._type==PositionAdjustingLexer.LABEL:
|
||||
self.handleAcceptPositionForIdentifier()
|
||||
return super(type(self),self).emit()
|
||||
|
||||
def handleAcceptPositionForIdentifier(self):
|
||||
tokenText = self.text
|
||||
identifierLength = 0
|
||||
while identifierLength \< len(tokenText) and self.isIdentifierChar(tokenText[identifierLength]):
|
||||
identifierLength += 1
|
||||
|
||||
if self._input.index > self._tokenStartCharIndex + identifierLength:
|
||||
offset = identifierLength - 1
|
||||
self._interp.resetAcceptPosition(self._tokenStartCharIndex + offset,
|
||||
self._tokenStartLine, self._tokenStartColumn + offset)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def handleAcceptPositionForKeyword(self, keyword):
|
||||
if self._input.index > self._tokenStartCharIndex + len(keyword):
|
||||
offset = len(keyword) - 1
|
||||
self._interp.resetAcceptPosition(self._tokenStartCharIndex + offset,
|
||||
self._tokenStartLine, self._tokenStartColumn + offset)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def isIdentifierChar(c):
|
||||
return c.isalnum() or c == '_'
|
||||
|
||||
>>
|
||||
|
||||
BasicListener(X) ::= <<
|
||||
if __name__ is not None and "." in __name__:
|
||||
from .<X>Listener import <X>Listener
|
||||
else:
|
||||
from <X>Listener import <X>Listener
|
||||
|
||||
class LeafListener(TListener):
|
||||
def visitTerminal(self, node):
|
||||
print(node.symbol.text)
|
||||
|
||||
>>
|
||||
|
||||
WalkListener(s) ::= <<
|
||||
walker = ParseTreeWalker()
|
||||
walker.walk(TParser.LeafListener(), <s>)
|
||||
>>
|
||||
|
||||
TokenGetterListener(X) ::= <<
|
||||
if __name__ is not None and "." in __name__:
|
||||
from .<X>Listener import <X>Listener
|
||||
else:
|
||||
from <X>Listener import <X>Listener
|
||||
|
||||
class LeafListener(TListener):
|
||||
def exitA(self, ctx):
|
||||
if ctx.getChildCount()==2:
|
||||
print(ctx.INT(0).symbol.text + ' ' + ctx.INT(1).symbol.text + ' ' + str_list(ctx.INT()))
|
||||
else:
|
||||
print(str(ctx.ID().symbol))
|
||||
|
||||
>>
|
||||
|
||||
RuleGetterListener(X) ::= <<
|
||||
if __name__ is not None and "." in __name__:
|
||||
from .<X>Listener import <X>Listener
|
||||
else:
|
||||
from <X>Listener import <X>Listener
|
||||
|
||||
class LeafListener(TListener):
|
||||
def exitA(self, ctx):
|
||||
if ctx.getChildCount()==2:
|
||||
print(ctx.b(0).start.text + ' ' + ctx.b(1).start.text + ' ' + ctx.b()[0].start.text)
|
||||
else:
|
||||
print(ctx.b(0).start.text)
|
||||
|
||||
>>
|
||||
|
||||
|
||||
LRListener(X) ::= <<
|
||||
if __name__ is not None and "." in __name__:
|
||||
from .<X>Listener import <X>Listener
|
||||
else:
|
||||
from <X>Listener import <X>Listener
|
||||
|
||||
class LeafListener(TListener):
|
||||
def exitE(self, ctx):
|
||||
if ctx.getChildCount()==3:
|
||||
print(ctx.e(0).start.text + ' ' + ctx.e(1).start.text + ' ' + ctx.e()[0].start.text)
|
||||
else:
|
||||
print(ctx.INT().symbol.text)
|
||||
|
||||
>>
|
||||
|
||||
LRWithLabelsListener(X) ::= <<
|
||||
if __name__ is not None and "." in __name__:
|
||||
from .<X>Listener import <X>Listener
|
||||
else:
|
||||
from <X>Listener import <X>Listener
|
||||
|
||||
class LeafListener(TListener):
|
||||
def exitCall(self, ctx):
|
||||
print(ctx.e().start.text + ' ' + str(ctx.eList()))
|
||||
def exitInt(self, ctx):
|
||||
print(ctx.INT().symbol.text)
|
||||
|
||||
>>
|
||||
|
||||
DeclareContextListGettersFunction() ::= <<
|
||||
def foo():
|
||||
s = SContext()
|
||||
a = s.a()
|
||||
b = s.b()
|
||||
>>
|
||||
|
||||
Declare_foo() ::= <<def foo(self):
|
||||
print('foo')
|
||||
>>
|
||||
|
||||
Invoke_foo() ::= "self.foo()"
|
||||
|
||||
Declare_pred() ::= <<def pred(self, v):
|
||||
print('eval=' + str(v).lower())
|
||||
return v
|
||||
|
||||
>>
|
||||
|
||||
Invoke_pred(v) ::= <<self.pred(<v>)>>
|
||||
|
||||
isEmpty ::= [
|
||||
"": true,
|
||||
default: false
|
||||
]
|
|
@ -0,0 +1,94 @@
|
|||
package org.antlr.v4.test.runtime.python2;
|
||||
|
||||
import org.antlr.v4.test.runtime.python.BasePythonTest;
|
||||
import org.stringtemplate.v4.ST;
|
||||
|
||||
public abstract class BasePython2Test extends BasePythonTest {
|
||||
|
||||
@Override
|
||||
protected String getLanguage() {
|
||||
return "Python2";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getPythonExecutable() {
|
||||
return "python2.7";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeLexerTestFile(String lexerName, boolean showDFA) {
|
||||
ST outputFileST = new ST(
|
||||
"from __future__ import print_function\n"
|
||||
+ "import sys\n"
|
||||
+ "from antlr4 import *\n"
|
||||
+ "from <lexerName> import <lexerName>\n"
|
||||
+ "\n"
|
||||
+ "def main(argv):\n"
|
||||
+ " input = FileStream(argv[1])\n"
|
||||
+ " lexer = <lexerName>(input)\n"
|
||||
+ " stream = CommonTokenStream(lexer)\n"
|
||||
+ " stream.fill()\n"
|
||||
+ " [ print(str(t)) for t in stream.tokens ]\n"
|
||||
+ (showDFA ? " print(lexer._interp.decisionToDFA[Lexer.DEFAULT_MODE].toLexerString(), end='')\n"
|
||||
: "") + "\n" + "if __name__ == '__main__':\n"
|
||||
+ " main(sys.argv)\n" + "\n");
|
||||
outputFileST.add("lexerName", lexerName);
|
||||
writeFile(tmpdir, "Test.py", outputFileST.render());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeParserTestFile(String parserName, String lexerName,
|
||||
String listenerName, String visitorName,
|
||||
String parserStartRuleName, boolean debug, boolean trace) {
|
||||
if(!parserStartRuleName.endsWith(")"))
|
||||
parserStartRuleName += "()";
|
||||
ST outputFileST = new ST(
|
||||
"import sys\n"
|
||||
+ "from antlr4 import *\n"
|
||||
+ "from <lexerName> import <lexerName>\n"
|
||||
+ "from <parserName> import <parserName>\n"
|
||||
+ "from <listenerName> import <listenerName>\n"
|
||||
+ "from <visitorName> import <visitorName>\n"
|
||||
+ "\n"
|
||||
+ "class TreeShapeListener(ParseTreeListener):\n"
|
||||
+ "\n"
|
||||
+ " def visitTerminal(self, node):\n"
|
||||
+ " pass\n"
|
||||
+ "\n"
|
||||
+ " def visitErrorNode(self, node):\n"
|
||||
+ " pass\n"
|
||||
+ "\n"
|
||||
+ " def exitEveryRule(self, ctx):\n"
|
||||
+ " pass\n"
|
||||
+ "\n"
|
||||
+ " def enterEveryRule(self, ctx):\n"
|
||||
+ " for child in ctx.getChildren():\n"
|
||||
+ " parent = child.parentCtx\n"
|
||||
+ " if not isinstance(parent, RuleNode) or parent.getRuleContext() != ctx:\n"
|
||||
+ " raise IllegalStateException(\"Invalid parse tree shape detected.\")\n"
|
||||
+ "\n"
|
||||
+ "def main(argv):\n"
|
||||
+ " input = FileStream(argv[1])\n"
|
||||
+ " lexer = <lexerName>(input)\n"
|
||||
+ " stream = CommonTokenStream(lexer)\n"
|
||||
+ "<createParser>"
|
||||
+ " parser.buildParseTrees = True\n"
|
||||
+ " tree = parser.<parserStartRuleName>\n"
|
||||
+ " ParseTreeWalker.DEFAULT.walk(TreeShapeListener(), tree)\n"
|
||||
+ "\n" + "if __name__ == '__main__':\n"
|
||||
+ " main(sys.argv)\n" + "\n");
|
||||
String stSource = " parser = <parserName>(stream)\n";
|
||||
if(debug)
|
||||
stSource += " parser.addErrorListener(DiagnosticErrorListener())\n";
|
||||
if(trace)
|
||||
stSource += " parser.setTrace(True)\n";
|
||||
ST createParserST = new ST(stSource);
|
||||
outputFileST.add("createParser", createParserST);
|
||||
outputFileST.add("parserName", parserName);
|
||||
outputFileST.add("lexerName", lexerName);
|
||||
outputFileST.add("listenerName", listenerName);
|
||||
outputFileST.add("visitorName", visitorName);
|
||||
outputFileST.add("parserStartRuleName", parserStartRuleName);
|
||||
writeFile(tmpdir, "Test.py", outputFileST.render());
|
||||
}
|
||||
}
|
|
@ -1,7 +1,7 @@
|
|||
/* This file is generated by TestGenerator, any edits will be overwritten by the next generation. */
|
||||
package org.antlr.v4.test.runtime.python2;
|
||||
|
||||
import org.antlr.v4.test.tool.ErrorQueue;
|
||||
import org.antlr.v4.test.runtime.java.ErrorQueue;
|
||||
import org.antlr.v4.tool.Grammar;
|
||||
import org.junit.Test;
|
||||
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
[The "BSD license"]
|
||||
Copyright (c) 2015 Terence Parr, Sam Harwell, Eric Vergnaud
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the author may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1 @@
|
|||
include *.txt
|
|
@ -0,0 +1,4 @@
|
|||
This is the Python 2.7 runtime for AntLR.
|
||||
Visit the AntLR web sites for more information:
|
||||
http://www.antlr.org
|
||||
http://theantlrguy.atlassian.net/wiki/display/ANTLR4/Python+Target
|
|
@ -0,0 +1,13 @@
|
|||
What's in this release?
|
||||
|
||||
- fixed minor bugs due to mix of ascii/unicode encoding
|
||||
- fixed bug where non-ascii input streams would fail
|
||||
- added support for visitor pattern
|
||||
- added support for wildcards in grammar
|
||||
|
||||
Breaking change:
|
||||
|
||||
In version 4.4, the parser/lexer had a tokenNames member.
|
||||
This has been removed in favor of the following members:
|
||||
- lexicalNames, containing the parsed text
|
||||
- symbolicNames, corresponding to tokenNames
|
|
@ -0,0 +1,13 @@
|
|||
from distutils.core import setup
|
||||
|
||||
setup(
|
||||
name='antlr4-python2-runtime',
|
||||
version='4.5.2',
|
||||
packages=['antlr4', 'antlr4.atn', 'antlr4.dfa', 'antlr4.tree', 'antlr4.error', 'antlr4.xpath'],
|
||||
package_dir={'': 'src'},
|
||||
url='http://www.antlr.org',
|
||||
license='BSD',
|
||||
author='Eric Vergnaud, Terence Parr, Sam Harwell',
|
||||
author_email='eric.vergnaud@wanadoo.fr',
|
||||
description='ANTLR 4.5.2 runtime for Python 2.7.6'
|
||||
)
|
|
@ -0,0 +1,328 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
# This implementation of {@link TokenStream} loads tokens from a
|
||||
# {@link TokenSource} on-demand, and places the tokens in a buffer to provide
|
||||
# access to any previous token by index.
|
||||
#
|
||||
# <p>
|
||||
# This token stream ignores the value of {@link Token#getChannel}. If your
|
||||
# parser requires the token stream filter tokens to only those on a particular
|
||||
# channel, such as {@link Token#DEFAULT_CHANNEL} or
|
||||
# {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a
|
||||
# {@link CommonTokenStream}.</p>
|
||||
from io import StringIO
|
||||
|
||||
from antlr4.Token import Token
|
||||
from antlr4.error.Errors import IllegalStateException
|
||||
|
||||
|
||||
# this is just to keep meaningful parameter types to Parser
|
||||
class TokenStream(object):
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class BufferedTokenStream(TokenStream):
|
||||
|
||||
def __init__(self, tokenSource):
|
||||
# The {@link TokenSource} from which tokens for this stream are fetched.
|
||||
self.tokenSource = tokenSource
|
||||
|
||||
# A collection of all tokens fetched from the token source. The list is
|
||||
# considered a complete view of the input once {@link #fetchedEOF} is set
|
||||
# to {@code true}.
|
||||
self.tokens = []
|
||||
|
||||
# The index into {@link #tokens} of the current token (next token to
|
||||
# {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be
|
||||
# {@link #LT LT(1)}.
|
||||
#
|
||||
# <p>This field is set to -1 when the stream is first constructed or when
|
||||
# {@link #setTokenSource} is called, indicating that the first token has
|
||||
# not yet been fetched from the token source. For additional information,
|
||||
# see the documentation of {@link IntStream} for a description of
|
||||
# Initializing Methods.</p>
|
||||
self.index = -1
|
||||
|
||||
# Indicates whether the {@link Token#EOF} token has been fetched from
|
||||
# {@link #tokenSource} and added to {@link #tokens}. This field improves
|
||||
# performance for the following cases:
|
||||
#
|
||||
# <ul>
|
||||
# <li>{@link #consume}: The lookahead check in {@link #consume} to prevent
|
||||
# consuming the EOF symbol is optimized by checking the values of
|
||||
# {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.</li>
|
||||
# <li>{@link #fetch}: The check to prevent adding multiple EOF symbols into
|
||||
# {@link #tokens} is trivial with this field.</li>
|
||||
# <ul>
|
||||
self.fetchedEOF = False
|
||||
|
||||
def mark(self):
|
||||
return 0
|
||||
|
||||
def release(self, marker):
|
||||
# no resources to release
|
||||
pass
|
||||
|
||||
def reset(self):
|
||||
self.seek(0)
|
||||
|
||||
def seek(self, index):
|
||||
self.lazyInit()
|
||||
self.index = self.adjustSeekIndex(index)
|
||||
|
||||
def get(self, index):
|
||||
self.lazyInit()
|
||||
return self.tokens[index]
|
||||
|
||||
def consume(self):
|
||||
skipEofCheck = False
|
||||
if self.index >= 0:
|
||||
if self.fetchedEOF:
|
||||
# the last token in tokens is EOF. skip check if p indexes any
|
||||
# fetched token except the last.
|
||||
skipEofCheck = self.index < len(self.tokens) - 1
|
||||
else:
|
||||
# no EOF token in tokens. skip check if p indexes a fetched token.
|
||||
skipEofCheck = self.index < len(self.tokens)
|
||||
else:
|
||||
# not yet initialized
|
||||
skipEofCheck = False
|
||||
|
||||
if not skipEofCheck and self.LA(1) == Token.EOF:
|
||||
raise IllegalStateException("cannot consume EOF")
|
||||
|
||||
if self.sync(self.index + 1):
|
||||
self.index = self.adjustSeekIndex(self.index + 1)
|
||||
|
||||
# Make sure index {@code i} in tokens has a token.
|
||||
#
|
||||
# @return {@code true} if a token is located at index {@code i}, otherwise
|
||||
# {@code false}.
|
||||
# @see #get(int i)
|
||||
#/
|
||||
def sync(self, i):
|
||||
assert i >= 0
|
||||
n = i - len(self.tokens) + 1 # how many more elements we need?
|
||||
if n > 0 :
|
||||
fetched = self.fetch(n)
|
||||
return fetched >= n
|
||||
return True
|
||||
|
||||
# Add {@code n} elements to buffer.
|
||||
#
|
||||
# @return The actual number of elements added to the buffer.
|
||||
#/
|
||||
def fetch(self, n):
|
||||
if self.fetchedEOF:
|
||||
return 0
|
||||
for i in range(0, n):
|
||||
t = self.tokenSource.nextToken()
|
||||
t.tokenIndex = len(self.tokens)
|
||||
self.tokens.append(t)
|
||||
if t.type==Token.EOF:
|
||||
self.fetchedEOF = True
|
||||
return i + 1
|
||||
return n
|
||||
|
||||
|
||||
# Get all tokens from start..stop inclusively#/
|
||||
def getTokens(self, start, stop, types=None):
|
||||
if start<0 or stop<0:
|
||||
return None
|
||||
self.lazyInit()
|
||||
subset = []
|
||||
if stop >= len(self.tokens):
|
||||
stop = len(self.tokens)-1
|
||||
for i in range(start, stop):
|
||||
t = self.tokens[i]
|
||||
if t.type==Token.EOF:
|
||||
break
|
||||
if types is None or t.type in types:
|
||||
subset.append(t)
|
||||
return subset
|
||||
|
||||
def LA(self, i):
|
||||
return self.LT(i).type
|
||||
|
||||
def LB(self, k):
|
||||
if (self.index-k) < 0:
|
||||
return None
|
||||
return self.tokens[self.index-k]
|
||||
|
||||
def LT(self, k):
|
||||
self.lazyInit()
|
||||
if k==0:
|
||||
return None
|
||||
if k < 0:
|
||||
return self.LB(-k)
|
||||
i = self.index + k - 1
|
||||
self.sync(i)
|
||||
if i >= len(self.tokens): # return EOF token
|
||||
# EOF must be last token
|
||||
return self.tokens[len(self.tokens)-1]
|
||||
return self.tokens[i]
|
||||
|
||||
# Allowed derived classes to modify the behavior of operations which change
|
||||
# the current stream position by adjusting the target token index of a seek
|
||||
# operation. The default implementation simply returns {@code i}. If an
|
||||
# exception is thrown in this method, the current stream index should not be
|
||||
# changed.
|
||||
#
|
||||
# <p>For example, {@link CommonTokenStream} overrides this method to ensure that
|
||||
# the seek target is always an on-channel token.</p>
|
||||
#
|
||||
# @param i The target token index.
|
||||
# @return The adjusted target token index.
|
||||
|
||||
def adjustSeekIndex(self, i):
|
||||
return i
|
||||
|
||||
def lazyInit(self):
|
||||
if self.index == -1:
|
||||
self.setup()
|
||||
|
||||
def setup(self):
|
||||
self.sync(0)
|
||||
self.index = self.adjustSeekIndex(0)
|
||||
|
||||
# Reset this token stream by setting its token source.#/
|
||||
def setTokenSource(self, tokenSource):
|
||||
self.tokenSource = tokenSource
|
||||
self.tokens = []
|
||||
self.index = -1
|
||||
|
||||
|
||||
|
||||
# Given a starting index, return the index of the next token on channel.
|
||||
# Return i if tokens[i] is on channel. Return -1 if there are no tokens
|
||||
# on channel between i and EOF.
|
||||
#/
|
||||
def nextTokenOnChannel(self, i, channel):
|
||||
self.sync(i)
|
||||
if i>=len(self.tokens):
|
||||
return -1
|
||||
token = self.tokens[i]
|
||||
while token.channel!=channel:
|
||||
if token.type==Token.EOF:
|
||||
return -1
|
||||
i += 1
|
||||
self.sync(i)
|
||||
token = self.tokens[i]
|
||||
return i
|
||||
|
||||
# Given a starting index, return the index of the previous token on channel.
|
||||
# Return i if tokens[i] is on channel. Return -1 if there are no tokens
|
||||
# on channel between i and 0.
|
||||
def previousTokenOnChannel(self, i, channel):
|
||||
while i>=0 and self.tokens[i].channel!=channel:
|
||||
i -= 1
|
||||
return i
|
||||
|
||||
# Collect all tokens on specified channel to the right of
|
||||
# the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or
|
||||
# EOF. If channel is -1, find any non default channel token.
|
||||
def getHiddenTokensToRight(self, tokenIndex, channel=-1):
|
||||
self.lazyInit()
|
||||
if tokenIndex<0 or tokenIndex>=len(self.tokens):
|
||||
raise Exception(str(tokenIndex) + " not in 0.." + str(len(self.tokens)-1))
|
||||
from antlr4.Lexer import Lexer
|
||||
nextOnChannel = self.nextTokenOnChannel(tokenIndex + 1, Lexer.DEFAULT_TOKEN_CHANNEL)
|
||||
from_ = tokenIndex+1
|
||||
# if none onchannel to right, nextOnChannel=-1 so set to = last token
|
||||
to = (len(self.tokens)-1) if nextOnChannel==-1 else nextOnChannel
|
||||
return self.filterForChannel(from_, to, channel)
|
||||
|
||||
|
||||
# Collect all tokens on specified channel to the left of
|
||||
# the current token up until we see a token on DEFAULT_TOKEN_CHANNEL.
|
||||
# If channel is -1, find any non default channel token.
|
||||
def getHiddenTokensToLeft(self, tokenIndex, channel=-1):
|
||||
self.lazyInit()
|
||||
if tokenIndex<0 or tokenIndex>=len(self.tokens):
|
||||
raise Exception(str(tokenIndex) + " not in 0.." + str(len(self.tokens)-1))
|
||||
from antlr4.Lexer import Lexer
|
||||
prevOnChannel = self.previousTokenOnChannel(tokenIndex - 1, Lexer.DEFAULT_TOKEN_CHANNEL)
|
||||
if prevOnChannel == tokenIndex - 1:
|
||||
return None
|
||||
# if none on channel to left, prevOnChannel=-1 then from=0
|
||||
from_ = prevOnChannel+1
|
||||
to = tokenIndex-1
|
||||
return self.filterForChannel(from_, to, channel)
|
||||
|
||||
|
||||
def filterForChannel(self, left, right, channel):
|
||||
hidden = []
|
||||
for i in range(left, right+1):
|
||||
t = self.tokens[i]
|
||||
if channel==-1:
|
||||
from antlr4.Lexer import Lexer
|
||||
if t.channel!= Lexer.DEFAULT_TOKEN_CHANNEL:
|
||||
hidden.append(t)
|
||||
elif t.channel==channel:
|
||||
hidden.append(t)
|
||||
if len(hidden)==0:
|
||||
return None
|
||||
return hidden
|
||||
|
||||
def getSourceName(self):
|
||||
return self.tokenSource.getSourceName()
|
||||
|
||||
# Get the text of all tokens in this buffer.#/
|
||||
def getText(self, interval=None):
|
||||
self.lazyInit()
|
||||
self.fill()
|
||||
if interval is None:
|
||||
interval = (0, len(self.tokens)-1)
|
||||
start = interval[0]
|
||||
if isinstance(start, Token):
|
||||
start = start.tokenIndex
|
||||
stop = interval[1]
|
||||
if isinstance(stop, Token):
|
||||
stop = stop.tokenIndex
|
||||
if start is None or stop is None or start<0 or stop<0:
|
||||
return ""
|
||||
if stop >= len(self.tokens):
|
||||
stop = len(self.tokens)-1
|
||||
with StringIO() as buf:
|
||||
for i in range(start, stop+1):
|
||||
t = self.tokens[i]
|
||||
if t.type==Token.EOF:
|
||||
break
|
||||
buf.write(t.text)
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
# Get all tokens from lexer until EOF#/
|
||||
def fill(self):
|
||||
self.lazyInit()
|
||||
while self.fetch(1000)==1000:
|
||||
pass
|
|
@ -0,0 +1,84 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
#
|
||||
# This default implementation of {@link TokenFactory} creates
|
||||
# {@link CommonToken} objects.
|
||||
#
|
||||
from antlr4.Token import CommonToken
|
||||
|
||||
class TokenFactory(object):
|
||||
|
||||
pass
|
||||
|
||||
class CommonTokenFactory(TokenFactory):
|
||||
#
|
||||
# The default {@link CommonTokenFactory} instance.
|
||||
#
|
||||
# <p>
|
||||
# This token factory does not explicitly copy token text when constructing
|
||||
# tokens.</p>
|
||||
#
|
||||
DEFAULT = None
|
||||
|
||||
def __init__(self, copyText=False):
|
||||
# Indicates whether {@link CommonToken#setText} should be called after
|
||||
# constructing tokens to explicitly set the text. This is useful for cases
|
||||
# where the input stream might not be able to provide arbitrary substrings
|
||||
# of text from the input after the lexer creates a token (e.g. the
|
||||
# implementation of {@link CharStream#getText} in
|
||||
# {@link UnbufferedCharStream} throws an
|
||||
# {@link UnsupportedOperationException}). Explicitly setting the token text
|
||||
# allows {@link Token#getText} to be called at any time regardless of the
|
||||
# input stream implementation.
|
||||
#
|
||||
# <p>
|
||||
# The default value is {@code false} to avoid the performance and memory
|
||||
# overhead of copying text for every token unless explicitly requested.</p>
|
||||
#
|
||||
self.copyText = copyText
|
||||
|
||||
def create(self, source, type, text, channel, start, stop, line, column):
|
||||
t = CommonToken(source, type, channel, start, stop)
|
||||
t.line = line
|
||||
t.column = column
|
||||
if text is not None:
|
||||
t.text = text
|
||||
elif self.copyText and source[1] is not None:
|
||||
t.text = source[1].getText(start,stop)
|
||||
return t
|
||||
|
||||
def createThin(self, type, text):
|
||||
t = CommonToken(type=type)
|
||||
t.text = text
|
||||
return t
|
||||
|
||||
CommonTokenFactory.DEFAULT = CommonTokenFactory()
|
|
@ -0,0 +1,110 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#/
|
||||
|
||||
#
|
||||
# This class extends {@link BufferedTokenStream} with functionality to filter
|
||||
# token streams to tokens on a particular channel (tokens where
|
||||
# {@link Token#getChannel} returns a particular value).
|
||||
#
|
||||
# <p>
|
||||
# This token stream provides access to all tokens by index or when calling
|
||||
# methods like {@link #getText}. The channel filtering is only used for code
|
||||
# accessing tokens via the lookahead methods {@link #LA}, {@link #LT}, and
|
||||
# {@link #LB}.</p>
|
||||
#
|
||||
# <p>
|
||||
# By default, tokens are placed on the default channel
|
||||
# ({@link Token#DEFAULT_CHANNEL}), but may be reassigned by using the
|
||||
# {@code ->channel(HIDDEN)} lexer command, or by using an embedded action to
|
||||
# call {@link Lexer#setChannel}.
|
||||
# </p>
|
||||
#
|
||||
# <p>
|
||||
# Note: lexer rules which use the {@code ->skip} lexer command or call
|
||||
# {@link Lexer#skip} do not produce tokens at all, so input text matched by
|
||||
# such a rule will not be available as part of the token stream, regardless of
|
||||
# channel.</p>
|
||||
#/
|
||||
|
||||
from antlr4.BufferedTokenStream import BufferedTokenStream
|
||||
from antlr4.Token import Token
|
||||
|
||||
|
||||
class CommonTokenStream(BufferedTokenStream):
|
||||
|
||||
def __init__(self, lexer, channel=Token.DEFAULT_CHANNEL):
|
||||
super(CommonTokenStream, self).__init__(lexer)
|
||||
self.channel = channel
|
||||
|
||||
def adjustSeekIndex(self, i):
|
||||
return self.nextTokenOnChannel(i, self.channel)
|
||||
|
||||
def LB(self, k):
|
||||
if k==0 or (self.index-k)<0:
|
||||
return None
|
||||
i = self.index
|
||||
n = 1
|
||||
# find k good tokens looking backwards
|
||||
while n <= k:
|
||||
# skip off-channel tokens
|
||||
i = self.previousTokenOnChannel(i - 1, self.channel)
|
||||
n += 1
|
||||
if i < 0:
|
||||
return None
|
||||
return self.tokens[i]
|
||||
|
||||
def LT(self, k):
|
||||
self.lazyInit()
|
||||
if k == 0:
|
||||
return None
|
||||
if k < 0:
|
||||
return self.LB(-k)
|
||||
i = self.index
|
||||
n = 1 # we know tokens[pos] is a good one
|
||||
# find k good tokens
|
||||
while n < k:
|
||||
# skip off-channel tokens, but make sure to not look past EOF
|
||||
if self.sync(i + 1):
|
||||
i = self.nextTokenOnChannel(i + 1, self.channel)
|
||||
n += 1
|
||||
return self.tokens[i]
|
||||
|
||||
# Count EOF just once.#/
|
||||
def getNumberOfOnChannelTokens(self):
|
||||
n = 0
|
||||
self.fill()
|
||||
for i in range(0, len(self.tokens)):
|
||||
t = self.tokens[i]
|
||||
if t.channel==self.channel:
|
||||
n += 1
|
||||
if t.type==Token.EOF:
|
||||
break
|
||||
return n
|
|
@ -0,0 +1,58 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
#
|
||||
# This is an InputStream that is loaded from a file all at once
|
||||
# when you construct the object.
|
||||
#
|
||||
|
||||
import codecs
|
||||
import unittest
|
||||
|
||||
from antlr4.InputStream import InputStream
|
||||
|
||||
|
||||
class FileStream(InputStream):
|
||||
|
||||
def __init__(self, fileName, encoding='ascii'):
|
||||
self.fileName = fileName
|
||||
# read binary to avoid line ending conversion
|
||||
with open(fileName, 'rb') as file:
|
||||
bytes = file.read()
|
||||
data = codecs.decode(bytes, encoding)
|
||||
super(type(self), self).__init__(data)
|
||||
|
||||
|
||||
class TestFileStream(unittest.TestCase):
|
||||
|
||||
def testStream(self):
|
||||
stream = FileStream("FileStream.py")
|
||||
self.assertTrue(stream.size>0)
|
|
@ -0,0 +1,133 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
import unittest
|
||||
|
||||
|
||||
#
|
||||
# Vacuum all input from a string and then treat it like a buffer.
|
||||
#
|
||||
from antlr4.Token import Token
|
||||
|
||||
|
||||
class InputStream (object):
|
||||
|
||||
def __init__(self, data):
|
||||
self.name = "<empty>"
|
||||
self.strdata = unicode(data)
|
||||
self._loadString()
|
||||
|
||||
def _loadString(self):
|
||||
self._index = 0
|
||||
self.data = [ord(c) for c in self.strdata]
|
||||
self._size = len(self.data)
|
||||
|
||||
@property
|
||||
def index(self):
|
||||
return self._index
|
||||
|
||||
@property
|
||||
def size(self):
|
||||
return self._size
|
||||
|
||||
# Reset the stream so that it's in the same state it was
|
||||
# when the object was created *except* the data array is not
|
||||
# touched.
|
||||
#
|
||||
def reset(self):
|
||||
self._index = 0
|
||||
|
||||
def consume(self):
|
||||
if self._index >= self._size:
|
||||
assert self.LA(1) == Token.EOF
|
||||
raise Exception("cannot consume EOF")
|
||||
self._index += 1
|
||||
|
||||
def LA(self, offset):
|
||||
if offset==0:
|
||||
return 0 # undefined
|
||||
if offset<0:
|
||||
offset += 1 # e.g., translate LA(-1) to use offset=0
|
||||
pos = self._index + offset - 1
|
||||
if pos < 0 or pos >= self._size: # invalid
|
||||
return Token.EOF
|
||||
return self.data[pos]
|
||||
|
||||
def LT(self, offset):
|
||||
return self.LA(offset)
|
||||
|
||||
# mark/release do nothing; we have entire buffer
|
||||
def mark(self):
|
||||
return -1
|
||||
|
||||
def release(self, marker):
|
||||
pass
|
||||
|
||||
# consume() ahead until p==_index; can't just set p=_index as we must
|
||||
# update line and column. If we seek backwards, just set p
|
||||
#
|
||||
def seek(self, _index):
|
||||
if _index<=self._index:
|
||||
self._index = _index # just jump; don't update stream state (line, ...)
|
||||
return
|
||||
# seek forward
|
||||
self._index = min(_index, self._size)
|
||||
|
||||
def getText(self, start, stop):
|
||||
if stop >= self._size:
|
||||
stop = self._size-1
|
||||
if start >= self._size:
|
||||
return ""
|
||||
else:
|
||||
return self.strdata[start:stop+1]
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
|
||||
def __unicode__(self):
|
||||
return self.strdata
|
||||
|
||||
|
||||
class TestInputStream(unittest.TestCase):
|
||||
|
||||
def testStream(self):
|
||||
stream = InputStream("abcde")
|
||||
self.assertEqual(0, stream.index)
|
||||
self.assertEqual(5, stream.size)
|
||||
self.assertEqual(ord("a"), stream.LA(1))
|
||||
stream.consume()
|
||||
self.assertEqual(1, stream.index)
|
||||
stream.seek(5)
|
||||
self.assertEqual(Token.EOF, stream.LA(1))
|
||||
self.assertEqual("bcd", stream.getText(1, 3))
|
||||
stream.reset()
|
||||
self.assertEqual(0, stream.index)
|
||||
|
||||
|
|
@ -0,0 +1,297 @@
|
|||
from io import StringIO
|
||||
import unittest
|
||||
|
||||
from antlr4.Token import Token
|
||||
|
||||
|
||||
class Interval(object):
|
||||
|
||||
def __init__(self, start, stop):
|
||||
self.start = start
|
||||
self.stop = stop
|
||||
self.range = xrange(start, stop)
|
||||
|
||||
def __contains__(self, item):
|
||||
return item in self.range
|
||||
|
||||
def __len__(self):
|
||||
return self.stop - self.start
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.range)
|
||||
|
||||
class IntervalSet(object):
|
||||
|
||||
def __init__(self):
|
||||
self.intervals = None
|
||||
self.readOnly = False
|
||||
|
||||
def __iter__(self):
|
||||
if self.intervals is not None:
|
||||
for i in self.intervals:
|
||||
for c in i:
|
||||
yield c
|
||||
|
||||
def __getitem__(self, item):
|
||||
i = 0
|
||||
for k in self:
|
||||
if i==item:
|
||||
return k
|
||||
else:
|
||||
i += 1
|
||||
return Token.INVALID_TYPE
|
||||
|
||||
def addOne(self, v):
|
||||
self.addRange(Interval(v, v+1))
|
||||
|
||||
def addRange(self, v):
|
||||
if self.intervals is None:
|
||||
self.intervals = list()
|
||||
self.intervals.append(v)
|
||||
else:
|
||||
# find insert pos
|
||||
k = 0
|
||||
for i in self.intervals:
|
||||
# distinct range -> insert
|
||||
if v.stop<i.start:
|
||||
self.intervals.insert(k, v)
|
||||
return
|
||||
# contiguous range -> adjust
|
||||
elif v.stop==i.start:
|
||||
self.intervals[k] = Interval(v.start, i.stop)
|
||||
return
|
||||
# overlapping range -> adjust and reduce
|
||||
elif v.start<=i.stop:
|
||||
self.intervals[k] = Interval(min(i.start,v.start), max(i.stop,v.stop))
|
||||
self.reduce(k)
|
||||
return
|
||||
k += 1
|
||||
# greater than any existing
|
||||
self.intervals.append(v)
|
||||
|
||||
def addSet(self, other):
|
||||
if other.intervals is not None:
|
||||
for i in other.intervals:
|
||||
self.addRange(i)
|
||||
return self
|
||||
|
||||
def reduce(self, k):
|
||||
# only need to reduce if k is not the last
|
||||
if k<len(self.intervals)-1:
|
||||
l = self.intervals[k]
|
||||
r = self.intervals[k+1]
|
||||
# if r contained in l
|
||||
if l.stop >= r.stop:
|
||||
self.intervals.pop(k+1)
|
||||
self.reduce(k)
|
||||
elif l.stop >= r.start:
|
||||
self.intervals[k] = Interval(l.start, r.stop)
|
||||
self.intervals.pop(k+1)
|
||||
|
||||
def complement(self, start, stop):
|
||||
result = IntervalSet()
|
||||
result.addRange(Interval(start,stop+1))
|
||||
for i in self.intervals:
|
||||
result.removeRange(i)
|
||||
return result
|
||||
|
||||
def __contains__(self, item):
|
||||
if self.intervals is None:
|
||||
return False
|
||||
else:
|
||||
for i in self.intervals:
|
||||
if item in i:
|
||||
return True
|
||||
return False
|
||||
|
||||
def __len__(self):
|
||||
xlen = 0
|
||||
for i in self.intervals:
|
||||
xlen += len(i)
|
||||
return xlen
|
||||
|
||||
def removeRange(self, v):
|
||||
if v.start==v.stop-1:
|
||||
self.removeOne(v.start)
|
||||
elif self.intervals is not None:
|
||||
k = 0
|
||||
for i in self.intervals:
|
||||
# intervals are ordered
|
||||
if v.stop<=i.start:
|
||||
return
|
||||
# check for including range, split it
|
||||
elif v.start>i.start and v.stop<i.stop:
|
||||
self.intervals[k] = Interval(i.start, v.start)
|
||||
x = Interval(v.stop, i.stop)
|
||||
self.intervals.insert(k, x)
|
||||
return
|
||||
# check for included range, remove it
|
||||
elif v.start<=i.start and v.stop>=i.stop:
|
||||
self.intervals.pop(k)
|
||||
k = k - 1 # need another pass
|
||||
# check for lower boundary
|
||||
elif v.start<i.stop:
|
||||
self.intervals[k] = Interval(i.start, v.start)
|
||||
# check for upper boundary
|
||||
elif v.stop<i.stop:
|
||||
self.intervals[k] = Interval(v.stop, i.stop)
|
||||
k += 1
|
||||
|
||||
def removeOne(self, v):
|
||||
if self.intervals is not None:
|
||||
k = 0
|
||||
for i in self.intervals:
|
||||
# intervals is ordered
|
||||
if v<i.start:
|
||||
return
|
||||
# check for single value range
|
||||
elif v==i.start and v==i.stop-1:
|
||||
self.intervals.pop(k)
|
||||
return
|
||||
# check for lower boundary
|
||||
elif v==i.start:
|
||||
self.intervals[k] = Interval(i.start+1, i.stop)
|
||||
return
|
||||
# check for upper boundary
|
||||
elif v==i.stop-1:
|
||||
self.intervals[k] = Interval(i.start, i.stop-1)
|
||||
return
|
||||
# split existing range
|
||||
elif v<i.stop-1:
|
||||
x = Interval(i.start, v)
|
||||
i.start = v + 1
|
||||
self.intervals.insert(k, x)
|
||||
return
|
||||
k += 1
|
||||
|
||||
|
||||
def toString(self, literalNames, symbolicNames):
|
||||
if self.intervals is None:
|
||||
return u"{}"
|
||||
with StringIO() as buf:
|
||||
if len(self)>1:
|
||||
buf.write(u"{")
|
||||
first = True
|
||||
for i in self.intervals:
|
||||
for j in i:
|
||||
if not first:
|
||||
buf.write(u", ")
|
||||
buf.write(self.elementName(literalNames, symbolicNames, j))
|
||||
first = False
|
||||
if len(self)>1:
|
||||
buf.write(u"}")
|
||||
return buf.getvalue()
|
||||
|
||||
def elementName(self, literalNames, symbolicNames, a):
|
||||
if a==Token.EOF:
|
||||
return u"<EOF>"
|
||||
elif a==Token.EPSILON:
|
||||
return u"<EPSILON>"
|
||||
else:
|
||||
if a<len(literalNames):
|
||||
return literalNames[a]
|
||||
if a<len(symbolicNames):
|
||||
return symbolicNames[a]
|
||||
return u"<UNKNOWN>"
|
||||
|
||||
|
||||
class TestIntervalSet(unittest.TestCase):
|
||||
|
||||
def testEmpty(self):
|
||||
s = IntervalSet()
|
||||
self.assertIsNone(s.intervals)
|
||||
self.assertFalse(30 in s)
|
||||
|
||||
def testOne(self):
|
||||
s = IntervalSet()
|
||||
s.addOne(30)
|
||||
self.assertTrue(30 in s)
|
||||
self.assertFalse(29 in s)
|
||||
self.assertFalse(31 in s)
|
||||
|
||||
def testTwo(self):
|
||||
s = IntervalSet()
|
||||
s.addOne(30)
|
||||
s.addOne(40)
|
||||
self.assertTrue(30 in s)
|
||||
self.assertTrue(40 in s)
|
||||
self.assertFalse(35 in s)
|
||||
|
||||
def testRange(self):
|
||||
s = IntervalSet()
|
||||
s.addRange(Interval(30,41))
|
||||
self.assertTrue(30 in s)
|
||||
self.assertTrue(40 in s)
|
||||
self.assertTrue(35 in s)
|
||||
|
||||
def testDistinct1(self):
|
||||
s = IntervalSet()
|
||||
s.addRange(Interval(30,32))
|
||||
s.addRange(Interval(40,42))
|
||||
self.assertEquals(2,len(s.intervals))
|
||||
self.assertTrue(30 in s)
|
||||
self.assertTrue(40 in s)
|
||||
self.assertFalse(35 in s)
|
||||
|
||||
def testDistinct2(self):
|
||||
s = IntervalSet()
|
||||
s.addRange(Interval(40,42))
|
||||
s.addRange(Interval(30,32))
|
||||
self.assertEquals(2,len(s.intervals))
|
||||
self.assertTrue(30 in s)
|
||||
self.assertTrue(40 in s)
|
||||
self.assertFalse(35 in s)
|
||||
|
||||
def testContiguous1(self):
|
||||
s = IntervalSet()
|
||||
s.addRange(Interval(30,36))
|
||||
s.addRange(Interval(36,41))
|
||||
self.assertEquals(1,len(s.intervals))
|
||||
self.assertTrue(30 in s)
|
||||
self.assertTrue(40 in s)
|
||||
self.assertTrue(35 in s)
|
||||
|
||||
def testContiguous2(self):
|
||||
s = IntervalSet()
|
||||
s.addRange(Interval(36,41))
|
||||
s.addRange(Interval(30,36))
|
||||
self.assertEquals(1,len(s.intervals))
|
||||
self.assertTrue(30 in s)
|
||||
self.assertTrue(40 in s)
|
||||
|
||||
def testOverlapping1(self):
|
||||
s = IntervalSet()
|
||||
s.addRange(Interval(30,40))
|
||||
s.addRange(Interval(35,45))
|
||||
self.assertEquals(1,len(s.intervals))
|
||||
self.assertTrue(30 in s)
|
||||
self.assertTrue(44 in s)
|
||||
|
||||
def testOverlapping2(self):
|
||||
s = IntervalSet()
|
||||
s.addRange(Interval(35,45))
|
||||
s.addRange(Interval(30,40))
|
||||
self.assertEquals(1,len(s.intervals))
|
||||
self.assertTrue(30 in s)
|
||||
self.assertTrue(44 in s)
|
||||
|
||||
def testOverlapping3(self):
|
||||
s = IntervalSet()
|
||||
s.addRange(Interval(30,32))
|
||||
s.addRange(Interval(40,42))
|
||||
s.addRange(Interval(50,52))
|
||||
s.addRange(Interval(20,61))
|
||||
self.assertEquals(1,len(s.intervals))
|
||||
self.assertTrue(20 in s)
|
||||
self.assertTrue(60 in s)
|
||||
|
||||
def testComplement(self):
|
||||
s = IntervalSet()
|
||||
s.addRange(Interval(10,21))
|
||||
c = s.complement(1,100)
|
||||
self.assertTrue(1 in c)
|
||||
self.assertTrue(100 in c)
|
||||
self.assertTrue(10 not in c)
|
||||
self.assertTrue(20 not in c)
|
||||
|
||||
|
|
@ -0,0 +1,195 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#/
|
||||
from antlr4.IntervalSet import IntervalSet, Interval
|
||||
from antlr4.Token import Token
|
||||
from antlr4.PredictionContext import PredictionContext, SingletonPredictionContext, PredictionContextFromRuleContext
|
||||
from antlr4.atn.ATNConfig import ATNConfig
|
||||
from antlr4.atn.ATNState import RuleStopState
|
||||
from antlr4.atn.Transition import WildcardTransition, NotSetTransition, AbstractPredicateTransition, RuleTransition
|
||||
|
||||
|
||||
class LL1Analyzer (object):
|
||||
|
||||
#* Special value added to the lookahead sets to indicate that we hit
|
||||
# a predicate during analysis if {@code seeThruPreds==false}.
|
||||
#/
|
||||
HIT_PRED = Token.INVALID_TYPE
|
||||
|
||||
def __init__(self, atn):
|
||||
self.atn = atn
|
||||
|
||||
#*
|
||||
# Calculates the SLL(1) expected lookahead set for each outgoing transition
|
||||
# of an {@link ATNState}. The returned array has one element for each
|
||||
# outgoing transition in {@code s}. If the closure from transition
|
||||
# <em>i</em> leads to a semantic predicate before matching a symbol, the
|
||||
# element at index <em>i</em> of the result will be {@code null}.
|
||||
#
|
||||
# @param s the ATN state
|
||||
# @return the expected symbols for each outgoing transition of {@code s}.
|
||||
#/
|
||||
def getDecisionLookahead(self, s):
|
||||
if s is None:
|
||||
return None
|
||||
|
||||
count = len(s.transitions)
|
||||
look = [] * count
|
||||
for alt in range(0, count):
|
||||
look[alt] = set()
|
||||
lookBusy = set()
|
||||
seeThruPreds = False # fail to get lookahead upon pred
|
||||
self._LOOK(s.transition(alt).target, None, PredictionContext.EMPTY, \
|
||||
look[alt], lookBusy, set(), seeThruPreds, False)
|
||||
# Wipe out lookahead for this alternative if we found nothing
|
||||
# or we had a predicate when we !seeThruPreds
|
||||
if len(look[alt])==0 or self.HIT_PRED in look[alt]:
|
||||
look[alt] = None
|
||||
return look
|
||||
|
||||
#*
|
||||
# Compute set of tokens that can follow {@code s} in the ATN in the
|
||||
# specified {@code ctx}.
|
||||
#
|
||||
# <p>If {@code ctx} is {@code null} and the end of the rule containing
|
||||
# {@code s} is reached, {@link Token#EPSILON} is added to the result set.
|
||||
# If {@code ctx} is not {@code null} and the end of the outermost rule is
|
||||
# reached, {@link Token#EOF} is added to the result set.</p>
|
||||
#
|
||||
# @param s the ATN state
|
||||
# @param stopState the ATN state to stop at. This can be a
|
||||
# {@link BlockEndState} to detect epsilon paths through a closure.
|
||||
# @param ctx the complete parser context, or {@code null} if the context
|
||||
# should be ignored
|
||||
#
|
||||
# @return The set of tokens that can follow {@code s} in the ATN in the
|
||||
# specified {@code ctx}.
|
||||
#/
|
||||
def LOOK(self, s, stopState=None, ctx=None):
|
||||
r = IntervalSet()
|
||||
seeThruPreds = True # ignore preds; get all lookahead
|
||||
lookContext = PredictionContextFromRuleContext(s.atn, ctx) if ctx is not None else None
|
||||
self._LOOK(s, stopState, lookContext, r, set(), set(), seeThruPreds, True)
|
||||
return r
|
||||
|
||||
#*
|
||||
# Compute set of tokens that can follow {@code s} in the ATN in the
|
||||
# specified {@code ctx}.
|
||||
#
|
||||
# <p>If {@code ctx} is {@code null} and {@code stopState} or the end of the
|
||||
# rule containing {@code s} is reached, {@link Token#EPSILON} is added to
|
||||
# the result set. If {@code ctx} is not {@code null} and {@code addEOF} is
|
||||
# {@code true} and {@code stopState} or the end of the outermost rule is
|
||||
# reached, {@link Token#EOF} is added to the result set.</p>
|
||||
#
|
||||
# @param s the ATN state.
|
||||
# @param stopState the ATN state to stop at. This can be a
|
||||
# {@link BlockEndState} to detect epsilon paths through a closure.
|
||||
# @param ctx The outer context, or {@code null} if the outer context should
|
||||
# not be used.
|
||||
# @param look The result lookahead set.
|
||||
# @param lookBusy A set used for preventing epsilon closures in the ATN
|
||||
# from causing a stack overflow. Outside code should pass
|
||||
# {@code new HashSet<ATNConfig>} for this argument.
|
||||
# @param calledRuleStack A set used for preventing left recursion in the
|
||||
# ATN from causing a stack overflow. Outside code should pass
|
||||
# {@code new BitSet()} for this argument.
|
||||
# @param seeThruPreds {@code true} to true semantic predicates as
|
||||
# implicitly {@code true} and "see through them", otherwise {@code false}
|
||||
# to treat semantic predicates as opaque and add {@link #HIT_PRED} to the
|
||||
# result if one is encountered.
|
||||
# @param addEOF Add {@link Token#EOF} to the result if the end of the
|
||||
# outermost context is reached. This parameter has no effect if {@code ctx}
|
||||
# is {@code null}.
|
||||
#/
|
||||
def _LOOK(self, s, stopState , ctx, look, lookBusy, \
|
||||
calledRuleStack, seeThruPreds, addEOF):
|
||||
c = ATNConfig(s, 0, ctx)
|
||||
|
||||
if c in lookBusy:
|
||||
return
|
||||
lookBusy.add(c)
|
||||
|
||||
if s == stopState:
|
||||
if ctx is None:
|
||||
look.addOne(Token.EPSILON)
|
||||
return
|
||||
elif ctx.isEmpty() and addEOF:
|
||||
look.addOne(Token.EOF)
|
||||
return
|
||||
|
||||
if isinstance(s, RuleStopState ):
|
||||
if ctx is None:
|
||||
look.addOne(Token.EPSILON)
|
||||
return
|
||||
elif ctx.isEmpty() and addEOF:
|
||||
look.addOne(Token.EOF)
|
||||
return
|
||||
|
||||
if ctx != PredictionContext.EMPTY:
|
||||
# run thru all possible stack tops in ctx
|
||||
for i in range(0, len(ctx)):
|
||||
returnState = self.atn.states[ctx.getReturnState(i)]
|
||||
removed = returnState.ruleIndex in calledRuleStack
|
||||
try:
|
||||
calledRuleStack.discard(returnState.ruleIndex)
|
||||
self._LOOK(returnState, stopState, ctx.getParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF)
|
||||
finally:
|
||||
if removed:
|
||||
calledRuleStack.add(returnState.ruleIndex)
|
||||
return
|
||||
|
||||
for t in s.transitions:
|
||||
if type(t) == RuleTransition:
|
||||
if t.target.ruleIndex in calledRuleStack:
|
||||
continue
|
||||
|
||||
newContext = SingletonPredictionContext.create(ctx, t.followState.stateNumber)
|
||||
|
||||
try:
|
||||
calledRuleStack.add(t.target.ruleIndex)
|
||||
self._LOOK(t.target, stopState, newContext, look, lookBusy, calledRuleStack, seeThruPreds, addEOF)
|
||||
finally:
|
||||
calledRuleStack.remove(t.target.ruleIndex)
|
||||
elif isinstance(t, AbstractPredicateTransition ):
|
||||
if seeThruPreds:
|
||||
self._LOOK(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF)
|
||||
else:
|
||||
look.addOne(self.HIT_PRED)
|
||||
elif t.isEpsilon:
|
||||
self._LOOK(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF)
|
||||
elif type(t) == WildcardTransition:
|
||||
look.addRange( Interval(Token.MIN_USER_TOKEN_TYPE, self.atn.maxTokenType + 1) )
|
||||
else:
|
||||
set = t.label
|
||||
if set is not None:
|
||||
if isinstance(t, NotSetTransition):
|
||||
set = set.complement(Token.MIN_USER_TOKEN_TYPE, self.atn.maxTokenType)
|
||||
look.addSet(set)
|
|
@ -0,0 +1,343 @@
|
|||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, self list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, self list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from self software without specific prior written permission.
|
||||
#
|
||||
# self SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# self SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#/
|
||||
|
||||
# A lexer is recognizer that draws input symbols from a character stream.
|
||||
# lexer grammars result in a subclass of self object. A Lexer object
|
||||
# uses simplified match() and error recovery mechanisms in the interest
|
||||
# of speed.
|
||||
#/
|
||||
from io import StringIO
|
||||
|
||||
from antlr4.CommonTokenFactory import CommonTokenFactory
|
||||
from antlr4.Recognizer import Recognizer
|
||||
from antlr4.Token import Token
|
||||
from antlr4.error.Errors import IllegalStateException, LexerNoViableAltException
|
||||
|
||||
|
||||
class TokenSource(object):
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class Lexer(Recognizer, TokenSource):
|
||||
|
||||
DEFAULT_MODE = 0
|
||||
MORE = -2
|
||||
SKIP = -3
|
||||
|
||||
DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL
|
||||
HIDDEN = Token.HIDDEN_CHANNEL
|
||||
MIN_CHAR_VALUE = '\u0000'
|
||||
MAX_CHAR_VALUE = '\uFFFE'
|
||||
|
||||
def __init__(self, input):
|
||||
super(Lexer, self).__init__()
|
||||
self._input = input
|
||||
self._factory = CommonTokenFactory.DEFAULT
|
||||
self._tokenFactorySourcePair = (self, input)
|
||||
|
||||
self._interp = None # child classes must populate this
|
||||
|
||||
# The goal of all lexer rules/methods is to create a token object.
|
||||
# self is an instance variable as multiple rules may collaborate to
|
||||
# create a single token. nextToken will return self object after
|
||||
# matching lexer rule(s). If you subclass to allow multiple token
|
||||
# emissions, then set self to the last token to be matched or
|
||||
# something nonnull so that the auto token emit mechanism will not
|
||||
# emit another token.
|
||||
self._token = None
|
||||
|
||||
# What character index in the stream did the current token start at?
|
||||
# Needed, for example, to get the text for current token. Set at
|
||||
# the start of nextToken.
|
||||
self._tokenStartCharIndex = -1
|
||||
|
||||
# The line on which the first character of the token resides#/
|
||||
self._tokenStartLine = -1
|
||||
|
||||
# The character position of first character within the line#/
|
||||
self._tokenStartColumn = -1
|
||||
|
||||
# Once we see EOF on char stream, next token will be EOF.
|
||||
# If you have DONE : EOF ; then you see DONE EOF.
|
||||
self._hitEOF = False
|
||||
|
||||
# The channel number for the current token#/
|
||||
self._channel = Token.DEFAULT_CHANNEL
|
||||
|
||||
# The token type for the current token#/
|
||||
self._type = Token.INVALID_TYPE
|
||||
|
||||
self._modeStack = []
|
||||
self._mode = self.DEFAULT_MODE
|
||||
|
||||
# You can set the text for the current token to override what is in
|
||||
# the input char buffer. Use setText() or can set self instance var.
|
||||
#/
|
||||
self._text = None
|
||||
|
||||
|
||||
def reset(self):
|
||||
# wack Lexer state variables
|
||||
if self._input is not None:
|
||||
self._input.seek(0) # rewind the input
|
||||
self._token = None
|
||||
self._type = Token.INVALID_TYPE
|
||||
self._channel = Token.DEFAULT_CHANNEL
|
||||
self._tokenStartCharIndex = -1
|
||||
self._tokenStartColumn = -1
|
||||
self._tokenStartLine = -1
|
||||
self._text = None
|
||||
|
||||
self._hitEOF = False
|
||||
self._mode = Lexer.DEFAULT_MODE
|
||||
self._modeStack = []
|
||||
|
||||
self._interp.reset()
|
||||
|
||||
# Return a token from self source; i.e., match a token on the char
|
||||
# stream.
|
||||
def nextToken(self):
|
||||
if self._input is None:
|
||||
raise IllegalStateException("nextToken requires a non-null input stream.")
|
||||
|
||||
# Mark start location in char stream so unbuffered streams are
|
||||
# guaranteed at least have text of current token
|
||||
tokenStartMarker = self._input.mark()
|
||||
try:
|
||||
while True:
|
||||
if self._hitEOF:
|
||||
self.emitEOF()
|
||||
return self._token
|
||||
self._token = None
|
||||
self._channel = Token.DEFAULT_CHANNEL
|
||||
self._tokenStartCharIndex = self._input.index
|
||||
self._tokenStartColumn = self._interp.column
|
||||
self._tokenStartLine = self._interp.line
|
||||
self._text = None
|
||||
continueOuter = False
|
||||
while True:
|
||||
self._type = Token.INVALID_TYPE
|
||||
ttype = self.SKIP
|
||||
try:
|
||||
ttype = self._interp.match(self._input, self._mode)
|
||||
except LexerNoViableAltException as e:
|
||||
self.notifyListeners(e) # report error
|
||||
self.recover(e)
|
||||
if self._input.LA(1)==Token.EOF:
|
||||
self._hitEOF = True
|
||||
if self._type == Token.INVALID_TYPE:
|
||||
self._type = ttype
|
||||
if self._type == self.SKIP:
|
||||
continueOuter = True
|
||||
break
|
||||
if self._type!=self.MORE:
|
||||
break
|
||||
if continueOuter:
|
||||
continue
|
||||
if self._token is None:
|
||||
self.emit()
|
||||
return self._token
|
||||
finally:
|
||||
# make sure we release marker after match or
|
||||
# unbuffered char stream will keep buffering
|
||||
self._input.release(tokenStartMarker)
|
||||
|
||||
# Instruct the lexer to skip creating a token for current lexer rule
|
||||
# and look for another token. nextToken() knows to keep looking when
|
||||
# a lexer rule finishes with token set to SKIP_TOKEN. Recall that
|
||||
# if token==null at end of any token rule, it creates one for you
|
||||
# and emits it.
|
||||
#/
|
||||
def skip(self):
|
||||
self._type = self.SKIP
|
||||
|
||||
def more(self):
|
||||
self._type = self.MORE
|
||||
|
||||
def mode(self, m):
|
||||
self._mode = m
|
||||
|
||||
def pushMode(self, m):
|
||||
if self._interp.debug:
|
||||
print("pushMode " + str(m))
|
||||
self._modeStack.append(self._mode)
|
||||
self.mode(m)
|
||||
|
||||
def popMode(self):
|
||||
if len(self._modeStack)==0:
|
||||
raise Exception("Empty Stack")
|
||||
if self._interp.debug:
|
||||
print("popMode back to "+ self._modeStack[:-1])
|
||||
self.mode( self._modeStack.pop() )
|
||||
return self._mode
|
||||
|
||||
# Set the char stream and reset the lexer#/
|
||||
@property
|
||||
def inputStream(self):
|
||||
return self._input
|
||||
|
||||
@inputStream.setter
|
||||
def inputStream(self, input):
|
||||
self._input = None
|
||||
self._tokenFactorySourcePair = (self, self._input)
|
||||
self.reset()
|
||||
self._input = input
|
||||
self._tokenFactorySourcePair = (self, self._input)
|
||||
|
||||
@property
|
||||
def sourceName(self):
|
||||
return self._input.sourceName
|
||||
|
||||
# By default does not support multiple emits per nextToken invocation
|
||||
# for efficiency reasons. Subclass and override self method, nextToken,
|
||||
# and getToken (to push tokens into a list and pull from that list
|
||||
# rather than a single variable as self implementation does).
|
||||
#/
|
||||
def emitToken(self, token):
|
||||
self._token = token
|
||||
|
||||
# The standard method called to automatically emit a token at the
|
||||
# outermost lexical rule. The token object should point into the
|
||||
# char buffer start..stop. If there is a text override in 'text',
|
||||
# use that to set the token's text. Override self method to emit
|
||||
# custom Token objects or provide a new factory.
|
||||
#/
|
||||
def emit(self):
|
||||
t = self._factory.create(self._tokenFactorySourcePair, self._type, self._text, self._channel, self._tokenStartCharIndex,
|
||||
self.getCharIndex()-1, self._tokenStartLine, self._tokenStartColumn)
|
||||
self.emitToken(t)
|
||||
return t
|
||||
|
||||
def emitEOF(self):
|
||||
cpos = self.column
|
||||
lpos = self.line
|
||||
eof = self._factory.create(self._tokenFactorySourcePair, Token.EOF, None, Token.DEFAULT_CHANNEL, self._input.index,
|
||||
self._input.index-1, lpos, cpos)
|
||||
self.emitToken(eof)
|
||||
return eof
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
return self._type
|
||||
|
||||
@type.setter
|
||||
def type(self, type):
|
||||
self._type = type
|
||||
|
||||
@property
|
||||
def line(self):
|
||||
return self._interp.line
|
||||
|
||||
@line.setter
|
||||
def line(self, line):
|
||||
self._interp.line = line
|
||||
|
||||
@property
|
||||
def column(self):
|
||||
return self._interp.column
|
||||
|
||||
@column.setter
|
||||
def column(self, column):
|
||||
self._interp.column = column
|
||||
|
||||
# What is the index of the current character of lookahead?#/
|
||||
def getCharIndex(self):
|
||||
return self._input.index
|
||||
|
||||
# Return the text matched so far for the current token or any
|
||||
# text override.
|
||||
@property
|
||||
def text(self):
|
||||
if self._text is not None:
|
||||
return self._text
|
||||
else:
|
||||
return self._interp.getText(self._input)
|
||||
|
||||
# Set the complete text of self token; it wipes any previous
|
||||
# changes to the text.
|
||||
@text.setter
|
||||
def text(self, txt):
|
||||
self._text = txt
|
||||
|
||||
# Return a list of all Token objects in input char stream.
|
||||
# Forces load of all tokens. Does not include EOF token.
|
||||
#/
|
||||
def getAllTokens(self):
|
||||
tokens = []
|
||||
t = self.nextToken()
|
||||
while t.type!=Token.EOF:
|
||||
tokens.append(t)
|
||||
t = self.nextToken()
|
||||
return tokens
|
||||
|
||||
def notifyListeners(self, e):
|
||||
start = self._tokenStartCharIndex
|
||||
stop = self._input.index
|
||||
text = self._input.getText(start, stop)
|
||||
msg = "token recognition error at: '" + self.getErrorDisplay(text) + "'"
|
||||
listener = self.getErrorListenerDispatch()
|
||||
listener.syntaxError(self, None, self._tokenStartLine, self._tokenStartColumn, msg, e)
|
||||
|
||||
def getErrorDisplay(self, s):
|
||||
with StringIO() as buf:
|
||||
for c in s:
|
||||
buf.write(unicode(self.getErrorDisplayForChar(c)))
|
||||
return buf.getvalue()
|
||||
|
||||
def getErrorDisplayForChar(self, c):
|
||||
if ord(c[0])==Token.EOF:
|
||||
return "<EOF>"
|
||||
elif c=='\n':
|
||||
return "\\n"
|
||||
elif c=='\t':
|
||||
return "\\t"
|
||||
elif c=='\r':
|
||||
return "\\r"
|
||||
else:
|
||||
return str(c)
|
||||
|
||||
def getCharErrorDisplay(self, c):
|
||||
return "'" + self.getErrorDisplayForChar(c) + "'"
|
||||
|
||||
# Lexers can normally match any char in it's vocabulary after matching
|
||||
# a token, so do the easy thing and just kill a character and hope
|
||||
# it all works out. You can instead use the rule invocation stack
|
||||
# to do sophisticated error recovery if you are in a fragment rule.
|
||||
#/
|
||||
def recover(self, re):
|
||||
if self._input.LA(1) != Token.EOF:
|
||||
if isinstance(re, LexerNoViableAltException):
|
||||
# skip a char and try again
|
||||
self._interp.consume(self._input)
|
||||
else:
|
||||
# TODO: Do we lose character or line position information?
|
||||
self._input.consume()
|
||||
|
|
@ -0,0 +1,139 @@
|
|||
#
|
||||
# Provides an implementation of {@link TokenSource} as a wrapper around a list
|
||||
# of {@link Token} objects.
|
||||
#
|
||||
# <p>If the final token in the list is an {@link Token#EOF} token, it will be used
|
||||
# as the EOF token for every call to {@link #nextToken} after the end of the
|
||||
# list is reached. Otherwise, an EOF token will be created.</p>
|
||||
#
|
||||
from antlr4.CommonTokenFactory import CommonTokenFactory
|
||||
from antlr4.Lexer import TokenSource
|
||||
from antlr4.Token import Token
|
||||
|
||||
|
||||
class ListTokenSource(TokenSource):
|
||||
|
||||
# Constructs a new {@link ListTokenSource} instance from the specified
|
||||
# collection of {@link Token} objects and source name.
|
||||
#
|
||||
# @param tokens The collection of {@link Token} objects to provide as a
|
||||
# {@link TokenSource}.
|
||||
# @param sourceName The name of the {@link TokenSource}. If this value is
|
||||
# {@code null}, {@link #getSourceName} will attempt to infer the name from
|
||||
# the next {@link Token} (or the previous token if the end of the input has
|
||||
# been reached).
|
||||
#
|
||||
# @exception NullPointerException if {@code tokens} is {@code null}
|
||||
#
|
||||
def __init__(self, tokens, sourceName=None):
|
||||
if tokens is None:
|
||||
raise ReferenceError("tokens cannot be null")
|
||||
self.tokens = tokens
|
||||
self.sourceName = sourceName
|
||||
# The index into {@link #tokens} of token to return by the next call to
|
||||
# {@link #nextToken}. The end of the input is indicated by this value
|
||||
# being greater than or equal to the number of items in {@link #tokens}.
|
||||
self.pos = 0
|
||||
# This field caches the EOF token for the token source.
|
||||
self.eofToken = None
|
||||
# This is the backing field for {@link #getTokenFactory} and
|
||||
self._factory = CommonTokenFactory.DEFAULT
|
||||
|
||||
|
||||
#
|
||||
# {@inheritDoc}
|
||||
#
|
||||
@property
|
||||
def column(self):
|
||||
if self.pos < len(self.tokens):
|
||||
return self.tokens[self.pos].column
|
||||
elif self.eofToken is not None:
|
||||
return self.eofToken.column
|
||||
elif len(self.tokens) > 0:
|
||||
# have to calculate the result from the line/column of the previous
|
||||
# token, along with the text of the token.
|
||||
lastToken = self.tokens[len(self.tokens) - 1]
|
||||
tokenText = lastToken.getText()
|
||||
if tokenText is not None:
|
||||
lastNewLine = tokenText.rfind('\n')
|
||||
if lastNewLine >= 0:
|
||||
return len(tokenText) - lastNewLine - 1
|
||||
return lastToken.column + lastToken.stopIndex - lastToken.startIndex + 1
|
||||
|
||||
# only reach this if tokens is empty, meaning EOF occurs at the first
|
||||
# position in the input
|
||||
return 0
|
||||
|
||||
#
|
||||
# {@inheritDoc}
|
||||
#
|
||||
def nextToken(self):
|
||||
if self.pos >= len(self.tokens):
|
||||
if self.eofToken is None:
|
||||
start = -1
|
||||
if len(self.tokens) > 0:
|
||||
previousStop = self.tokens[len(self.tokens) - 1].stopIndex
|
||||
if previousStop != -1:
|
||||
start = previousStop + 1
|
||||
stop = max(-1, start - 1)
|
||||
self.eofToken = self._factory.create((self, self.getInputStream()),
|
||||
Token.EOF, "EOF", Token.DEFAULT_CHANNEL, start, stop, self.line, self.column)
|
||||
return self.eofToken
|
||||
t = self.tokens[self.pos]
|
||||
if self.pos == len(self.tokens) - 1 and t.type == Token.EOF:
|
||||
eofToken = t
|
||||
self.pos += 1
|
||||
return t
|
||||
|
||||
#
|
||||
# {@inheritDoc}
|
||||
#
|
||||
@property
|
||||
def line(self):
|
||||
if self.pos < len(self.tokens):
|
||||
return self.tokens[self.pos].line
|
||||
elif self.eofToken is not None:
|
||||
return self.eofToken.line
|
||||
elif len(self.tokens) > 0:
|
||||
# have to calculate the result from the line/column of the previous
|
||||
# token, along with the text of the token.
|
||||
lastToken = self.tokens[len(self.tokens) - 1]
|
||||
line = lastToken.line
|
||||
tokenText = lastToken.text
|
||||
if tokenText is not None:
|
||||
for c in tokenText:
|
||||
if c == '\n':
|
||||
line += 1
|
||||
|
||||
# if no text is available, assume the token did not contain any newline characters.
|
||||
return line
|
||||
|
||||
# only reach this if tokens is empty, meaning EOF occurs at the first
|
||||
# position in the input
|
||||
return 1
|
||||
|
||||
#
|
||||
# {@inheritDoc}
|
||||
#
|
||||
def getInputStream(self):
|
||||
if self.pos < len(self.tokens):
|
||||
return self.tokens[self.pos].getInputStream()
|
||||
elif self.eofToken is not None:
|
||||
return self.eofToken.getInputStream()
|
||||
elif len(self.tokens) > 0:
|
||||
return self.tokens[len(self.tokens) - 1].getInputStream()
|
||||
else:
|
||||
# no input stream information is available
|
||||
return None
|
||||
|
||||
#
|
||||
# {@inheritDoc}
|
||||
#
|
||||
def getSourceName(self):
|
||||
if self.sourceName is not None:
|
||||
return self.sourceName
|
||||
inputStream = self.getInputStream()
|
||||
if inputStream is not None:
|
||||
return inputStream.getSourceName()
|
||||
else:
|
||||
return "List"
|
|
@ -0,0 +1,575 @@
|
|||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, self list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, self list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from self software without specific prior written permission.
|
||||
#
|
||||
# self SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# self SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
from __future__ import print_function
|
||||
from antlr4.error.ErrorStrategy import DefaultErrorStrategy
|
||||
from antlr4.Recognizer import Recognizer
|
||||
from antlr4.Token import Token
|
||||
from antlr4.Lexer import Lexer
|
||||
from antlr4.atn.ATNDeserializer import ATNDeserializer
|
||||
from antlr4.atn.ATNDeserializationOptions import ATNDeserializationOptions
|
||||
from antlr4.error.Errors import UnsupportedOperationException
|
||||
from antlr4.tree.ParseTreePatternMatcher import ParseTreePatternMatcher
|
||||
from antlr4.tree.Tree import ParseTreeListener
|
||||
|
||||
class TraceListener(ParseTreeListener):
|
||||
|
||||
def __init__(self, parser):
|
||||
self._parser = parser
|
||||
|
||||
def enterEveryRule(self, ctx):
|
||||
print("enter " + self._parser.ruleNames[ctx.getRuleIndex()] + ", LT(1)=" + self._parser._input.LT(1).text)
|
||||
|
||||
def visitTerminal(self, node):
|
||||
print("consume " + str(node.symbol) + " rule " + self._parser.ruleNames[self._parser._ctx.getRuleIndex()])
|
||||
|
||||
def visitErrorNode(self, node):
|
||||
pass
|
||||
|
||||
def exitEveryRule(self, ctx):
|
||||
print("exit " + self._parser.ruleNames[ctx.getRuleIndex()] + ", LT(1)=" + self._parser._input.LT(1).text)
|
||||
|
||||
|
||||
# self is all the parsing support code essentially; most of it is error recovery stuff.#
|
||||
class Parser (Recognizer):
|
||||
|
||||
# self field maps from the serialized ATN string to the deserialized {@link ATN} with
|
||||
# bypass alternatives.
|
||||
#
|
||||
# @see ATNDeserializationOptions#isGenerateRuleBypassTransitions()
|
||||
#
|
||||
bypassAltsAtnCache = dict()
|
||||
|
||||
def __init__(self, input):
|
||||
super(Parser, self).__init__()
|
||||
# The input stream.
|
||||
self._input = None
|
||||
# The error handling strategy for the parser. The default value is a new
|
||||
# instance of {@link DefaultErrorStrategy}.
|
||||
self._errHandler = DefaultErrorStrategy()
|
||||
self._precedenceStack = list()
|
||||
self._precedenceStack.append(0)
|
||||
# The {@link ParserRuleContext} object for the currently executing rule.
|
||||
# self is always non-null during the parsing process.
|
||||
self._ctx = None
|
||||
# Specifies whether or not the parser should construct a parse tree during
|
||||
# the parsing process. The default value is {@code true}.
|
||||
self.buildParseTrees = True
|
||||
# When {@link #setTrace}{@code (true)} is called, a reference to the
|
||||
# {@link TraceListener} is stored here so it can be easily removed in a
|
||||
# later call to {@link #setTrace}{@code (false)}. The listener itself is
|
||||
# implemented as a parser listener so self field is not directly used by
|
||||
# other parser methods.
|
||||
self._tracer = None
|
||||
# The list of {@link ParseTreeListener} listeners registered to receive
|
||||
# events during the parse.
|
||||
self._parseListeners = None
|
||||
# The number of syntax errors reported during parsing. self value is
|
||||
# incremented each time {@link #notifyErrorListeners} is called.
|
||||
self._syntaxErrors = 0
|
||||
self.setInputStream(input)
|
||||
|
||||
# reset the parser's state#
|
||||
def reset(self):
|
||||
if self._input is not None:
|
||||
self._input.seek(0)
|
||||
self._errHandler.reset(self)
|
||||
self._ctx = None
|
||||
self._syntaxErrors = 0
|
||||
self.setTrace(False)
|
||||
self._precedenceStack = list()
|
||||
self._precedenceStack.append(0)
|
||||
if self._interp is not None:
|
||||
self._interp.reset()
|
||||
|
||||
# Match current input symbol against {@code ttype}. If the symbol type
|
||||
# matches, {@link ANTLRErrorStrategy#reportMatch} and {@link #consume} are
|
||||
# called to complete the match process.
|
||||
#
|
||||
# <p>If the symbol type does not match,
|
||||
# {@link ANTLRErrorStrategy#recoverInline} is called on the current error
|
||||
# strategy to attempt recovery. If {@link #getBuildParseTree} is
|
||||
# {@code true} and the token index of the symbol returned by
|
||||
# {@link ANTLRErrorStrategy#recoverInline} is -1, the symbol is added to
|
||||
# the parse tree by calling {@link ParserRuleContext#addErrorNode}.</p>
|
||||
#
|
||||
# @param ttype the token type to match
|
||||
# @return the matched symbol
|
||||
# @throws RecognitionException if the current input symbol did not match
|
||||
# {@code ttype} and the error strategy could not recover from the
|
||||
# mismatched symbol
|
||||
|
||||
def match(self, ttype):
|
||||
t = self.getCurrentToken()
|
||||
if t.type==ttype:
|
||||
self._errHandler.reportMatch(self)
|
||||
self.consume()
|
||||
else:
|
||||
t = self._errHandler.recoverInline(self)
|
||||
if self.buildParseTrees and t.tokenIndex==-1:
|
||||
# we must have conjured up a new token during single token insertion
|
||||
# if it's not the current symbol
|
||||
self._ctx.addErrorNode(t)
|
||||
return t
|
||||
|
||||
# Match current input symbol as a wildcard. If the symbol type matches
|
||||
# (i.e. has a value greater than 0), {@link ANTLRErrorStrategy#reportMatch}
|
||||
# and {@link #consume} are called to complete the match process.
|
||||
#
|
||||
# <p>If the symbol type does not match,
|
||||
# {@link ANTLRErrorStrategy#recoverInline} is called on the current error
|
||||
# strategy to attempt recovery. If {@link #getBuildParseTree} is
|
||||
# {@code true} and the token index of the symbol returned by
|
||||
# {@link ANTLRErrorStrategy#recoverInline} is -1, the symbol is added to
|
||||
# the parse tree by calling {@link ParserRuleContext#addErrorNode}.</p>
|
||||
#
|
||||
# @return the matched symbol
|
||||
# @throws RecognitionException if the current input symbol did not match
|
||||
# a wildcard and the error strategy could not recover from the mismatched
|
||||
# symbol
|
||||
|
||||
def matchWildcard(self):
|
||||
t = self.getCurrentToken()
|
||||
if t.type > 0:
|
||||
self._errHandler.reportMatch(self)
|
||||
self.consume()
|
||||
else:
|
||||
t = self._errHandler.recoverInline(self)
|
||||
if self.buildParseTrees and t.tokenIndex == -1:
|
||||
# we must have conjured up a new token during single token insertion
|
||||
# if it's not the current symbol
|
||||
self._ctx.addErrorNode(t)
|
||||
|
||||
return t
|
||||
|
||||
def getParseListeners(self):
|
||||
return list() if self._parseListeners is None else self._parseListeners
|
||||
|
||||
# Registers {@code listener} to receive events during the parsing process.
|
||||
#
|
||||
# <p>To support output-preserving grammar transformations (including but not
|
||||
# limited to left-recursion removal, automated left-factoring, and
|
||||
# optimized code generation), calls to listener methods during the parse
|
||||
# may differ substantially from calls made by
|
||||
# {@link ParseTreeWalker#DEFAULT} used after the parse is complete. In
|
||||
# particular, rule entry and exit events may occur in a different order
|
||||
# during the parse than after the parser. In addition, calls to certain
|
||||
# rule entry methods may be omitted.</p>
|
||||
#
|
||||
# <p>With the following specific exceptions, calls to listener events are
|
||||
# <em>deterministic</em>, i.e. for identical input the calls to listener
|
||||
# methods will be the same.</p>
|
||||
#
|
||||
# <ul>
|
||||
# <li>Alterations to the grammar used to generate code may change the
|
||||
# behavior of the listener calls.</li>
|
||||
# <li>Alterations to the command line options passed to ANTLR 4 when
|
||||
# generating the parser may change the behavior of the listener calls.</li>
|
||||
# <li>Changing the version of the ANTLR Tool used to generate the parser
|
||||
# may change the behavior of the listener calls.</li>
|
||||
# </ul>
|
||||
#
|
||||
# @param listener the listener to add
|
||||
#
|
||||
# @throws NullPointerException if {@code} listener is {@code null}
|
||||
#
|
||||
def addParseListener(self, listener):
|
||||
if listener is None:
|
||||
raise ReferenceError("listener")
|
||||
if self._parseListeners is None:
|
||||
self._parseListeners = []
|
||||
self._parseListeners.append(listener)
|
||||
|
||||
#
|
||||
# Remove {@code listener} from the list of parse listeners.
|
||||
#
|
||||
# <p>If {@code listener} is {@code null} or has not been added as a parse
|
||||
# listener, self method does nothing.</p>
|
||||
# @param listener the listener to remove
|
||||
#
|
||||
def removeParseListener(self, listener):
|
||||
if self._parseListeners is not None:
|
||||
self._parseListeners.remove(listener)
|
||||
if len(self._parseListeners)==0:
|
||||
self._parseListeners = None
|
||||
|
||||
# Remove all parse listeners.
|
||||
def removeParseListeners(self):
|
||||
self._parseListeners = None
|
||||
|
||||
# Notify any parse listeners of an enter rule event.
|
||||
def triggerEnterRuleEvent(self):
|
||||
if self._parseListeners is not None:
|
||||
for listener in self._parseListeners:
|
||||
listener.enterEveryRule(self._ctx)
|
||||
self._ctx.enterRule(listener)
|
||||
|
||||
#
|
||||
# Notify any parse listeners of an exit rule event.
|
||||
#
|
||||
# @see #addParseListener
|
||||
#
|
||||
def triggerExitRuleEvent(self):
|
||||
if self._parseListeners is not None:
|
||||
# reverse order walk of listeners
|
||||
for listener in reversed(self._parseListeners):
|
||||
self._ctx.exitRule(listener)
|
||||
listener.exitEveryRule(self._ctx)
|
||||
|
||||
|
||||
def getTokenFactory(self):
|
||||
return self._input.tokenSource._factory
|
||||
|
||||
# Tell our token source and error strategy about a new way to create tokens.#
|
||||
def setTokenFactory(self, factory):
|
||||
self._input.tokenSource._factory = factory
|
||||
|
||||
# The ATN with bypass alternatives is expensive to create so we create it
|
||||
# lazily.
|
||||
#
|
||||
# @throws UnsupportedOperationException if the current parser does not
|
||||
# implement the {@link #getSerializedATN()} method.
|
||||
#
|
||||
def getATNWithBypassAlts(self):
|
||||
serializedAtn = self.getSerializedATN()
|
||||
if serializedAtn is None:
|
||||
raise UnsupportedOperationException("The current parser does not support an ATN with bypass alternatives.")
|
||||
result = self.bypassAltsAtnCache.get(serializedAtn, None)
|
||||
if result is None:
|
||||
deserializationOptions = ATNDeserializationOptions()
|
||||
deserializationOptions.generateRuleBypassTransitions = True
|
||||
result = ATNDeserializer(deserializationOptions).deserialize(serializedAtn)
|
||||
self.bypassAltsAtnCache[serializedAtn] = result
|
||||
return result
|
||||
|
||||
# The preferred method of getting a tree pattern. For example, here's a
|
||||
# sample use:
|
||||
#
|
||||
# <pre>
|
||||
# ParseTree t = parser.expr();
|
||||
# ParseTreePattern p = parser.compileParseTreePattern("<ID>+0", MyParser.RULE_expr);
|
||||
# ParseTreeMatch m = p.match(t);
|
||||
# String id = m.get("ID");
|
||||
# </pre>
|
||||
#
|
||||
def compileParseTreePattern(self, pattern, patternRuleIndex, lexer = None):
|
||||
if lexer is None:
|
||||
if self.getTokenStream() is not None:
|
||||
tokenSource = self.getTokenStream().getTokenSource()
|
||||
if isinstance( tokenSource, Lexer ):
|
||||
lexer = tokenSource
|
||||
if lexer is None:
|
||||
raise UnsupportedOperationException("Parser can't discover a lexer to use")
|
||||
|
||||
m = ParseTreePatternMatcher(lexer, self)
|
||||
return m.compile(pattern, patternRuleIndex)
|
||||
|
||||
|
||||
def getInputStream(self):
|
||||
return self.getTokenStream()
|
||||
|
||||
def setInputStream(self, input):
|
||||
self.setTokenStream(input)
|
||||
|
||||
def getTokenStream(self):
|
||||
return self._input
|
||||
|
||||
# Set the token stream and reset the parser.#
|
||||
def setTokenStream(self, input):
|
||||
self._input = None
|
||||
self.reset()
|
||||
self._input = input
|
||||
|
||||
# Match needs to return the current input symbol, which gets put
|
||||
# into the label for the associated token ref; e.g., x=ID.
|
||||
#
|
||||
def getCurrentToken(self):
|
||||
return self._input.LT(1)
|
||||
|
||||
def notifyErrorListeners(self, msg, offendingToken = None, e = None):
|
||||
if offendingToken is None:
|
||||
offendingToken = self.getCurrentToken()
|
||||
self._syntaxErrors += 1
|
||||
line = offendingToken.line
|
||||
column = offendingToken.column
|
||||
listener = self.getErrorListenerDispatch()
|
||||
listener.syntaxError(self, offendingToken, line, column, msg, e)
|
||||
|
||||
#
|
||||
# Consume and return the {@linkplain #getCurrentToken current symbol}.
|
||||
#
|
||||
# <p>E.g., given the following input with {@code A} being the current
|
||||
# lookahead symbol, self function moves the cursor to {@code B} and returns
|
||||
# {@code A}.</p>
|
||||
#
|
||||
# <pre>
|
||||
# A B
|
||||
# ^
|
||||
# </pre>
|
||||
#
|
||||
# If the parser is not in error recovery mode, the consumed symbol is added
|
||||
# to the parse tree using {@link ParserRuleContext#addChild(Token)}, and
|
||||
# {@link ParseTreeListener#visitTerminal} is called on any parse listeners.
|
||||
# If the parser <em>is</em> in error recovery mode, the consumed symbol is
|
||||
# added to the parse tree using
|
||||
# {@link ParserRuleContext#addErrorNode(Token)}, and
|
||||
# {@link ParseTreeListener#visitErrorNode} is called on any parse
|
||||
# listeners.
|
||||
#
|
||||
def consume(self):
|
||||
o = self.getCurrentToken()
|
||||
if o.type != Token.EOF:
|
||||
self.getInputStream().consume()
|
||||
hasListener = self._parseListeners is not None and len(self._parseListeners)>0
|
||||
if self.buildParseTrees or hasListener:
|
||||
if self._errHandler.inErrorRecoveryMode(self):
|
||||
node = self._ctx.addErrorNode(o)
|
||||
else:
|
||||
node = self._ctx.addTokenNode(o)
|
||||
if hasListener:
|
||||
for listener in self._parseListeners:
|
||||
listener.visitTerminal(node)
|
||||
return o
|
||||
|
||||
def addContextToParseTree(self):
|
||||
# add current context to parent if we have a parent
|
||||
if self._ctx.parentCtx is not None:
|
||||
self._ctx.parentCtx.addChild(self._ctx)
|
||||
|
||||
# Always called by generated parsers upon entry to a rule. Access field
|
||||
# {@link #_ctx} get the current context.
|
||||
#
|
||||
def enterRule(self, localctx , state , ruleIndex ):
|
||||
self.state = state
|
||||
self._ctx = localctx
|
||||
self._ctx.start = self._input.LT(1)
|
||||
if self.buildParseTrees:
|
||||
self.addContextToParseTree()
|
||||
if self._parseListeners is not None:
|
||||
self.triggerEnterRuleEvent()
|
||||
|
||||
def exitRule(self):
|
||||
self._ctx.stop = self._input.LT(-1)
|
||||
# trigger event on _ctx, before it reverts to parent
|
||||
if self._parseListeners is not None:
|
||||
self.triggerExitRuleEvent()
|
||||
self.state = self._ctx.invokingState
|
||||
self._ctx = self._ctx.parentCtx
|
||||
|
||||
def enterOuterAlt(self, localctx, altNum):
|
||||
# if we have new localctx, make sure we replace existing ctx
|
||||
# that is previous child of parse tree
|
||||
if self.buildParseTrees and self._ctx != localctx:
|
||||
if self._ctx.parentCtx is not None:
|
||||
self._ctx.parentCtx.removeLastChild()
|
||||
self._ctx.parentCtx.addChild(localctx)
|
||||
self._ctx = localctx
|
||||
|
||||
# Get the precedence level for the top-most precedence rule.
|
||||
#
|
||||
# @return The precedence level for the top-most precedence rule, or -1 if
|
||||
# the parser context is not nested within a precedence rule.
|
||||
#
|
||||
def getPrecedence(self):
|
||||
if len(self._precedenceStack)==0:
|
||||
return -1
|
||||
else:
|
||||
return self._precedenceStack[-1]
|
||||
|
||||
def enterRecursionRule(self, localctx, state, ruleIndex, precedence):
|
||||
self.state = state
|
||||
self._precedenceStack.append(precedence)
|
||||
self._ctx = localctx
|
||||
self._ctx.start = self._input.LT(1)
|
||||
if self._parseListeners is not None:
|
||||
self.triggerEnterRuleEvent() # simulates rule entry for left-recursive rules
|
||||
|
||||
#
|
||||
# Like {@link #enterRule} but for recursive rules.
|
||||
#
|
||||
def pushNewRecursionContext(self, localctx, state, ruleIndex):
|
||||
previous = self._ctx
|
||||
previous.parentCtx = localctx
|
||||
previous.invokingState = state
|
||||
previous.stop = self._input.LT(-1)
|
||||
|
||||
self._ctx = localctx
|
||||
self._ctx.start = previous.start
|
||||
if self.buildParseTrees:
|
||||
self._ctx.addChild(previous)
|
||||
|
||||
if self._parseListeners is not None:
|
||||
self.triggerEnterRuleEvent() # simulates rule entry for left-recursive rules
|
||||
|
||||
def unrollRecursionContexts(self, parentCtx):
|
||||
self._precedenceStack.pop()
|
||||
self._ctx.stop = self._input.LT(-1)
|
||||
retCtx = self._ctx # save current ctx (return value)
|
||||
# unroll so _ctx is as it was before call to recursive method
|
||||
if self._parseListeners is not None:
|
||||
while self._ctx is not parentCtx:
|
||||
self.triggerExitRuleEvent()
|
||||
self._ctx = self._ctx.parentCtx
|
||||
else:
|
||||
self._ctx = parentCtx
|
||||
|
||||
# hook into tree
|
||||
retCtx.parentCtx = parentCtx
|
||||
|
||||
if self.buildParseTrees and parentCtx is not None:
|
||||
# add return ctx into invoking rule's tree
|
||||
parentCtx.addChild(retCtx)
|
||||
|
||||
def getInvokingContext(self, ruleIndex):
|
||||
ctx = self._ctx
|
||||
while ctx is not None:
|
||||
if ctx.ruleIndex == ruleIndex:
|
||||
return ctx
|
||||
ctx = ctx.parentCtx
|
||||
return None
|
||||
|
||||
|
||||
def precpred(self, localctx , precedence):
|
||||
return precedence >= self._precedenceStack[-1]
|
||||
|
||||
def inContext(self, context):
|
||||
# TODO: useful in parser?
|
||||
return False
|
||||
|
||||
#
|
||||
# Checks whether or not {@code symbol} can follow the current state in the
|
||||
# ATN. The behavior of self method is equivalent to the following, but is
|
||||
# implemented such that the complete context-sensitive follow set does not
|
||||
# need to be explicitly constructed.
|
||||
#
|
||||
# <pre>
|
||||
# return getExpectedTokens().contains(symbol);
|
||||
# </pre>
|
||||
#
|
||||
# @param symbol the symbol type to check
|
||||
# @return {@code true} if {@code symbol} can follow the current state in
|
||||
# the ATN, otherwise {@code false}.
|
||||
#
|
||||
def isExpectedToken(self, symbol):
|
||||
atn = self._interp.atn
|
||||
ctx = self._ctx
|
||||
s = atn.states[self.state]
|
||||
following = atn.nextTokens(s)
|
||||
if symbol in following:
|
||||
return True
|
||||
if not Token.EPSILON in following:
|
||||
return False
|
||||
|
||||
while ctx is not None and ctx.invokingState>=0 and Token.EPSILON in following:
|
||||
invokingState = atn.states[ctx.invokingState]
|
||||
rt = invokingState.transitions[0]
|
||||
following = atn.nextTokens(rt.followState)
|
||||
if symbol in following:
|
||||
return True
|
||||
ctx = ctx.parentCtx
|
||||
|
||||
if Token.EPSILON in following and symbol == Token.EOF:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
# Computes the set of input symbols which could follow the current parser
|
||||
# state and context, as given by {@link #getState} and {@link #getContext},
|
||||
# respectively.
|
||||
#
|
||||
# @see ATN#getExpectedTokens(int, RuleContext)
|
||||
#
|
||||
def getExpectedTokens(self):
|
||||
return self._interp.atn.getExpectedTokens(self.state, self._ctx)
|
||||
|
||||
def getExpectedTokensWithinCurrentRule(self):
|
||||
atn = self._interp.atn
|
||||
s = atn.states[self.state]
|
||||
return atn.nextTokens(s)
|
||||
|
||||
# Get a rule's index (i.e., {@code RULE_ruleName} field) or -1 if not found.#
|
||||
def getRuleIndex(self, ruleName):
|
||||
ruleIndex = self.getRuleIndexMap().get(ruleName, None)
|
||||
if ruleIndex is not None:
|
||||
return ruleIndex
|
||||
else:
|
||||
return -1
|
||||
|
||||
# Return List<String> of the rule names in your parser instance
|
||||
# leading up to a call to the current rule. You could override if
|
||||
# you want more details such as the file/line info of where
|
||||
# in the ATN a rule is invoked.
|
||||
#
|
||||
# this is very useful for error messages.
|
||||
#
|
||||
def getRuleInvocationStack(self, p=None):
|
||||
if p is None:
|
||||
p = self._ctx
|
||||
stack = list()
|
||||
while p is not None:
|
||||
# compute what follows who invoked us
|
||||
ruleIndex = p.getRuleIndex()
|
||||
if ruleIndex<0:
|
||||
stack.append("n/a")
|
||||
else:
|
||||
stack.append(self.ruleNames[ruleIndex])
|
||||
p = p.parentCtx
|
||||
return stack
|
||||
|
||||
# For debugging and other purposes.#
|
||||
def getDFAStrings(self):
|
||||
return [ unicode(dfa) for dfa in self._interp.decisionToDFA]
|
||||
|
||||
# For debugging and other purposes.#
|
||||
def dumpDFA(self):
|
||||
seenOne = False
|
||||
for i in range(0, len(self._interp.decisionToDFA)):
|
||||
dfa = self._interp.decisionToDFA[i]
|
||||
if len(dfa.states)>0:
|
||||
if seenOne:
|
||||
print()
|
||||
print("Decision " + str(dfa.decision) + ":")
|
||||
print(dfa.toString(self.literalNames, self.symbolicNames), end='')
|
||||
seenOne = True
|
||||
|
||||
|
||||
def getSourceName(self):
|
||||
return self._input.sourceName
|
||||
|
||||
# During a parse is sometimes useful to listen in on the rule entry and exit
|
||||
# events as well as token matches. self is for quick and dirty debugging.
|
||||
#
|
||||
def setTrace(self, trace):
|
||||
if not trace:
|
||||
self.removeParseListener(self._tracer)
|
||||
self._tracer = None
|
||||
else:
|
||||
if self._tracer is not None:
|
||||
self.removeParseListener(self._tracer)
|
||||
self._tracer = TraceListener(self)
|
||||
self.addParseListener(self._tracer)
|
|
@ -0,0 +1,187 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2013 Terence Parr
|
||||
# Copyright (c) 2013 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
# A parser simulator that mimics what ANTLR's generated
|
||||
# parser code does. A ParserATNSimulator is used to make
|
||||
# predictions via adaptivePredict but this class moves a pointer through the
|
||||
# ATN to simulate parsing. ParserATNSimulator just
|
||||
# makes us efficient rather than having to backtrack, for example.
|
||||
#
|
||||
# This properly creates parse trees even for left recursive rules.
|
||||
#
|
||||
# We rely on the left recursive rule invocation and special predicate
|
||||
# transitions to make left recursive rules work.
|
||||
#
|
||||
# See TestParserInterpreter for examples.
|
||||
#
|
||||
from antlr4 import PredictionContextCache
|
||||
from antlr4.dfa.DFA import DFA
|
||||
from antlr4.Parser import Parser
|
||||
from antlr4.ParserRuleContext import InterpreterRuleContext
|
||||
from antlr4.Token import Token
|
||||
from antlr4.atn.ATNState import StarLoopEntryState, ATNState, LoopEndState
|
||||
from antlr4.atn.ParserATNSimulator import ParserATNSimulator
|
||||
from antlr4.atn.Transition import Transition
|
||||
from antlr4.error.Errors import RecognitionException, UnsupportedOperationException, FailedPredicateException
|
||||
|
||||
|
||||
class ParserInterpreter(Parser):
|
||||
|
||||
def __init__(self, grammarFileName, tokenNames, ruleNames, atn, input):
|
||||
super(ParserInterpreter, self).__init__(input)
|
||||
self.grammarFileName = grammarFileName
|
||||
self.atn = atn
|
||||
self.tokenNames = tokenNames
|
||||
self.ruleNames = ruleNames
|
||||
self.decisionToDFA = [ DFA(state) for state in atn.decisionToState ]
|
||||
self.sharedContextCache = PredictionContextCache()
|
||||
self._parentContextStack = list()
|
||||
# identify the ATN states where pushNewRecursionContext must be called
|
||||
self.pushRecursionContextStates = set()
|
||||
for state in atn.states:
|
||||
if not isinstance(state, StarLoopEntryState):
|
||||
continue
|
||||
if state.precedenceRuleDecision:
|
||||
self.pushRecursionContextStates.add(state.stateNumber)
|
||||
# get atn simulator that knows how to do predictions
|
||||
self._interp = ParserATNSimulator(self, atn, self.decisionToDFA, self.sharedContextCache)
|
||||
|
||||
# Begin parsing at startRuleIndex#
|
||||
def parse(self, startRuleIndex):
|
||||
startRuleStartState = self.atn.ruleToStartState[startRuleIndex]
|
||||
rootContext = InterpreterRuleContext(None, ATNState.INVALID_STATE_NUMBER, startRuleIndex)
|
||||
if startRuleStartState.isPrecedenceRule:
|
||||
self.enterRecursionRule(rootContext, startRuleStartState.stateNumber, startRuleIndex, 0)
|
||||
else:
|
||||
self.enterRule(rootContext, startRuleStartState.stateNumber, startRuleIndex)
|
||||
while True:
|
||||
p = self.getATNState()
|
||||
if p.stateType==ATNState.RULE_STOP :
|
||||
# pop; return from rule
|
||||
if len(self._ctx)==0:
|
||||
if startRuleStartState.isPrecedenceRule:
|
||||
result = self._ctx
|
||||
parentContext = self._parentContextStack.pop()
|
||||
self.unrollRecursionContexts(parentContext.a)
|
||||
return result
|
||||
else:
|
||||
self.exitRule()
|
||||
return rootContext
|
||||
self.visitRuleStopState(p)
|
||||
|
||||
else:
|
||||
try:
|
||||
self.visitState(p)
|
||||
except RecognitionException as e:
|
||||
self.state = self.atn.ruleToStopState[p.ruleIndex].stateNumber
|
||||
self._ctx.exception = e
|
||||
self._errHandler.reportError(self, e)
|
||||
self._errHandler.recover(self, e)
|
||||
|
||||
def enterRecursionRule(self, localctx, state, ruleIndex, precedence):
|
||||
self._parentContextStack.append((self._ctx, localctx.invokingState))
|
||||
super(ParserInterpreter, self).enterRecursionRule(localctx, state, ruleIndex, precedence)
|
||||
|
||||
def getATNState(self):
|
||||
return self.atn.states[self.state]
|
||||
|
||||
def visitState(self, p):
|
||||
edge = 0
|
||||
if len(p.transitions) > 1:
|
||||
self._errHandler.sync(self)
|
||||
edge = self._interp.adaptivePredict(self._input, p.decision, self._ctx)
|
||||
else:
|
||||
edge = 1
|
||||
|
||||
transition = p.transitions[edge - 1]
|
||||
tt = transition.serializationType
|
||||
if tt==Transition.EPSILON:
|
||||
|
||||
if self.pushRecursionContextStates[p.stateNumber] and not isinstance(transition.target, LoopEndState):
|
||||
t = self._parentContextStack[-1]
|
||||
ctx = InterpreterRuleContext(t[0], t[1], self._ctx.ruleIndex)
|
||||
self.pushNewRecursionContext(ctx, self.atn.ruleToStartState[p.ruleIndex].stateNumber, self._ctx.ruleIndex)
|
||||
|
||||
elif tt==Transition.ATOM:
|
||||
|
||||
self.match(transition.label)
|
||||
|
||||
elif tt in [ Transition.RANGE, Transition.SET, Transition.NOT_SET]:
|
||||
|
||||
if not transition.matches(self._input.LA(1), Token.MIN_USER_TOKEN_TYPE, 0xFFFF):
|
||||
self._errHandler.recoverInline(self)
|
||||
self.matchWildcard()
|
||||
|
||||
elif tt==Transition.WILDCARD:
|
||||
|
||||
self.matchWildcard()
|
||||
|
||||
elif tt==Transition.RULE:
|
||||
|
||||
ruleStartState = transition.target
|
||||
ruleIndex = ruleStartState.ruleIndex
|
||||
ctx = InterpreterRuleContext(self._ctx, p.stateNumber, ruleIndex)
|
||||
if ruleStartState.isPrecedenceRule:
|
||||
self.enterRecursionRule(ctx, ruleStartState.stateNumber, ruleIndex, transition.precedence)
|
||||
else:
|
||||
self.enterRule(ctx, transition.target.stateNumber, ruleIndex)
|
||||
|
||||
elif tt==Transition.PREDICATE:
|
||||
|
||||
if not self.sempred(self._ctx, transition.ruleIndex, transition.predIndex):
|
||||
raise FailedPredicateException(self)
|
||||
|
||||
elif tt==Transition.ACTION:
|
||||
|
||||
self.action(self._ctx, transition.ruleIndex, transition.actionIndex)
|
||||
|
||||
elif tt==Transition.PRECEDENCE:
|
||||
|
||||
if not self.precpred(self._ctx, transition.precedence):
|
||||
msg = "precpred(_ctx, " + str(transition.precedence) + ")"
|
||||
raise FailedPredicateException(self, msg)
|
||||
|
||||
else:
|
||||
raise UnsupportedOperationException("Unrecognized ATN transition type.")
|
||||
|
||||
self.state = transition.target.stateNumber
|
||||
|
||||
def visitRuleStopState(self, p):
|
||||
ruleStartState = self.atn.ruleToStartState[p.ruleIndex]
|
||||
if ruleStartState.isPrecedenceRule:
|
||||
parentContext = self._parentContextStack.pop()
|
||||
self.unrollRecursionContexts(parentContext.a)
|
||||
self.state = parentContext[1]
|
||||
else:
|
||||
self.exitRule()
|
||||
|
||||
ruleTransition = self.atn.states[self.state].transitions[0]
|
||||
self.state = ruleTransition.followState.stateNumber
|
|
@ -0,0 +1,188 @@
|
|||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#* A rule invocation record for parsing.
|
||||
#
|
||||
# Contains all of the information about the current rule not stored in the
|
||||
# RuleContext. It handles parse tree children list, Any ATN state
|
||||
# tracing, and the default values available for rule indications:
|
||||
# start, stop, rule index, current alt number, current
|
||||
# ATN state.
|
||||
#
|
||||
# Subclasses made for each rule and grammar track the parameters,
|
||||
# return values, locals, and labels specific to that rule. These
|
||||
# are the objects that are returned from rules.
|
||||
#
|
||||
# Note text is not an actual field of a rule return value; it is computed
|
||||
# from start and stop using the input stream's toString() method. I
|
||||
# could add a ctor to this so that we can pass in and store the input
|
||||
# stream, but I'm not sure we want to do that. It would seem to be undefined
|
||||
# to get the .text property anyway if the rule matches tokens from multiple
|
||||
# input streams.
|
||||
#
|
||||
# I do not use getters for fields of objects that are used simply to
|
||||
# group values such as this aggregate. The getters/setters are there to
|
||||
# satisfy the superclass interface.
|
||||
|
||||
from antlr4.RuleContext import RuleContext
|
||||
from antlr4.tree.Tree import TerminalNodeImpl, ErrorNodeImpl, TerminalNode, INVALID_INTERVAL
|
||||
|
||||
class ParserRuleContext(RuleContext):
|
||||
|
||||
def __init__(self, parent = None, invokingStateNumber = None ):
|
||||
super(ParserRuleContext, self).__init__(parent, invokingStateNumber)
|
||||
#* If we are debugging or building a parse tree for a visitor,
|
||||
# we need to track all of the tokens and rule invocations associated
|
||||
# with this rule's context. This is empty for parsing w/o tree constr.
|
||||
# operation because we don't the need to track the details about
|
||||
# how we parse this rule.
|
||||
#/
|
||||
self.children = None
|
||||
self.start = None
|
||||
self.stop = None
|
||||
# The exception that forced this rule to return. If the rule successfully
|
||||
# completed, this is {@code null}.
|
||||
self.exception = None
|
||||
|
||||
#* COPY a ctx (I'm deliberately not using copy constructor)#/
|
||||
def copyFrom(self, ctx):
|
||||
# from RuleContext
|
||||
self.parentCtx = ctx.parentCtx
|
||||
self.invokingState = ctx.invokingState
|
||||
self.children = None
|
||||
self.start = ctx.start
|
||||
self.stop = ctx.stop
|
||||
|
||||
# Double dispatch methods for listeners
|
||||
def enterRule(self, listener):
|
||||
pass
|
||||
|
||||
def exitRule(self, listener):
|
||||
pass
|
||||
|
||||
#* Does not set parent link; other add methods do that#/
|
||||
def addChild(self, child):
|
||||
if self.children is None:
|
||||
self.children = []
|
||||
self.children.append(child)
|
||||
return child
|
||||
|
||||
#* Used by enterOuterAlt to toss out a RuleContext previously added as
|
||||
# we entered a rule. If we have # label, we will need to remove
|
||||
# generic ruleContext object.
|
||||
#/
|
||||
def removeLastChild(self):
|
||||
if self.children is not None:
|
||||
del self.children[len(self.children)-1]
|
||||
|
||||
def addTokenNode(self, token):
|
||||
node = TerminalNodeImpl(token)
|
||||
self.addChild(node)
|
||||
node.parentCtx = self
|
||||
return node
|
||||
|
||||
def addErrorNode(self, badToken):
|
||||
node = ErrorNodeImpl(badToken)
|
||||
self.addChild(node)
|
||||
node.parentCtx = self
|
||||
return node
|
||||
|
||||
def getChild(self, i, ttype = None):
|
||||
if ttype is None:
|
||||
return self.children[i] if len(self.children)>i else None
|
||||
else:
|
||||
for child in self.getChildren():
|
||||
if not isinstance(child, ttype):
|
||||
continue
|
||||
if i==0:
|
||||
return child
|
||||
i -= 1
|
||||
return None
|
||||
|
||||
def getChildren(self, predicate = None):
|
||||
if self.children is not None:
|
||||
for child in self.children:
|
||||
if predicate is not None and not predicate(child):
|
||||
continue
|
||||
yield child
|
||||
|
||||
def getToken(self, ttype, i):
|
||||
for child in self.getChildren():
|
||||
if not isinstance(child, TerminalNode):
|
||||
continue
|
||||
if child.symbol.type != ttype:
|
||||
continue
|
||||
if i==0:
|
||||
return child
|
||||
i -= 1
|
||||
return None
|
||||
|
||||
def getTokens(self, ttype ):
|
||||
if self.getChildren() is None:
|
||||
return []
|
||||
tokens = []
|
||||
for child in self.getChildren():
|
||||
if not isinstance(child, TerminalNode):
|
||||
continue
|
||||
if child.symbol.type != ttype:
|
||||
continue
|
||||
tokens.append(child)
|
||||
return tokens
|
||||
|
||||
def getTypedRuleContext(self, ctxType, i):
|
||||
return self.getChild(i, ctxType)
|
||||
|
||||
def getTypedRuleContexts(self, ctxType):
|
||||
children = self.getChildren()
|
||||
if children is None:
|
||||
return []
|
||||
contexts = []
|
||||
for child in children:
|
||||
if not isinstance(child, ctxType):
|
||||
continue
|
||||
contexts.append(child)
|
||||
return contexts
|
||||
|
||||
def getChildCount(self):
|
||||
return len(self.children) if self.children else 0
|
||||
|
||||
def getSourceInterval(self):
|
||||
if self.start is None or self.stop is None:
|
||||
return INVALID_INTERVAL
|
||||
else:
|
||||
return (self.start.tokenIndex, self.stop.tokenIndex)
|
||||
|
||||
|
||||
RuleContext.EMPTY = ParserRuleContext()
|
||||
|
||||
class InterpreterRuleContext(ParserRuleContext):
|
||||
|
||||
def __init__(self, parent, invokingStateNumber, ruleIndex):
|
||||
super(InterpreterRuleContext, self).__init__(parent, invokingStateNumber)
|
||||
self.ruleIndex = ruleIndex
|
|
@ -0,0 +1,660 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#/
|
||||
from io import StringIO
|
||||
|
||||
from antlr4.RuleContext import RuleContext
|
||||
from antlr4.atn.ATNState import ATNState
|
||||
|
||||
|
||||
class PredictionContext(object):
|
||||
|
||||
# Represents {@code $} in local context prediction, which means wildcard.
|
||||
# {@code#+x =#}.
|
||||
#/
|
||||
EMPTY = None
|
||||
|
||||
# Represents {@code $} in an array in full context mode, when {@code $}
|
||||
# doesn't mean wildcard: {@code $ + x = [$,x]}. Here,
|
||||
# {@code $} = {@link #EMPTY_RETURN_STATE}.
|
||||
#/
|
||||
EMPTY_RETURN_STATE = 0x7FFFFFFF
|
||||
|
||||
globalNodeCount = 1
|
||||
id = globalNodeCount
|
||||
|
||||
# Stores the computed hash code of this {@link PredictionContext}. The hash
|
||||
# code is computed in parts to match the following reference algorithm.
|
||||
#
|
||||
# <pre>
|
||||
# private int referenceHashCode() {
|
||||
# int hash = {@link MurmurHash#initialize MurmurHash.initialize}({@link #INITIAL_HASH});
|
||||
#
|
||||
# for (int i = 0; i < {@link #size()}; i++) {
|
||||
# hash = {@link MurmurHash#update MurmurHash.update}(hash, {@link #getParent getParent}(i));
|
||||
# }
|
||||
#
|
||||
# for (int i = 0; i < {@link #size()}; i++) {
|
||||
# hash = {@link MurmurHash#update MurmurHash.update}(hash, {@link #getReturnState getReturnState}(i));
|
||||
# }
|
||||
#
|
||||
# hash = {@link MurmurHash#finish MurmurHash.finish}(hash, 2# {@link #size()});
|
||||
# return hash;
|
||||
# }
|
||||
# </pre>
|
||||
#/
|
||||
|
||||
def __init__(self, cachedHashCode):
|
||||
self.cachedHashCode = cachedHashCode
|
||||
|
||||
# This means only the {@link #EMPTY} context is in set.
|
||||
def isEmpty(self):
|
||||
return self is self.EMPTY
|
||||
|
||||
def hasEmptyPath(self):
|
||||
return self.getReturnState(len(self) - 1) == self.EMPTY_RETURN_STATE
|
||||
|
||||
def __hash__(self):
|
||||
return self.cachedHashCode
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
|
||||
|
||||
def calculateHashCode(parent, returnState):
|
||||
return hash( str(parent) + str(returnState))
|
||||
|
||||
def calculateEmptyHashCode():
|
||||
return hash("")
|
||||
|
||||
|
||||
# Used to cache {@link PredictionContext} objects. Its used for the shared
|
||||
# context cash associated with contexts in DFA states. This cache
|
||||
# can be used for both lexers and parsers.
|
||||
|
||||
class PredictionContextCache(object):
|
||||
|
||||
def __init__(self):
|
||||
self.cache = dict()
|
||||
|
||||
# Add a context to the cache and return it. If the context already exists,
|
||||
# return that one instead and do not add a new context to the cache.
|
||||
# Protect shared cache from unsafe thread access.
|
||||
#
|
||||
def add(self, ctx):
|
||||
if ctx==PredictionContext.EMPTY:
|
||||
return PredictionContext.EMPTY
|
||||
existing = self.cache.get(ctx, None)
|
||||
if existing is not None:
|
||||
return existing
|
||||
self.cache[ctx] = ctx
|
||||
return ctx
|
||||
|
||||
def get(self, ctx):
|
||||
return self.cache.get(ctx, None)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.cache)
|
||||
|
||||
|
||||
class SingletonPredictionContext(PredictionContext):
|
||||
|
||||
@staticmethod
|
||||
def create(parent , returnState ):
|
||||
if returnState == PredictionContext.EMPTY_RETURN_STATE and parent is None:
|
||||
# someone can pass in the bits of an array ctx that mean $
|
||||
return SingletonPredictionContext.EMPTY
|
||||
else:
|
||||
return SingletonPredictionContext(parent, returnState)
|
||||
|
||||
def __init__(self, parent, returnState):
|
||||
assert returnState!=ATNState.INVALID_STATE_NUMBER
|
||||
hashCode = calculateHashCode(parent, returnState) if parent is not None else calculateEmptyHashCode()
|
||||
super(SingletonPredictionContext, self).__init__(hashCode)
|
||||
self.parentCtx = parent
|
||||
self.returnState = returnState
|
||||
|
||||
def __len__(self):
|
||||
return 1
|
||||
|
||||
def getParent(self, index):
|
||||
assert index == 0
|
||||
return self.parentCtx
|
||||
|
||||
def getReturnState(self, index):
|
||||
assert index == 0
|
||||
return self.returnState
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
elif other is None:
|
||||
return False
|
||||
elif not isinstance(other, SingletonPredictionContext):
|
||||
return False
|
||||
elif hash(self) != hash(other):
|
||||
return False # can't be same if hash is different
|
||||
else:
|
||||
return self.returnState == other.returnState and self.parentCtx==other.parentCtx
|
||||
|
||||
def __hash__(self):
|
||||
return self.cachedHashCode
|
||||
|
||||
def __unicode__(self):
|
||||
up = "" if self.parentCtx is None else unicode(self.parentCtx)
|
||||
if len(up)==0:
|
||||
if self.returnState == self.EMPTY_RETURN_STATE:
|
||||
return u"$"
|
||||
else:
|
||||
return unicode(self.returnState)
|
||||
else:
|
||||
return unicode(self.returnState) + u" " + up
|
||||
|
||||
|
||||
class EmptyPredictionContext(SingletonPredictionContext):
|
||||
|
||||
def __init__(self):
|
||||
super(EmptyPredictionContext, self).__init__(None, self.EMPTY_RETURN_STATE)
|
||||
|
||||
def isEmpty(self):
|
||||
return True
|
||||
|
||||
def getParent(self, index):
|
||||
return None
|
||||
|
||||
def getReturnState(self, index):
|
||||
return self.returnState
|
||||
|
||||
def __eq__(self, other):
|
||||
return self is other
|
||||
|
||||
def __unicode__(self):
|
||||
return "$"
|
||||
|
||||
|
||||
PredictionContext.EMPTY = EmptyPredictionContext()
|
||||
|
||||
class ArrayPredictionContext(PredictionContext):
|
||||
# Parent can be null only if full ctx mode and we make an array
|
||||
# from {@link #EMPTY} and non-empty. We merge {@link #EMPTY} by using null parent and
|
||||
# returnState == {@link #EMPTY_RETURN_STATE}.
|
||||
|
||||
def __init__(self, parents, returnStates):
|
||||
super(ArrayPredictionContext, self).__init__(calculateHashCode(parents, returnStates))
|
||||
assert parents is not None and len(parents)>0
|
||||
assert returnStates is not None and len(returnStates)>0
|
||||
self.parents = parents
|
||||
self.returnStates = returnStates
|
||||
|
||||
def isEmpty(self):
|
||||
# since EMPTY_RETURN_STATE can only appear in the last position, we
|
||||
# don't need to verify that size==1
|
||||
return self.returnStates[0]==PredictionContext.EMPTY_RETURN_STATE
|
||||
|
||||
def __len__(self):
|
||||
return len(self.returnStates)
|
||||
|
||||
def getParent(self, index):
|
||||
return self.parents[index]
|
||||
|
||||
def getReturnState(self, index):
|
||||
return self.returnStates[index]
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
elif not isinstance(other, ArrayPredictionContext):
|
||||
return False
|
||||
elif hash(self) != hash(other):
|
||||
return False # can't be same if hash is different
|
||||
else:
|
||||
return self.returnStates==other.returnStates and self.parents==other.parents
|
||||
|
||||
def __unicode__(self):
|
||||
if self.isEmpty():
|
||||
return "[]"
|
||||
with StringIO() as buf:
|
||||
buf.write(u"[")
|
||||
for i in range(0,len(self.returnStates)):
|
||||
if i>0:
|
||||
buf.write(u", ")
|
||||
if self.returnStates[i]==PredictionContext.EMPTY_RETURN_STATE:
|
||||
buf.write(u"$")
|
||||
continue
|
||||
buf.write(self.returnStates[i])
|
||||
if self.parents[i] is not None:
|
||||
buf.write(u' ')
|
||||
buf.write(unicode(self.parents[i]))
|
||||
else:
|
||||
buf.write(u"null")
|
||||
buf.write(u"]")
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
# Convert a {@link RuleContext} tree to a {@link PredictionContext} graph.
|
||||
# Return {@link #EMPTY} if {@code outerContext} is empty or null.
|
||||
#/
|
||||
def PredictionContextFromRuleContext(atn, outerContext=None):
|
||||
if outerContext is None:
|
||||
outerContext = RuleContext.EMPTY
|
||||
|
||||
# if we are in RuleContext of start rule, s, then PredictionContext
|
||||
# is EMPTY. Nobody called us. (if we are empty, return empty)
|
||||
if outerContext.parentCtx is None or outerContext is RuleContext.EMPTY:
|
||||
return PredictionContext.EMPTY
|
||||
|
||||
# If we have a parent, convert it to a PredictionContext graph
|
||||
parent = PredictionContextFromRuleContext(atn, outerContext.parentCtx)
|
||||
state = atn.states[outerContext.invokingState]
|
||||
transition = state.transitions[0]
|
||||
return SingletonPredictionContext.create(parent, transition.followState.stateNumber)
|
||||
|
||||
|
||||
def calculateListsHashCode(parents, returnStates ):
|
||||
|
||||
with StringIO() as s:
|
||||
for parent in parents:
|
||||
s.write(unicode(parent))
|
||||
for returnState in returnStates:
|
||||
s.write(unicode(returnState))
|
||||
return hash(s.getvalue())
|
||||
|
||||
def merge(a, b, rootIsWildcard, mergeCache):
|
||||
assert a is not None and b is not None # must be empty context, never null
|
||||
|
||||
# share same graph if both same
|
||||
if a==b:
|
||||
return a
|
||||
|
||||
if isinstance(a, SingletonPredictionContext) and isinstance(b, SingletonPredictionContext):
|
||||
return mergeSingletons(a, b, rootIsWildcard, mergeCache)
|
||||
|
||||
# At least one of a or b is array
|
||||
# If one is $ and rootIsWildcard, return $ as# wildcard
|
||||
if rootIsWildcard:
|
||||
if isinstance( a, EmptyPredictionContext ):
|
||||
return a
|
||||
if isinstance( b, EmptyPredictionContext ):
|
||||
return b
|
||||
|
||||
# convert singleton so both are arrays to normalize
|
||||
if isinstance( a, SingletonPredictionContext ):
|
||||
a = ArrayPredictionContext(a)
|
||||
if isinstance( b, SingletonPredictionContext):
|
||||
b = ArrayPredictionContext(b)
|
||||
return mergeArrays(a, b, rootIsWildcard, mergeCache)
|
||||
|
||||
|
||||
#
|
||||
# Merge two {@link SingletonPredictionContext} instances.
|
||||
#
|
||||
# <p>Stack tops equal, parents merge is same; return left graph.<br>
|
||||
# <embed src="images/SingletonMerge_SameRootSamePar.svg" type="image/svg+xml"/></p>
|
||||
#
|
||||
# <p>Same stack top, parents differ; merge parents giving array node, then
|
||||
# remainders of those graphs. A new root node is created to point to the
|
||||
# merged parents.<br>
|
||||
# <embed src="images/SingletonMerge_SameRootDiffPar.svg" type="image/svg+xml"/></p>
|
||||
#
|
||||
# <p>Different stack tops pointing to same parent. Make array node for the
|
||||
# root where both element in the root point to the same (original)
|
||||
# parent.<br>
|
||||
# <embed src="images/SingletonMerge_DiffRootSamePar.svg" type="image/svg+xml"/></p>
|
||||
#
|
||||
# <p>Different stack tops pointing to different parents. Make array node for
|
||||
# the root where each element points to the corresponding original
|
||||
# parent.<br>
|
||||
# <embed src="images/SingletonMerge_DiffRootDiffPar.svg" type="image/svg+xml"/></p>
|
||||
#
|
||||
# @param a the first {@link SingletonPredictionContext}
|
||||
# @param b the second {@link SingletonPredictionContext}
|
||||
# @param rootIsWildcard {@code true} if this is a local-context merge,
|
||||
# otherwise false to indicate a full-context merge
|
||||
# @param mergeCache
|
||||
#/
|
||||
def mergeSingletons(a, b, rootIsWildcard, mergeCache):
|
||||
if mergeCache is not None:
|
||||
previous = mergeCache.get(a,b)
|
||||
if previous is not None:
|
||||
return previous
|
||||
previous = mergeCache.get(b,a)
|
||||
if previous is not None:
|
||||
return previous
|
||||
|
||||
rootMerge = mergeRoot(a, b, rootIsWildcard)
|
||||
if rootMerge is not None:
|
||||
if mergeCache is not None:
|
||||
mergeCache.put(a, b, rootMerge)
|
||||
return rootMerge
|
||||
|
||||
if a.returnState==b.returnState:
|
||||
parent = merge(a.parentCtx, b.parentCtx, rootIsWildcard, mergeCache)
|
||||
# if parent is same as existing a or b parent or reduced to a parent, return it
|
||||
if parent == a.parentCtx:
|
||||
return a # ax + bx = ax, if a=b
|
||||
if parent == b.parentCtx:
|
||||
return b # ax + bx = bx, if a=b
|
||||
# else: ax + ay = a'[x,y]
|
||||
# merge parents x and y, giving array node with x,y then remainders
|
||||
# of those graphs. dup a, a' points at merged array
|
||||
# new joined parent so create new singleton pointing to it, a'
|
||||
a_ = SingletonPredictionContext.create(parent, a.returnState)
|
||||
if mergeCache is not None:
|
||||
mergeCache.put(a, b, a_)
|
||||
return a_
|
||||
else: # a != b payloads differ
|
||||
# see if we can collapse parents due to $+x parents if local ctx
|
||||
singleParent = None
|
||||
if a is b or (a.parentCtx is not None and a.parentCtx==b.parentCtx): # ax + bx = [a,b]x
|
||||
singleParent = a.parentCtx
|
||||
if singleParent is not None: # parents are same
|
||||
# sort payloads and use same parent
|
||||
payloads = [ a.returnState, b.returnState ]
|
||||
if a.returnState > b.returnState:
|
||||
payloads[0] = b.returnState
|
||||
payloads[1] = a.returnState
|
||||
parents = [singleParent, singleParent]
|
||||
a_ = ArrayPredictionContext(parents, payloads);
|
||||
if mergeCache is not None:
|
||||
mergeCache.put(a, b, a_)
|
||||
return a_
|
||||
# parents differ and can't merge them. Just pack together
|
||||
# into array; can't merge.
|
||||
# ax + by = [ax,by]
|
||||
payloads = [ a.returnState, b.returnState ]
|
||||
parents = [ a.parentCtx, b.parentCtx ]
|
||||
if a.returnState > b.returnState: # sort by payload
|
||||
payloads[0] = b.returnState
|
||||
payloads[1] = a.returnState
|
||||
parents = [ b.parentCtx, a.parentCtx ]
|
||||
a_ = ArrayPredictionContext(parents, payloads)
|
||||
if mergeCache is not None:
|
||||
mergeCache.put(a, b, a_)
|
||||
return a_
|
||||
|
||||
|
||||
#
|
||||
# Handle case where at least one of {@code a} or {@code b} is
|
||||
# {@link #EMPTY}. In the following diagrams, the symbol {@code $} is used
|
||||
# to represent {@link #EMPTY}.
|
||||
#
|
||||
# <h2>Local-Context Merges</h2>
|
||||
#
|
||||
# <p>These local-context merge operations are used when {@code rootIsWildcard}
|
||||
# is true.</p>
|
||||
#
|
||||
# <p>{@link #EMPTY} is superset of any graph; return {@link #EMPTY}.<br>
|
||||
# <embed src="images/LocalMerge_EmptyRoot.svg" type="image/svg+xml"/></p>
|
||||
#
|
||||
# <p>{@link #EMPTY} and anything is {@code #EMPTY}, so merged parent is
|
||||
# {@code #EMPTY}; return left graph.<br>
|
||||
# <embed src="images/LocalMerge_EmptyParent.svg" type="image/svg+xml"/></p>
|
||||
#
|
||||
# <p>Special case of last merge if local context.<br>
|
||||
# <embed src="images/LocalMerge_DiffRoots.svg" type="image/svg+xml"/></p>
|
||||
#
|
||||
# <h2>Full-Context Merges</h2>
|
||||
#
|
||||
# <p>These full-context merge operations are used when {@code rootIsWildcard}
|
||||
# is false.</p>
|
||||
#
|
||||
# <p><embed src="images/FullMerge_EmptyRoots.svg" type="image/svg+xml"/></p>
|
||||
#
|
||||
# <p>Must keep all contexts; {@link #EMPTY} in array is a special value (and
|
||||
# null parent).<br>
|
||||
# <embed src="images/FullMerge_EmptyRoot.svg" type="image/svg+xml"/></p>
|
||||
#
|
||||
# <p><embed src="images/FullMerge_SameRoot.svg" type="image/svg+xml"/></p>
|
||||
#
|
||||
# @param a the first {@link SingletonPredictionContext}
|
||||
# @param b the second {@link SingletonPredictionContext}
|
||||
# @param rootIsWildcard {@code true} if this is a local-context merge,
|
||||
# otherwise false to indicate a full-context merge
|
||||
#/
|
||||
def mergeRoot(a, b, rootIsWildcard):
|
||||
if rootIsWildcard:
|
||||
if a == PredictionContext.EMPTY:
|
||||
return PredictionContext.EMPTY ## + b =#
|
||||
if b == PredictionContext.EMPTY:
|
||||
return PredictionContext.EMPTY # a +# =#
|
||||
else:
|
||||
if a == PredictionContext.EMPTY and b == PredictionContext.EMPTY:
|
||||
return PredictionContext.EMPTY # $ + $ = $
|
||||
elif a == PredictionContext.EMPTY: # $ + x = [$,x]
|
||||
payloads = [ b.returnState, PredictionContext.EMPTY_RETURN_STATE ]
|
||||
parents = [ b.parentCtx, None ]
|
||||
return ArrayPredictionContext(parents, payloads)
|
||||
elif b == PredictionContext.EMPTY: # x + $ = [$,x] ($ is always first if present)
|
||||
payloads = [ a.returnState, PredictionContext.EMPTY_RETURN_STATE ]
|
||||
parents = [ a.parentCtx, None ]
|
||||
return ArrayPredictionContext(parents, payloads)
|
||||
return None
|
||||
|
||||
|
||||
#
|
||||
# Merge two {@link ArrayPredictionContext} instances.
|
||||
#
|
||||
# <p>Different tops, different parents.<br>
|
||||
# <embed src="images/ArrayMerge_DiffTopDiffPar.svg" type="image/svg+xml"/></p>
|
||||
#
|
||||
# <p>Shared top, same parents.<br>
|
||||
# <embed src="images/ArrayMerge_ShareTopSamePar.svg" type="image/svg+xml"/></p>
|
||||
#
|
||||
# <p>Shared top, different parents.<br>
|
||||
# <embed src="images/ArrayMerge_ShareTopDiffPar.svg" type="image/svg+xml"/></p>
|
||||
#
|
||||
# <p>Shared top, all shared parents.<br>
|
||||
# <embed src="images/ArrayMerge_ShareTopSharePar.svg" type="image/svg+xml"/></p>
|
||||
#
|
||||
# <p>Equal tops, merge parents and reduce top to
|
||||
# {@link SingletonPredictionContext}.<br>
|
||||
# <embed src="images/ArrayMerge_EqualTop.svg" type="image/svg+xml"/></p>
|
||||
#/
|
||||
def mergeArrays(a, b, rootIsWildcard, mergeCache):
|
||||
if mergeCache is not None:
|
||||
previous = mergeCache.get(a,b)
|
||||
if previous is not None:
|
||||
return previous
|
||||
previous = mergeCache.get(b,a)
|
||||
if previous is not None:
|
||||
return previous
|
||||
|
||||
# merge sorted payloads a + b => M
|
||||
i = 0; # walks a
|
||||
j = 0; # walks b
|
||||
k = 0; # walks target M array
|
||||
|
||||
mergedReturnStates = [] * (len(a.returnState) + len( b.returnStates))
|
||||
mergedParents = [] * len(mergedReturnStates)
|
||||
# walk and merge to yield mergedParents, mergedReturnStates
|
||||
while i<len(a.returnStates) and j<len(b.returnStates):
|
||||
a_parent = a.parents[i]
|
||||
b_parent = b.parents[j]
|
||||
if a.returnStates[i]==b.returnStates[j]:
|
||||
# same payload (stack tops are equal), must yield merged singleton
|
||||
payload = a.returnStates[i]
|
||||
# $+$ = $
|
||||
bothDollars = payload == PredictionContext.EMPTY_RETURN_STATE and \
|
||||
a_parent is None and b_parent is None
|
||||
ax_ax = (a_parent is not None and b_parent is not None) and a_parent==b_parent # ax+ax -> ax
|
||||
if bothDollars or ax_ax:
|
||||
mergedParents[k] = a_parent # choose left
|
||||
mergedReturnStates[k] = payload
|
||||
else: # ax+ay -> a'[x,y]
|
||||
mergedParent = merge(a_parent, b_parent, rootIsWildcard, mergeCache)
|
||||
mergedParents[k] = mergedParent
|
||||
mergedReturnStates[k] = payload
|
||||
i += 1 # hop over left one as usual
|
||||
j += 1 # but also skip one in right side since we merge
|
||||
elif a.returnStates[i]<b.returnStates[j]: # copy a[i] to M
|
||||
mergedParents[k] = a_parent
|
||||
mergedReturnStates[k] = a.returnStates[i]
|
||||
i += 1
|
||||
else: # b > a, copy b[j] to M
|
||||
mergedParents[k] = b_parent
|
||||
mergedReturnStates[k] = b.returnStates[j]
|
||||
j += 1
|
||||
k += 1
|
||||
|
||||
# copy over any payloads remaining in either array
|
||||
if i < len(a.returnStates):
|
||||
for p in range(i, len(a.returnStates)):
|
||||
mergedParents[k] = a.parents[p]
|
||||
mergedReturnStates[k] = a.returnStates[p]
|
||||
k += 1
|
||||
else:
|
||||
for p in range(j, len(b.returnStates)):
|
||||
mergedParents[k] = b.parents[p]
|
||||
mergedReturnStates[k] = b.returnStates[p]
|
||||
k += 1
|
||||
|
||||
# trim merged if we combined a few that had same stack tops
|
||||
if k < len(mergedParents): # write index < last position; trim
|
||||
if k == 1: # for just one merged element, return singleton top
|
||||
a_ = SingletonPredictionContext.create(mergedParents[0], mergedReturnStates[0])
|
||||
if mergeCache is not None:
|
||||
mergeCache.put(a,b,a_)
|
||||
return a_
|
||||
mergedParents = mergedParents[0:k]
|
||||
mergedReturnStates = mergedReturnStates[0:k]
|
||||
|
||||
M = ArrayPredictionContext(mergedParents, mergedReturnStates)
|
||||
|
||||
# if we created same array as a or b, return that instead
|
||||
# TODO: track whether this is possible above during merge sort for speed
|
||||
if M==a:
|
||||
if mergeCache is not None:
|
||||
mergeCache.put(a,b,a)
|
||||
return a
|
||||
if M==b:
|
||||
if mergeCache is not None:
|
||||
mergeCache.put(a,b,b)
|
||||
return b
|
||||
combineCommonParents(mergedParents)
|
||||
|
||||
if mergeCache is not None:
|
||||
mergeCache.put(a,b,M)
|
||||
return M
|
||||
|
||||
|
||||
#
|
||||
# Make pass over all <em>M</em> {@code parents}; merge any {@code equals()}
|
||||
# ones.
|
||||
#/
|
||||
def combineCommonParents(parents):
|
||||
uniqueParents = dict()
|
||||
|
||||
for p in range(0, len(parents)):
|
||||
parent = parents[p]
|
||||
if uniqueParents.get(parent, None) is None:
|
||||
uniqueParents[parent] = parent
|
||||
|
||||
for p in range(0, len(parents)):
|
||||
parents[p] = uniqueParents[parents[p]]
|
||||
|
||||
def getCachedPredictionContext(context, contextCache, visited):
|
||||
if context.isEmpty():
|
||||
return context
|
||||
existing = visited.get(context)
|
||||
if existing is not None:
|
||||
return existing
|
||||
existing = contextCache.get(context)
|
||||
if existing is not None:
|
||||
visited[context] = existing
|
||||
return existing
|
||||
changed = False
|
||||
parents = [None] * len(context)
|
||||
for i in range(0, len(parents)):
|
||||
parent = getCachedPredictionContext(context.getParent(i), contextCache, visited)
|
||||
if changed or parent is not context.getParent(i):
|
||||
if not changed:
|
||||
parents = [None] * len(context)
|
||||
for j in range(0, len(context)):
|
||||
parents[j] = context.getParent(j)
|
||||
changed = True
|
||||
parents[i] = parent
|
||||
if not changed:
|
||||
contextCache.add(context)
|
||||
visited[context] = context
|
||||
return context
|
||||
updated = None
|
||||
if len(parents) == 0:
|
||||
updated = PredictionContext.EMPTY
|
||||
elif len(parents) == 1:
|
||||
updated = SingletonPredictionContext.create(parents[0], context.getReturnState(0))
|
||||
else:
|
||||
updated = ArrayPredictionContext(parents, context.returnStates)
|
||||
|
||||
contextCache.add(updated)
|
||||
visited[updated] = updated
|
||||
visited[context] = updated
|
||||
|
||||
return updated
|
||||
|
||||
|
||||
# # extra structures, but cut/paste/morphed works, so leave it.
|
||||
# # seems to do a breadth-first walk
|
||||
# public static List<PredictionContext> getAllNodes(PredictionContext context) {
|
||||
# Map<PredictionContext, PredictionContext> visited =
|
||||
# new IdentityHashMap<PredictionContext, PredictionContext>();
|
||||
# Deque<PredictionContext> workList = new ArrayDeque<PredictionContext>();
|
||||
# workList.add(context);
|
||||
# visited.put(context, context);
|
||||
# List<PredictionContext> nodes = new ArrayList<PredictionContext>();
|
||||
# while (!workList.isEmpty()) {
|
||||
# PredictionContext current = workList.pop();
|
||||
# nodes.add(current);
|
||||
# for (int i = 0; i < current.size(); i++) {
|
||||
# PredictionContext parent = current.getParent(i);
|
||||
# if ( parent!=null && visited.put(parent, parent) == null) {
|
||||
# workList.push(parent);
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# return nodes;
|
||||
# }
|
||||
|
||||
# ter's recursive version of Sam's getAllNodes()
|
||||
def getAllContextNodes(context, nodes=None, visited=None):
|
||||
if nodes is None:
|
||||
nodes = list()
|
||||
return getAllContextNodes(context, nodes, visited)
|
||||
elif visited is None:
|
||||
visited = dict()
|
||||
return getAllContextNodes(context, nodes, visited)
|
||||
else:
|
||||
if context is None or visited.get(context, None) is not None:
|
||||
return nodes
|
||||
visited.put(context, context)
|
||||
nodes.add(context)
|
||||
for i in range(0, len(context)):
|
||||
getAllContextNodes(context.getParent(i), nodes, visited);
|
||||
return nodes
|
||||
|
|
@ -0,0 +1,168 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
from __builtin__ import unicode
|
||||
|
||||
from antlr4.Token import Token
|
||||
from antlr4.error.ErrorListener import ProxyErrorListener, ConsoleErrorListener
|
||||
|
||||
|
||||
class Recognizer(object):
|
||||
|
||||
tokenTypeMapCache = dict()
|
||||
ruleIndexMapCache = dict()
|
||||
|
||||
def __init__(self):
|
||||
self._listeners = [ ConsoleErrorListener.INSTANCE ]
|
||||
self._interp = None
|
||||
self._stateNumber = -1
|
||||
|
||||
def extractVersion(self, version):
|
||||
pos = version.find(".")
|
||||
major = version[0:pos]
|
||||
version = version[pos+1:]
|
||||
pos = version.find(".")
|
||||
if pos==-1:
|
||||
pos = version.find("-")
|
||||
if pos==-1:
|
||||
pos = len(version)
|
||||
minor = version[0:pos]
|
||||
return major, minor
|
||||
|
||||
def checkVersion(self, toolVersion):
|
||||
runtimeVersion = "4.5.2"
|
||||
rvmajor, rvminor = self.extractVersion(runtimeVersion)
|
||||
tvmajor, tvminor = self.extractVersion(toolVersion)
|
||||
if rvmajor!=tvmajor or rvminor!=tvminor:
|
||||
print("ANTLR runtime and generated code versions disagree: "+runtimeVersion+"!="+toolVersion)
|
||||
|
||||
def addErrorListener(self, listener):
|
||||
self._listeners.append(listener)
|
||||
|
||||
def removeErrorListener(self, listener):
|
||||
self._listeners.remove(listener)
|
||||
|
||||
def removeErrorListeners(self):
|
||||
self._listeners = []
|
||||
|
||||
def getTokenTypeMap(self):
|
||||
tokenNames = self.getTokenNames()
|
||||
if tokenNames is None:
|
||||
from antlr4.error.Errors import UnsupportedOperationException
|
||||
raise UnsupportedOperationException("The current recognizer does not provide a list of token names.")
|
||||
result = self.tokenTypeMapCache.get(tokenNames, None)
|
||||
if result is None:
|
||||
result = zip( tokenNames, range(0, len(tokenNames)))
|
||||
result["EOF"] = Token.EOF
|
||||
self.tokenTypeMapCache[tokenNames] = result
|
||||
return result
|
||||
|
||||
# Get a map from rule names to rule indexes.
|
||||
#
|
||||
# <p>Used for XPath and tree pattern compilation.</p>
|
||||
#
|
||||
def getRuleIndexMap(self):
|
||||
ruleNames = self.getRuleNames()
|
||||
if ruleNames is None:
|
||||
from antlr4.error.Errors import UnsupportedOperationException
|
||||
raise UnsupportedOperationException("The current recognizer does not provide a list of rule names.")
|
||||
result = self.ruleIndexMapCache.get(ruleNames, None)
|
||||
if result is None:
|
||||
result = zip( ruleNames, range(0, len(ruleNames)))
|
||||
self.ruleIndexMapCache[ruleNames] = result
|
||||
return result
|
||||
|
||||
def getTokenType(self, tokenName):
|
||||
ttype = self.getTokenTypeMap().get(tokenName, None)
|
||||
if ttype is not None:
|
||||
return ttype
|
||||
else:
|
||||
return Token.INVALID_TYPE
|
||||
|
||||
|
||||
# What is the error header, normally line/character position information?#
|
||||
def getErrorHeader(self, e):
|
||||
line = e.getOffendingToken().line
|
||||
column = e.getOffendingToken().column
|
||||
return u"line " + unicode(line) + u":" + unicode(column)
|
||||
|
||||
|
||||
# How should a token be displayed in an error message? The default
|
||||
# is to display just the text, but during development you might
|
||||
# want to have a lot of information spit out. Override in that case
|
||||
# to use t.toString() (which, for CommonToken, dumps everything about
|
||||
# the token). This is better than forcing you to override a method in
|
||||
# your token objects because you don't have to go modify your lexer
|
||||
# so that it creates a new Java type.
|
||||
#
|
||||
# @deprecated This method is not called by the ANTLR 4 Runtime. Specific
|
||||
# implementations of {@link ANTLRErrorStrategy} may provide a similar
|
||||
# feature when necessary. For example, see
|
||||
# {@link DefaultErrorStrategy#getTokenErrorDisplay}.
|
||||
#
|
||||
def getTokenErrorDisplay(self, t):
|
||||
if t is None:
|
||||
return u"<no token>"
|
||||
s = t.text
|
||||
if s is None:
|
||||
if t.type==Token.EOF:
|
||||
s = u"<EOF>"
|
||||
else:
|
||||
s = u"<" + unicode(t.type) + u">"
|
||||
s = s.replace(u"\n",u"\\n")
|
||||
s = s.replace(u"\r",u"\\r")
|
||||
s = s.replace(u"\t",u"\\t")
|
||||
return u"'" + s + u"'"
|
||||
|
||||
def getErrorListenerDispatch(self):
|
||||
return ProxyErrorListener(self._listeners)
|
||||
|
||||
# subclass needs to override these if there are sempreds or actions
|
||||
# that the ATN interp needs to execute
|
||||
def sempred(self, localctx, ruleIndex, actionIndex):
|
||||
return True
|
||||
|
||||
def precpred(self, localctx , precedence):
|
||||
return True
|
||||
|
||||
@property
|
||||
def state(self):
|
||||
return self._stateNumber
|
||||
|
||||
# Indicate that the recognizer has changed internal state that is
|
||||
# consistent with the ATN state passed in. This way we always know
|
||||
# where we are in the ATN as the parser goes along. The rule
|
||||
# context objects form a stack that lets us see the stack of
|
||||
# invoking rules. Combine this and we have complete ATN
|
||||
# configuration information.
|
||||
|
||||
@state.setter
|
||||
def state(self, atnState):
|
||||
self._stateNumber = atnState
|
|
@ -0,0 +1,234 @@
|
|||
# [The "BSD license"]
|
||||
# Copyright (c) 2013 Terence Parr
|
||||
# Copyright (c) 2013 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#/
|
||||
|
||||
|
||||
# A rule context is a record of a single rule invocation. It knows
|
||||
# which context invoked it, if any. If there is no parent context, then
|
||||
# naturally the invoking state is not valid. The parent link
|
||||
# provides a chain upwards from the current rule invocation to the root
|
||||
# of the invocation tree, forming a stack. We actually carry no
|
||||
# information about the rule associated with this context (except
|
||||
# when parsing). We keep only the state number of the invoking state from
|
||||
# the ATN submachine that invoked this. Contrast this with the s
|
||||
# pointer inside ParserRuleContext that tracks the current state
|
||||
# being "executed" for the current rule.
|
||||
#
|
||||
# The parent contexts are useful for computing lookahead sets and
|
||||
# getting error information.
|
||||
#
|
||||
# These objects are used during parsing and prediction.
|
||||
# For the special case of parsers, we use the subclass
|
||||
# ParserRuleContext.
|
||||
#
|
||||
# @see ParserRuleContext
|
||||
#/
|
||||
from io import StringIO
|
||||
|
||||
from antlr4.tree.Tree import RuleNode, INVALID_INTERVAL
|
||||
from antlr4.tree.Trees import Trees
|
||||
|
||||
|
||||
class RuleContext(RuleNode):
|
||||
|
||||
EMPTY = None
|
||||
|
||||
def __init__(self, parent=None, invokingState=-1):
|
||||
super(RuleContext, self).__init__()
|
||||
# What context invoked this rule?
|
||||
self.parentCtx = parent
|
||||
# What state invoked the rule associated with this context?
|
||||
# The "return address" is the followState of invokingState
|
||||
# If parent is null, this should be -1.
|
||||
self.invokingState = invokingState
|
||||
|
||||
|
||||
def depth(self):
|
||||
n = 0
|
||||
p = self
|
||||
while p is not None:
|
||||
p = p.parentCtx
|
||||
n += 1
|
||||
return n
|
||||
|
||||
# A context is empty if there is no invoking state; meaning nobody call
|
||||
# current context.
|
||||
def isEmpty(self):
|
||||
return self.invokingState == -1
|
||||
|
||||
# satisfy the ParseTree / SyntaxTree interface
|
||||
|
||||
def getSourceInterval(self):
|
||||
return INVALID_INTERVAL
|
||||
|
||||
def getRuleContext(self):
|
||||
return self
|
||||
|
||||
def getPayload(self):
|
||||
return self
|
||||
|
||||
# Return the combined text of all child nodes. This method only considers
|
||||
# tokens which have been added to the parse tree.
|
||||
# <p>
|
||||
# Since tokens on hidden channels (e.g. whitespace or comments) are not
|
||||
# added to the parse trees, they will not appear in the output of this
|
||||
# method.
|
||||
#/
|
||||
def getText(self):
|
||||
if self.getChildCount() == 0:
|
||||
return u""
|
||||
with StringIO() as builder:
|
||||
for child in self.getChildren():
|
||||
builder.write(child.getText())
|
||||
return builder.getvalue()
|
||||
|
||||
def getRuleIndex(self):
|
||||
return -1
|
||||
|
||||
def getChild(self, i):
|
||||
return None
|
||||
|
||||
def getChildCount(self):
|
||||
return 0
|
||||
|
||||
def getChildren(self):
|
||||
for c in []:
|
||||
yield c
|
||||
|
||||
def accept(self, visitor):
|
||||
return visitor.visitChildren(self)
|
||||
|
||||
# # Call this method to view a parse tree in a dialog box visually.#/
|
||||
# public Future<JDialog> inspect(@Nullable Parser parser) {
|
||||
# List<String> ruleNames = parser != null ? Arrays.asList(parser.getRuleNames()) : null;
|
||||
# return inspect(ruleNames);
|
||||
# }
|
||||
#
|
||||
# public Future<JDialog> inspect(@Nullable List<String> ruleNames) {
|
||||
# TreeViewer viewer = new TreeViewer(ruleNames, this);
|
||||
# return viewer.open();
|
||||
# }
|
||||
#
|
||||
# # Save this tree in a postscript file#/
|
||||
# public void save(@Nullable Parser parser, String fileName)
|
||||
# throws IOException, PrintException
|
||||
# {
|
||||
# List<String> ruleNames = parser != null ? Arrays.asList(parser.getRuleNames()) : null;
|
||||
# save(ruleNames, fileName);
|
||||
# }
|
||||
#
|
||||
# # Save this tree in a postscript file using a particular font name and size#/
|
||||
# public void save(@Nullable Parser parser, String fileName,
|
||||
# String fontName, int fontSize)
|
||||
# throws IOException
|
||||
# {
|
||||
# List<String> ruleNames = parser != null ? Arrays.asList(parser.getRuleNames()) : null;
|
||||
# save(ruleNames, fileName, fontName, fontSize);
|
||||
# }
|
||||
#
|
||||
# # Save this tree in a postscript file#/
|
||||
# public void save(@Nullable List<String> ruleNames, String fileName)
|
||||
# throws IOException, PrintException
|
||||
# {
|
||||
# Trees.writePS(this, ruleNames, fileName);
|
||||
# }
|
||||
#
|
||||
# # Save this tree in a postscript file using a particular font name and size#/
|
||||
# public void save(@Nullable List<String> ruleNames, String fileName,
|
||||
# String fontName, int fontSize)
|
||||
# throws IOException
|
||||
# {
|
||||
# Trees.writePS(this, ruleNames, fileName, fontName, fontSize);
|
||||
# }
|
||||
#
|
||||
# # Print out a whole tree, not just a node, in LISP format
|
||||
# # (root child1 .. childN). Print just a node if this is a leaf.
|
||||
# # We have to know the recognizer so we can get rule names.
|
||||
# #/
|
||||
# @Override
|
||||
# public String toStringTree(@Nullable Parser recog) {
|
||||
# return Trees.toStringTree(this, recog);
|
||||
# }
|
||||
#
|
||||
# Print out a whole tree, not just a node, in LISP format
|
||||
# (root child1 .. childN). Print just a node if this is a leaf.
|
||||
#
|
||||
def toStringTree(self, ruleNames=None, recog=None):
|
||||
return Trees.toStringTree(self, ruleNames=ruleNames, recog=recog)
|
||||
# }
|
||||
#
|
||||
# @Override
|
||||
# public String toStringTree() {
|
||||
# return toStringTree((List<String>)null);
|
||||
# }
|
||||
#
|
||||
def __unicode__(self):
|
||||
return self.toString(None, None)
|
||||
|
||||
# @Override
|
||||
# public String toString() {
|
||||
# return toString((List<String>)null, (RuleContext)null);
|
||||
# }
|
||||
#
|
||||
# public final String toString(@Nullable Recognizer<?,?> recog) {
|
||||
# return toString(recog, ParserRuleContext.EMPTY);
|
||||
# }
|
||||
#
|
||||
# public final String toString(@Nullable List<String> ruleNames) {
|
||||
# return toString(ruleNames, null);
|
||||
# }
|
||||
#
|
||||
# // recog null unless ParserRuleContext, in which case we use subclass toString(...)
|
||||
# public String toString(@Nullable Recognizer<?,?> recog, @Nullable RuleContext stop) {
|
||||
# String[] ruleNames = recog != null ? recog.getRuleNames() : null;
|
||||
# List<String> ruleNamesList = ruleNames != null ? Arrays.asList(ruleNames) : null;
|
||||
# return toString(ruleNamesList, stop);
|
||||
# }
|
||||
|
||||
def toString(self, ruleNames, stop):
|
||||
with StringIO() as buf:
|
||||
p = self
|
||||
buf.write(u"[")
|
||||
while p is not None and p is not stop:
|
||||
if ruleNames is None:
|
||||
if not p.isEmpty():
|
||||
buf.write(unicode(p.invokingState))
|
||||
else:
|
||||
ri = p.getRuleIndex()
|
||||
ruleName = ruleNames[ri] if ri >= 0 and ri < len(ruleNames) else unicode(ri)
|
||||
buf.write(ruleName)
|
||||
|
||||
if p.parentCtx is not None and (ruleNames is not None or not p.parentCtx.isEmpty()):
|
||||
buf.write(u" ")
|
||||
|
||||
p = p.parentCtx
|
||||
|
||||
buf.write(u"]")
|
||||
return buf.getvalue()
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
#
|
||||
# This is an InputStream that is loaded from stdin all at once
|
||||
# when you construct the object.
|
||||
#
|
||||
|
||||
import codecs
|
||||
import sys
|
||||
|
||||
from antlr4.InputStream import InputStream
|
||||
|
||||
|
||||
class StdinStream(InputStream):
|
||||
|
||||
def __init__(self, encoding='ascii'):
|
||||
bytes = sys.stdin.read()
|
||||
data = codecs.decode(bytes, encoding)
|
||||
super(type(self), self).__init__(data)
|
|
@ -0,0 +1,184 @@
|
|||
#[The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
# A token has properties: text, type, line, character position in the line
|
||||
# (so we can ignore tabs), token channel, index, and source from which
|
||||
# we obtained this token.
|
||||
from io import StringIO
|
||||
|
||||
|
||||
class Token (object):
|
||||
|
||||
INVALID_TYPE = 0
|
||||
|
||||
# During lookahead operations, this "token" signifies we hit rule end ATN state
|
||||
# and did not follow it despite needing to.
|
||||
EPSILON = -2
|
||||
|
||||
MIN_USER_TOKEN_TYPE = 1
|
||||
|
||||
EOF = -1
|
||||
|
||||
# All tokens go to the parser (unless skip() is called in that rule)
|
||||
# on a particular "channel". The parser tunes to a particular channel
|
||||
# so that whitespace etc... can go to the parser on a "hidden" channel.
|
||||
|
||||
DEFAULT_CHANNEL = 0
|
||||
|
||||
# Anything on different channel than DEFAULT_CHANNEL is not parsed
|
||||
# by parser.
|
||||
|
||||
HIDDEN_CHANNEL = 1
|
||||
|
||||
def __init__(self):
|
||||
self.source = None
|
||||
self.type = None # token type of the token
|
||||
self.channel = None # The parser ignores everything not on DEFAULT_CHANNEL
|
||||
self.start = None # optional; return -1 if not implemented.
|
||||
self.stop = None # optional; return -1 if not implemented.
|
||||
self.tokenIndex = None # from 0..n-1 of the token object in the input stream
|
||||
self.line = None # line=1..n of the 1st character
|
||||
self.column = None # beginning of the line at which it occurs, 0..n-1
|
||||
self._text = None # text of the token.
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
return self._text
|
||||
|
||||
# Explicitly set the text for this token. If {code text} is not
|
||||
# {@code null}, then {@link #getText} will return this value rather than
|
||||
# extracting the text from the input.
|
||||
#
|
||||
# @param text The explicit text of the token, or {@code null} if the text
|
||||
# should be obtained from the input along with the start and stop indexes
|
||||
# of the token.
|
||||
|
||||
@text.setter
|
||||
def text(self, text):
|
||||
self._text = text
|
||||
|
||||
|
||||
def getTokenSource(self):
|
||||
return self.source[0]
|
||||
|
||||
def getInputStream(self):
|
||||
return self.source[1]
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
|
||||
|
||||
class CommonToken(Token):
|
||||
|
||||
|
||||
# An empty {@link Pair} which is used as the default value of
|
||||
# {@link #source} for tokens that do not have a source.
|
||||
EMPTY_SOURCE = (None, None)
|
||||
|
||||
def __init__(self, source = EMPTY_SOURCE, type = None, channel=Token.DEFAULT_CHANNEL, start=-1, stop=-1):
|
||||
super(CommonToken, self).__init__()
|
||||
self.source = source
|
||||
self.type = type
|
||||
self.channel = channel
|
||||
self.start = start
|
||||
self.stop = stop
|
||||
self.tokenIndex = -1
|
||||
if source[0] is not None:
|
||||
self.line = source[0].line
|
||||
self.column = source[0].column
|
||||
else:
|
||||
self.column = -1
|
||||
|
||||
# Constructs a new {@link CommonToken} as a copy of another {@link Token}.
|
||||
#
|
||||
# <p>
|
||||
# If {@code oldToken} is also a {@link CommonToken} instance, the newly
|
||||
# constructed token will share a reference to the {@link #text} field and
|
||||
# the {@link Pair} stored in {@link #source}. Otherwise, {@link #text} will
|
||||
# be assigned the result of calling {@link #getText}, and {@link #source}
|
||||
# will be constructed from the result of {@link Token#getTokenSource} and
|
||||
# {@link Token#getInputStream}.</p>
|
||||
#
|
||||
# @param oldToken The token to copy.
|
||||
#
|
||||
def clone(self):
|
||||
t = CommonToken(self.source, self.type, self.channel, self.start, self.stop)
|
||||
t.tokenIndex = self.tokenIndex
|
||||
t.line = self.line
|
||||
t.column = self.column
|
||||
t.text = self.text
|
||||
return t
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
if self._text is not None:
|
||||
return self._text
|
||||
input = self.getInputStream()
|
||||
if input is None:
|
||||
return None
|
||||
n = input.size
|
||||
if self.start < n and self.stop < n:
|
||||
return input.getText(self.start, self.stop)
|
||||
else:
|
||||
return u"<EOF>"
|
||||
|
||||
@text.setter
|
||||
def text(self, text):
|
||||
self._text = text
|
||||
|
||||
def __unicode__(self):
|
||||
with StringIO() as buf:
|
||||
buf.write(u"[@")
|
||||
buf.write(unicode(self.tokenIndex))
|
||||
buf.write(u",")
|
||||
buf.write(unicode(self.start))
|
||||
buf.write(u":")
|
||||
buf.write(unicode(self.stop))
|
||||
buf.write(u"='")
|
||||
txt = self.text
|
||||
if txt is not None:
|
||||
txt = txt.replace(u"\n",u"\\n")
|
||||
txt = txt.replace(u"\r",u"\\r")
|
||||
txt = txt.replace(u"\t",u"\\t")
|
||||
else:
|
||||
txt = u"<no text>"
|
||||
buf.write(txt)
|
||||
buf.write(u"',<")
|
||||
buf.write(unicode(self.type))
|
||||
buf.write(u">")
|
||||
if self.channel > 0:
|
||||
buf.write(u",channel=")
|
||||
buf.write(unicode(self.channel))
|
||||
buf.write(u",")
|
||||
buf.write(unicode(self.line))
|
||||
buf.write(u":")
|
||||
buf.write(unicode(self.column))
|
||||
buf.write(u"]")
|
||||
return buf.getvalue()
|
|
@ -0,0 +1,64 @@
|
|||
#[The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
from io import StringIO
|
||||
|
||||
def str_collection(val, begin, end):
|
||||
with StringIO() as buf:
|
||||
buf.write(begin)
|
||||
first = True
|
||||
for item in val:
|
||||
if not first:
|
||||
buf.write(u', ')
|
||||
buf.write(unicode(item))
|
||||
first = False
|
||||
buf.write(end)
|
||||
return buf.getvalue()
|
||||
|
||||
def str_list(val):
|
||||
return str_collection(val, u'[', u']')
|
||||
|
||||
def str_set(val):
|
||||
return str_collection(val, u'{', u'}')
|
||||
|
||||
def escapeWhitespace(s, escapeSpaces):
|
||||
with StringIO() as buf:
|
||||
for c in s:
|
||||
if c==' ' and escapeSpaces:
|
||||
buf.write(u'\u00B7')
|
||||
elif c=='\t':
|
||||
buf.write(u"\\t")
|
||||
elif c=='\n':
|
||||
buf.write(u"\\n")
|
||||
elif c=='\r':
|
||||
buf.write(u"\\r")
|
||||
else:
|
||||
buf.write(unicode(c))
|
||||
return buf.getvalue()
|
|
@ -0,0 +1 @@
|
|||
|
|
@ -0,0 +1,147 @@
|
|||
# [The "BSD license"]
|
||||
# Copyright (c) 2013 Terence Parr
|
||||
# Copyright (c) 2013 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#/
|
||||
from antlr4.IntervalSet import IntervalSet
|
||||
from antlr4.Token import Token
|
||||
|
||||
|
||||
class ATN(object):
|
||||
|
||||
INVALID_ALT_NUMBER = 0
|
||||
|
||||
# Used for runtime deserialization of ATNs from strings#/
|
||||
def __init__(self, grammarType , maxTokenType ):
|
||||
# The type of the ATN.
|
||||
self.grammarType = grammarType
|
||||
# The maximum value for any symbol recognized by a transition in the ATN.
|
||||
self.maxTokenType = maxTokenType
|
||||
self.states = []
|
||||
# Each subrule/rule is a decision point and we must track them so we
|
||||
# can go back later and build DFA predictors for them. This includes
|
||||
# all the rules, subrules, optional blocks, ()+, ()* etc...
|
||||
self.decisionToState = []
|
||||
# Maps from rule index to starting state number.
|
||||
self.ruleToStartState = []
|
||||
# Maps from rule index to stop state number.
|
||||
self.ruleToStopState = None
|
||||
self.modeNameToStartState = dict()
|
||||
# For lexer ATNs, this maps the rule index to the resulting token type.
|
||||
# For parser ATNs, this maps the rule index to the generated bypass token
|
||||
# type if the
|
||||
# {@link ATNDeserializationOptions#isGenerateRuleBypassTransitions}
|
||||
# deserialization option was specified; otherwise, this is {@code null}.
|
||||
self.ruleToTokenType = None
|
||||
# For lexer ATNs, this is an array of {@link LexerAction} objects which may
|
||||
# be referenced by action transitions in the ATN.
|
||||
self.lexerActions = None
|
||||
self.modeToStartState = []
|
||||
|
||||
# Compute the set of valid tokens that can occur starting in state {@code s}.
|
||||
# If {@code ctx} is null, the set of tokens will not include what can follow
|
||||
# the rule surrounding {@code s}. In other words, the set will be
|
||||
# restricted to tokens reachable staying within {@code s}'s rule.
|
||||
def nextTokensInContext(self, s, ctx):
|
||||
from antlr4.LL1Analyzer import LL1Analyzer
|
||||
anal = LL1Analyzer(self)
|
||||
return anal.LOOK(s, ctx=ctx)
|
||||
|
||||
# Compute the set of valid tokens that can occur starting in {@code s} and
|
||||
# staying in same rule. {@link Token#EPSILON} is in set if we reach end of
|
||||
# rule.
|
||||
def nextTokensNoContext(self, s):
|
||||
if s.nextTokenWithinRule is not None:
|
||||
return s.nextTokenWithinRule
|
||||
s.nextTokenWithinRule = self.nextTokensInContext(s, None)
|
||||
s.nextTokenWithinRule.readonly = True
|
||||
return s.nextTokenWithinRule
|
||||
|
||||
def nextTokens(self, s, ctx = None):
|
||||
if ctx==None:
|
||||
return self.nextTokensNoContext(s)
|
||||
else:
|
||||
return self.nextTokensInContext(s, ctx)
|
||||
|
||||
def addState(self, state):
|
||||
if state is not None:
|
||||
state.atn = self
|
||||
state.stateNumber = len(self.states)
|
||||
self.states.append(state)
|
||||
|
||||
def removeState(self, state):
|
||||
self.states[state.stateNumber] = None # just free mem, don't shift states in list
|
||||
|
||||
def defineDecisionState(self, s):
|
||||
self.decisionToState.append(s)
|
||||
s.decision = len(self.decisionToState)-1
|
||||
return s.decision
|
||||
|
||||
def getDecisionState(self, decision):
|
||||
if len(self.decisionToState)==0:
|
||||
return None
|
||||
else:
|
||||
return self.decisionToState[decision]
|
||||
|
||||
# Computes the set of input symbols which could follow ATN state number
|
||||
# {@code stateNumber} in the specified full {@code context}. This method
|
||||
# considers the complete parser context, but does not evaluate semantic
|
||||
# predicates (i.e. all predicates encountered during the calculation are
|
||||
# assumed true). If a path in the ATN exists from the starting state to the
|
||||
# {@link RuleStopState} of the outermost context without matching any
|
||||
# symbols, {@link Token#EOF} is added to the returned set.
|
||||
#
|
||||
# <p>If {@code context} is {@code null}, it is treated as
|
||||
# {@link ParserRuleContext#EMPTY}.</p>
|
||||
#
|
||||
# @param stateNumber the ATN state number
|
||||
# @param context the full parse context
|
||||
# @return The set of potentially valid input symbols which could follow the
|
||||
# specified state in the specified context.
|
||||
# @throws IllegalArgumentException if the ATN does not contain a state with
|
||||
# number {@code stateNumber}
|
||||
#/
|
||||
def getExpectedTokens(self, stateNumber, ctx ):
|
||||
if stateNumber < 0 or stateNumber >= len(self.states):
|
||||
raise Exception("Invalid state number.")
|
||||
s = self.states[stateNumber]
|
||||
following = self.nextTokens(s)
|
||||
if Token.EPSILON not in following:
|
||||
return following
|
||||
expected = IntervalSet()
|
||||
expected.addSet(following)
|
||||
expected.removeOne(Token.EPSILON)
|
||||
while (ctx != None and ctx.invokingState >= 0 and Token.EPSILON in following):
|
||||
invokingState = self.states[ctx.invokingState]
|
||||
rt = invokingState.transitions[0]
|
||||
following = self.nextTokens(rt.followState)
|
||||
expected.addSet(following)
|
||||
expected.removeOne(Token.EPSILON)
|
||||
ctx = ctx.parentCtx
|
||||
if Token.EPSILON in following:
|
||||
expected.addOne(Token.EOF)
|
||||
return expected
|
|
@ -0,0 +1,154 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#/
|
||||
|
||||
# A tuple: (ATN state, predicted alt, syntactic, semantic context).
|
||||
# The syntactic context is a graph-structured stack node whose
|
||||
# path(s) to the root is the rule invocation(s)
|
||||
# chain used to arrive at the state. The semantic context is
|
||||
# the tree of semantic predicates encountered before reaching
|
||||
# an ATN state.
|
||||
#/
|
||||
from io import StringIO
|
||||
|
||||
from antlr4.atn.ATNState import DecisionState
|
||||
from antlr4.atn.SemanticContext import SemanticContext
|
||||
|
||||
|
||||
class ATNConfig(object):
|
||||
|
||||
def __init__(self, state=None, alt=None, context=None, semantic=None, config=None):
|
||||
if config is not None:
|
||||
if state is None:
|
||||
state = config.state
|
||||
if alt is None:
|
||||
alt = config.alt
|
||||
if context is None:
|
||||
context = config.context
|
||||
if semantic is None:
|
||||
semantic = config.semanticContext
|
||||
if semantic is None:
|
||||
semantic = SemanticContext.NONE
|
||||
|
||||
# The ATN state associated with this configuration#/
|
||||
self.state = state
|
||||
# What alt (or lexer rule) is predicted by this configuration#/
|
||||
self.alt = alt
|
||||
# The stack of invoking states leading to the rule/states associated
|
||||
# with this config. We track only those contexts pushed during
|
||||
# execution of the ATN simulator.
|
||||
self.context = context
|
||||
self.semanticContext = semantic
|
||||
# We cannot execute predicates dependent upon local context unless
|
||||
# we know for sure we are in the correct context. Because there is
|
||||
# no way to do this efficiently, we simply cannot evaluate
|
||||
# dependent predicates unless we are in the rule that initially
|
||||
# invokes the ATN simulator.
|
||||
#
|
||||
# closure() tracks the depth of how far we dip into the
|
||||
# outer context: depth > 0. Note that it may not be totally
|
||||
# accurate depth since I don't ever decrement. TODO: make it a boolean then
|
||||
self.reachesIntoOuterContext = 0 if config is None else config.reachesIntoOuterContext
|
||||
self.precedenceFilterSuppressed = False if config is None else config.precedenceFilterSuppressed
|
||||
|
||||
# An ATN configuration is equal to another if both have
|
||||
# the same state, they predict the same alternative, and
|
||||
# syntactic/semantic contexts are the same.
|
||||
#/
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
elif not isinstance(other, ATNConfig):
|
||||
return False
|
||||
else:
|
||||
return self.state.stateNumber==other.state.stateNumber \
|
||||
and self.alt==other.alt \
|
||||
and ((self.context is other.context) or (self.context==other.context)) \
|
||||
and self.semanticContext==other.semanticContext \
|
||||
and self.precedenceFilterSuppressed==other.precedenceFilterSuppressed
|
||||
|
||||
def __hash__(self):
|
||||
return hash( str(self.state.stateNumber) + "/" +
|
||||
str(self.alt) + "/" +
|
||||
str(self.context) + "/" +
|
||||
str(self.semanticContext) )
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
|
||||
def __unicode__(self):
|
||||
with StringIO() as buf:
|
||||
buf.write(u"(")
|
||||
buf.write(unicode(self.state))
|
||||
buf.write(u",")
|
||||
buf.write(unicode(self.alt))
|
||||
if self.context is not None:
|
||||
buf.write(u",[")
|
||||
buf.write(unicode(self.context))
|
||||
buf.write(u"]")
|
||||
if self.semanticContext is not None and self.semanticContext is not SemanticContext.NONE:
|
||||
buf.write(u",")
|
||||
buf.write(unicode(self.semanticContext))
|
||||
if self.reachesIntoOuterContext>0:
|
||||
buf.write(u",up=")
|
||||
buf.write(unicode(self.reachesIntoOuterContext))
|
||||
buf.write(u')')
|
||||
return buf.getvalue()
|
||||
|
||||
class LexerATNConfig(ATNConfig):
|
||||
|
||||
def __init__(self, state, alt=None, context=None, semantic=SemanticContext.NONE, lexerActionExecutor=None, config=None):
|
||||
super(LexerATNConfig, self).__init__(state=state, alt=alt, context=context, semantic=semantic, config=config)
|
||||
if config is not None:
|
||||
if lexerActionExecutor is None:
|
||||
lexerActionExecutor = config.lexerActionExecutor
|
||||
# This is the backing field for {@link #getLexerActionExecutor}.
|
||||
self.lexerActionExecutor = lexerActionExecutor
|
||||
self.passedThroughNonGreedyDecision = False if config is None else self.checkNonGreedyDecision(config, state)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self.state.stateNumber) + str(self.alt) + str(self.context) \
|
||||
+ str(self.semanticContext) + str(1 if self.passedThroughNonGreedyDecision else 0) \
|
||||
+ str(self.lexerActionExecutor))
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
elif not isinstance(other, LexerATNConfig):
|
||||
return False
|
||||
if self.passedThroughNonGreedyDecision != other.passedThroughNonGreedyDecision:
|
||||
return False
|
||||
if self.lexerActionExecutor is not other.lexerActionExecutor:
|
||||
return False
|
||||
return super(LexerATNConfig, self).__eq__(other)
|
||||
|
||||
def checkNonGreedyDecision(self, source, target):
|
||||
return source.passedThroughNonGreedyDecision \
|
||||
or isinstance(target, DecisionState) and target.nonGreedy
|
|
@ -0,0 +1,239 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#
|
||||
# Specialized {@link Set}{@code <}{@link ATNConfig}{@code >} that can track
|
||||
# info about the set, with support for combining similar configurations using a
|
||||
# graph-structured stack.
|
||||
#/
|
||||
from io import StringIO
|
||||
|
||||
from antlr4.PredictionContext import merge
|
||||
from antlr4.Utils import str_list
|
||||
from antlr4.atn.ATN import ATN
|
||||
from antlr4.atn.SemanticContext import SemanticContext
|
||||
from antlr4.error.Errors import UnsupportedOperationException, IllegalStateException
|
||||
|
||||
|
||||
class ATNConfigSet(object):
|
||||
#
|
||||
# The reason that we need this is because we don't want the hash map to use
|
||||
# the standard hash code and equals. We need all configurations with the same
|
||||
# {@code (s,i,_,semctx)} to be equal. Unfortunately, this key effectively doubles
|
||||
# the number of objects associated with ATNConfigs. The other solution is to
|
||||
# use a hash table that lets us specify the equals/hashcode operation.
|
||||
|
||||
def __init__(self, fullCtx=True):
|
||||
# All configs but hashed by (s, i, _, pi) not including context. Wiped out
|
||||
# when we go readonly as this set becomes a DFA state.
|
||||
self.configLookup = set()
|
||||
# Indicates that this configuration set is part of a full context
|
||||
# LL prediction. It will be used to determine how to merge $. With SLL
|
||||
# it's a wildcard whereas it is not for LL context merge.
|
||||
self.fullCtx = fullCtx
|
||||
# Indicates that the set of configurations is read-only. Do not
|
||||
# allow any code to manipulate the set; DFA states will point at
|
||||
# the sets and they must not change. This does not protect the other
|
||||
# fields; in particular, conflictingAlts is set after
|
||||
# we've made this readonly.
|
||||
self.readonly = False
|
||||
# Track the elements as they are added to the set; supports get(i)#/
|
||||
self.configs = []
|
||||
|
||||
# TODO: these fields make me pretty uncomfortable but nice to pack up info together, saves recomputation
|
||||
# TODO: can we track conflicts as they are added to save scanning configs later?
|
||||
self.uniqueAlt = 0
|
||||
self.conflictingAlts = None
|
||||
|
||||
# Used in parser and lexer. In lexer, it indicates we hit a pred
|
||||
# while computing a closure operation. Don't make a DFA state from this.
|
||||
self.hasSemanticContext = False
|
||||
self.dipsIntoOuterContext = False
|
||||
|
||||
self.cachedHashCode = -1
|
||||
|
||||
def __iter__(self):
|
||||
return self.configs.__iter__()
|
||||
|
||||
# Adding a new config means merging contexts with existing configs for
|
||||
# {@code (s, i, pi, _)}, where {@code s} is the
|
||||
# {@link ATNConfig#state}, {@code i} is the {@link ATNConfig#alt}, and
|
||||
# {@code pi} is the {@link ATNConfig#semanticContext}. We use
|
||||
# {@code (s,i,pi)} as key.
|
||||
#
|
||||
# <p>This method updates {@link #dipsIntoOuterContext} and
|
||||
# {@link #hasSemanticContext} when necessary.</p>
|
||||
#/
|
||||
def add(self, config, mergeCache=None):
|
||||
if self.readonly:
|
||||
raise Exception("This set is readonly")
|
||||
if config.semanticContext is not SemanticContext.NONE:
|
||||
self.hasSemanticContext = True
|
||||
if config.reachesIntoOuterContext > 0:
|
||||
self.dipsIntoOuterContext = True
|
||||
existing = self.getOrAdd(config)
|
||||
if existing is config:
|
||||
self.cachedHashCode = -1
|
||||
self.configs.append(config) # track order here
|
||||
return True
|
||||
# a previous (s,i,pi,_), merge with it and save result
|
||||
rootIsWildcard = not self.fullCtx
|
||||
merged = merge(existing.context, config.context, rootIsWildcard, mergeCache)
|
||||
# no need to check for existing.context, config.context in cache
|
||||
# since only way to create new graphs is "call rule" and here. We
|
||||
# cache at both places.
|
||||
existing.reachesIntoOuterContext = max(existing.reachesIntoOuterContext, config.reachesIntoOuterContext)
|
||||
# make sure to preserve the precedence filter suppression during the merge
|
||||
if config.precedenceFilterSuppressed:
|
||||
existing.precedenceFilterSuppressed = True
|
||||
existing.context = merged # replace context; no need to alt mapping
|
||||
return True
|
||||
|
||||
def getOrAdd(self, config):
|
||||
for c in self.configLookup:
|
||||
if c==config:
|
||||
return c
|
||||
self.configLookup.add(config)
|
||||
return config
|
||||
|
||||
def getStates(self):
|
||||
states = set()
|
||||
for c in self.configs:
|
||||
states.add(c.state)
|
||||
return states
|
||||
|
||||
def getPredicates(self):
|
||||
preds = list()
|
||||
for c in self.configs:
|
||||
if c.semanticContext!=SemanticContext.NONE:
|
||||
preds.append(c.semanticContext)
|
||||
return preds
|
||||
|
||||
def get(self, i):
|
||||
return self.configs[i]
|
||||
|
||||
def optimizeConfigs(self, interpreter):
|
||||
if self.readonly:
|
||||
raise IllegalStateException("This set is readonly")
|
||||
if len(self.configLookup)==0:
|
||||
return
|
||||
for config in self.configs:
|
||||
config.context = interpreter.getCachedContext(config.context)
|
||||
|
||||
def addAll(self, coll):
|
||||
for c in coll:
|
||||
self.add(c)
|
||||
return False
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
elif not isinstance(other, ATNConfigSet):
|
||||
return False
|
||||
|
||||
same = self.configs is not None and \
|
||||
self.configs==other.configs and \
|
||||
self.fullCtx == other.fullCtx and \
|
||||
self.uniqueAlt == other.uniqueAlt and \
|
||||
self.conflictingAlts == other.conflictingAlts and \
|
||||
self.hasSemanticContext == other.hasSemanticContext and \
|
||||
self.dipsIntoOuterContext == other.dipsIntoOuterContext
|
||||
|
||||
return same
|
||||
|
||||
def __hash__(self):
|
||||
if self.readonly:
|
||||
if self.cachedHashCode == -1:
|
||||
self.cachedHashCode = self.hashConfigs()
|
||||
return self.cachedHashCode
|
||||
return self.hashConfigs()
|
||||
|
||||
def hashConfigs(self):
|
||||
with StringIO() as buf:
|
||||
for cfg in self.configs:
|
||||
buf.write(unicode(cfg))
|
||||
return hash(buf.getvalue())
|
||||
|
||||
def __len__(self):
|
||||
return len(self.configs)
|
||||
|
||||
def isEmpty(self):
|
||||
return len(self.configs)==0
|
||||
|
||||
def __contains__(self, item):
|
||||
if self.configLookup is None:
|
||||
raise UnsupportedOperationException("This method is not implemented for readonly sets.")
|
||||
return item in self.configLookup
|
||||
|
||||
def containsFast(self, obj):
|
||||
if self.configLookup is None:
|
||||
raise UnsupportedOperationException("This method is not implemented for readonly sets.")
|
||||
return self.configLookup.containsFast(obj)
|
||||
|
||||
|
||||
def clear(self):
|
||||
if self.readonly:
|
||||
raise IllegalStateException("This set is readonly")
|
||||
self.configs.clear()
|
||||
self.cachedHashCode = -1
|
||||
self.configLookup.clear()
|
||||
|
||||
def setReadonly(self, readonly):
|
||||
self.readonly = readonly
|
||||
self.configLookup = None # can't mod, no need for lookup cache
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
|
||||
def __unicode__(self):
|
||||
with StringIO() as buf:
|
||||
buf.write(str_list(self.configs))
|
||||
if self.hasSemanticContext:
|
||||
buf.write(u",hasSemanticContext=")
|
||||
buf.write(unicode(self.hasSemanticContext))
|
||||
if self.uniqueAlt!=ATN.INVALID_ALT_NUMBER:
|
||||
buf.write(u",uniqueAlt=")
|
||||
buf.write(unicode(self.uniqueAlt))
|
||||
if self.conflictingAlts is not None:
|
||||
buf.write(u",conflictingAlts=")
|
||||
buf.write(unicode(self.conflictingAlts))
|
||||
if self.dipsIntoOuterContext:
|
||||
buf.write(u",dipsIntoOuterContext")
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
class OrderedATNConfigSet(ATNConfigSet):
|
||||
|
||||
def __init__(self):
|
||||
super(OrderedATNConfigSet, self).__init__()
|
||||
# self.configLookup = set()
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
#[The "BSD license"]
|
||||
# Copyright (c) 2013 Terence Parr
|
||||
# Copyright (c) 2013 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
class ATNDeserializationOptions(object):
|
||||
|
||||
defaultOptions = None
|
||||
|
||||
def __init__(self, copyFrom = None):
|
||||
self.readOnly = False
|
||||
self.verifyATN = True if copyFrom is None else copyFrom.verifyATN
|
||||
self.generateRuleBypassTransitions = False if copyFrom is None else copyFrom.generateRuleBypassTransitions
|
||||
|
||||
def __setattr__(self, key, value):
|
||||
if key!="readOnly" and self.readOnly:
|
||||
raise Exception("The object is read only.")
|
||||
super(type(self), self).__setattr__(key,value)
|
||||
|
||||
ATNDeserializationOptions.defaultOptions = ATNDeserializationOptions()
|
||||
ATNDeserializationOptions.defaultOptions.readOnly = True
|
||||
|
|
@ -0,0 +1,542 @@
|
|||
# [The "BSD license"]
|
||||
# Copyright (c) 2013 Terence Parr
|
||||
# Copyright (c) 2013 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#/
|
||||
from uuid import UUID
|
||||
|
||||
from antlr4.atn.ATN import ATN
|
||||
from antlr4.atn.ATNType import ATNType
|
||||
from antlr4.atn.ATNState import *
|
||||
from antlr4.atn.Transition import *
|
||||
from antlr4.atn.LexerAction import *
|
||||
from antlr4.atn.ATNDeserializationOptions import ATNDeserializationOptions
|
||||
|
||||
|
||||
# This is the earliest supported serialized UUID.
|
||||
BASE_SERIALIZED_UUID = UUID("AADB8D7E-AEEF-4415-AD2B-8204D6CF042E")
|
||||
|
||||
# This list contains all of the currently supported UUIDs, ordered by when
|
||||
# the feature first appeared in this branch.
|
||||
SUPPORTED_UUIDS = [ BASE_SERIALIZED_UUID ]
|
||||
|
||||
SERIALIZED_VERSION = 3
|
||||
|
||||
# This is the current serialized UUID.
|
||||
SERIALIZED_UUID = BASE_SERIALIZED_UUID
|
||||
|
||||
class ATNDeserializer (object):
|
||||
|
||||
def __init__(self, options = None):
|
||||
if options is None:
|
||||
options = ATNDeserializationOptions.defaultOptions
|
||||
self.deserializationOptions = options
|
||||
self.edgeFactories = None
|
||||
self.stateFactories = None
|
||||
self.actionFactories = None
|
||||
|
||||
# Determines if a particular serialized representation of an ATN supports
|
||||
# a particular feature, identified by the {@link UUID} used for serializing
|
||||
# the ATN at the time the feature was first introduced.
|
||||
#
|
||||
# @param feature The {@link UUID} marking the first time the feature was
|
||||
# supported in the serialized ATN.
|
||||
# @param actualUuid The {@link UUID} of the actual serialized ATN which is
|
||||
# currently being deserialized.
|
||||
# @return {@code true} if the {@code actualUuid} value represents a
|
||||
# serialized ATN at or after the feature identified by {@code feature} was
|
||||
# introduced; otherwise, {@code false}.
|
||||
|
||||
def isFeatureSupported(self, feature, actualUuid):
|
||||
idx1 = SUPPORTED_UUIDS.index(feature)
|
||||
if idx1<0:
|
||||
return False
|
||||
idx2 = SUPPORTED_UUIDS.index(actualUuid)
|
||||
return idx2 >= idx1
|
||||
|
||||
def deserialize(self, data):
|
||||
self.reset(data)
|
||||
self.checkVersion()
|
||||
self.checkUUID()
|
||||
atn = self.readATN()
|
||||
self.readStates(atn)
|
||||
self.readRules(atn)
|
||||
self.readModes(atn)
|
||||
sets = self.readSets(atn)
|
||||
self.readEdges(atn, sets)
|
||||
self.readDecisions(atn)
|
||||
self.readLexerActions(atn)
|
||||
self.markPrecedenceDecisions(atn)
|
||||
self.verifyATN(atn)
|
||||
if self.deserializationOptions.generateRuleBypassTransitions \
|
||||
and atn.grammarType == ATNType.PARSER:
|
||||
self.generateRuleBypassTransitions(atn)
|
||||
# re-verify after modification
|
||||
self.verifyATN(atn)
|
||||
return atn
|
||||
|
||||
def reset(self, data):
|
||||
def adjust(c):
|
||||
v = ord(c)
|
||||
return v-2 if v>1 else -1
|
||||
temp = [ adjust(c) for c in data ]
|
||||
# don't adjust the first value since that's the version number
|
||||
temp[0] = ord(data[0])
|
||||
self.data = temp
|
||||
self.pos = 0
|
||||
|
||||
def checkVersion(self):
|
||||
version = self.readInt()
|
||||
if version != SERIALIZED_VERSION:
|
||||
raise Exception("Could not deserialize ATN with version " + str(version) + " (expected " + str(SERIALIZED_VERSION) + ").")
|
||||
|
||||
def checkUUID(self):
|
||||
uuid = self.readUUID()
|
||||
if not uuid in SUPPORTED_UUIDS:
|
||||
raise Exception("Could not deserialize ATN with UUID: " + str(uuid) + \
|
||||
" (expected " + str(SERIALIZED_UUID) + " or a legacy UUID).", uuid, SERIALIZED_UUID)
|
||||
self.uuid = uuid
|
||||
|
||||
def readATN(self):
|
||||
grammarType = self.readInt()
|
||||
maxTokenType = self.readInt()
|
||||
return ATN(grammarType, maxTokenType)
|
||||
|
||||
def readStates(self, atn):
|
||||
loopBackStateNumbers = []
|
||||
endStateNumbers = []
|
||||
nstates = self.readInt()
|
||||
for i in range(0, nstates):
|
||||
stype = self.readInt()
|
||||
# ignore bad type of states
|
||||
if stype==ATNState.INVALID_TYPE:
|
||||
atn.addState(None)
|
||||
continue
|
||||
ruleIndex = self.readInt()
|
||||
if ruleIndex == 0xFFFF:
|
||||
ruleIndex = -1
|
||||
|
||||
s = self.stateFactory(stype, ruleIndex)
|
||||
if stype == ATNState.LOOP_END: # special case
|
||||
loopBackStateNumber = self.readInt()
|
||||
loopBackStateNumbers.append((s, loopBackStateNumber))
|
||||
elif isinstance(s, BlockStartState):
|
||||
endStateNumber = self.readInt()
|
||||
endStateNumbers.append((s, endStateNumber))
|
||||
|
||||
atn.addState(s)
|
||||
|
||||
# delay the assignment of loop back and end states until we know all the state instances have been initialized
|
||||
for pair in loopBackStateNumbers:
|
||||
pair[0].loopBackState = atn.states[pair[1]]
|
||||
|
||||
for pair in endStateNumbers:
|
||||
pair[0].endState = atn.states[pair[1]]
|
||||
|
||||
numNonGreedyStates = self.readInt()
|
||||
for i in range(0, numNonGreedyStates):
|
||||
stateNumber = self.readInt()
|
||||
atn.states[stateNumber].nonGreedy = True
|
||||
|
||||
numPrecedenceStates = self.readInt()
|
||||
for i in range(0, numPrecedenceStates):
|
||||
stateNumber = self.readInt()
|
||||
atn.states[stateNumber].isPrecedenceRule = True
|
||||
|
||||
def readRules(self, atn):
|
||||
nrules = self.readInt()
|
||||
if atn.grammarType == ATNType.LEXER:
|
||||
atn.ruleToTokenType = [0] * nrules
|
||||
|
||||
atn.ruleToStartState = [0] * nrules
|
||||
for i in range(0, nrules):
|
||||
s = self.readInt()
|
||||
startState = atn.states[s]
|
||||
atn.ruleToStartState[i] = startState
|
||||
if atn.grammarType == ATNType.LEXER:
|
||||
tokenType = self.readInt()
|
||||
if tokenType == 0xFFFF:
|
||||
tokenType = Token.EOF
|
||||
|
||||
atn.ruleToTokenType[i] = tokenType
|
||||
|
||||
atn.ruleToStopState = [0] * nrules
|
||||
for state in atn.states:
|
||||
if not isinstance(state, RuleStopState):
|
||||
continue
|
||||
atn.ruleToStopState[state.ruleIndex] = state
|
||||
atn.ruleToStartState[state.ruleIndex].stopState = state
|
||||
|
||||
def readModes(self, atn):
|
||||
nmodes = self.readInt()
|
||||
for i in range(0, nmodes):
|
||||
s = self.readInt()
|
||||
atn.modeToStartState.append(atn.states[s])
|
||||
|
||||
def readSets(self, atn):
|
||||
sets = []
|
||||
m = self.readInt()
|
||||
for i in range(0, m):
|
||||
iset = IntervalSet()
|
||||
sets.append(iset)
|
||||
n = self.readInt()
|
||||
containsEof = self.readInt()
|
||||
if containsEof!=0:
|
||||
iset.addOne(-1)
|
||||
for j in range(0, n):
|
||||
i1 = self.readInt()
|
||||
i2 = self.readInt()
|
||||
iset.addRange(Interval(i1, i2 + 1)) # range upper limit is exclusive
|
||||
return sets
|
||||
|
||||
def readEdges(self, atn, sets):
|
||||
nedges = self.readInt()
|
||||
for i in range(0, nedges):
|
||||
src = self.readInt()
|
||||
trg = self.readInt()
|
||||
ttype = self.readInt()
|
||||
arg1 = self.readInt()
|
||||
arg2 = self.readInt()
|
||||
arg3 = self.readInt()
|
||||
trans = self.edgeFactory(atn, ttype, src, trg, arg1, arg2, arg3, sets)
|
||||
srcState = atn.states[src]
|
||||
srcState.addTransition(trans)
|
||||
|
||||
# edges for rule stop states can be derived, so they aren't serialized
|
||||
for state in atn.states:
|
||||
for i in range(0, len(state.transitions)):
|
||||
t = state.transitions[i]
|
||||
if not isinstance(t, RuleTransition):
|
||||
continue
|
||||
outermostPrecedenceReturn = -1
|
||||
if atn.ruleToStartState[t.target.ruleIndex].isPrecedenceRule:
|
||||
if t.precedence == 0:
|
||||
outermostPrecedenceReturn = t.target.ruleIndex
|
||||
trans = EpsilonTransition(t.followState, outermostPrecedenceReturn)
|
||||
atn.ruleToStopState[t.target.ruleIndex].addTransition(trans)
|
||||
|
||||
for state in atn.states:
|
||||
if isinstance(state, BlockStartState):
|
||||
# we need to know the end state to set its start state
|
||||
if state.endState is None:
|
||||
raise Exception("IllegalState")
|
||||
# block end states can only be associated to a single block start state
|
||||
if state.endState.startState is not None:
|
||||
raise Exception("IllegalState")
|
||||
state.endState.startState = state
|
||||
|
||||
elif isinstance(state, PlusLoopbackState):
|
||||
for i in range(0, len(state.transitions)):
|
||||
target = state.transitions[i].target
|
||||
if isinstance(target, PlusBlockStartState):
|
||||
target.loopBackState = state
|
||||
elif isinstance(state, StarLoopbackState):
|
||||
for i in range(0, len(state.transitions)):
|
||||
target = state.transitions[i].target
|
||||
if isinstance(target, StarLoopEntryState):
|
||||
target.loopBackState = state
|
||||
|
||||
def readDecisions(self, atn):
|
||||
ndecisions = self.readInt()
|
||||
for i in range(0, ndecisions):
|
||||
s = self.readInt()
|
||||
decState = atn.states[s]
|
||||
atn.decisionToState.append(decState)
|
||||
decState.decision = i
|
||||
|
||||
def readLexerActions(self, atn):
|
||||
if atn.grammarType == ATNType.LEXER:
|
||||
count = self.readInt()
|
||||
atn.lexerActions = [ None ] * count
|
||||
for i in range(0, count):
|
||||
actionType = self.readInt()
|
||||
data1 = self.readInt()
|
||||
if data1 == 0xFFFF:
|
||||
data1 = -1
|
||||
data2 = self.readInt()
|
||||
if data2 == 0xFFFF:
|
||||
data2 = -1
|
||||
lexerAction = self.lexerActionFactory(actionType, data1, data2)
|
||||
atn.lexerActions[i] = lexerAction
|
||||
|
||||
def generateRuleBypassTransitions(self, atn):
|
||||
|
||||
count = len(atn.ruleToStartState)
|
||||
atn.ruleToTokenType = [ 0 ] * count
|
||||
for i in range(0, count):
|
||||
atn.ruleToTokenType[i] = atn.maxTokenType + i + 1
|
||||
|
||||
for i in range(0, count):
|
||||
self.generateRuleBypassTransition(atn, i)
|
||||
|
||||
def generateRuleBypassTransition(self, atn, idx):
|
||||
|
||||
bypassStart = BasicBlockStartState()
|
||||
bypassStart.ruleIndex = idx
|
||||
atn.addState(bypassStart)
|
||||
|
||||
bypassStop = BlockEndState()
|
||||
bypassStop.ruleIndex = idx
|
||||
atn.addState(bypassStop)
|
||||
|
||||
bypassStart.endState = bypassStop
|
||||
atn.defineDecisionState(bypassStart)
|
||||
|
||||
bypassStop.startState = bypassStart
|
||||
|
||||
excludeTransition = None
|
||||
|
||||
if atn.ruleToStartState[idx].isPrecedenceRule:
|
||||
# wrap from the beginning of the rule to the StarLoopEntryState
|
||||
endState = None
|
||||
for state in atn.states:
|
||||
if self.stateIsEndStateFor(state, idx):
|
||||
endState = state
|
||||
excludeTransition = state.loopBackState.transitions[0]
|
||||
break
|
||||
|
||||
if excludeTransition is None:
|
||||
raise Exception("Couldn't identify final state of the precedence rule prefix section.")
|
||||
|
||||
else:
|
||||
|
||||
endState = atn.ruleToStopState[idx]
|
||||
|
||||
# all non-excluded transitions that currently target end state need to target blockEnd instead
|
||||
for state in atn.states:
|
||||
for transition in state.transitions:
|
||||
if transition == excludeTransition:
|
||||
continue
|
||||
if transition.target == endState:
|
||||
transition.target = bypassStop
|
||||
|
||||
# all transitions leaving the rule start state need to leave blockStart instead
|
||||
ruleToStartState = atn.ruleToStartState[idx]
|
||||
count = len(ruleToStartState.transitions)
|
||||
while count > 0:
|
||||
bypassStart.addTransition(ruleToStartState.transitions[count-1])
|
||||
del ruleToStartState.transitions[-1]
|
||||
|
||||
# link the new states
|
||||
atn.ruleToStartState[idx].addTransition(EpsilonTransition(bypassStart))
|
||||
bypassStop.addTransition(EpsilonTransition(endState))
|
||||
|
||||
matchState = BasicState()
|
||||
atn.addState(matchState)
|
||||
matchState.addTransition(AtomTransition(bypassStop, atn.ruleToTokenType[idx]))
|
||||
bypassStart.addTransition(EpsilonTransition(matchState))
|
||||
|
||||
|
||||
def stateIsEndStateFor(self, state, idx):
|
||||
if state.ruleIndex != idx:
|
||||
return None
|
||||
if not isinstance(state, StarLoopEntryState):
|
||||
return None
|
||||
|
||||
maybeLoopEndState = state.transitions[len(state.transitions) - 1].target
|
||||
if not isinstance(maybeLoopEndState, LoopEndState):
|
||||
return None
|
||||
|
||||
if maybeLoopEndState.epsilonOnlyTransitions and \
|
||||
isinstance(maybeLoopEndState.transitions[0].target, RuleStopState):
|
||||
return state
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
#
|
||||
# Analyze the {@link StarLoopEntryState} states in the specified ATN to set
|
||||
# the {@link StarLoopEntryState#precedenceRuleDecision} field to the
|
||||
# correct value.
|
||||
#
|
||||
# @param atn The ATN.
|
||||
#
|
||||
def markPrecedenceDecisions(self, atn):
|
||||
for state in atn.states:
|
||||
if not isinstance(state, StarLoopEntryState):
|
||||
continue
|
||||
|
||||
# We analyze the ATN to determine if this ATN decision state is the
|
||||
# decision for the closure block that determines whether a
|
||||
# precedence rule should continue or complete.
|
||||
#
|
||||
if atn.ruleToStartState[state.ruleIndex].isPrecedenceRule:
|
||||
maybeLoopEndState = state.transitions[len(state.transitions) - 1].target
|
||||
if isinstance(maybeLoopEndState, LoopEndState):
|
||||
if maybeLoopEndState.epsilonOnlyTransitions and \
|
||||
isinstance(maybeLoopEndState.transitions[0].target, RuleStopState):
|
||||
state.precedenceRuleDecision = True
|
||||
|
||||
def verifyATN(self, atn):
|
||||
if not self.deserializationOptions.verifyATN:
|
||||
return
|
||||
# verify assumptions
|
||||
for state in atn.states:
|
||||
if state is None:
|
||||
continue
|
||||
|
||||
self.checkCondition(state.epsilonOnlyTransitions or len(state.transitions) <= 1)
|
||||
|
||||
if isinstance(state, PlusBlockStartState):
|
||||
self.checkCondition(state.loopBackState is not None)
|
||||
|
||||
if isinstance(state, StarLoopEntryState):
|
||||
self.checkCondition(state.loopBackState is not None)
|
||||
self.checkCondition(len(state.transitions) == 2)
|
||||
|
||||
if isinstance(state.transitions[0].target, StarBlockStartState):
|
||||
self.checkCondition(isinstance(state.transitions[1].target, LoopEndState))
|
||||
self.checkCondition(not state.nonGreedy)
|
||||
elif isinstance(state.transitions[0].target, LoopEndState):
|
||||
self.checkCondition(isinstance(state.transitions[1].target, StarBlockStartState))
|
||||
self.checkCondition(state.nonGreedy)
|
||||
else:
|
||||
raise Exception("IllegalState")
|
||||
|
||||
if isinstance(state, StarLoopbackState):
|
||||
self.checkCondition(len(state.transitions) == 1)
|
||||
self.checkCondition(isinstance(state.transitions[0].target, StarLoopEntryState))
|
||||
|
||||
if isinstance(state, LoopEndState):
|
||||
self.checkCondition(state.loopBackState is not None)
|
||||
|
||||
if isinstance(state, RuleStartState):
|
||||
self.checkCondition(state.stopState is not None)
|
||||
|
||||
if isinstance(state, BlockStartState):
|
||||
self.checkCondition(state.endState is not None)
|
||||
|
||||
if isinstance(state, BlockEndState):
|
||||
self.checkCondition(state.startState is not None)
|
||||
|
||||
if isinstance(state, DecisionState):
|
||||
self.checkCondition(len(state.transitions) <= 1 or state.decision >= 0)
|
||||
else:
|
||||
self.checkCondition(len(state.transitions) <= 1 or isinstance(state, RuleStopState))
|
||||
|
||||
def checkCondition(self, condition, message=None):
|
||||
if not condition:
|
||||
if message is None:
|
||||
message = "IllegalState"
|
||||
raise Exception(message)
|
||||
|
||||
def readInt(self):
|
||||
i = self.data[self.pos]
|
||||
self.pos += 1
|
||||
return i
|
||||
|
||||
def readInt32(self):
|
||||
low = self.readInt()
|
||||
high = self.readInt()
|
||||
return low | (high << 16)
|
||||
|
||||
def readLong(self):
|
||||
low = self.readInt32()
|
||||
high = self.readInt32()
|
||||
return (low & 0x00000000FFFFFFFF) | (high << 32)
|
||||
|
||||
def readUUID(self):
|
||||
low = self.readLong()
|
||||
high = self.readLong()
|
||||
allBits = (low & 0xFFFFFFFFFFFFFFFF) | (high << 64)
|
||||
return UUID(int=allBits)
|
||||
|
||||
def edgeFactory(self, atn, type, src, trg, arg1, arg2, arg3, sets):
|
||||
target = atn.states[trg]
|
||||
if self.edgeFactories is None:
|
||||
ef = [None] * 11
|
||||
ef[0] = lambda args : None
|
||||
ef[Transition.EPSILON] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
|
||||
EpsilonTransition(target)
|
||||
ef[Transition.RANGE] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
|
||||
RangeTransition(target, Token.EOF, arg2) if arg3 != 0 else RangeTransition(target, arg1, arg2)
|
||||
ef[Transition.RULE] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
|
||||
RuleTransition(atn.states[arg1], arg2, arg3, target)
|
||||
ef[Transition.PREDICATE] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
|
||||
PredicateTransition(target, arg1, arg2, arg3 != 0)
|
||||
ef[Transition.PRECEDENCE] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
|
||||
PrecedencePredicateTransition(target, arg1)
|
||||
ef[Transition.ATOM] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
|
||||
AtomTransition(target, Token.EOF) if arg3 != 0 else AtomTransition(target, arg1)
|
||||
ef[Transition.ACTION] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
|
||||
ActionTransition(target, arg1, arg2, arg3 != 0)
|
||||
ef[Transition.SET] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
|
||||
SetTransition(target, sets[arg1])
|
||||
ef[Transition.NOT_SET] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
|
||||
NotSetTransition(target, sets[arg1])
|
||||
ef[Transition.WILDCARD] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
|
||||
WildcardTransition(target)
|
||||
self.edgeFactories = ef
|
||||
|
||||
if type> len(self.edgeFactories) or self.edgeFactories[type] is None:
|
||||
raise Exception("The specified transition type: " + str(type) + " is not valid.")
|
||||
else:
|
||||
return self.edgeFactories[type](atn, src, trg, arg1, arg2, arg3, sets, target)
|
||||
|
||||
def stateFactory(self, type, ruleIndex):
|
||||
if self.stateFactories is None:
|
||||
sf = [None] * 13
|
||||
sf[ATNState.INVALID_TYPE] = lambda : None
|
||||
sf[ATNState.BASIC] = lambda : BasicState()
|
||||
sf[ATNState.RULE_START] = lambda : RuleStartState()
|
||||
sf[ATNState.BLOCK_START] = lambda : BasicBlockStartState()
|
||||
sf[ATNState.PLUS_BLOCK_START] = lambda : PlusBlockStartState()
|
||||
sf[ATNState.STAR_BLOCK_START] = lambda : StarBlockStartState()
|
||||
sf[ATNState.TOKEN_START] = lambda : TokensStartState()
|
||||
sf[ATNState.RULE_STOP] = lambda : RuleStopState()
|
||||
sf[ATNState.BLOCK_END] = lambda : BlockEndState()
|
||||
sf[ATNState.STAR_LOOP_BACK] = lambda : StarLoopbackState()
|
||||
sf[ATNState.STAR_LOOP_ENTRY] = lambda : StarLoopEntryState()
|
||||
sf[ATNState.PLUS_LOOP_BACK] = lambda : PlusLoopbackState()
|
||||
sf[ATNState.LOOP_END] = lambda : LoopEndState()
|
||||
self.stateFactories = sf
|
||||
|
||||
if type> len(self.stateFactories) or self.stateFactories[type] is None:
|
||||
raise Exception("The specified state type " + str(type) + " is not valid.")
|
||||
else:
|
||||
s = self.stateFactories[type]()
|
||||
if s is not None:
|
||||
s.ruleIndex = ruleIndex
|
||||
return s
|
||||
|
||||
def lexerActionFactory(self, type, data1, data2):
|
||||
if self.actionFactories is None:
|
||||
af = [ None ] * 8
|
||||
af[LexerActionType.CHANNEL] = lambda data1, data2: LexerChannelAction(data1)
|
||||
af[LexerActionType.CUSTOM] = lambda data1, data2: LexerCustomAction(data1, data2)
|
||||
af[LexerActionType.MODE] = lambda data1, data2: LexerModeAction(data1)
|
||||
af[LexerActionType.MORE] = lambda data1, data2: LexerMoreAction.INSTANCE
|
||||
af[LexerActionType.POP_MODE] = lambda data1, data2: LexerPopModeAction.INSTANCE
|
||||
af[LexerActionType.PUSH_MODE] = lambda data1, data2: LexerPushModeAction(data1)
|
||||
af[LexerActionType.SKIP] = lambda data1, data2: LexerSkipAction.INSTANCE
|
||||
af[LexerActionType.TYPE] = lambda data1, data2: LexerTypeAction(data1)
|
||||
self.actionFactories = af
|
||||
|
||||
if type> len(self.actionFactories) or self.actionFactories[type] is None:
|
||||
raise Exception("The specified lexer action type " + str(type) + " is not valid.")
|
||||
else:
|
||||
return self.actionFactories[type](data1, data2)
|
|
@ -0,0 +1,70 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2013 Terence Parr
|
||||
# Copyright (c) 2013 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#/
|
||||
from antlr4.PredictionContext import getCachedPredictionContext
|
||||
from antlr4.atn.ATNConfigSet import ATNConfigSet
|
||||
from antlr4.dfa.DFAState import DFAState
|
||||
|
||||
|
||||
class ATNSimulator(object):
|
||||
|
||||
# Must distinguish between missing edge and edge we know leads nowhere#/
|
||||
ERROR = DFAState(0x7FFFFFFF, ATNConfigSet())
|
||||
|
||||
# The context cache maps all PredictionContext objects that are ==
|
||||
# to a single cached copy. This cache is shared across all contexts
|
||||
# in all ATNConfigs in all DFA states. We rebuild each ATNConfigSet
|
||||
# to use only cached nodes/graphs in addDFAState(). We don't want to
|
||||
# fill this during closure() since there are lots of contexts that
|
||||
# pop up but are not used ever again. It also greatly slows down closure().
|
||||
#
|
||||
# <p>This cache makes a huge difference in memory and a little bit in speed.
|
||||
# For the Java grammar on java.*, it dropped the memory requirements
|
||||
# at the end from 25M to 16M. We don't store any of the full context
|
||||
# graphs in the DFA because they are limited to local context only,
|
||||
# but apparently there's a lot of repetition there as well. We optimize
|
||||
# the config contexts before storing the config set in the DFA states
|
||||
# by literally rebuilding them with cached subgraphs only.</p>
|
||||
#
|
||||
# <p>I tried a cache for use during closure operations, that was
|
||||
# whacked after each adaptivePredict(). It cost a little bit
|
||||
# more time I think and doesn't save on the overall footprint
|
||||
# so it's not worth the complexity.</p>
|
||||
#/
|
||||
def __init__(self, atn, sharedContextCache):
|
||||
self.atn = atn
|
||||
self.sharedContextCache = sharedContextCache
|
||||
|
||||
def getCachedContext(self, context):
|
||||
if self.sharedContextCache is None:
|
||||
return context
|
||||
visited = dict()
|
||||
return getCachedPredictionContext(context, self.sharedContextCache, visited)
|
||||
|
|
@ -0,0 +1,283 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
# The following images show the relation of states and
|
||||
# {@link ATNState#transitions} for various grammar constructs.
|
||||
#
|
||||
# <ul>
|
||||
#
|
||||
# <li>Solid edges marked with an ε indicate a required
|
||||
# {@link EpsilonTransition}.</li>
|
||||
#
|
||||
# <li>Dashed edges indicate locations where any transition derived from
|
||||
# {@link Transition} might appear.</li>
|
||||
#
|
||||
# <li>Dashed nodes are place holders for either a sequence of linked
|
||||
# {@link BasicState} states or the inclusion of a block representing a nested
|
||||
# construct in one of the forms below.</li>
|
||||
#
|
||||
# <li>Nodes showing multiple outgoing alternatives with a {@code ...} support
|
||||
# any number of alternatives (one or more). Nodes without the {@code ...} only
|
||||
# support the exact number of alternatives shown in the diagram.</li>
|
||||
#
|
||||
# </ul>
|
||||
#
|
||||
# <h2>Basic Blocks</h2>
|
||||
#
|
||||
# <h3>Rule</h3>
|
||||
#
|
||||
# <embed src="images/Rule.svg" type="image/svg+xml"/>
|
||||
#
|
||||
# <h3>Block of 1 or more alternatives</h3>
|
||||
#
|
||||
# <embed src="images/Block.svg" type="image/svg+xml"/>
|
||||
#
|
||||
# <h2>Greedy Loops</h2>
|
||||
#
|
||||
# <h3>Greedy Closure: {@code (...)*}</h3>
|
||||
#
|
||||
# <embed src="images/ClosureGreedy.svg" type="image/svg+xml"/>
|
||||
#
|
||||
# <h3>Greedy Positive Closure: {@code (...)+}</h3>
|
||||
#
|
||||
# <embed src="images/PositiveClosureGreedy.svg" type="image/svg+xml"/>
|
||||
#
|
||||
# <h3>Greedy Optional: {@code (...)?}</h3>
|
||||
#
|
||||
# <embed src="images/OptionalGreedy.svg" type="image/svg+xml"/>
|
||||
#
|
||||
# <h2>Non-Greedy Loops</h2>
|
||||
#
|
||||
# <h3>Non-Greedy Closure: {@code (...)*?}</h3>
|
||||
#
|
||||
# <embed src="images/ClosureNonGreedy.svg" type="image/svg+xml"/>
|
||||
#
|
||||
# <h3>Non-Greedy Positive Closure: {@code (...)+?}</h3>
|
||||
#
|
||||
# <embed src="images/PositiveClosureNonGreedy.svg" type="image/svg+xml"/>
|
||||
#
|
||||
# <h3>Non-Greedy Optional: {@code (...)??}</h3>
|
||||
#
|
||||
# <embed src="images/OptionalNonGreedy.svg" type="image/svg+xml"/>
|
||||
#
|
||||
|
||||
INITIAL_NUM_TRANSITIONS = 4
|
||||
|
||||
class ATNState(object):
|
||||
|
||||
# constants for serialization
|
||||
INVALID_TYPE = 0
|
||||
BASIC = 1
|
||||
RULE_START = 2
|
||||
BLOCK_START = 3
|
||||
PLUS_BLOCK_START = 4
|
||||
STAR_BLOCK_START = 5
|
||||
TOKEN_START = 6
|
||||
RULE_STOP = 7
|
||||
BLOCK_END = 8
|
||||
STAR_LOOP_BACK = 9
|
||||
STAR_LOOP_ENTRY = 10
|
||||
PLUS_LOOP_BACK = 11
|
||||
LOOP_END = 12
|
||||
|
||||
serializationNames = [
|
||||
"INVALID",
|
||||
"BASIC",
|
||||
"RULE_START",
|
||||
"BLOCK_START",
|
||||
"PLUS_BLOCK_START",
|
||||
"STAR_BLOCK_START",
|
||||
"TOKEN_START",
|
||||
"RULE_STOP",
|
||||
"BLOCK_END",
|
||||
"STAR_LOOP_BACK",
|
||||
"STAR_LOOP_ENTRY",
|
||||
"PLUS_LOOP_BACK",
|
||||
"LOOP_END" ]
|
||||
|
||||
INVALID_STATE_NUMBER = -1
|
||||
|
||||
def __init__(self):
|
||||
# Which ATN are we in?
|
||||
self.atn = None
|
||||
self.stateNumber = ATNState.INVALID_STATE_NUMBER
|
||||
self.stateType = None
|
||||
self.ruleIndex = 0 # at runtime, we don't have Rule objects
|
||||
self.epsilonOnlyTransitions = False
|
||||
# Track the transitions emanating from this ATN state.
|
||||
self.transitions = []
|
||||
# Used to cache lookahead during parsing, not used during construction
|
||||
self.nextTokenWithinRule = None
|
||||
|
||||
def __hash__(self):
|
||||
return self.stateNumber
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, ATNState):
|
||||
return self.stateNumber==other.stateNumber
|
||||
else:
|
||||
return False
|
||||
|
||||
def onlyHasEpsilonTransitions(self):
|
||||
return self.epsilonOnlyTransitions
|
||||
|
||||
def isNonGreedyExitState(self):
|
||||
return False
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
|
||||
def __unicode__(self):
|
||||
return unicode(self.stateNumber)
|
||||
|
||||
def addTransition(self, trans, index=-1):
|
||||
if len(self.transitions)==0:
|
||||
self.epsilonOnlyTransitions = trans.isEpsilon
|
||||
elif self.epsilonOnlyTransitions != trans.isEpsilon:
|
||||
self.epsilonOnlyTransitions = False
|
||||
# TODO System.err.format(Locale.getDefault(), "ATN state %d has both epsilon and non-epsilon transitions.\n", stateNumber);
|
||||
if index==-1:
|
||||
self.transitions.append(trans)
|
||||
else:
|
||||
self.transitions.insert(index, trans)
|
||||
|
||||
class BasicState(ATNState):
|
||||
|
||||
def __init__(self):
|
||||
super(BasicState, self).__init__()
|
||||
self.stateType = self.BASIC
|
||||
|
||||
|
||||
class DecisionState(ATNState):
|
||||
|
||||
def __init__(self):
|
||||
super(DecisionState, self).__init__()
|
||||
self.decision = -1
|
||||
self.nonGreedy = False
|
||||
|
||||
# The start of a regular {@code (...)} block.
|
||||
class BlockStartState(DecisionState):
|
||||
|
||||
def __init__(self):
|
||||
super(BlockStartState, self).__init__()
|
||||
self.endState = None
|
||||
|
||||
class BasicBlockStartState(BlockStartState):
|
||||
|
||||
def __init__(self):
|
||||
super(BasicBlockStartState, self).__init__()
|
||||
self.stateType = self.BLOCK_START
|
||||
|
||||
# Terminal node of a simple {@code (a|b|c)} block.
|
||||
class BlockEndState(ATNState):
|
||||
|
||||
def __init__(self):
|
||||
super(BlockEndState, self).__init__()
|
||||
self.stateType = self.BLOCK_END
|
||||
self.startState = None
|
||||
|
||||
# The last node in the ATN for a rule, unless that rule is the start symbol.
|
||||
# In that case, there is one transition to EOF. Later, we might encode
|
||||
# references to all calls to this rule to compute FOLLOW sets for
|
||||
# error handling.
|
||||
#
|
||||
class RuleStopState(ATNState):
|
||||
|
||||
def __init__(self):
|
||||
super(RuleStopState, self).__init__()
|
||||
self.stateType = self.RULE_STOP
|
||||
|
||||
class RuleStartState(ATNState):
|
||||
|
||||
def __init__(self):
|
||||
super(RuleStartState, self).__init__()
|
||||
self.stateType = self.RULE_START
|
||||
self.stopState = None
|
||||
self.isPrecedenceRule = False
|
||||
|
||||
# Decision state for {@code A+} and {@code (A|B)+}. It has two transitions:
|
||||
# one to the loop back to start of the block and one to exit.
|
||||
#
|
||||
class PlusLoopbackState(DecisionState):
|
||||
|
||||
def __init__(self):
|
||||
super(PlusLoopbackState, self).__init__()
|
||||
self.stateType = self.PLUS_LOOP_BACK
|
||||
|
||||
# Start of {@code (A|B|...)+} loop. Technically a decision state, but
|
||||
# we don't use for code generation; somebody might need it, so I'm defining
|
||||
# it for completeness. In reality, the {@link PlusLoopbackState} node is the
|
||||
# real decision-making note for {@code A+}.
|
||||
#
|
||||
class PlusBlockStartState(BlockStartState):
|
||||
|
||||
def __init__(self):
|
||||
super(PlusBlockStartState, self).__init__()
|
||||
self.stateType = self.PLUS_BLOCK_START
|
||||
self.loopBackState = None
|
||||
|
||||
# The block that begins a closure loop.
|
||||
class StarBlockStartState(BlockStartState):
|
||||
|
||||
def __init__(self):
|
||||
super(StarBlockStartState, self).__init__()
|
||||
self.stateType = self.STAR_BLOCK_START
|
||||
|
||||
class StarLoopbackState(ATNState):
|
||||
|
||||
def __init__(self):
|
||||
super(StarLoopbackState, self).__init__()
|
||||
self.stateType = self.STAR_LOOP_BACK
|
||||
|
||||
|
||||
class StarLoopEntryState(DecisionState):
|
||||
|
||||
def __init__(self):
|
||||
super(StarLoopEntryState, self).__init__()
|
||||
self.stateType = self.STAR_LOOP_ENTRY
|
||||
self.loopBackState = None
|
||||
# Indicates whether this state can benefit from a precedence DFA during SLL decision making.
|
||||
self.precedenceRuleDecision = None
|
||||
|
||||
# Mark the end of a * or + loop.
|
||||
class LoopEndState(ATNState):
|
||||
|
||||
def __init__(self):
|
||||
super(LoopEndState, self).__init__()
|
||||
self.stateType = self.LOOP_END
|
||||
self.loopBackState = None
|
||||
|
||||
# The Tokens rule start state linking to each lexer rule start state */
|
||||
class TokensStartState(DecisionState):
|
||||
|
||||
def __init__(self):
|
||||
super(TokensStartState, self).__init__()
|
||||
self.stateType = self.TOKEN_START
|
|
@ -0,0 +1,37 @@
|
|||
# [The "BSD license"]
|
||||
# Copyright (c) 2013 Terence Parr
|
||||
# Copyright (c) 2013 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#/
|
||||
|
||||
# Represents the type of recognizer an ATN applies to.
|
||||
|
||||
class ATNType(object):
|
||||
|
||||
LEXER = 0
|
||||
PARSER = 1
|
||||
|
|
@ -0,0 +1,588 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#/
|
||||
|
||||
# When we hit an accept state in either the DFA or the ATN, we
|
||||
# have to notify the character stream to start buffering characters
|
||||
# via {@link IntStream#mark} and record the current state. The current sim state
|
||||
# includes the current index into the input, the current line,
|
||||
# and current character position in that line. Note that the Lexer is
|
||||
# tracking the starting line and characterization of the token. These
|
||||
# variables track the "state" of the simulator when it hits an accept state.
|
||||
#
|
||||
# <p>We track these variables separately for the DFA and ATN simulation
|
||||
# because the DFA simulation often has to fail over to the ATN
|
||||
# simulation. If the ATN simulation fails, we need the DFA to fall
|
||||
# back to its previously accepted state, if any. If the ATN succeeds,
|
||||
# then the ATN does the accept and the DFA simulator that invoked it
|
||||
# can simply return the predicted token type.</p>
|
||||
#/
|
||||
from antlr4 import Lexer
|
||||
from antlr4.PredictionContext import SingletonPredictionContext, PredictionContext
|
||||
from antlr4.Token import Token
|
||||
from antlr4.atn.ATN import ATN
|
||||
from antlr4.atn.ATNConfig import LexerATNConfig
|
||||
from antlr4.atn.ATNSimulator import ATNSimulator
|
||||
from antlr4.atn.ATNConfigSet import OrderedATNConfigSet
|
||||
from antlr4.atn.ATNState import RuleStopState
|
||||
from antlr4.atn.LexerActionExecutor import LexerActionExecutor
|
||||
from antlr4.atn.Transition import Transition
|
||||
from antlr4.dfa.DFAState import DFAState
|
||||
from antlr4.error.Errors import LexerNoViableAltException, UnsupportedOperationException
|
||||
|
||||
class SimState(object):
|
||||
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.index = -1
|
||||
self.line = 0
|
||||
self.column = -1
|
||||
self.dfaState = None
|
||||
|
||||
class LexerATNSimulator(ATNSimulator):
|
||||
|
||||
debug = False
|
||||
dfa_debug = False
|
||||
|
||||
MIN_DFA_EDGE = 0
|
||||
MAX_DFA_EDGE = 127 # forces unicode to stay in ATN
|
||||
|
||||
ERROR = None
|
||||
|
||||
match_calls = 0
|
||||
|
||||
def __init__(self, recog, atn, decisionToDFA, sharedContextCache):
|
||||
super(LexerATNSimulator, self).__init__(atn, sharedContextCache)
|
||||
self.decisionToDFA = decisionToDFA
|
||||
self.recog = recog
|
||||
# The current token's starting index into the character stream.
|
||||
# Shared across DFA to ATN simulation in case the ATN fails and the
|
||||
# DFA did not have a previous accept state. In this case, we use the
|
||||
# ATN-generated exception object.
|
||||
self.startIndex = -1
|
||||
# line number 1..n within the input#/
|
||||
self.line = 1
|
||||
# The index of the character relative to the beginning of the line 0..n-1#/
|
||||
self.column = 0
|
||||
from antlr4.Lexer import Lexer
|
||||
self.mode = Lexer.DEFAULT_MODE
|
||||
# Used during DFA/ATN exec to record the most recent accept configuration info
|
||||
self.prevAccept = SimState()
|
||||
|
||||
|
||||
def copyState(self, simulator ):
|
||||
self.column = simulator.column
|
||||
self.line = simulator.line
|
||||
self.mode = simulator.mode
|
||||
self.startIndex = simulator.startIndex
|
||||
|
||||
def match(self, input , mode):
|
||||
self.match_calls += 1
|
||||
self.mode = mode
|
||||
mark = input.mark()
|
||||
try:
|
||||
self.startIndex = input.index
|
||||
self.prevAccept.reset()
|
||||
dfa = self.decisionToDFA[mode]
|
||||
if dfa.s0 is None:
|
||||
return self.matchATN(input)
|
||||
else:
|
||||
return self.execATN(input, dfa.s0)
|
||||
finally:
|
||||
input.release(mark)
|
||||
|
||||
def reset(self):
|
||||
self.prevAccept.reset()
|
||||
self.startIndex = -1
|
||||
self.line = 1
|
||||
self.column = 0
|
||||
self.mode = Lexer.DEFAULT_MODE
|
||||
|
||||
def matchATN(self, input):
|
||||
startState = self.atn.modeToStartState[self.mode]
|
||||
|
||||
if self.debug:
|
||||
print("matchATN mode " + str(self.mode) + " start: " + str(startState))
|
||||
|
||||
old_mode = self.mode
|
||||
s0_closure = self.computeStartState(input, startState)
|
||||
suppressEdge = s0_closure.hasSemanticContext
|
||||
s0_closure.hasSemanticContext = False
|
||||
|
||||
next = self.addDFAState(s0_closure)
|
||||
if not suppressEdge:
|
||||
self.decisionToDFA[self.mode].s0 = next
|
||||
|
||||
predict = self.execATN(input, next)
|
||||
|
||||
if self.debug:
|
||||
print("DFA after matchATN: " + str(self.decisionToDFA[old_mode].toLexerString()))
|
||||
|
||||
return predict
|
||||
|
||||
def execATN(self, input, ds0):
|
||||
if self.debug:
|
||||
print("start state closure=" + str(ds0.configs))
|
||||
|
||||
if ds0.isAcceptState:
|
||||
# allow zero-length tokens
|
||||
self.captureSimState(self.prevAccept, input, ds0)
|
||||
|
||||
t = input.LA(1)
|
||||
s = ds0 # s is current/from DFA state
|
||||
|
||||
while True: # while more work
|
||||
if self.debug:
|
||||
print("execATN loop starting closure: %s\n", s.configs)
|
||||
|
||||
# As we move src->trg, src->trg, we keep track of the previous trg to
|
||||
# avoid looking up the DFA state again, which is expensive.
|
||||
# If the previous target was already part of the DFA, we might
|
||||
# be able to avoid doing a reach operation upon t. If s!=null,
|
||||
# it means that semantic predicates didn't prevent us from
|
||||
# creating a DFA state. Once we know s!=null, we check to see if
|
||||
# the DFA state has an edge already for t. If so, we can just reuse
|
||||
# it's configuration set; there's no point in re-computing it.
|
||||
# This is kind of like doing DFA simulation within the ATN
|
||||
# simulation because DFA simulation is really just a way to avoid
|
||||
# computing reach/closure sets. Technically, once we know that
|
||||
# we have a previously added DFA state, we could jump over to
|
||||
# the DFA simulator. But, that would mean popping back and forth
|
||||
# a lot and making things more complicated algorithmically.
|
||||
# This optimization makes a lot of sense for loops within DFA.
|
||||
# A character will take us back to an existing DFA state
|
||||
# that already has lots of edges out of it. e.g., .* in comments.
|
||||
# print("Target for:" + str(s) + " and:" + str(t))
|
||||
target = self.getExistingTargetState(s, t)
|
||||
# print("Existing:" + str(target))
|
||||
if target is None:
|
||||
target = self.computeTargetState(input, s, t)
|
||||
# print("Computed:" + str(target))
|
||||
|
||||
if target == self.ERROR:
|
||||
break
|
||||
|
||||
# If this is a consumable input element, make sure to consume before
|
||||
# capturing the accept state so the input index, line, and char
|
||||
# position accurately reflect the state of the interpreter at the
|
||||
# end of the token.
|
||||
if t != Token.EOF:
|
||||
self.consume(input)
|
||||
|
||||
if target.isAcceptState:
|
||||
self.captureSimState(self.prevAccept, input, target)
|
||||
if t == Token.EOF:
|
||||
break
|
||||
|
||||
t = input.LA(1)
|
||||
|
||||
s = target # flip; current DFA target becomes new src/from state
|
||||
|
||||
return self.failOrAccept(self.prevAccept, input, s.configs, t)
|
||||
|
||||
# Get an existing target state for an edge in the DFA. If the target state
|
||||
# for the edge has not yet been computed or is otherwise not available,
|
||||
# this method returns {@code null}.
|
||||
#
|
||||
# @param s The current DFA state
|
||||
# @param t The next input symbol
|
||||
# @return The existing target DFA state for the given input symbol
|
||||
# {@code t}, or {@code null} if the target state for this edge is not
|
||||
# already cached
|
||||
def getExistingTargetState(self, s, t):
|
||||
if s.edges is None or t < self.MIN_DFA_EDGE or t > self.MAX_DFA_EDGE:
|
||||
return None
|
||||
|
||||
target = s.edges[t - self.MIN_DFA_EDGE]
|
||||
if self.debug and target is not None:
|
||||
print("reuse state "+s.stateNumber+ " edge to "+target.stateNumber)
|
||||
|
||||
return target
|
||||
|
||||
# Compute a target state for an edge in the DFA, and attempt to add the
|
||||
# computed state and corresponding edge to the DFA.
|
||||
#
|
||||
# @param input The input stream
|
||||
# @param s The current DFA state
|
||||
# @param t The next input symbol
|
||||
#
|
||||
# @return The computed target DFA state for the given input symbol
|
||||
# {@code t}. If {@code t} does not lead to a valid DFA state, this method
|
||||
# returns {@link #ERROR}.
|
||||
def computeTargetState(self, input, s, t):
|
||||
reach = OrderedATNConfigSet()
|
||||
|
||||
# if we don't find an existing DFA state
|
||||
# Fill reach starting from closure, following t transitions
|
||||
self.getReachableConfigSet(input, s.configs, reach, t)
|
||||
|
||||
if len(reach)==0: # we got nowhere on t from s
|
||||
if not reach.hasSemanticContext:
|
||||
# we got nowhere on t, don't throw out this knowledge; it'd
|
||||
# cause a failover from DFA later.
|
||||
self. addDFAEdge(s, t, self.ERROR)
|
||||
|
||||
# stop when we can't match any more char
|
||||
return self.ERROR
|
||||
|
||||
# Add an edge from s to target DFA found/created for reach
|
||||
return self.addDFAEdge(s, t, cfgs=reach)
|
||||
|
||||
def failOrAccept(self, prevAccept , input, reach, t):
|
||||
if self.prevAccept.dfaState is not None:
|
||||
lexerActionExecutor = prevAccept.dfaState.lexerActionExecutor
|
||||
self.accept(input, lexerActionExecutor, self.startIndex, prevAccept.index, prevAccept.line, prevAccept.column)
|
||||
return prevAccept.dfaState.prediction
|
||||
else:
|
||||
# if no accept and EOF is first char, return EOF
|
||||
if t==Token.EOF and input.index==self.startIndex:
|
||||
return Token.EOF
|
||||
raise LexerNoViableAltException(self.recog, input, self.startIndex, reach)
|
||||
|
||||
# Given a starting configuration set, figure out all ATN configurations
|
||||
# we can reach upon input {@code t}. Parameter {@code reach} is a return
|
||||
# parameter.
|
||||
def getReachableConfigSet(self, input, closure, reach, t):
|
||||
# this is used to skip processing for configs which have a lower priority
|
||||
# than a config that already reached an accept state for the same rule
|
||||
skipAlt = ATN.INVALID_ALT_NUMBER
|
||||
for cfg in closure:
|
||||
currentAltReachedAcceptState = ( cfg.alt == skipAlt )
|
||||
if currentAltReachedAcceptState and cfg.passedThroughNonGreedyDecision:
|
||||
continue
|
||||
|
||||
if self.debug:
|
||||
print("testing %s at %s\n", self.getTokenName(t), cfg.toString(self.recog, True))
|
||||
|
||||
for trans in cfg.state.transitions: # for each transition
|
||||
target = self.getReachableTarget(trans, t)
|
||||
if target is not None:
|
||||
lexerActionExecutor = cfg.lexerActionExecutor
|
||||
if lexerActionExecutor is not None:
|
||||
lexerActionExecutor = lexerActionExecutor.fixOffsetBeforeMatch(input.index - self.startIndex)
|
||||
|
||||
treatEofAsEpsilon = (t == Token.EOF)
|
||||
config = LexerATNConfig(state=target, lexerActionExecutor=lexerActionExecutor, config=cfg)
|
||||
if self.closure(input, config, reach, currentAltReachedAcceptState, True, treatEofAsEpsilon):
|
||||
# any remaining configs for this alt have a lower priority than
|
||||
# the one that just reached an accept state.
|
||||
skipAlt = cfg.alt
|
||||
|
||||
def accept(self, input, lexerActionExecutor, startIndex, index, line, charPos):
|
||||
if self.debug:
|
||||
print("ACTION %s\n", lexerActionExecutor)
|
||||
|
||||
# seek to after last char in token
|
||||
input.seek(index)
|
||||
self.line = line
|
||||
self.column = charPos
|
||||
|
||||
if lexerActionExecutor is not None and self.recog is not None:
|
||||
lexerActionExecutor.execute(self.recog, input, startIndex)
|
||||
|
||||
def getReachableTarget(self, trans, t):
|
||||
if trans.matches(t, 0, 0xFFFE):
|
||||
return trans.target
|
||||
else:
|
||||
return None
|
||||
|
||||
def computeStartState(self, input, p):
|
||||
initialContext = PredictionContext.EMPTY
|
||||
configs = OrderedATNConfigSet()
|
||||
for i in range(0,len(p.transitions)):
|
||||
target = p.transitions[i].target
|
||||
c = LexerATNConfig(state=target, alt=i+1, context=initialContext)
|
||||
self.closure(input, c, configs, False, False, False)
|
||||
return configs
|
||||
|
||||
# Since the alternatives within any lexer decision are ordered by
|
||||
# preference, this method stops pursuing the closure as soon as an accept
|
||||
# state is reached. After the first accept state is reached by depth-first
|
||||
# search from {@code config}, all other (potentially reachable) states for
|
||||
# this rule would have a lower priority.
|
||||
#
|
||||
# @return {@code true} if an accept state is reached, otherwise
|
||||
# {@code false}.
|
||||
def closure(self, input, config, configs, currentAltReachedAcceptState,
|
||||
speculative, treatEofAsEpsilon):
|
||||
if self.debug:
|
||||
print("closure("+config.toString(self.recog, True)+")")
|
||||
|
||||
if isinstance( config.state, RuleStopState ):
|
||||
if self.debug:
|
||||
if self.recog is not None:
|
||||
print("closure at %s rule stop %s\n", self.recog.getRuleNames()[config.state.ruleIndex], config)
|
||||
else:
|
||||
print("closure at rule stop %s\n", config)
|
||||
|
||||
if config.context is None or config.context.hasEmptyPath():
|
||||
if config.context is None or config.context.isEmpty():
|
||||
configs.add(config)
|
||||
return True
|
||||
else:
|
||||
configs.add(LexerATNConfig(state=config.state, config=config, context=PredictionContext.EMPTY))
|
||||
currentAltReachedAcceptState = True
|
||||
|
||||
if config.context is not None and not config.context.isEmpty():
|
||||
for i in range(0,len(config.context)):
|
||||
if config.context.getReturnState(i) != PredictionContext.EMPTY_RETURN_STATE:
|
||||
newContext = config.context.getParent(i) # "pop" return state
|
||||
returnState = self.atn.states[config.context.getReturnState(i)]
|
||||
c = LexerATNConfig(state=returnState, config=config, context=newContext)
|
||||
currentAltReachedAcceptState = self.closure(input, c, configs,
|
||||
currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
|
||||
|
||||
return currentAltReachedAcceptState
|
||||
|
||||
# optimization
|
||||
if not config.state.epsilonOnlyTransitions:
|
||||
if not currentAltReachedAcceptState or not config.passedThroughNonGreedyDecision:
|
||||
configs.add(config)
|
||||
|
||||
for t in config.state.transitions:
|
||||
c = self.getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon)
|
||||
if c is not None:
|
||||
currentAltReachedAcceptState = self.closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
|
||||
|
||||
return currentAltReachedAcceptState
|
||||
|
||||
# side-effect: can alter configs.hasSemanticContext
|
||||
def getEpsilonTarget(self, input, config, t, configs, speculative, treatEofAsEpsilon):
|
||||
c = None
|
||||
if t.serializationType==Transition.RULE:
|
||||
newContext = SingletonPredictionContext.create(config.context, t.followState.stateNumber)
|
||||
c = LexerATNConfig(state=t.target, config=config, context=newContext)
|
||||
|
||||
elif t.serializationType==Transition.PRECEDENCE:
|
||||
raise UnsupportedOperationException("Precedence predicates are not supported in lexers.")
|
||||
|
||||
elif t.serializationType==Transition.PREDICATE:
|
||||
# Track traversing semantic predicates. If we traverse,
|
||||
# we cannot add a DFA state for this "reach" computation
|
||||
# because the DFA would not test the predicate again in the
|
||||
# future. Rather than creating collections of semantic predicates
|
||||
# like v3 and testing them on prediction, v4 will test them on the
|
||||
# fly all the time using the ATN not the DFA. This is slower but
|
||||
# semantically it's not used that often. One of the key elements to
|
||||
# this predicate mechanism is not adding DFA states that see
|
||||
# predicates immediately afterwards in the ATN. For example,
|
||||
|
||||
# a : ID {p1}? | ID {p2}? ;
|
||||
|
||||
# should create the start state for rule 'a' (to save start state
|
||||
# competition), but should not create target of ID state. The
|
||||
# collection of ATN states the following ID references includes
|
||||
# states reached by traversing predicates. Since this is when we
|
||||
# test them, we cannot cash the DFA state target of ID.
|
||||
|
||||
if self.debug:
|
||||
print("EVAL rule "+ str(t.ruleIndex) + ":" + str(t.predIndex))
|
||||
configs.hasSemanticContext = True
|
||||
if self.evaluatePredicate(input, t.ruleIndex, t.predIndex, speculative):
|
||||
c = LexerATNConfig(state=t.target, config=config)
|
||||
|
||||
elif t.serializationType==Transition.ACTION:
|
||||
if config.context is None or config.context.hasEmptyPath():
|
||||
# execute actions anywhere in the start rule for a token.
|
||||
#
|
||||
# TODO: if the entry rule is invoked recursively, some
|
||||
# actions may be executed during the recursive call. The
|
||||
# problem can appear when hasEmptyPath() is true but
|
||||
# isEmpty() is false. In this case, the config needs to be
|
||||
# split into two contexts - one with just the empty path
|
||||
# and another with everything but the empty path.
|
||||
# Unfortunately, the current algorithm does not allow
|
||||
# getEpsilonTarget to return two configurations, so
|
||||
# additional modifications are needed before we can support
|
||||
# the split operation.
|
||||
lexerActionExecutor = LexerActionExecutor.append(config.lexerActionExecutor,
|
||||
self.atn.lexerActions[t.actionIndex])
|
||||
c = LexerATNConfig(state=t.target, config=config, lexerActionExecutor=lexerActionExecutor)
|
||||
|
||||
else:
|
||||
# ignore actions in referenced rules
|
||||
c = LexerATNConfig(state=t.target, config=config)
|
||||
|
||||
elif t.serializationType==Transition.EPSILON:
|
||||
c = LexerATNConfig(state=t.target, config=config)
|
||||
|
||||
elif t.serializationType in [ Transition.ATOM, Transition.RANGE, Transition.SET ]:
|
||||
if treatEofAsEpsilon:
|
||||
if t.matches(Token.EOF, 0, 0xFFFF):
|
||||
c = LexerATNConfig(state=t.target, config=config)
|
||||
|
||||
return c
|
||||
|
||||
# Evaluate a predicate specified in the lexer.
|
||||
#
|
||||
# <p>If {@code speculative} is {@code true}, this method was called before
|
||||
# {@link #consume} for the matched character. This method should call
|
||||
# {@link #consume} before evaluating the predicate to ensure position
|
||||
# sensitive values, including {@link Lexer#getText}, {@link Lexer#getLine},
|
||||
# and {@link Lexer#getcolumn}, properly reflect the current
|
||||
# lexer state. This method should restore {@code input} and the simulator
|
||||
# to the original state before returning (i.e. undo the actions made by the
|
||||
# call to {@link #consume}.</p>
|
||||
#
|
||||
# @param input The input stream.
|
||||
# @param ruleIndex The rule containing the predicate.
|
||||
# @param predIndex The index of the predicate within the rule.
|
||||
# @param speculative {@code true} if the current index in {@code input} is
|
||||
# one character before the predicate's location.
|
||||
#
|
||||
# @return {@code true} if the specified predicate evaluates to
|
||||
# {@code true}.
|
||||
#/
|
||||
def evaluatePredicate(self, input, ruleIndex, predIndex, speculative):
|
||||
# assume true if no recognizer was provided
|
||||
if self.recog is None:
|
||||
return True
|
||||
|
||||
if not speculative:
|
||||
return self.recog.sempred(None, ruleIndex, predIndex)
|
||||
|
||||
savedcolumn = self.column
|
||||
savedLine = self.line
|
||||
index = input.index
|
||||
marker = input.mark()
|
||||
try:
|
||||
self.consume(input)
|
||||
return self.recog.sempred(None, ruleIndex, predIndex)
|
||||
finally:
|
||||
self.column = savedcolumn
|
||||
self.line = savedLine
|
||||
input.seek(index)
|
||||
input.release(marker)
|
||||
|
||||
def captureSimState(self, settings, input, dfaState):
|
||||
settings.index = input.index
|
||||
settings.line = self.line
|
||||
settings.column = self.column
|
||||
settings.dfaState = dfaState
|
||||
|
||||
def addDFAEdge(self, from_, tk, to=None, cfgs=None):
|
||||
|
||||
if to is None and cfgs is not None:
|
||||
# leading to this call, ATNConfigSet.hasSemanticContext is used as a
|
||||
# marker indicating dynamic predicate evaluation makes this edge
|
||||
# dependent on the specific input sequence, so the static edge in the
|
||||
# DFA should be omitted. The target DFAState is still created since
|
||||
# execATN has the ability to resynchronize with the DFA state cache
|
||||
# following the predicate evaluation step.
|
||||
#
|
||||
# TJP notes: next time through the DFA, we see a pred again and eval.
|
||||
# If that gets us to a previously created (but dangling) DFA
|
||||
# state, we can continue in pure DFA mode from there.
|
||||
#/
|
||||
suppressEdge = cfgs.hasSemanticContext
|
||||
cfgs.hasSemanticContext = False
|
||||
|
||||
to = self.addDFAState(cfgs)
|
||||
|
||||
if suppressEdge:
|
||||
return to
|
||||
|
||||
# add the edge
|
||||
if tk < self.MIN_DFA_EDGE or tk > self.MAX_DFA_EDGE:
|
||||
# Only track edges within the DFA bounds
|
||||
return to
|
||||
|
||||
if self.debug:
|
||||
print("EDGE " + str(from_) + " -> " + str(to) + " upon "+ chr(tk))
|
||||
|
||||
if from_.edges is None:
|
||||
# make room for tokens 1..n and -1 masquerading as index 0
|
||||
from_.edges = [ None ] * (self.MAX_DFA_EDGE - self.MIN_DFA_EDGE + 1)
|
||||
|
||||
from_.edges[tk - self.MIN_DFA_EDGE] = to # connect
|
||||
|
||||
return to
|
||||
|
||||
|
||||
# Add a new DFA state if there isn't one with this set of
|
||||
# configurations already. This method also detects the first
|
||||
# configuration containing an ATN rule stop state. Later, when
|
||||
# traversing the DFA, we will know which rule to accept.
|
||||
def addDFAState(self, configs):
|
||||
# the lexer evaluates predicates on-the-fly; by this point configs
|
||||
# should not contain any configurations with unevaluated predicates.
|
||||
assert not configs.hasSemanticContext
|
||||
|
||||
proposed = DFAState(configs=configs)
|
||||
firstConfigWithRuleStopState = None
|
||||
for c in configs:
|
||||
if isinstance(c.state, RuleStopState):
|
||||
firstConfigWithRuleStopState = c
|
||||
break
|
||||
|
||||
if firstConfigWithRuleStopState is not None:
|
||||
proposed.isAcceptState = True
|
||||
proposed.lexerActionExecutor = firstConfigWithRuleStopState.lexerActionExecutor
|
||||
proposed.prediction = self.atn.ruleToTokenType[firstConfigWithRuleStopState.state.ruleIndex]
|
||||
|
||||
dfa = self.decisionToDFA[self.mode]
|
||||
existing = dfa.states.get(proposed, None)
|
||||
if existing is not None:
|
||||
return existing
|
||||
|
||||
newState = proposed
|
||||
|
||||
newState.stateNumber = len(dfa.states)
|
||||
configs.setReadonly(True)
|
||||
newState.configs = configs
|
||||
dfa.states[newState] = newState
|
||||
return newState
|
||||
|
||||
def getDFA(self, mode):
|
||||
return self.decisionToDFA[mode]
|
||||
|
||||
# Get the text matched so far for the current token.
|
||||
def getText(self, input):
|
||||
# index is first lookahead char, don't include.
|
||||
return input.getText(self.startIndex, input.index-1)
|
||||
|
||||
def consume(self, input):
|
||||
curChar = input.LA(1)
|
||||
if curChar==ord('\n'):
|
||||
self.line += 1
|
||||
self.column = 0
|
||||
else:
|
||||
self.column += 1
|
||||
input.consume()
|
||||
|
||||
def getTokenName(self, t):
|
||||
if t==-1:
|
||||
return "EOF"
|
||||
else:
|
||||
return "'" + chr(t) + "'"
|
||||
|
||||
|
|
@ -0,0 +1,316 @@
|
|||
#
|
||||
#[The "BSD license"]
|
||||
# Copyright (c) 2013 Terence Parr
|
||||
# Copyright (c) 2013 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
class LexerActionType(object):
|
||||
|
||||
CHANNEL = 0 #The type of a {@link LexerChannelAction} action.
|
||||
CUSTOM = 1 #The type of a {@link LexerCustomAction} action.
|
||||
MODE = 2 #The type of a {@link LexerModeAction} action.
|
||||
MORE = 3 #The type of a {@link LexerMoreAction} action.
|
||||
POP_MODE = 4 #The type of a {@link LexerPopModeAction} action.
|
||||
PUSH_MODE = 5 #The type of a {@link LexerPushModeAction} action.
|
||||
SKIP = 6 #The type of a {@link LexerSkipAction} action.
|
||||
TYPE = 7 #The type of a {@link LexerTypeAction} action.
|
||||
|
||||
class LexerAction(object):
|
||||
|
||||
def __init__(self, action):
|
||||
self.actionType = action
|
||||
self.isPositionDependent = False
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self.actionType))
|
||||
|
||||
def __eq__(self, other):
|
||||
return self is other
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
|
||||
def __unicode__(self):
|
||||
return unicode(super(LexerAction, self))
|
||||
|
||||
|
||||
#
|
||||
# Implements the {@code skip} lexer action by calling {@link Lexer#skip}.
|
||||
#
|
||||
# <p>The {@code skip} command does not have any parameters, so this action is
|
||||
# implemented as a singleton instance exposed by {@link #INSTANCE}.</p>
|
||||
class LexerSkipAction(LexerAction ):
|
||||
|
||||
# Provides a singleton instance of this parameterless lexer action.
|
||||
INSTANCE = None
|
||||
|
||||
def __init__(self):
|
||||
super(LexerSkipAction, self).__init__(LexerActionType.SKIP)
|
||||
|
||||
def execute(self, lexer):
|
||||
lexer.skip()
|
||||
|
||||
def __unicode__(self):
|
||||
return u"skip"
|
||||
|
||||
LexerSkipAction.INSTANCE = LexerSkipAction()
|
||||
|
||||
# Implements the {@code type} lexer action by calling {@link Lexer#setType}
|
||||
# with the assigned type.
|
||||
class LexerTypeAction(LexerAction):
|
||||
|
||||
def __init__(self, type):
|
||||
super(LexerTypeAction, self).__init__(LexerActionType.TYPE)
|
||||
self.type = type
|
||||
|
||||
def execute(self, lexer):
|
||||
lexer.type = self.type
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self.actionType) + str(self.type))
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
elif not isinstance(other, LexerTypeAction):
|
||||
return False
|
||||
else:
|
||||
return self.type == other.type
|
||||
|
||||
def __unicode__(self):
|
||||
return u"type(" + unicode(self.type) + u")"
|
||||
|
||||
|
||||
# Implements the {@code pushMode} lexer action by calling
|
||||
# {@link Lexer#pushMode} with the assigned mode.
|
||||
class LexerPushModeAction(LexerAction):
|
||||
|
||||
def __init__(self, mode):
|
||||
super(LexerPushModeAction, self).__init__(LexerActionType.PUSH_MODE)
|
||||
self.mode = mode
|
||||
|
||||
# <p>This action is implemented by calling {@link Lexer#pushMode} with the
|
||||
# value provided by {@link #getMode}.</p>
|
||||
def execute(self, lexer):
|
||||
lexer.pushMode(self.mode)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self.actionType) + str(self.mode))
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
elif not isinstance(other, LexerPushModeAction):
|
||||
return False
|
||||
else:
|
||||
return self.mode == other.mode
|
||||
|
||||
def __unicode__(self):
|
||||
return u"pushMode(" + unicode(self.mode) + u")"
|
||||
|
||||
|
||||
# Implements the {@code popMode} lexer action by calling {@link Lexer#popMode}.
|
||||
#
|
||||
# <p>The {@code popMode} command does not have any parameters, so this action is
|
||||
# implemented as a singleton instance exposed by {@link #INSTANCE}.</p>
|
||||
class LexerPopModeAction(LexerAction):
|
||||
|
||||
INSTANCE = None
|
||||
|
||||
def __init__(self):
|
||||
super(LexerPopModeAction, self).__init__(LexerActionType.POP_MODE)
|
||||
|
||||
# <p>This action is implemented by calling {@link Lexer#popMode}.</p>
|
||||
def execute(self, lexer):
|
||||
lexer.popMode()
|
||||
|
||||
def __unicode__(self):
|
||||
return "popMode"
|
||||
|
||||
LexerPopModeAction.INSTANCE = LexerPopModeAction()
|
||||
|
||||
# Implements the {@code more} lexer action by calling {@link Lexer#more}.
|
||||
#
|
||||
# <p>The {@code more} command does not have any parameters, so this action is
|
||||
# implemented as a singleton instance exposed by {@link #INSTANCE}.</p>
|
||||
class LexerMoreAction(LexerAction):
|
||||
|
||||
INSTANCE = None
|
||||
|
||||
def __init__(self):
|
||||
super(LexerMoreAction, self).__init__(LexerActionType.MORE)
|
||||
|
||||
# <p>This action is implemented by calling {@link Lexer#popMode}.</p>
|
||||
def execute(self, lexer):
|
||||
lexer.more()
|
||||
|
||||
def __unicode__(self):
|
||||
return "more"
|
||||
|
||||
LexerMoreAction.INSTANCE = LexerMoreAction()
|
||||
|
||||
# Implements the {@code mode} lexer action by calling {@link Lexer#mode} with
|
||||
# the assigned mode.
|
||||
class LexerModeAction(LexerAction):
|
||||
|
||||
def __init__(self, mode):
|
||||
super(LexerModeAction, self).__init__(LexerActionType.MODE)
|
||||
self.mode = mode
|
||||
|
||||
# <p>This action is implemented by calling {@link Lexer#mode} with the
|
||||
# value provided by {@link #getMode}.</p>
|
||||
def execute(self, lexer):
|
||||
lexer.mode(self.mode)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self.actionType) + str(self.mode))
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
elif not isinstance(other, LexerModeAction):
|
||||
return False
|
||||
else:
|
||||
return self.mode == other.mode
|
||||
|
||||
def __unicode__(self):
|
||||
return u"mode(" + unicode(self.mode) + u")"
|
||||
|
||||
# Executes a custom lexer action by calling {@link Recognizer#action} with the
|
||||
# rule and action indexes assigned to the custom action. The implementation of
|
||||
# a custom action is added to the generated code for the lexer in an override
|
||||
# of {@link Recognizer#action} when the grammar is compiled.
|
||||
#
|
||||
# <p>This class may represent embedded actions created with the <code>{...}</code>
|
||||
# syntax in ANTLR 4, as well as actions created for lexer commands where the
|
||||
# command argument could not be evaluated when the grammar was compiled.</p>
|
||||
|
||||
class LexerCustomAction(LexerAction):
|
||||
|
||||
# Constructs a custom lexer action with the specified rule and action
|
||||
# indexes.
|
||||
#
|
||||
# @param ruleIndex The rule index to use for calls to
|
||||
# {@link Recognizer#action}.
|
||||
# @param actionIndex The action index to use for calls to
|
||||
# {@link Recognizer#action}.
|
||||
#/
|
||||
def __init__(self, ruleIndex, actionIndex):
|
||||
super(LexerCustomAction, self).__init__(LexerActionType.CUSTOM)
|
||||
self.ruleIndex = ruleIndex
|
||||
self.actionIndex = actionIndex
|
||||
self.isPositionDependent = True
|
||||
|
||||
# <p>Custom actions are implemented by calling {@link Lexer#action} with the
|
||||
# appropriate rule and action indexes.</p>
|
||||
def execute(self, lexer):
|
||||
lexer.action(None, self.ruleIndex, self.actionIndex)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self.actionType) + str(self.ruleIndex) + str(self.actionIndex))
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
elif not isinstance(other, LexerCustomAction):
|
||||
return False
|
||||
else:
|
||||
return self.ruleIndex == other.ruleIndex and self.actionIndex == other.actionIndex
|
||||
|
||||
# Implements the {@code channel} lexer action by calling
|
||||
# {@link Lexer#setChannel} with the assigned channel.
|
||||
class LexerChannelAction(LexerAction):
|
||||
|
||||
# Constructs a new {@code channel} action with the specified channel value.
|
||||
# @param channel The channel value to pass to {@link Lexer#setChannel}.
|
||||
def __init__(self, channel):
|
||||
super(LexerChannelAction, self).__init__(LexerActionType.CHANNEL)
|
||||
self.channel = channel
|
||||
|
||||
# <p>This action is implemented by calling {@link Lexer#setChannel} with the
|
||||
# value provided by {@link #getChannel}.</p>
|
||||
def execute(self, lexer):
|
||||
lexer._channel = self.channel
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self.actionType) + str(self.channel))
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
elif not isinstance(other, LexerChannelAction):
|
||||
return False
|
||||
else:
|
||||
return self.channel == other.channel
|
||||
|
||||
def __unicode__(self):
|
||||
return u"channel(" + unicode(self.channel) + u")"
|
||||
|
||||
# This implementation of {@link LexerAction} is used for tracking input offsets
|
||||
# for position-dependent actions within a {@link LexerActionExecutor}.
|
||||
#
|
||||
# <p>This action is not serialized as part of the ATN, and is only required for
|
||||
# position-dependent lexer actions which appear at a location other than the
|
||||
# end of a rule. For more information about DFA optimizations employed for
|
||||
# lexer actions, see {@link LexerActionExecutor#append} and
|
||||
# {@link LexerActionExecutor#fixOffsetBeforeMatch}.</p>
|
||||
class LexerIndexedCustomAction(LexerAction):
|
||||
|
||||
# Constructs a new indexed custom action by associating a character offset
|
||||
# with a {@link LexerAction}.
|
||||
#
|
||||
# <p>Note: This class is only required for lexer actions for which
|
||||
# {@link LexerAction#isPositionDependent} returns {@code true}.</p>
|
||||
#
|
||||
# @param offset The offset into the input {@link CharStream}, relative to
|
||||
# the token start index, at which the specified lexer action should be
|
||||
# executed.
|
||||
# @param action The lexer action to execute at a particular offset in the
|
||||
# input {@link CharStream}.
|
||||
def __init__(self, offset, action):
|
||||
super(LexerIndexedCustomAction, self).__init__(action.actionType)
|
||||
self.offset = offset
|
||||
self.action = action
|
||||
self.isPositionDependent = True
|
||||
|
||||
# <p>This method calls {@link #execute} on the result of {@link #getAction}
|
||||
# using the provided {@code lexer}.</p>
|
||||
def execute(self, lexer):
|
||||
# assume the input stream position was properly set by the calling code
|
||||
self.action.execute(lexer)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self.actionType) + str(self.offset) + str(self.action))
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
elif not isinstance(other, LexerIndexedCustomAction):
|
||||
return False
|
||||
else:
|
||||
return self.offset == other.offset and self.action == other.action
|
|
@ -0,0 +1,160 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2013 Terence Parr
|
||||
# Copyright (c) 2013 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#/
|
||||
|
||||
# Represents an executor for a sequence of lexer actions which traversed during
|
||||
# the matching operation of a lexer rule (token).
|
||||
#
|
||||
# <p>The executor tracks position information for position-dependent lexer actions
|
||||
# efficiently, ensuring that actions appearing only at the end of the rule do
|
||||
# not cause bloating of the {@link DFA} created for the lexer.</p>
|
||||
|
||||
|
||||
from antlr4.atn.LexerAction import LexerIndexedCustomAction
|
||||
|
||||
class LexerActionExecutor(object):
|
||||
|
||||
def __init__(self, lexerActions=list()):
|
||||
self.lexerActions = lexerActions
|
||||
# Caches the result of {@link #hashCode} since the hash code is an element
|
||||
# of the performance-critical {@link LexerATNConfig#hashCode} operation.
|
||||
self.hashCode = hash("".join([str(la) for la in lexerActions]))
|
||||
|
||||
|
||||
# Creates a {@link LexerActionExecutor} which executes the actions for
|
||||
# the input {@code lexerActionExecutor} followed by a specified
|
||||
# {@code lexerAction}.
|
||||
#
|
||||
# @param lexerActionExecutor The executor for actions already traversed by
|
||||
# the lexer while matching a token within a particular
|
||||
# {@link LexerATNConfig}. If this is {@code null}, the method behaves as
|
||||
# though it were an empty executor.
|
||||
# @param lexerAction The lexer action to execute after the actions
|
||||
# specified in {@code lexerActionExecutor}.
|
||||
#
|
||||
# @return A {@link LexerActionExecutor} for executing the combine actions
|
||||
# of {@code lexerActionExecutor} and {@code lexerAction}.
|
||||
@staticmethod
|
||||
def append(lexerActionExecutor, lexerAction):
|
||||
if lexerActionExecutor is None:
|
||||
return LexerActionExecutor([ lexerAction ])
|
||||
|
||||
lexerActions = lexerActionExecutor.lexerActions + [ lexerAction ]
|
||||
return LexerActionExecutor(lexerActions)
|
||||
|
||||
# Creates a {@link LexerActionExecutor} which encodes the current offset
|
||||
# for position-dependent lexer actions.
|
||||
#
|
||||
# <p>Normally, when the executor encounters lexer actions where
|
||||
# {@link LexerAction#isPositionDependent} returns {@code true}, it calls
|
||||
# {@link IntStream#seek} on the input {@link CharStream} to set the input
|
||||
# position to the <em>end</em> of the current token. This behavior provides
|
||||
# for efficient DFA representation of lexer actions which appear at the end
|
||||
# of a lexer rule, even when the lexer rule matches a variable number of
|
||||
# characters.</p>
|
||||
#
|
||||
# <p>Prior to traversing a match transition in the ATN, the current offset
|
||||
# from the token start index is assigned to all position-dependent lexer
|
||||
# actions which have not already been assigned a fixed offset. By storing
|
||||
# the offsets relative to the token start index, the DFA representation of
|
||||
# lexer actions which appear in the middle of tokens remains efficient due
|
||||
# to sharing among tokens of the same length, regardless of their absolute
|
||||
# position in the input stream.</p>
|
||||
#
|
||||
# <p>If the current executor already has offsets assigned to all
|
||||
# position-dependent lexer actions, the method returns {@code this}.</p>
|
||||
#
|
||||
# @param offset The current offset to assign to all position-dependent
|
||||
# lexer actions which do not already have offsets assigned.
|
||||
#
|
||||
# @return A {@link LexerActionExecutor} which stores input stream offsets
|
||||
# for all position-dependent lexer actions.
|
||||
#/
|
||||
def fixOffsetBeforeMatch(self, offset):
|
||||
updatedLexerActions = None
|
||||
for i in range(0, len(self.lexerActions)):
|
||||
if self.lexerActions[i].isPositionDependent and not isinstance(self.lexerActions[i], LexerIndexedCustomAction):
|
||||
if updatedLexerActions is None:
|
||||
updatedLexerActions = [ la for la in self.lexerActions ]
|
||||
updatedLexerActions[i] = LexerIndexedCustomAction(offset, self.lexerActions[i])
|
||||
|
||||
if updatedLexerActions is None:
|
||||
return self
|
||||
else:
|
||||
return LexerActionExecutor(updatedLexerActions)
|
||||
|
||||
|
||||
# Execute the actions encapsulated by this executor within the context of a
|
||||
# particular {@link Lexer}.
|
||||
#
|
||||
# <p>This method calls {@link IntStream#seek} to set the position of the
|
||||
# {@code input} {@link CharStream} prior to calling
|
||||
# {@link LexerAction#execute} on a position-dependent action. Before the
|
||||
# method returns, the input position will be restored to the same position
|
||||
# it was in when the method was invoked.</p>
|
||||
#
|
||||
# @param lexer The lexer instance.
|
||||
# @param input The input stream which is the source for the current token.
|
||||
# When this method is called, the current {@link IntStream#index} for
|
||||
# {@code input} should be the start of the following token, i.e. 1
|
||||
# character past the end of the current token.
|
||||
# @param startIndex The token start index. This value may be passed to
|
||||
# {@link IntStream#seek} to set the {@code input} position to the beginning
|
||||
# of the token.
|
||||
#/
|
||||
def execute(self, lexer, input, startIndex):
|
||||
requiresSeek = False
|
||||
stopIndex = input.index
|
||||
try:
|
||||
for lexerAction in self.lexerActions:
|
||||
if isinstance(lexerAction, LexerIndexedCustomAction):
|
||||
offset = lexerAction.offset
|
||||
input.seek(startIndex + offset)
|
||||
lexerAction = lexerAction.action
|
||||
requiresSeek = (startIndex + offset) != stopIndex
|
||||
elif lexerAction.isPositionDependent:
|
||||
input.seek(stopIndex)
|
||||
requiresSeek = False
|
||||
lexerAction.execute(lexer)
|
||||
finally:
|
||||
if requiresSeek:
|
||||
input.seek(stopIndex)
|
||||
|
||||
def __hash__(self):
|
||||
return self.hashCode
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
elif not isinstance(other, LexerActionExecutor):
|
||||
return False
|
||||
else:
|
||||
return self.hashCode == other.hashCode \
|
||||
and self.lexerActions == other.lexerActions
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,544 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#
|
||||
# This enumeration defines the prediction modes available in ANTLR 4 along with
|
||||
# utility methods for analyzing configuration sets for conflicts and/or
|
||||
# ambiguities.
|
||||
|
||||
|
||||
from antlr4.atn.ATN import ATN
|
||||
from antlr4.atn.ATNConfig import ATNConfig
|
||||
from antlr4.atn.ATNConfigSet import ATNConfigSet
|
||||
from antlr4.atn.ATNState import RuleStopState
|
||||
from antlr4.atn.SemanticContext import SemanticContext
|
||||
|
||||
class PredictionMode(object):
|
||||
#
|
||||
# The SLL(*) prediction mode. This prediction mode ignores the current
|
||||
# parser context when making predictions. This is the fastest prediction
|
||||
# mode, and provides correct results for many grammars. This prediction
|
||||
# mode is more powerful than the prediction mode provided by ANTLR 3, but
|
||||
# may result in syntax errors for grammar and input combinations which are
|
||||
# not SLL.
|
||||
#
|
||||
# <p>
|
||||
# When using this prediction mode, the parser will either return a correct
|
||||
# parse tree (i.e. the same parse tree that would be returned with the
|
||||
# {@link #LL} prediction mode), or it will report a syntax error. If a
|
||||
# syntax error is encountered when using the {@link #SLL} prediction mode,
|
||||
# it may be due to either an actual syntax error in the input or indicate
|
||||
# that the particular combination of grammar and input requires the more
|
||||
# powerful {@link #LL} prediction abilities to complete successfully.</p>
|
||||
#
|
||||
# <p>
|
||||
# This prediction mode does not provide any guarantees for prediction
|
||||
# behavior for syntactically-incorrect inputs.</p>
|
||||
#
|
||||
SLL = 0
|
||||
#
|
||||
# The LL(*) prediction mode. This prediction mode allows the current parser
|
||||
# context to be used for resolving SLL conflicts that occur during
|
||||
# prediction. This is the fastest prediction mode that guarantees correct
|
||||
# parse results for all combinations of grammars with syntactically correct
|
||||
# inputs.
|
||||
#
|
||||
# <p>
|
||||
# When using this prediction mode, the parser will make correct decisions
|
||||
# for all syntactically-correct grammar and input combinations. However, in
|
||||
# cases where the grammar is truly ambiguous this prediction mode might not
|
||||
# report a precise answer for <em>exactly which</em> alternatives are
|
||||
# ambiguous.</p>
|
||||
#
|
||||
# <p>
|
||||
# This prediction mode does not provide any guarantees for prediction
|
||||
# behavior for syntactically-incorrect inputs.</p>
|
||||
#
|
||||
LL = 1
|
||||
#
|
||||
# The LL(*) prediction mode with exact ambiguity detection. In addition to
|
||||
# the correctness guarantees provided by the {@link #LL} prediction mode,
|
||||
# this prediction mode instructs the prediction algorithm to determine the
|
||||
# complete and exact set of ambiguous alternatives for every ambiguous
|
||||
# decision encountered while parsing.
|
||||
#
|
||||
# <p>
|
||||
# This prediction mode may be used for diagnosing ambiguities during
|
||||
# grammar development. Due to the performance overhead of calculating sets
|
||||
# of ambiguous alternatives, this prediction mode should be avoided when
|
||||
# the exact results are not necessary.</p>
|
||||
#
|
||||
# <p>
|
||||
# This prediction mode does not provide any guarantees for prediction
|
||||
# behavior for syntactically-incorrect inputs.</p>
|
||||
#
|
||||
LL_EXACT_AMBIG_DETECTION = 2
|
||||
|
||||
|
||||
#
|
||||
# Computes the SLL prediction termination condition.
|
||||
#
|
||||
# <p>
|
||||
# This method computes the SLL prediction termination condition for both of
|
||||
# the following cases.</p>
|
||||
#
|
||||
# <ul>
|
||||
# <li>The usual SLL+LL fallback upon SLL conflict</li>
|
||||
# <li>Pure SLL without LL fallback</li>
|
||||
# </ul>
|
||||
#
|
||||
# <p><strong>COMBINED SLL+LL PARSING</strong></p>
|
||||
#
|
||||
# <p>When LL-fallback is enabled upon SLL conflict, correct predictions are
|
||||
# ensured regardless of how the termination condition is computed by this
|
||||
# method. Due to the substantially higher cost of LL prediction, the
|
||||
# prediction should only fall back to LL when the additional lookahead
|
||||
# cannot lead to a unique SLL prediction.</p>
|
||||
#
|
||||
# <p>Assuming combined SLL+LL parsing, an SLL configuration set with only
|
||||
# conflicting subsets should fall back to full LL, even if the
|
||||
# configuration sets don't resolve to the same alternative (e.g.
|
||||
# {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting
|
||||
# configuration, SLL could continue with the hopes that more lookahead will
|
||||
# resolve via one of those non-conflicting configurations.</p>
|
||||
#
|
||||
# <p>Here's the prediction termination rule them: SLL (for SLL+LL parsing)
|
||||
# stops when it sees only conflicting configuration subsets. In contrast,
|
||||
# full LL keeps going when there is uncertainty.</p>
|
||||
#
|
||||
# <p><strong>HEURISTIC</strong></p>
|
||||
#
|
||||
# <p>As a heuristic, we stop prediction when we see any conflicting subset
|
||||
# unless we see a state that only has one alternative associated with it.
|
||||
# The single-alt-state thing lets prediction continue upon rules like
|
||||
# (otherwise, it would admit defeat too soon):</p>
|
||||
#
|
||||
# <p>{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}</p>
|
||||
#
|
||||
# <p>When the ATN simulation reaches the state before {@code ';'}, it has a
|
||||
# DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally
|
||||
# {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop
|
||||
# processing this node because alternative to has another way to continue,
|
||||
# via {@code [6|2|[]]}.</p>
|
||||
#
|
||||
# <p>It also let's us continue for this rule:</p>
|
||||
#
|
||||
# <p>{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}</p>
|
||||
#
|
||||
# <p>After matching input A, we reach the stop state for rule A, state 1.
|
||||
# State 8 is the state right before B. Clearly alternatives 1 and 2
|
||||
# conflict and no amount of further lookahead will separate the two.
|
||||
# However, alternative 3 will be able to continue and so we do not stop
|
||||
# working on this state. In the previous example, we're concerned with
|
||||
# states associated with the conflicting alternatives. Here alt 3 is not
|
||||
# associated with the conflicting configs, but since we can continue
|
||||
# looking for input reasonably, don't declare the state done.</p>
|
||||
#
|
||||
# <p><strong>PURE SLL PARSING</strong></p>
|
||||
#
|
||||
# <p>To handle pure SLL parsing, all we have to do is make sure that we
|
||||
# combine stack contexts for configurations that differ only by semantic
|
||||
# predicate. From there, we can do the usual SLL termination heuristic.</p>
|
||||
#
|
||||
# <p><strong>PREDICATES IN SLL+LL PARSING</strong></p>
|
||||
#
|
||||
# <p>SLL decisions don't evaluate predicates until after they reach DFA stop
|
||||
# states because they need to create the DFA cache that works in all
|
||||
# semantic situations. In contrast, full LL evaluates predicates collected
|
||||
# during start state computation so it can ignore predicates thereafter.
|
||||
# This means that SLL termination detection can totally ignore semantic
|
||||
# predicates.</p>
|
||||
#
|
||||
# <p>Implementation-wise, {@link ATNConfigSet} combines stack contexts but not
|
||||
# semantic predicate contexts so we might see two configurations like the
|
||||
# following.</p>
|
||||
#
|
||||
# <p>{@code (s, 1, x, {}), (s, 1, x', {p})}</p>
|
||||
#
|
||||
# <p>Before testing these configurations against others, we have to merge
|
||||
# {@code x} and {@code x'} (without modifying the existing configurations).
|
||||
# For example, we test {@code (x+x')==x''} when looking for conflicts in
|
||||
# the following configurations.</p>
|
||||
#
|
||||
# <p>{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}</p>
|
||||
#
|
||||
# <p>If the configuration set has predicates (as indicated by
|
||||
# {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of
|
||||
# the configurations to strip out all of the predicates so that a standard
|
||||
# {@link ATNConfigSet} will merge everything ignoring predicates.</p>
|
||||
#
|
||||
@classmethod
|
||||
def hasSLLConflictTerminatingPrediction(cls, mode, configs):
|
||||
# Configs in rule stop states indicate reaching the end of the decision
|
||||
# rule (local context) or end of start rule (full context). If all
|
||||
# configs meet this condition, then none of the configurations is able
|
||||
# to match additional input so we terminate prediction.
|
||||
#
|
||||
if cls.allConfigsInRuleStopStates(configs):
|
||||
return True
|
||||
|
||||
# pure SLL mode parsing
|
||||
if mode == PredictionMode.SLL:
|
||||
# Don't bother with combining configs from different semantic
|
||||
# contexts if we can fail over to full LL; costs more time
|
||||
# since we'll often fail over anyway.
|
||||
if configs.hasSemanticContext:
|
||||
# dup configs, tossing out semantic predicates
|
||||
dup = ATNConfigSet()
|
||||
for c in configs:
|
||||
c = ATNConfig(c,SemanticContext.NONE)
|
||||
dup.add(c)
|
||||
configs = dup
|
||||
# now we have combined contexts for configs with dissimilar preds
|
||||
|
||||
# pure SLL or combined SLL+LL mode parsing
|
||||
altsets = cls.getConflictingAltSubsets(configs)
|
||||
return cls.hasConflictingAltSet(altsets) and not cls.hasStateAssociatedWithOneAlt(configs)
|
||||
|
||||
# Checks if any configuration in {@code configs} is in a
|
||||
# {@link RuleStopState}. Configurations meeting this condition have reached
|
||||
# the end of the decision rule (local context) or end of start rule (full
|
||||
# context).
|
||||
#
|
||||
# @param configs the configuration set to test
|
||||
# @return {@code true} if any configuration in {@code configs} is in a
|
||||
# {@link RuleStopState}, otherwise {@code false}
|
||||
@classmethod
|
||||
def hasConfigInRuleStopState(cls, configs):
|
||||
for c in configs:
|
||||
if isinstance(c.state, RuleStopState):
|
||||
return True
|
||||
return False
|
||||
|
||||
# Checks if all configurations in {@code configs} are in a
|
||||
# {@link RuleStopState}. Configurations meeting this condition have reached
|
||||
# the end of the decision rule (local context) or end of start rule (full
|
||||
# context).
|
||||
#
|
||||
# @param configs the configuration set to test
|
||||
# @return {@code true} if all configurations in {@code configs} are in a
|
||||
# {@link RuleStopState}, otherwise {@code false}
|
||||
@classmethod
|
||||
def allConfigsInRuleStopStates(cls, configs):
|
||||
for config in configs:
|
||||
if not isinstance(config.state, RuleStopState):
|
||||
return False
|
||||
return True
|
||||
|
||||
#
|
||||
# Full LL prediction termination.
|
||||
#
|
||||
# <p>Can we stop looking ahead during ATN simulation or is there some
|
||||
# uncertainty as to which alternative we will ultimately pick, after
|
||||
# consuming more input? Even if there are partial conflicts, we might know
|
||||
# that everything is going to resolve to the same minimum alternative. That
|
||||
# means we can stop since no more lookahead will change that fact. On the
|
||||
# other hand, there might be multiple conflicts that resolve to different
|
||||
# minimums. That means we need more look ahead to decide which of those
|
||||
# alternatives we should predict.</p>
|
||||
#
|
||||
# <p>The basic idea is to split the set of configurations {@code C}, into
|
||||
# conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with
|
||||
# non-conflicting configurations. Two configurations conflict if they have
|
||||
# identical {@link ATNConfig#state} and {@link ATNConfig#context} values
|
||||
# but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)}
|
||||
# and {@code (s, j, ctx, _)} for {@code i!=j}.</p>
|
||||
#
|
||||
# <p>Reduce these configuration subsets to the set of possible alternatives.
|
||||
# You can compute the alternative subsets in one pass as follows:</p>
|
||||
#
|
||||
# <p>{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in
|
||||
# {@code C} holding {@code s} and {@code ctx} fixed.</p>
|
||||
#
|
||||
# <p>Or in pseudo-code, for each configuration {@code c} in {@code C}:</p>
|
||||
#
|
||||
# <pre>
|
||||
# map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
|
||||
# alt and not pred
|
||||
# </pre>
|
||||
#
|
||||
# <p>The values in {@code map} are the set of {@code A_s,ctx} sets.</p>
|
||||
#
|
||||
# <p>If {@code |A_s,ctx|=1} then there is no conflict associated with
|
||||
# {@code s} and {@code ctx}.</p>
|
||||
#
|
||||
# <p>Reduce the subsets to singletons by choosing a minimum of each subset. If
|
||||
# the union of these alternative subsets is a singleton, then no amount of
|
||||
# more lookahead will help us. We will always pick that alternative. If,
|
||||
# however, there is more than one alternative, then we are uncertain which
|
||||
# alternative to predict and must continue looking for resolution. We may
|
||||
# or may not discover an ambiguity in the future, even if there are no
|
||||
# conflicting subsets this round.</p>
|
||||
#
|
||||
# <p>The biggest sin is to terminate early because it means we've made a
|
||||
# decision but were uncertain as to the eventual outcome. We haven't used
|
||||
# enough lookahead. On the other hand, announcing a conflict too late is no
|
||||
# big deal; you will still have the conflict. It's just inefficient. It
|
||||
# might even look until the end of file.</p>
|
||||
#
|
||||
# <p>No special consideration for semantic predicates is required because
|
||||
# predicates are evaluated on-the-fly for full LL prediction, ensuring that
|
||||
# no configuration contains a semantic context during the termination
|
||||
# check.</p>
|
||||
#
|
||||
# <p><strong>CONFLICTING CONFIGS</strong></p>
|
||||
#
|
||||
# <p>Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict
|
||||
# when {@code i!=j} but {@code x=x'}. Because we merge all
|
||||
# {@code (s, i, _)} configurations together, that means that there are at
|
||||
# most {@code n} configurations associated with state {@code s} for
|
||||
# {@code n} possible alternatives in the decision. The merged stacks
|
||||
# complicate the comparison of configuration contexts {@code x} and
|
||||
# {@code x'}. Sam checks to see if one is a subset of the other by calling
|
||||
# merge and checking to see if the merged result is either {@code x} or
|
||||
# {@code x'}. If the {@code x} associated with lowest alternative {@code i}
|
||||
# is the superset, then {@code i} is the only possible prediction since the
|
||||
# others resolve to {@code min(i)} as well. However, if {@code x} is
|
||||
# associated with {@code j>i} then at least one stack configuration for
|
||||
# {@code j} is not in conflict with alternative {@code i}. The algorithm
|
||||
# should keep going, looking for more lookahead due to the uncertainty.</p>
|
||||
#
|
||||
# <p>For simplicity, I'm doing a equality check between {@code x} and
|
||||
# {@code x'} that lets the algorithm continue to consume lookahead longer
|
||||
# than necessary. The reason I like the equality is of course the
|
||||
# simplicity but also because that is the test you need to detect the
|
||||
# alternatives that are actually in conflict.</p>
|
||||
#
|
||||
# <p><strong>CONTINUE/STOP RULE</strong></p>
|
||||
#
|
||||
# <p>Continue if union of resolved alternative sets from non-conflicting and
|
||||
# conflicting alternative subsets has more than one alternative. We are
|
||||
# uncertain about which alternative to predict.</p>
|
||||
#
|
||||
# <p>The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which
|
||||
# alternatives are still in the running for the amount of input we've
|
||||
# consumed at this point. The conflicting sets let us to strip away
|
||||
# configurations that won't lead to more states because we resolve
|
||||
# conflicts to the configuration with a minimum alternate for the
|
||||
# conflicting set.</p>
|
||||
#
|
||||
# <p><strong>CASES</strong></p>
|
||||
#
|
||||
# <ul>
|
||||
#
|
||||
# <li>no conflicts and more than 1 alternative in set => continue</li>
|
||||
#
|
||||
# <li> {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)},
|
||||
# {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set
|
||||
# {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
|
||||
# {@code {1,3}} => continue
|
||||
# </li>
|
||||
#
|
||||
# <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
|
||||
# {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set
|
||||
# {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
|
||||
# {@code {1}} => stop and predict 1</li>
|
||||
#
|
||||
# <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
|
||||
# {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U
|
||||
# {@code {1}} = {@code {1}} => stop and predict 1, can announce
|
||||
# ambiguity {@code {1,2}}</li>
|
||||
#
|
||||
# <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)},
|
||||
# {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U
|
||||
# {@code {2}} = {@code {1,2}} => continue</li>
|
||||
#
|
||||
# <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)},
|
||||
# {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U
|
||||
# {@code {3}} = {@code {1,3}} => continue</li>
|
||||
#
|
||||
# </ul>
|
||||
#
|
||||
# <p><strong>EXACT AMBIGUITY DETECTION</strong></p>
|
||||
#
|
||||
# <p>If all states report the same conflicting set of alternatives, then we
|
||||
# know we have the exact ambiguity set.</p>
|
||||
#
|
||||
# <p><code>|A_<em>i</em>|>1</code> and
|
||||
# <code>A_<em>i</em> = A_<em>j</em></code> for all <em>i</em>, <em>j</em>.</p>
|
||||
#
|
||||
# <p>In other words, we continue examining lookahead until all {@code A_i}
|
||||
# have more than one alternative and all {@code A_i} are the same. If
|
||||
# {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate
|
||||
# because the resolved set is {@code {1}}. To determine what the real
|
||||
# ambiguity is, we have to know whether the ambiguity is between one and
|
||||
# two or one and three so we keep going. We can only stop prediction when
|
||||
# we need exact ambiguity detection when the sets look like
|
||||
# {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...</p>
|
||||
#
|
||||
@classmethod
|
||||
def resolvesToJustOneViableAlt(cls, altsets):
|
||||
return cls.getSingleViableAlt(altsets)
|
||||
|
||||
#
|
||||
# Determines if every alternative subset in {@code altsets} contains more
|
||||
# than one alternative.
|
||||
#
|
||||
# @param altsets a collection of alternative subsets
|
||||
# @return {@code true} if every {@link BitSet} in {@code altsets} has
|
||||
# {@link BitSet#cardinality cardinality} > 1, otherwise {@code false}
|
||||
#
|
||||
@classmethod
|
||||
def allSubsetsConflict(cls, altsets):
|
||||
return not cls.hasNonConflictingAltSet(altsets)
|
||||
|
||||
#
|
||||
# Determines if any single alternative subset in {@code altsets} contains
|
||||
# exactly one alternative.
|
||||
#
|
||||
# @param altsets a collection of alternative subsets
|
||||
# @return {@code true} if {@code altsets} contains a {@link BitSet} with
|
||||
# {@link BitSet#cardinality cardinality} 1, otherwise {@code false}
|
||||
#
|
||||
@classmethod
|
||||
def hasNonConflictingAltSet(cls, altsets):
|
||||
for alts in altsets:
|
||||
if len(alts)==1:
|
||||
return True
|
||||
return False
|
||||
|
||||
#
|
||||
# Determines if any single alternative subset in {@code altsets} contains
|
||||
# more than one alternative.
|
||||
#
|
||||
# @param altsets a collection of alternative subsets
|
||||
# @return {@code true} if {@code altsets} contains a {@link BitSet} with
|
||||
# {@link BitSet#cardinality cardinality} > 1, otherwise {@code false}
|
||||
#
|
||||
@classmethod
|
||||
def hasConflictingAltSet(cls, altsets):
|
||||
for alts in altsets:
|
||||
if len(alts)>1:
|
||||
return True
|
||||
return False
|
||||
|
||||
#
|
||||
# Determines if every alternative subset in {@code altsets} is equivalent.
|
||||
#
|
||||
# @param altsets a collection of alternative subsets
|
||||
# @return {@code true} if every member of {@code altsets} is equal to the
|
||||
# others, otherwise {@code false}
|
||||
#
|
||||
@classmethod
|
||||
def allSubsetsEqual(cls, altsets):
|
||||
first = None
|
||||
for alts in altsets:
|
||||
if first is None:
|
||||
first = alts
|
||||
elif not alts==first:
|
||||
return False
|
||||
return True
|
||||
|
||||
#
|
||||
# Returns the unique alternative predicted by all alternative subsets in
|
||||
# {@code altsets}. If no such alternative exists, this method returns
|
||||
# {@link ATN#INVALID_ALT_NUMBER}.
|
||||
#
|
||||
# @param altsets a collection of alternative subsets
|
||||
#
|
||||
@classmethod
|
||||
def getUniqueAlt(cls, altsets):
|
||||
all = cls.getAlts(altsets)
|
||||
if len(all)==1:
|
||||
return all[0]
|
||||
else:
|
||||
return ATN.INVALID_ALT_NUMBER
|
||||
|
||||
# Gets the complete set of represented alternatives for a collection of
|
||||
# alternative subsets. This method returns the union of each {@link BitSet}
|
||||
# in {@code altsets}.
|
||||
#
|
||||
# @param altsets a collection of alternative subsets
|
||||
# @return the set of represented alternatives in {@code altsets}
|
||||
#
|
||||
@classmethod
|
||||
def getAlts(cls, altsets):
|
||||
all = set()
|
||||
for alts in altsets:
|
||||
all = all | alts
|
||||
return all
|
||||
|
||||
#
|
||||
# This function gets the conflicting alt subsets from a configuration set.
|
||||
# For each configuration {@code c} in {@code configs}:
|
||||
#
|
||||
# <pre>
|
||||
# map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
|
||||
# alt and not pred
|
||||
# </pre>
|
||||
#
|
||||
@classmethod
|
||||
def getConflictingAltSubsets(cls, configs):
|
||||
configToAlts = dict()
|
||||
for c in configs:
|
||||
s = str(c.state.stateNumber) + "/" + str(c.context)
|
||||
alts = configToAlts.get(s, None)
|
||||
if alts is None:
|
||||
alts = set()
|
||||
configToAlts[s] = alts
|
||||
alts.add(c.alt)
|
||||
return configToAlts.values()
|
||||
|
||||
#
|
||||
# Get a map from state to alt subset from a configuration set. For each
|
||||
# configuration {@code c} in {@code configs}:
|
||||
#
|
||||
# <pre>
|
||||
# map[c.{@link ATNConfig#state state}] U= c.{@link ATNConfig#alt alt}
|
||||
# </pre>
|
||||
#
|
||||
@classmethod
|
||||
def getStateToAltMap(cls, configs):
|
||||
m = dict()
|
||||
for c in configs:
|
||||
alts = m.get(c.state, None)
|
||||
if alts is None:
|
||||
alts = set()
|
||||
m[c.state] = alts
|
||||
alts.add(c.alt)
|
||||
return m
|
||||
|
||||
@classmethod
|
||||
def hasStateAssociatedWithOneAlt(cls, configs):
|
||||
x = cls.getStateToAltMap(configs)
|
||||
for alts in x.values():
|
||||
if len(alts)==1:
|
||||
return True
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def getSingleViableAlt(cls, altsets):
|
||||
viableAlts = set()
|
||||
for alts in altsets:
|
||||
minAlt = min(alts)
|
||||
viableAlts.add(minAlt);
|
||||
if len(viableAlts)>1 : # more than 1 viable alt
|
||||
return ATN.INVALID_ALT_NUMBER
|
||||
return min(viableAlts)
|
|
@ -0,0 +1,360 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
# A tree structure used to record the semantic context in which
|
||||
# an ATN configuration is valid. It's either a single predicate,
|
||||
# a conjunction {@code p1&&p2}, or a sum of products {@code p1||p2}.
|
||||
#
|
||||
# <p>I have scoped the {@link AND}, {@link OR}, and {@link Predicate} subclasses of
|
||||
# {@link SemanticContext} within the scope of this outer class.</p>
|
||||
#
|
||||
from io import StringIO
|
||||
|
||||
|
||||
class SemanticContext(object):
|
||||
#
|
||||
# The default {@link SemanticContext}, which is semantically equivalent to
|
||||
# a predicate of the form {@code {true}?}.
|
||||
#
|
||||
NONE = None
|
||||
|
||||
#
|
||||
# For context independent predicates, we evaluate them without a local
|
||||
# context (i.e., null context). That way, we can evaluate them without
|
||||
# having to create proper rule-specific context during prediction (as
|
||||
# opposed to the parser, which creates them naturally). In a practical
|
||||
# sense, this avoids a cast exception from RuleContext to myruleContext.
|
||||
#
|
||||
# <p>For context dependent predicates, we must pass in a local context so that
|
||||
# references such as $arg evaluate properly as _localctx.arg. We only
|
||||
# capture context dependent predicates in the context in which we begin
|
||||
# prediction, so we passed in the outer context here in case of context
|
||||
# dependent predicate evaluation.</p>
|
||||
#
|
||||
def eval(self, parser, outerContext):
|
||||
pass
|
||||
|
||||
#
|
||||
# Evaluate the precedence predicates for the context and reduce the result.
|
||||
#
|
||||
# @param parser The parser instance.
|
||||
# @param outerContext The current parser context object.
|
||||
# @return The simplified semantic context after precedence predicates are
|
||||
# evaluated, which will be one of the following values.
|
||||
# <ul>
|
||||
# <li>{@link #NONE}: if the predicate simplifies to {@code true} after
|
||||
# precedence predicates are evaluated.</li>
|
||||
# <li>{@code null}: if the predicate simplifies to {@code false} after
|
||||
# precedence predicates are evaluated.</li>
|
||||
# <li>{@code this}: if the semantic context is not changed as a result of
|
||||
# precedence predicate evaluation.</li>
|
||||
# <li>A non-{@code null} {@link SemanticContext}: the new simplified
|
||||
# semantic context after precedence predicates are evaluated.</li>
|
||||
# </ul>
|
||||
#
|
||||
def evalPrecedence(self, parser, outerContext):
|
||||
return self
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
|
||||
def __unicode__(self):
|
||||
return unicode(super(SemanticContext, self))
|
||||
|
||||
|
||||
def andContext(a, b):
|
||||
if a is None or a is SemanticContext.NONE:
|
||||
return b
|
||||
if b is None or b is SemanticContext.NONE:
|
||||
return a
|
||||
result = AND(a, b)
|
||||
if len(result.opnds) == 1:
|
||||
return result.opnds[0]
|
||||
else:
|
||||
return result
|
||||
|
||||
def orContext(a, b):
|
||||
if a is None:
|
||||
return b
|
||||
if b is None:
|
||||
return a
|
||||
if a is SemanticContext.NONE or b is SemanticContext.NONE:
|
||||
return SemanticContext.NONE
|
||||
result = OR(a, b)
|
||||
if len(result.opnds) == 1:
|
||||
return result.opnds[0]
|
||||
else:
|
||||
return result
|
||||
|
||||
def filterPrecedencePredicates(collection):
|
||||
result = []
|
||||
for context in collection:
|
||||
if isinstance(context, PrecedencePredicate):
|
||||
if result is None:
|
||||
result = []
|
||||
result.append(context)
|
||||
return result
|
||||
|
||||
|
||||
class Predicate(SemanticContext):
|
||||
|
||||
def __init__(self, ruleIndex=-1, predIndex=-1, isCtxDependent=False):
|
||||
self.ruleIndex = ruleIndex
|
||||
self.predIndex = predIndex
|
||||
self.isCtxDependent = isCtxDependent # e.g., $i ref in pred
|
||||
|
||||
def eval(self, parser, outerContext):
|
||||
localctx = outerContext if self.isCtxDependent else None
|
||||
return parser.sempred(localctx, self.ruleIndex, self.predIndex)
|
||||
|
||||
def __hash__(self):
|
||||
with StringIO() as buf:
|
||||
buf.write(unicode(self.ruleIndex))
|
||||
buf.write(u"/")
|
||||
buf.write(unicode(self.predIndex))
|
||||
buf.write(u"/")
|
||||
buf.write(unicode(self.isCtxDependent))
|
||||
return hash(buf.getvalue())
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
elif not isinstance(other, Predicate):
|
||||
return False
|
||||
return self.ruleIndex == other.ruleIndex and \
|
||||
self.predIndex == other.predIndex and \
|
||||
self.isCtxDependent == other.isCtxDependent
|
||||
|
||||
def __unicode__(self):
|
||||
return u"{" + unicode(self.ruleIndex) + u":" + unicode(self.predIndex) + u"}?"
|
||||
|
||||
|
||||
class PrecedencePredicate(SemanticContext):
|
||||
|
||||
def __init__(self, precedence=0):
|
||||
self.precedence = precedence
|
||||
|
||||
def eval(self, parser, outerContext):
|
||||
return parser.precpred(outerContext, self.precedence)
|
||||
|
||||
def evalPrecedence(self, parser, outerContext):
|
||||
if parser.precpred(outerContext, self.precedence):
|
||||
return SemanticContext.NONE
|
||||
else:
|
||||
return None
|
||||
|
||||
def __cmp__(self, other):
|
||||
return self.precedence - other.precedence
|
||||
|
||||
def __hash__(self):
|
||||
return 31
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
elif not isinstance(other, PrecedencePredicate):
|
||||
return False
|
||||
else:
|
||||
return self.precedence == other.precedence
|
||||
|
||||
# A semantic context which is true whenever none of the contained contexts
|
||||
# is false.
|
||||
#
|
||||
class AND(SemanticContext):
|
||||
|
||||
def __init__(self, a, b):
|
||||
operands = set()
|
||||
if isinstance( a, AND):
|
||||
for o in a.opnds:
|
||||
operands.add(o)
|
||||
else:
|
||||
operands.add(a)
|
||||
if isinstance( b, AND):
|
||||
for o in b.opnds:
|
||||
operands.add(o)
|
||||
else:
|
||||
operands.add(b)
|
||||
|
||||
precedencePredicates = filterPrecedencePredicates(operands)
|
||||
if len(precedencePredicates)>0:
|
||||
# interested in the transition with the lowest precedence
|
||||
reduced = min(precedencePredicates)
|
||||
operands.add(reduced)
|
||||
|
||||
self.opnds = [ o for o in operands ]
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
elif not isinstance(other, AND):
|
||||
return False
|
||||
else:
|
||||
return self.opnds == other.opnds
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self.opnds)+ "/AND")
|
||||
|
||||
#
|
||||
# {@inheritDoc}
|
||||
#
|
||||
# <p>
|
||||
# The evaluation of predicates by this context is short-circuiting, but
|
||||
# unordered.</p>
|
||||
#
|
||||
def eval(self, parser, outerContext):
|
||||
for opnd in self.opnds:
|
||||
if not opnd.eval(parser, outerContext):
|
||||
return False
|
||||
return True
|
||||
|
||||
def evalPrecedence(self, parser, outerContext):
|
||||
differs = False
|
||||
operands = []
|
||||
for context in self.opnds:
|
||||
evaluated = context.evalPrecedence(parser, outerContext)
|
||||
differs |= evaluated is not context
|
||||
if evaluated is None:
|
||||
# The AND context is false if any element is false
|
||||
return None
|
||||
elif evaluated is not SemanticContext.NONE:
|
||||
# Reduce the result by skipping true elements
|
||||
operands.append(evaluated)
|
||||
|
||||
if not differs:
|
||||
return self
|
||||
|
||||
if len(operands)==0:
|
||||
# all elements were true, so the AND context is true
|
||||
return SemanticContext.NONE
|
||||
|
||||
result = None
|
||||
for o in operands:
|
||||
result = o if result is None else andContext(result, o)
|
||||
|
||||
return result
|
||||
|
||||
def __unicode__(self):
|
||||
with StringIO() as buf:
|
||||
first = True
|
||||
for o in self.opnds:
|
||||
if not first:
|
||||
buf.write(u"&&")
|
||||
buf.write(unicode(o))
|
||||
first = False
|
||||
return buf.getvalue()
|
||||
|
||||
#
|
||||
# A semantic context which is true whenever at least one of the contained
|
||||
# contexts is true.
|
||||
#
|
||||
class OR (SemanticContext):
|
||||
|
||||
def __init__(self, a, b):
|
||||
operands = set()
|
||||
if isinstance( a, OR):
|
||||
for o in a.opnds:
|
||||
operands.add(o)
|
||||
else:
|
||||
operands.add(a)
|
||||
if isinstance( b, OR):
|
||||
for o in b.opnds:
|
||||
operands.add(o)
|
||||
else:
|
||||
operands.add(b)
|
||||
|
||||
precedencePredicates = filterPrecedencePredicates(operands)
|
||||
if len(precedencePredicates)>0:
|
||||
# interested in the transition with the highest precedence
|
||||
s = sorted(precedencePredicates)
|
||||
reduced = s[len(s)-1]
|
||||
operands.add(reduced)
|
||||
|
||||
self.opnds = [ o for o in operands ]
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
elif not isinstance(other, OR):
|
||||
return False
|
||||
else:
|
||||
return self.opnds == other.opnds
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self.opnds)+"/OR")
|
||||
|
||||
# <p>
|
||||
# The evaluation of predicates by this context is short-circuiting, but
|
||||
# unordered.</p>
|
||||
#
|
||||
def eval(self, parser, outerContext):
|
||||
for opnd in self.opnds:
|
||||
if opnd.eval(parser, outerContext):
|
||||
return True
|
||||
return False
|
||||
|
||||
def evalPrecedence(self, parser, outerContext):
|
||||
differs = False
|
||||
operands = []
|
||||
for context in self.opnds:
|
||||
evaluated = context.evalPrecedence(parser, outerContext)
|
||||
differs |= evaluated is not context
|
||||
if evaluated is SemanticContext.NONE:
|
||||
# The OR context is true if any element is true
|
||||
return SemanticContext.NONE
|
||||
elif evaluated is not None:
|
||||
# Reduce the result by skipping false elements
|
||||
operands.append(evaluated)
|
||||
|
||||
if not differs:
|
||||
return self
|
||||
|
||||
if len(operands)==0:
|
||||
# all elements were false, so the OR context is false
|
||||
return None
|
||||
|
||||
result = None
|
||||
for o in operands:
|
||||
result = o if result is None else orContext(result, o)
|
||||
|
||||
return result
|
||||
|
||||
def __unicode__(self):
|
||||
with StringIO() as buf:
|
||||
first = True
|
||||
for o in self.opnds:
|
||||
if not first:
|
||||
buf.write(u"||")
|
||||
buf.write(unicode(o))
|
||||
first = False
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
SemanticContext.NONE = Predicate()
|
|
@ -0,0 +1,279 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
# An ATN transition between any two ATN states. Subclasses define
|
||||
# atom, set, epsilon, action, predicate, rule transitions.
|
||||
#
|
||||
# <p>This is a one way link. It emanates from a state (usually via a list of
|
||||
# transitions) and has a target state.</p>
|
||||
#
|
||||
# <p>Since we never have to change the ATN transitions once we construct it,
|
||||
# we can fix these transitions as specific classes. The DFA transitions
|
||||
# on the other hand need to update the labels as it adds transitions to
|
||||
# the states. We'll use the term Edge for the DFA to distinguish them from
|
||||
# ATN transitions.</p>
|
||||
#
|
||||
from __builtin__ import unicode
|
||||
|
||||
from antlr4.IntervalSet import IntervalSet, Interval
|
||||
from antlr4.Token import Token
|
||||
from antlr4.atn.SemanticContext import Predicate, PrecedencePredicate
|
||||
|
||||
|
||||
class Transition (object):
|
||||
# constants for serialization
|
||||
EPSILON = 1
|
||||
RANGE = 2
|
||||
RULE = 3
|
||||
PREDICATE = 4 # e.g., {isType(input.LT(1))}?
|
||||
ATOM = 5
|
||||
ACTION = 6
|
||||
SET = 7 # ~(A|B) or ~atom, wildcard, which convert to next 2
|
||||
NOT_SET = 8
|
||||
WILDCARD = 9
|
||||
PRECEDENCE = 10
|
||||
|
||||
serializationNames = [
|
||||
u"INVALID",
|
||||
u"EPSILON",
|
||||
u"RANGE",
|
||||
u"RULE",
|
||||
u"PREDICATE",
|
||||
u"ATOM",
|
||||
u"ACTION",
|
||||
u"SET",
|
||||
u"NOT_SET",
|
||||
u"WILDCARD",
|
||||
u"PRECEDENCE"
|
||||
]
|
||||
|
||||
serializationTypes = dict()
|
||||
|
||||
def __init__(self, target):
|
||||
# The target of this transition.
|
||||
if target is None:
|
||||
raise Exception("target cannot be null.")
|
||||
self.target = target
|
||||
# Are we epsilon, action, sempred?
|
||||
self.isEpsilon = False
|
||||
self.label = None
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
|
||||
|
||||
# TODO: make all transitions sets? no, should remove set edges
|
||||
class AtomTransition(Transition):
|
||||
|
||||
def __init__(self, target, label):
|
||||
super(AtomTransition, self).__init__(target)
|
||||
self.label_ = label # The token type or character value; or, signifies special label.
|
||||
self.label = self.makeLabel()
|
||||
self.serializationType = self.ATOM
|
||||
|
||||
def makeLabel(self):
|
||||
s = IntervalSet()
|
||||
s.addOne(self.label_)
|
||||
return s
|
||||
|
||||
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
|
||||
return self.label_ == symbol
|
||||
|
||||
def __unicode__(self):
|
||||
return unicode(self.label_)
|
||||
|
||||
class RuleTransition(Transition):
|
||||
|
||||
def __init__(self, ruleStart, ruleIndex, precedence, followState):
|
||||
super(RuleTransition, self).__init__(ruleStart)
|
||||
self.ruleIndex = ruleIndex # ptr to the rule definition object for this rule ref
|
||||
self.precedence = precedence
|
||||
self.followState = followState # what node to begin computations following ref to rule
|
||||
self.serializationType = self.RULE
|
||||
self.isEpsilon = True
|
||||
|
||||
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
|
||||
return False
|
||||
|
||||
|
||||
class EpsilonTransition(Transition):
|
||||
|
||||
def __init__(self, target, outermostPrecedenceReturn=-1):
|
||||
super(EpsilonTransition, self).__init__(target)
|
||||
self.serializationType = self.EPSILON
|
||||
self.isEpsilon = True
|
||||
self.outermostPrecedenceReturn = outermostPrecedenceReturn
|
||||
|
||||
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
|
||||
return False
|
||||
|
||||
def __unicode__(self):
|
||||
return "epsilon"
|
||||
|
||||
class RangeTransition(Transition):
|
||||
|
||||
def __init__(self, target, start, stop):
|
||||
super(RangeTransition, self).__init__(target)
|
||||
self.serializationType = self.RANGE
|
||||
self.start = start
|
||||
self.stop = stop
|
||||
self.label = self.makeLabel()
|
||||
|
||||
def makeLabel(self):
|
||||
s = IntervalSet()
|
||||
s.addRange(Interval(self.start, self.stop + 1))
|
||||
return s
|
||||
|
||||
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
|
||||
return symbol >= self.start and symbol <= self.stop
|
||||
|
||||
def __unicode__(self):
|
||||
return "'" + chr(self.start) + "'..'" + chr(self.stop) + "'"
|
||||
|
||||
class AbstractPredicateTransition(Transition):
|
||||
|
||||
def __init__(self, target):
|
||||
super(AbstractPredicateTransition, self).__init__(target)
|
||||
|
||||
|
||||
class PredicateTransition(AbstractPredicateTransition):
|
||||
|
||||
def __init__(self, target, ruleIndex, predIndex, isCtxDependent):
|
||||
super(PredicateTransition, self).__init__(target)
|
||||
self.serializationType = self.PREDICATE
|
||||
self.ruleIndex = ruleIndex
|
||||
self.predIndex = predIndex
|
||||
self.isCtxDependent = isCtxDependent # e.g., $i ref in pred
|
||||
self.isEpsilon = True
|
||||
|
||||
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
|
||||
return False
|
||||
|
||||
def getPredicate(self):
|
||||
return Predicate(self.ruleIndex, self.predIndex, self.isCtxDependent)
|
||||
|
||||
def __unicode__(self):
|
||||
return u"pred_" + unicode(self.ruleIndex) + u":" + unicode(self.predIndex)
|
||||
|
||||
class ActionTransition(Transition):
|
||||
|
||||
def __init__(self, target, ruleIndex, actionIndex=-1, isCtxDependent=False):
|
||||
super(ActionTransition, self).__init__(target)
|
||||
self.serializationType = self.ACTION
|
||||
self.ruleIndex = ruleIndex
|
||||
self.actionIndex = actionIndex
|
||||
self.isCtxDependent = isCtxDependent # e.g., $i ref in pred
|
||||
self.isEpsilon = True
|
||||
|
||||
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
|
||||
return False
|
||||
|
||||
def __unicode__(self):
|
||||
return u"action_" + unicode(self.ruleIndex) + u":" + unicode(self.actionIndex)
|
||||
|
||||
# A transition containing a set of values.
|
||||
class SetTransition(Transition):
|
||||
|
||||
def __init__(self, target, set):
|
||||
super(SetTransition, self).__init__(target)
|
||||
self.serializationType = self.SET
|
||||
if set is not None:
|
||||
self.label = set
|
||||
else:
|
||||
self.label = IntervalSet()
|
||||
self.label.addRange(Interval(Token.INVALID_TYPE, Token.INVALID_TYPE + 1))
|
||||
|
||||
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
|
||||
return symbol in self.label
|
||||
|
||||
def __unicode__(self):
|
||||
return unicode(self.label)
|
||||
|
||||
class NotSetTransition(SetTransition):
|
||||
|
||||
def __init__(self, target, set):
|
||||
super(NotSetTransition, self).__init__(target, set)
|
||||
self.serializationType = self.NOT_SET
|
||||
|
||||
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
|
||||
return symbol >= minVocabSymbol \
|
||||
and symbol <= maxVocabSymbol \
|
||||
and not super(type(self), self).matches(symbol, minVocabSymbol, maxVocabSymbol)
|
||||
|
||||
def __unicode__(self):
|
||||
return u'~' + super(type(self), self).__unicode__()
|
||||
|
||||
|
||||
class WildcardTransition(Transition):
|
||||
|
||||
def __init__(self, target):
|
||||
super(WildcardTransition, self).__init__(target)
|
||||
self.serializationType = self.WILDCARD
|
||||
|
||||
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
|
||||
return symbol >= minVocabSymbol and symbol <= maxVocabSymbol
|
||||
|
||||
def __unicode__(self):
|
||||
return u"."
|
||||
|
||||
|
||||
class PrecedencePredicateTransition(AbstractPredicateTransition):
|
||||
|
||||
def __init__(self, target, precedence):
|
||||
super(PrecedencePredicateTransition, self).__init__(target)
|
||||
self.serializationType = self.PRECEDENCE
|
||||
self.precedence = precedence
|
||||
self.isEpsilon = True
|
||||
|
||||
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
|
||||
return False
|
||||
|
||||
|
||||
def getPredicate(self):
|
||||
return PrecedencePredicate(self.precedence)
|
||||
|
||||
def __unicode__(self):
|
||||
return self.precedence + " >= _p"
|
||||
|
||||
|
||||
Transition.serializationTypes = {
|
||||
EpsilonTransition: Transition.EPSILON,
|
||||
RangeTransition: Transition.RANGE,
|
||||
RuleTransition: Transition.RULE,
|
||||
PredicateTransition: Transition.PREDICATE,
|
||||
AtomTransition: Transition.ATOM,
|
||||
ActionTransition: Transition.ACTION,
|
||||
SetTransition: Transition.SET,
|
||||
NotSetTransition: Transition.NOT_SET,
|
||||
WildcardTransition: Transition.WILDCARD,
|
||||
PrecedencePredicateTransition: Transition.PRECEDENCE
|
||||
}
|
||||
|
|
@ -0,0 +1 @@
|
|||
__author__ = 'ericvergnaud'
|
|
@ -0,0 +1,150 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from antlr4.atn.ATNConfigSet import ATNConfigSet
|
||||
from antlr4.dfa.DFAState import DFAState
|
||||
from antlr4.error.Errors import IllegalStateException
|
||||
|
||||
|
||||
class DFA(object):
|
||||
|
||||
def __init__(self, atnStartState, decision=0):
|
||||
# From which ATN state did we create this DFA?
|
||||
self.atnStartState = atnStartState
|
||||
self.decision = decision
|
||||
# A set of all DFA states. Use {@link Map} so we can get old state back
|
||||
# ({@link Set} only allows you to see if it's there).
|
||||
self._states = dict()
|
||||
self.s0 = None
|
||||
# {@code true} if this DFA is for a precedence decision; otherwise,
|
||||
# {@code false}. This is the backing field for {@link #isPrecedenceDfa},
|
||||
# {@link #setPrecedenceDfa}.
|
||||
self.precedenceDfa = False
|
||||
|
||||
|
||||
# Get the start state for a specific precedence value.
|
||||
#
|
||||
# @param precedence The current precedence.
|
||||
# @return The start state corresponding to the specified precedence, or
|
||||
# {@code null} if no start state exists for the specified precedence.
|
||||
#
|
||||
# @throws IllegalStateException if this is not a precedence DFA.
|
||||
# @see #isPrecedenceDfa()
|
||||
|
||||
def getPrecedenceStartState(self, precedence):
|
||||
if not self.precedenceDfa:
|
||||
raise IllegalStateException("Only precedence DFAs may contain a precedence start state.")
|
||||
|
||||
# s0.edges is never null for a precedence DFA
|
||||
if precedence < 0 or precedence >= len(self.s0.edges):
|
||||
return None
|
||||
return self.s0.edges[precedence]
|
||||
|
||||
# Set the start state for a specific precedence value.
|
||||
#
|
||||
# @param precedence The current precedence.
|
||||
# @param startState The start state corresponding to the specified
|
||||
# precedence.
|
||||
#
|
||||
# @throws IllegalStateException if this is not a precedence DFA.
|
||||
# @see #isPrecedenceDfa()
|
||||
#
|
||||
def setPrecedenceStartState(self, precedence, startState):
|
||||
if not self.precedenceDfa:
|
||||
raise IllegalStateException("Only precedence DFAs may contain a precedence start state.")
|
||||
|
||||
if precedence < 0:
|
||||
return
|
||||
|
||||
# synchronization on s0 here is ok. when the DFA is turned into a
|
||||
# precedence DFA, s0 will be initialized once and not updated again
|
||||
# s0.edges is never null for a precedence DFA
|
||||
if precedence >= len(self.s0.edges):
|
||||
ext = [None] * (precedence + 1 - len(self.s0.edges))
|
||||
self.s0.edges.extend(ext)
|
||||
self.s0.edges[precedence] = startState
|
||||
#
|
||||
# Sets whether this is a precedence DFA. If the specified value differs
|
||||
# from the current DFA configuration, the following actions are taken;
|
||||
# otherwise no changes are made to the current DFA.
|
||||
#
|
||||
# <ul>
|
||||
# <li>The {@link #states} map is cleared</li>
|
||||
# <li>If {@code precedenceDfa} is {@code false}, the initial state
|
||||
# {@link #s0} is set to {@code null}; otherwise, it is initialized to a new
|
||||
# {@link DFAState} with an empty outgoing {@link DFAState#edges} array to
|
||||
# store the start states for individual precedence values.</li>
|
||||
# <li>The {@link #precedenceDfa} field is updated</li>
|
||||
# </ul>
|
||||
#
|
||||
# @param precedenceDfa {@code true} if this is a precedence DFA; otherwise,
|
||||
# {@code false}
|
||||
|
||||
def setPrecedenceDfa(self, precedenceDfa):
|
||||
if self.precedenceDfa != precedenceDfa:
|
||||
self._states = dict()
|
||||
if precedenceDfa:
|
||||
precedenceState = DFAState(ATNConfigSet())
|
||||
precedenceState.edges = []
|
||||
precedenceState.isAcceptState = False
|
||||
precedenceState.requiresFullContext = False
|
||||
self.s0 = precedenceState
|
||||
else:
|
||||
self.s0 = None
|
||||
self.precedenceDfa = precedenceDfa
|
||||
|
||||
@property
|
||||
def states(self):
|
||||
return self._states
|
||||
|
||||
# Return a list of all states in this DFA, ordered by state number.
|
||||
def sortedStates(self):
|
||||
return sorted(self._states.keys(), key=lambda state: state.stateNumber)
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
|
||||
def __unicode__(self):
|
||||
return self.toString(None)
|
||||
|
||||
def toString(self, literalNames=None, symbolicNames=None):
|
||||
if self.s0 is None:
|
||||
return ""
|
||||
from antlr4.dfa.DFASerializer import DFASerializer
|
||||
serializer = DFASerializer(self, literalNames, symbolicNames)
|
||||
return unicode(serializer)
|
||||
|
||||
def toLexerString(self):
|
||||
if self.s0 is None:
|
||||
return ""
|
||||
from antlr4.dfa.DFASerializer import LexerDFASerializer
|
||||
serializer = LexerDFASerializer(self)
|
||||
return unicode(serializer)
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#/
|
||||
|
||||
# A DFA walker that knows how to dump them to serialized strings.#/
|
||||
from io import StringIO
|
||||
|
||||
from antlr4.Utils import str_list
|
||||
|
||||
|
||||
class DFASerializer(object):
|
||||
|
||||
def __init__(self, dfa, literalNames=None, symbolicNames=None):
|
||||
self.dfa = dfa
|
||||
self.literalNames = literalNames
|
||||
self.symbolicNames = symbolicNames
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
|
||||
def __unicode__(self):
|
||||
if self.dfa.s0 is None:
|
||||
return None
|
||||
with StringIO() as buf:
|
||||
for s in self.dfa.sortedStates():
|
||||
n = 0
|
||||
if s.edges is not None:
|
||||
n = len(s.edges)
|
||||
for i in range(0, n):
|
||||
t = s.edges[i]
|
||||
if t is not None and t.stateNumber != 0x7FFFFFFF:
|
||||
buf.write(self.getStateString(s))
|
||||
label = self.getEdgeLabel(i)
|
||||
buf.write(u"-")
|
||||
buf.write(label)
|
||||
buf.write(u"->")
|
||||
buf.write(self.getStateString(t))
|
||||
buf.write(u'\n')
|
||||
output = buf.getvalue()
|
||||
if len(output)==0:
|
||||
return None
|
||||
else:
|
||||
return output
|
||||
|
||||
def getEdgeLabel(self, i):
|
||||
if i==0:
|
||||
return u"EOF"
|
||||
if self.literalNames is not None and i<=len(self.literalNames):
|
||||
return self.literalNames[i-1]
|
||||
elif self.symbolicNames is not None and i<=len(self.symbolicNames):
|
||||
return self.symbolicNames[i-1]
|
||||
else:
|
||||
return unicode(i-1)
|
||||
|
||||
def getStateString(self, s):
|
||||
n = s.stateNumber
|
||||
baseStateStr = ( u":" if s.isAcceptState else u"") + u"s" + unicode(n) + \
|
||||
( u"^" if s.requiresFullContext else u"")
|
||||
if s.isAcceptState:
|
||||
if s.predicates is not None:
|
||||
return baseStateStr + u"=>" + str_list(s.predicates)
|
||||
else:
|
||||
return baseStateStr + u"=>" + unicode(s.prediction)
|
||||
else:
|
||||
return baseStateStr
|
||||
|
||||
class LexerDFASerializer(DFASerializer):
|
||||
|
||||
def __init__(self, dfa):
|
||||
super(LexerDFASerializer, self).__init__(dfa, None)
|
||||
|
||||
def getEdgeLabel(self, i):
|
||||
return u"'" + unichr(i) + u"'"
|
|
@ -0,0 +1,156 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#/
|
||||
|
||||
# Map a predicate to a predicted alternative.#/
|
||||
from io import StringIO
|
||||
|
||||
from antlr4.atn.ATNConfigSet import ATNConfigSet
|
||||
|
||||
|
||||
class PredPrediction(object):
|
||||
def __init__(self, pred, alt):
|
||||
self.alt = alt
|
||||
self.pred = pred
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
|
||||
def __unicode__(self):
|
||||
return u"(" + unicode(self.pred) + u", " + unicode(self.alt) + u")"
|
||||
|
||||
# A DFA state represents a set of possible ATN configurations.
|
||||
# As Aho, Sethi, Ullman p. 117 says "The DFA uses its state
|
||||
# to keep track of all possible states the ATN can be in after
|
||||
# reading each input symbol. That is to say, after reading
|
||||
# input a1a2..an, the DFA is in a state that represents the
|
||||
# subset T of the states of the ATN that are reachable from the
|
||||
# ATN's start state along some path labeled a1a2..an."
|
||||
# In conventional NFA→DFA conversion, therefore, the subset T
|
||||
# would be a bitset representing the set of states the
|
||||
# ATN could be in. We need to track the alt predicted by each
|
||||
# state as well, however. More importantly, we need to maintain
|
||||
# a stack of states, tracking the closure operations as they
|
||||
# jump from rule to rule, emulating rule invocations (method calls).
|
||||
# I have to add a stack to simulate the proper lookahead sequences for
|
||||
# the underlying LL grammar from which the ATN was derived.
|
||||
#
|
||||
# <p>I use a set of ATNConfig objects not simple states. An ATNConfig
|
||||
# is both a state (ala normal conversion) and a RuleContext describing
|
||||
# the chain of rules (if any) followed to arrive at that state.</p>
|
||||
#
|
||||
# <p>A DFA state may have multiple references to a particular state,
|
||||
# but with different ATN contexts (with same or different alts)
|
||||
# meaning that state was reached via a different set of rule invocations.</p>
|
||||
#/
|
||||
class DFAState(object):
|
||||
|
||||
def __init__(self, stateNumber=-1, configs=ATNConfigSet()):
|
||||
self.stateNumber = stateNumber
|
||||
self.configs = configs
|
||||
# {@code edges[symbol]} points to target of symbol. Shift up by 1 so (-1)
|
||||
# {@link Token#EOF} maps to {@code edges[0]}.
|
||||
self.edges = None
|
||||
self.isAcceptState = False
|
||||
# if accept state, what ttype do we match or alt do we predict?
|
||||
# This is set to {@link ATN#INVALID_ALT_NUMBER} when {@link #predicates}{@code !=null} or
|
||||
# {@link #requiresFullContext}.
|
||||
self.prediction = 0
|
||||
self.lexerActionExecutor = None
|
||||
# Indicates that this state was created during SLL prediction that
|
||||
# discovered a conflict between the configurations in the state. Future
|
||||
# {@link ParserATNSimulator#execATN} invocations immediately jumped doing
|
||||
# full context prediction if this field is true.
|
||||
self.requiresFullContext = False
|
||||
# During SLL parsing, this is a list of predicates associated with the
|
||||
# ATN configurations of the DFA state. When we have predicates,
|
||||
# {@link #requiresFullContext} is {@code false} since full context prediction evaluates predicates
|
||||
# on-the-fly. If this is not null, then {@link #prediction} is
|
||||
# {@link ATN#INVALID_ALT_NUMBER}.
|
||||
#
|
||||
# <p>We only use these for non-{@link #requiresFullContext} but conflicting states. That
|
||||
# means we know from the context (it's $ or we don't dip into outer
|
||||
# context) that it's an ambiguity not a conflict.</p>
|
||||
#
|
||||
# <p>This list is computed by {@link ParserATNSimulator#predicateDFAState}.</p>
|
||||
self.predicates = None
|
||||
|
||||
|
||||
|
||||
# Get the set of all alts mentioned by all ATN configurations in this
|
||||
# DFA state.
|
||||
def getAltSet(self):
|
||||
alts = set()
|
||||
if self.configs is not None:
|
||||
for c in self.configs:
|
||||
alts.add(c.alt)
|
||||
if len(alts)==0:
|
||||
return None
|
||||
else:
|
||||
return alts
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.configs)
|
||||
|
||||
# Two {@link DFAState} instances are equal if their ATN configuration sets
|
||||
# are the same. This method is used to see if a state already exists.
|
||||
#
|
||||
# <p>Because the number of alternatives and number of ATN configurations are
|
||||
# finite, there is a finite number of DFA states that can be processed.
|
||||
# This is necessary to show that the algorithm terminates.</p>
|
||||
#
|
||||
# <p>Cannot test the DFA state numbers here because in
|
||||
# {@link ParserATNSimulator#addDFAState} we need to know if any other state
|
||||
# exists that has this exact set of ATN configurations. The
|
||||
# {@link #stateNumber} is irrelevant.</p>
|
||||
def __eq__(self, other):
|
||||
# compare set of ATN configurations in this set with other
|
||||
if self is other:
|
||||
return True
|
||||
elif not isinstance(other, DFAState):
|
||||
return False
|
||||
else:
|
||||
return self.configs==other.configs
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
|
||||
def __unicode__(self):
|
||||
with StringIO() as buf:
|
||||
buf.write(unicode(self.stateNumber))
|
||||
buf.write(u":")
|
||||
buf.write(unicode(self.configs))
|
||||
if self.isAcceptState:
|
||||
buf.write(u"=>")
|
||||
if self.predicates is not None:
|
||||
buf.write(unicode(self.predicates))
|
||||
else:
|
||||
buf.write(unicode(self.prediction))
|
||||
return buf.getvalue()
|
|
@ -0,0 +1 @@
|
|||
__author__ = 'ericvergnaud'
|
|
@ -0,0 +1,133 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
|
||||
#
|
||||
# This implementation of {@link ANTLRErrorListener} can be used to identify
|
||||
# certain potential correctness and performance problems in grammars. "Reports"
|
||||
# are made by calling {@link Parser#notifyErrorListeners} with the appropriate
|
||||
# message.
|
||||
#
|
||||
# <ul>
|
||||
# <li><b>Ambiguities</b>: These are cases where more than one path through the
|
||||
# grammar can match the input.</li>
|
||||
# <li><b>Weak context sensitivity</b>: These are cases where full-context
|
||||
# prediction resolved an SLL conflict to a unique alternative which equaled the
|
||||
# minimum alternative of the SLL conflict.</li>
|
||||
# <li><b>Strong (forced) context sensitivity</b>: These are cases where the
|
||||
# full-context prediction resolved an SLL conflict to a unique alternative,
|
||||
# <em>and</em> the minimum alternative of the SLL conflict was found to not be
|
||||
# a truly viable alternative. Two-stage parsing cannot be used for inputs where
|
||||
# this situation occurs.</li>
|
||||
# </ul>
|
||||
|
||||
from io import StringIO
|
||||
|
||||
from antlr4.Utils import str_set
|
||||
from antlr4.error.ErrorListener import ErrorListener
|
||||
|
||||
|
||||
class DiagnosticErrorListener(ErrorListener):
|
||||
|
||||
def __init__(self, exactOnly=True):
|
||||
# whether all ambiguities or only exact ambiguities are reported.
|
||||
self.exactOnly = exactOnly
|
||||
|
||||
def reportAmbiguity(self, recognizer, dfa, startIndex,
|
||||
stopIndex, exact, ambigAlts, configs):
|
||||
if self.exactOnly and not exact:
|
||||
return
|
||||
|
||||
with StringIO() as buf:
|
||||
buf.write(u"reportAmbiguity d=")
|
||||
buf.write(self.getDecisionDescription(recognizer, dfa))
|
||||
buf.write(u": ambigAlts=")
|
||||
buf.write(str_set(self.getConflictingAlts(ambigAlts, configs)))
|
||||
buf.write(u", input='")
|
||||
buf.write(recognizer.getTokenStream().getText((startIndex, stopIndex)))
|
||||
buf.write(u"'")
|
||||
recognizer.notifyErrorListeners(buf.getvalue())
|
||||
|
||||
|
||||
def reportAttemptingFullContext(self, recognizer, dfa, startIndex,
|
||||
stopIndex, conflictingAlts, configs):
|
||||
with StringIO() as buf:
|
||||
buf.write(u"reportAttemptingFullContext d=")
|
||||
buf.write(self.getDecisionDescription(recognizer, dfa))
|
||||
buf.write(u", input='")
|
||||
buf.write(recognizer.getTokenStream().getText((startIndex, stopIndex)))
|
||||
buf.write(u"'")
|
||||
recognizer.notifyErrorListeners(buf.getvalue())
|
||||
|
||||
def reportContextSensitivity(self, recognizer, dfa, startIndex,
|
||||
stopIndex, prediction, configs):
|
||||
with StringIO() as buf:
|
||||
buf.write(u"reportContextSensitivity d=")
|
||||
buf.write(self.getDecisionDescription(recognizer, dfa))
|
||||
buf.write(u", input='")
|
||||
buf.write(recognizer.getTokenStream().getText((startIndex, stopIndex)))
|
||||
buf.write(u"'")
|
||||
recognizer.notifyErrorListeners(buf.getvalue())
|
||||
|
||||
def getDecisionDescription(self, recognizer, dfa):
|
||||
decision = dfa.decision
|
||||
ruleIndex = dfa.atnStartState.ruleIndex
|
||||
|
||||
ruleNames = recognizer.ruleNames
|
||||
if ruleIndex < 0 or ruleIndex >= len(ruleNames):
|
||||
return unicode(decision)
|
||||
|
||||
ruleName = ruleNames[ruleIndex]
|
||||
if ruleName is None or len(ruleName)==0:
|
||||
return unicode(decision)
|
||||
|
||||
return unicode(decision) + u" (" + ruleName + u")"
|
||||
|
||||
#
|
||||
# Computes the set of conflicting or ambiguous alternatives from a
|
||||
# configuration set, if that information was not already provided by the
|
||||
# parser.
|
||||
#
|
||||
# @param reportedAlts The set of conflicting or ambiguous alternatives, as
|
||||
# reported by the parser.
|
||||
# @param configs The conflicting or ambiguous configuration set.
|
||||
# @return Returns {@code reportedAlts} if it is not {@code null}, otherwise
|
||||
# returns the set of alternatives represented in {@code configs}.
|
||||
#
|
||||
def getConflictingAlts(self, reportedAlts, configs):
|
||||
if reportedAlts is not None:
|
||||
return reportedAlts
|
||||
|
||||
result = set()
|
||||
for config in configs:
|
||||
result.add(config.alt)
|
||||
|
||||
return result
|
|
@ -0,0 +1,97 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
# Provides an empty default implementation of {@link ANTLRErrorListener}. The
|
||||
# default implementation of each method does nothing, but can be overridden as
|
||||
# necessary.
|
||||
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
|
||||
class ErrorListener(object):
|
||||
|
||||
def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
|
||||
pass
|
||||
|
||||
def reportAmbiguity(self, recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs):
|
||||
pass
|
||||
|
||||
def reportAttemptingFullContext(self, recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs):
|
||||
pass
|
||||
|
||||
def reportContextSensitivity(self, recognizer, dfa, startIndex, stopIndex, prediction, configs):
|
||||
pass
|
||||
|
||||
class ConsoleErrorListener(ErrorListener):
|
||||
#
|
||||
# Provides a default instance of {@link ConsoleErrorListener}.
|
||||
#
|
||||
INSTANCE = None
|
||||
|
||||
#
|
||||
# {@inheritDoc}
|
||||
#
|
||||
# <p>
|
||||
# This implementation prints messages to {@link System#err} containing the
|
||||
# values of {@code line}, {@code charPositionInLine}, and {@code msg} using
|
||||
# the following format.</p>
|
||||
#
|
||||
# <pre>
|
||||
# line <em>line</em>:<em>charPositionInLine</em> <em>msg</em>
|
||||
# </pre>
|
||||
#
|
||||
def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
|
||||
print("line " + str(line) + ":" + str(column) + " " + msg, file=sys.stderr)
|
||||
|
||||
ConsoleErrorListener.INSTANCE = ConsoleErrorListener()
|
||||
|
||||
class ProxyErrorListener(ErrorListener):
|
||||
|
||||
def __init__(self, delegates):
|
||||
super(ProxyErrorListener, self).__init__()
|
||||
if delegates is None:
|
||||
raise ReferenceError("delegates")
|
||||
self.delegates = delegates
|
||||
|
||||
def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
|
||||
for delegate in self.delegates:
|
||||
delegate.syntaxError(recognizer, offendingSymbol, line, column, msg, e)
|
||||
|
||||
def reportAmbiguity(self, recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs):
|
||||
for delegate in self.delegates:
|
||||
delegate.reportAmbiguity(recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs)
|
||||
|
||||
def reportAttemptingFullContext(self, recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs):
|
||||
for delegate in self.delegates:
|
||||
delegate.reportAttemptingFullContext(recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs)
|
||||
|
||||
def reportContextSensitivity(self, recognizer, dfa, startIndex, stopIndex, prediction, configs):
|
||||
for delegate in self.delegates:
|
||||
delegate.reportContextSensitivity(recognizer, dfa, startIndex, stopIndex, prediction, configs)
|
|
@ -0,0 +1,719 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
from antlr4.IntervalSet import IntervalSet
|
||||
|
||||
from antlr4.Token import Token
|
||||
from antlr4.atn.ATNState import ATNState
|
||||
from antlr4.error.Errors import NoViableAltException, InputMismatchException, FailedPredicateException, ParseCancellationException
|
||||
|
||||
class ErrorStrategy(object):
|
||||
|
||||
def reset(self, recognizer):
|
||||
pass
|
||||
|
||||
def recoverInline(self, recognizer):
|
||||
pass
|
||||
|
||||
def recover(self, recognizer, e):
|
||||
pass
|
||||
|
||||
def sync(self, recognizer):
|
||||
pass
|
||||
|
||||
def inErrorRecoveryMode(self, recognizer):
|
||||
pass
|
||||
|
||||
def reportError(self, recognizer, e):
|
||||
pass
|
||||
|
||||
|
||||
# This is the default implementation of {@link ANTLRErrorStrategy} used for
|
||||
# error reporting and recovery in ANTLR parsers.
|
||||
#
|
||||
class DefaultErrorStrategy(ErrorStrategy):
|
||||
|
||||
def __init__(self):
|
||||
super(DefaultErrorStrategy, self).__init__()
|
||||
# Indicates whether the error strategy is currently "recovering from an
|
||||
# error". This is used to suppress reporting multiple error messages while
|
||||
# attempting to recover from a detected syntax error.
|
||||
#
|
||||
# @see #inErrorRecoveryMode
|
||||
#
|
||||
self.errorRecoveryMode = False
|
||||
|
||||
# The index into the input stream where the last error occurred.
|
||||
# This is used to prevent infinite loops where an error is found
|
||||
# but no token is consumed during recovery...another error is found,
|
||||
# ad nauseum. This is a failsafe mechanism to guarantee that at least
|
||||
# one token/tree node is consumed for two errors.
|
||||
#
|
||||
self.lastErrorIndex = -1
|
||||
self.lastErrorStates = None
|
||||
|
||||
# <p>The default implementation simply calls {@link #endErrorCondition} to
|
||||
# ensure that the handler is not in error recovery mode.</p>
|
||||
def reset(self, recognizer):
|
||||
self.endErrorCondition(recognizer)
|
||||
|
||||
#
|
||||
# This method is called to enter error recovery mode when a recognition
|
||||
# exception is reported.
|
||||
#
|
||||
# @param recognizer the parser instance
|
||||
#
|
||||
def beginErrorCondition(self, recognizer):
|
||||
self.errorRecoveryMode = True
|
||||
|
||||
def inErrorRecoveryMode(self, recognizer):
|
||||
return self.errorRecoveryMode
|
||||
|
||||
#
|
||||
# This method is called to leave error recovery mode after recovering from
|
||||
# a recognition exception.
|
||||
#
|
||||
# @param recognizer
|
||||
#
|
||||
def endErrorCondition(self, recognizer):
|
||||
self.errorRecoveryMode = False
|
||||
self.lastErrorStates = None
|
||||
self.lastErrorIndex = -1
|
||||
|
||||
#
|
||||
# {@inheritDoc}
|
||||
#
|
||||
# <p>The default implementation simply calls {@link #endErrorCondition}.</p>
|
||||
#
|
||||
def reportMatch(self, recognizer):
|
||||
self.endErrorCondition(recognizer)
|
||||
|
||||
#
|
||||
# {@inheritDoc}
|
||||
#
|
||||
# <p>The default implementation returns immediately if the handler is already
|
||||
# in error recovery mode. Otherwise, it calls {@link #beginErrorCondition}
|
||||
# and dispatches the reporting task based on the runtime type of {@code e}
|
||||
# according to the following table.</p>
|
||||
#
|
||||
# <ul>
|
||||
# <li>{@link NoViableAltException}: Dispatches the call to
|
||||
# {@link #reportNoViableAlternative}</li>
|
||||
# <li>{@link InputMismatchException}: Dispatches the call to
|
||||
# {@link #reportInputMismatch}</li>
|
||||
# <li>{@link FailedPredicateException}: Dispatches the call to
|
||||
# {@link #reportFailedPredicate}</li>
|
||||
# <li>All other types: calls {@link Parser#notifyErrorListeners} to report
|
||||
# the exception</li>
|
||||
# </ul>
|
||||
#
|
||||
def reportError(self, recognizer, e):
|
||||
# if we've already reported an error and have not matched a token
|
||||
# yet successfully, don't report any errors.
|
||||
if self.inErrorRecoveryMode(recognizer):
|
||||
return # don't report spurious errors
|
||||
self.beginErrorCondition(recognizer)
|
||||
if isinstance( e, NoViableAltException ):
|
||||
self.reportNoViableAlternative(recognizer, e)
|
||||
elif isinstance( e, InputMismatchException ):
|
||||
self.reportInputMismatch(recognizer, e)
|
||||
elif isinstance( e, FailedPredicateException ):
|
||||
self.reportFailedPredicate(recognizer, e)
|
||||
else:
|
||||
print("unknown recognition error type: " + type(e).__name__)
|
||||
recognizer.notifyErrorListeners(e.getOffendingToken(), e.getMessage(), e)
|
||||
|
||||
#
|
||||
# {@inheritDoc}
|
||||
#
|
||||
# <p>The default implementation resynchronizes the parser by consuming tokens
|
||||
# until we find one in the resynchronization set--loosely the set of tokens
|
||||
# that can follow the current rule.</p>
|
||||
#
|
||||
def recover(self, recognizer, e):
|
||||
if self.lastErrorIndex==recognizer.getInputStream().index \
|
||||
and self.lastErrorStates is not None \
|
||||
and recognizer.state in self.lastErrorStates:
|
||||
# uh oh, another error at same token index and previously-visited
|
||||
# state in ATN; must be a case where LT(1) is in the recovery
|
||||
# token set so nothing got consumed. Consume a single token
|
||||
# at least to prevent an infinite loop; this is a failsafe.
|
||||
recognizer.consume()
|
||||
|
||||
self.lastErrorIndex = recognizer._input.index
|
||||
if self.lastErrorStates is None:
|
||||
self.lastErrorStates = []
|
||||
self.lastErrorStates.append(recognizer.state)
|
||||
followSet = self.getErrorRecoverySet(recognizer)
|
||||
self.consumeUntil(recognizer, followSet)
|
||||
|
||||
# The default implementation of {@link ANTLRErrorStrategy#sync} makes sure
|
||||
# that the current lookahead symbol is consistent with what were expecting
|
||||
# at this point in the ATN. You can call this anytime but ANTLR only
|
||||
# generates code to check before subrules/loops and each iteration.
|
||||
#
|
||||
# <p>Implements Jim Idle's magic sync mechanism in closures and optional
|
||||
# subrules. E.g.,</p>
|
||||
#
|
||||
# <pre>
|
||||
# a : sync ( stuff sync )* ;
|
||||
# sync : {consume to what can follow sync} ;
|
||||
# </pre>
|
||||
#
|
||||
# At the start of a sub rule upon error, {@link #sync} performs single
|
||||
# token deletion, if possible. If it can't do that, it bails on the current
|
||||
# rule and uses the default error recovery, which consumes until the
|
||||
# resynchronization set of the current rule.
|
||||
#
|
||||
# <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
|
||||
# with an empty alternative), then the expected set includes what follows
|
||||
# the subrule.</p>
|
||||
#
|
||||
# <p>During loop iteration, it consumes until it sees a token that can start a
|
||||
# sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
|
||||
# stay in the loop as long as possible.</p>
|
||||
#
|
||||
# <p><strong>ORIGINS</strong></p>
|
||||
#
|
||||
# <p>Previous versions of ANTLR did a poor job of their recovery within loops.
|
||||
# A single mismatch token or missing token would force the parser to bail
|
||||
# out of the entire rules surrounding the loop. So, for rule</p>
|
||||
#
|
||||
# <pre>
|
||||
# classDef : 'class' ID '{' member* '}'
|
||||
# </pre>
|
||||
#
|
||||
# input with an extra token between members would force the parser to
|
||||
# consume until it found the next class definition rather than the next
|
||||
# member definition of the current class.
|
||||
#
|
||||
# <p>This functionality cost a little bit of effort because the parser has to
|
||||
# compare token set at the start of the loop and at each iteration. If for
|
||||
# some reason speed is suffering for you, you can turn off this
|
||||
# functionality by simply overriding this method as a blank { }.</p>
|
||||
#
|
||||
def sync(self, recognizer):
|
||||
# If already recovering, don't try to sync
|
||||
if self.inErrorRecoveryMode(recognizer):
|
||||
return
|
||||
|
||||
s = recognizer._interp.atn.states[recognizer.state]
|
||||
la = recognizer.getTokenStream().LA(1)
|
||||
# try cheaper subset first; might get lucky. seems to shave a wee bit off
|
||||
if la==Token.EOF or la in recognizer.atn.nextTokens(s):
|
||||
return
|
||||
|
||||
# Return but don't end recovery. only do that upon valid token match
|
||||
if recognizer.isExpectedToken(la):
|
||||
return
|
||||
|
||||
if s.stateType in [ATNState.BLOCK_START, ATNState.STAR_BLOCK_START,
|
||||
ATNState.PLUS_BLOCK_START, ATNState.STAR_LOOP_ENTRY]:
|
||||
# report error and recover if possible
|
||||
if self.singleTokenDeletion(recognizer)is not None:
|
||||
return
|
||||
else:
|
||||
raise InputMismatchException(recognizer)
|
||||
|
||||
elif s.stateType in [ATNState.PLUS_LOOP_BACK, ATNState.STAR_LOOP_BACK]:
|
||||
self.reportUnwantedToken(recognizer)
|
||||
expecting = recognizer.getExpectedTokens()
|
||||
whatFollowsLoopIterationOrRule = expecting.addSet(self.getErrorRecoverySet(recognizer))
|
||||
self.consumeUntil(recognizer, whatFollowsLoopIterationOrRule)
|
||||
|
||||
else:
|
||||
# do nothing if we can't identify the exact kind of ATN state
|
||||
pass
|
||||
|
||||
# This is called by {@link #reportError} when the exception is a
|
||||
# {@link NoViableAltException}.
|
||||
#
|
||||
# @see #reportError
|
||||
#
|
||||
# @param recognizer the parser instance
|
||||
# @param e the recognition exception
|
||||
#
|
||||
def reportNoViableAlternative(self, recognizer, e):
|
||||
tokens = recognizer.getTokenStream()
|
||||
if tokens is not None:
|
||||
if e.startToken.type==Token.EOF:
|
||||
input = "<EOF>"
|
||||
else:
|
||||
input = tokens.getText((e.startToken, e.offendingToken))
|
||||
else:
|
||||
input = "<unknown input>"
|
||||
msg = "no viable alternative at input " + self.escapeWSAndQuote(input)
|
||||
recognizer.notifyErrorListeners(msg, e.offendingToken, e)
|
||||
|
||||
#
|
||||
# This is called by {@link #reportError} when the exception is an
|
||||
# {@link InputMismatchException}.
|
||||
#
|
||||
# @see #reportError
|
||||
#
|
||||
# @param recognizer the parser instance
|
||||
# @param e the recognition exception
|
||||
#
|
||||
def reportInputMismatch(self, recognizer, e):
|
||||
msg = "mismatched input " + self.getTokenErrorDisplay(e.offendingToken) \
|
||||
+ " expecting " + e.getExpectedTokens().toString(recognizer.literalNames, recognizer.symbolicNames)
|
||||
recognizer.notifyErrorListeners(msg, e.offendingToken, e)
|
||||
|
||||
#
|
||||
# This is called by {@link #reportError} when the exception is a
|
||||
# {@link FailedPredicateException}.
|
||||
#
|
||||
# @see #reportError
|
||||
#
|
||||
# @param recognizer the parser instance
|
||||
# @param e the recognition exception
|
||||
#
|
||||
def reportFailedPredicate(self, recognizer, e):
|
||||
ruleName = recognizer.ruleNames[recognizer._ctx.getRuleIndex()]
|
||||
msg = "rule " + ruleName + " " + e.message
|
||||
recognizer.notifyErrorListeners(msg, e.offendingToken, e)
|
||||
|
||||
# This method is called to report a syntax error which requires the removal
|
||||
# of a token from the input stream. At the time this method is called, the
|
||||
# erroneous symbol is current {@code LT(1)} symbol and has not yet been
|
||||
# removed from the input stream. When this method returns,
|
||||
# {@code recognizer} is in error recovery mode.
|
||||
#
|
||||
# <p>This method is called when {@link #singleTokenDeletion} identifies
|
||||
# single-token deletion as a viable recovery strategy for a mismatched
|
||||
# input error.</p>
|
||||
#
|
||||
# <p>The default implementation simply returns if the handler is already in
|
||||
# error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
|
||||
# enter error recovery mode, followed by calling
|
||||
# {@link Parser#notifyErrorListeners}.</p>
|
||||
#
|
||||
# @param recognizer the parser instance
|
||||
#
|
||||
def reportUnwantedToken(self, recognizer):
|
||||
if self.inErrorRecoveryMode(recognizer):
|
||||
return
|
||||
|
||||
self.beginErrorCondition(recognizer)
|
||||
t = recognizer.getCurrentToken()
|
||||
tokenName = self.getTokenErrorDisplay(t)
|
||||
expecting = self.getExpectedTokens(recognizer)
|
||||
msg = "extraneous input " + tokenName + " expecting " \
|
||||
+ expecting.toString(recognizer.literalNames, recognizer.symbolicNames)
|
||||
recognizer.notifyErrorListeners(msg, t, None)
|
||||
|
||||
# This method is called to report a syntax error which requires the
|
||||
# insertion of a missing token into the input stream. At the time this
|
||||
# method is called, the missing token has not yet been inserted. When this
|
||||
# method returns, {@code recognizer} is in error recovery mode.
|
||||
#
|
||||
# <p>This method is called when {@link #singleTokenInsertion} identifies
|
||||
# single-token insertion as a viable recovery strategy for a mismatched
|
||||
# input error.</p>
|
||||
#
|
||||
# <p>The default implementation simply returns if the handler is already in
|
||||
# error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
|
||||
# enter error recovery mode, followed by calling
|
||||
# {@link Parser#notifyErrorListeners}.</p>
|
||||
#
|
||||
# @param recognizer the parser instance
|
||||
#
|
||||
def reportMissingToken(self, recognizer):
|
||||
if self.inErrorRecoveryMode(recognizer):
|
||||
return
|
||||
self.beginErrorCondition(recognizer)
|
||||
t = recognizer.getCurrentToken()
|
||||
expecting = self.getExpectedTokens(recognizer)
|
||||
msg = "missing " + expecting.toString(recognizer.literalNames, recognizer.symbolicNames) \
|
||||
+ " at " + self.getTokenErrorDisplay(t)
|
||||
recognizer.notifyErrorListeners(msg, t, None)
|
||||
|
||||
# <p>The default implementation attempts to recover from the mismatched input
|
||||
# by using single token insertion and deletion as described below. If the
|
||||
# recovery attempt fails, this method throws an
|
||||
# {@link InputMismatchException}.</p>
|
||||
#
|
||||
# <p><strong>EXTRA TOKEN</strong> (single token deletion)</p>
|
||||
#
|
||||
# <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the
|
||||
# right token, however, then assume {@code LA(1)} is some extra spurious
|
||||
# token and delete it. Then consume and return the next token (which was
|
||||
# the {@code LA(2)} token) as the successful result of the match operation.</p>
|
||||
#
|
||||
# <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p>
|
||||
#
|
||||
# <p><strong>MISSING TOKEN</strong> (single token insertion)</p>
|
||||
#
|
||||
# <p>If current token (at {@code LA(1)}) is consistent with what could come
|
||||
# after the expected {@code LA(1)} token, then assume the token is missing
|
||||
# and use the parser's {@link TokenFactory} to create it on the fly. The
|
||||
# "insertion" is performed by returning the created token as the successful
|
||||
# result of the match operation.</p>
|
||||
#
|
||||
# <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p>
|
||||
#
|
||||
# <p><strong>EXAMPLE</strong></p>
|
||||
#
|
||||
# <p>For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When
|
||||
# the parser returns from the nested call to {@code expr}, it will have
|
||||
# call chain:</p>
|
||||
#
|
||||
# <pre>
|
||||
# stat → expr → atom
|
||||
# </pre>
|
||||
#
|
||||
# and it will be trying to match the {@code ')'} at this point in the
|
||||
# derivation:
|
||||
#
|
||||
# <pre>
|
||||
# => ID '=' '(' INT ')' ('+' atom)* ';'
|
||||
# ^
|
||||
# </pre>
|
||||
#
|
||||
# The attempt to match {@code ')'} will fail when it sees {@code ';'} and
|
||||
# call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'}
|
||||
# is in the set of tokens that can follow the {@code ')'} token reference
|
||||
# in rule {@code atom}. It can assume that you forgot the {@code ')'}.
|
||||
#
|
||||
def recoverInline(self, recognizer):
|
||||
# SINGLE TOKEN DELETION
|
||||
matchedSymbol = self.singleTokenDeletion(recognizer)
|
||||
if matchedSymbol is not None:
|
||||
# we have deleted the extra token.
|
||||
# now, move past ttype token as if all were ok
|
||||
recognizer.consume()
|
||||
return matchedSymbol
|
||||
|
||||
# SINGLE TOKEN INSERTION
|
||||
if self.singleTokenInsertion(recognizer):
|
||||
return self.getMissingSymbol(recognizer)
|
||||
|
||||
# even that didn't work; must throw the exception
|
||||
raise InputMismatchException(recognizer)
|
||||
|
||||
#
|
||||
# This method implements the single-token insertion inline error recovery
|
||||
# strategy. It is called by {@link #recoverInline} if the single-token
|
||||
# deletion strategy fails to recover from the mismatched input. If this
|
||||
# method returns {@code true}, {@code recognizer} will be in error recovery
|
||||
# mode.
|
||||
#
|
||||
# <p>This method determines whether or not single-token insertion is viable by
|
||||
# checking if the {@code LA(1)} input symbol could be successfully matched
|
||||
# if it were instead the {@code LA(2)} symbol. If this method returns
|
||||
# {@code true}, the caller is responsible for creating and inserting a
|
||||
# token with the correct type to produce this behavior.</p>
|
||||
#
|
||||
# @param recognizer the parser instance
|
||||
# @return {@code true} if single-token insertion is a viable recovery
|
||||
# strategy for the current mismatched input, otherwise {@code false}
|
||||
#
|
||||
def singleTokenInsertion(self, recognizer):
|
||||
currentSymbolType = recognizer.getTokenStream().LA(1)
|
||||
# if current token is consistent with what could come after current
|
||||
# ATN state, then we know we're missing a token; error recovery
|
||||
# is free to conjure up and insert the missing token
|
||||
atn = recognizer._interp.atn
|
||||
currentState = atn.states[recognizer.state]
|
||||
next = currentState.transitions[0].target
|
||||
expectingAtLL2 = atn.nextTokens(next, recognizer._ctx)
|
||||
if currentSymbolType in expectingAtLL2:
|
||||
self.reportMissingToken(recognizer)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
# This method implements the single-token deletion inline error recovery
|
||||
# strategy. It is called by {@link #recoverInline} to attempt to recover
|
||||
# from mismatched input. If this method returns null, the parser and error
|
||||
# handler state will not have changed. If this method returns non-null,
|
||||
# {@code recognizer} will <em>not</em> be in error recovery mode since the
|
||||
# returned token was a successful match.
|
||||
#
|
||||
# <p>If the single-token deletion is successful, this method calls
|
||||
# {@link #reportUnwantedToken} to report the error, followed by
|
||||
# {@link Parser#consume} to actually "delete" the extraneous token. Then,
|
||||
# before returning {@link #reportMatch} is called to signal a successful
|
||||
# match.</p>
|
||||
#
|
||||
# @param recognizer the parser instance
|
||||
# @return the successfully matched {@link Token} instance if single-token
|
||||
# deletion successfully recovers from the mismatched input, otherwise
|
||||
# {@code null}
|
||||
#
|
||||
def singleTokenDeletion(self, recognizer):
|
||||
nextTokenType = recognizer.getTokenStream().LA(2)
|
||||
expecting = self.getExpectedTokens(recognizer)
|
||||
if nextTokenType in expecting:
|
||||
self.reportUnwantedToken(recognizer)
|
||||
# print("recoverFromMismatchedToken deleting " \
|
||||
# + str(recognizer.getTokenStream().LT(1)) \
|
||||
# + " since " + str(recognizer.getTokenStream().LT(2)) \
|
||||
# + " is what we want", file=sys.stderr)
|
||||
recognizer.consume() # simply delete extra token
|
||||
# we want to return the token we're actually matching
|
||||
matchedSymbol = recognizer.getCurrentToken()
|
||||
self.reportMatch(recognizer) # we know current token is correct
|
||||
return matchedSymbol
|
||||
else:
|
||||
return None
|
||||
|
||||
# Conjure up a missing token during error recovery.
|
||||
#
|
||||
# The recognizer attempts to recover from single missing
|
||||
# symbols. But, actions might refer to that missing symbol.
|
||||
# For example, x=ID {f($x);}. The action clearly assumes
|
||||
# that there has been an identifier matched previously and that
|
||||
# $x points at that token. If that token is missing, but
|
||||
# the next token in the stream is what we want we assume that
|
||||
# this token is missing and we keep going. Because we
|
||||
# have to return some token to replace the missing token,
|
||||
# we have to conjure one up. This method gives the user control
|
||||
# over the tokens returned for missing tokens. Mostly,
|
||||
# you will want to create something special for identifier
|
||||
# tokens. For literals such as '{' and ',', the default
|
||||
# action in the parser or tree parser works. It simply creates
|
||||
# a CommonToken of the appropriate type. The text will be the token.
|
||||
# If you change what tokens must be created by the lexer,
|
||||
# override this method to create the appropriate tokens.
|
||||
#
|
||||
def getMissingSymbol(self, recognizer):
|
||||
currentSymbol = recognizer.getCurrentToken()
|
||||
expecting = self.getExpectedTokens(recognizer)
|
||||
expectedTokenType = expecting[0] # get any element
|
||||
if expectedTokenType==Token.EOF:
|
||||
tokenText = "<missing EOF>"
|
||||
else:
|
||||
name = None
|
||||
if expectedTokenType < len(recognizer.literalNames):
|
||||
name = recognizer.literalNames[expectedTokenType]
|
||||
if name is None and expectedTokenType < len(recognizer.symbolicNames):
|
||||
name = recognizer.symbolicNames[expectedTokenType]
|
||||
tokenText = "<missing " + str(name) + ">"
|
||||
current = currentSymbol
|
||||
lookback = recognizer.getTokenStream().LT(-1)
|
||||
if current.type==Token.EOF and lookback is not None:
|
||||
current = lookback
|
||||
return recognizer.getTokenFactory().create(current.source,
|
||||
expectedTokenType, tokenText, Token.DEFAULT_CHANNEL,
|
||||
-1, -1, current.line, current.column)
|
||||
|
||||
def getExpectedTokens(self, recognizer):
|
||||
return recognizer.getExpectedTokens()
|
||||
|
||||
# How should a token be displayed in an error message? The default
|
||||
# is to display just the text, but during development you might
|
||||
# want to have a lot of information spit out. Override in that case
|
||||
# to use t.toString() (which, for CommonToken, dumps everything about
|
||||
# the token). This is better than forcing you to override a method in
|
||||
# your token objects because you don't have to go modify your lexer
|
||||
# so that it creates a new Java type.
|
||||
#
|
||||
def getTokenErrorDisplay(self, t):
|
||||
if t is None:
|
||||
return u"<no token>"
|
||||
s = t.text
|
||||
if s is None:
|
||||
if t.type==Token.EOF:
|
||||
s = u"<EOF>"
|
||||
else:
|
||||
s = u"<" + unicode(t.type) + u">"
|
||||
return self.escapeWSAndQuote(s)
|
||||
|
||||
def escapeWSAndQuote(self, s):
|
||||
s = s.replace(u"\n",u"\\n")
|
||||
s = s.replace(u"\r",u"\\r")
|
||||
s = s.replace(u"\t",u"\\t")
|
||||
return u"'" + s + u"'"
|
||||
|
||||
# Compute the error recovery set for the current rule. During
|
||||
# rule invocation, the parser pushes the set of tokens that can
|
||||
# follow that rule reference on the stack; this amounts to
|
||||
# computing FIRST of what follows the rule reference in the
|
||||
# enclosing rule. See LinearApproximator.FIRST().
|
||||
# This local follow set only includes tokens
|
||||
# from within the rule; i.e., the FIRST computation done by
|
||||
# ANTLR stops at the end of a rule.
|
||||
#
|
||||
# EXAMPLE
|
||||
#
|
||||
# When you find a "no viable alt exception", the input is not
|
||||
# consistent with any of the alternatives for rule r. The best
|
||||
# thing to do is to consume tokens until you see something that
|
||||
# can legally follow a call to r#or* any rule that called r.
|
||||
# You don't want the exact set of viable next tokens because the
|
||||
# input might just be missing a token--you might consume the
|
||||
# rest of the input looking for one of the missing tokens.
|
||||
#
|
||||
# Consider grammar:
|
||||
#
|
||||
# a : '[' b ']'
|
||||
# | '(' b ')'
|
||||
# ;
|
||||
# b : c '^' INT ;
|
||||
# c : ID
|
||||
# | INT
|
||||
# ;
|
||||
#
|
||||
# At each rule invocation, the set of tokens that could follow
|
||||
# that rule is pushed on a stack. Here are the various
|
||||
# context-sensitive follow sets:
|
||||
#
|
||||
# FOLLOW(b1_in_a) = FIRST(']') = ']'
|
||||
# FOLLOW(b2_in_a) = FIRST(')') = ')'
|
||||
# FOLLOW(c_in_b) = FIRST('^') = '^'
|
||||
#
|
||||
# Upon erroneous input "[]", the call chain is
|
||||
#
|
||||
# a -> b -> c
|
||||
#
|
||||
# and, hence, the follow context stack is:
|
||||
#
|
||||
# depth follow set start of rule execution
|
||||
# 0 <EOF> a (from main())
|
||||
# 1 ']' b
|
||||
# 2 '^' c
|
||||
#
|
||||
# Notice that ')' is not included, because b would have to have
|
||||
# been called from a different context in rule a for ')' to be
|
||||
# included.
|
||||
#
|
||||
# For error recovery, we cannot consider FOLLOW(c)
|
||||
# (context-sensitive or otherwise). We need the combined set of
|
||||
# all context-sensitive FOLLOW sets--the set of all tokens that
|
||||
# could follow any reference in the call chain. We need to
|
||||
# resync to one of those tokens. Note that FOLLOW(c)='^' and if
|
||||
# we resync'd to that token, we'd consume until EOF. We need to
|
||||
# sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
|
||||
# In this case, for input "[]", LA(1) is ']' and in the set, so we would
|
||||
# not consume anything. After printing an error, rule c would
|
||||
# return normally. Rule b would not find the required '^' though.
|
||||
# At this point, it gets a mismatched token error and throws an
|
||||
# exception (since LA(1) is not in the viable following token
|
||||
# set). The rule exception handler tries to recover, but finds
|
||||
# the same recovery set and doesn't consume anything. Rule b
|
||||
# exits normally returning to rule a. Now it finds the ']' (and
|
||||
# with the successful match exits errorRecovery mode).
|
||||
#
|
||||
# So, you can see that the parser walks up the call chain looking
|
||||
# for the token that was a member of the recovery set.
|
||||
#
|
||||
# Errors are not generated in errorRecovery mode.
|
||||
#
|
||||
# ANTLR's error recovery mechanism is based upon original ideas:
|
||||
#
|
||||
# "Algorithms + Data Structures = Programs" by Niklaus Wirth
|
||||
#
|
||||
# and
|
||||
#
|
||||
# "A note on error recovery in recursive descent parsers":
|
||||
# http:#portal.acm.org/citation.cfm?id=947902.947905
|
||||
#
|
||||
# Later, Josef Grosch had some good ideas:
|
||||
#
|
||||
# "Efficient and Comfortable Error Recovery in Recursive Descent
|
||||
# Parsers":
|
||||
# ftp:#www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
|
||||
#
|
||||
# Like Grosch I implement context-sensitive FOLLOW sets that are combined
|
||||
# at run-time upon error to avoid overhead during parsing.
|
||||
#
|
||||
def getErrorRecoverySet(self, recognizer):
|
||||
atn = recognizer._interp.atn
|
||||
ctx = recognizer._ctx
|
||||
recoverSet = IntervalSet()
|
||||
while ctx is not None and ctx.invokingState>=0:
|
||||
# compute what follows who invoked us
|
||||
invokingState = atn.states[ctx.invokingState]
|
||||
rt = invokingState.transitions[0]
|
||||
follow = atn.nextTokens(rt.followState)
|
||||
recoverSet.addSet(follow)
|
||||
ctx = ctx.parentCtx
|
||||
recoverSet.removeOne(Token.EPSILON)
|
||||
return recoverSet
|
||||
|
||||
# Consume tokens until one matches the given token set.#
|
||||
def consumeUntil(self, recognizer, set_):
|
||||
ttype = recognizer.getTokenStream().LA(1)
|
||||
while ttype != Token.EOF and not ttype in set_:
|
||||
recognizer.consume()
|
||||
ttype = recognizer.getTokenStream().LA(1)
|
||||
|
||||
|
||||
#
|
||||
# This implementation of {@link ANTLRErrorStrategy} responds to syntax errors
|
||||
# by immediately canceling the parse operation with a
|
||||
# {@link ParseCancellationException}. The implementation ensures that the
|
||||
# {@link ParserRuleContext#exception} field is set for all parse tree nodes
|
||||
# that were not completed prior to encountering the error.
|
||||
#
|
||||
# <p>
|
||||
# This error strategy is useful in the following scenarios.</p>
|
||||
#
|
||||
# <ul>
|
||||
# <li><strong>Two-stage parsing:</strong> This error strategy allows the first
|
||||
# stage of two-stage parsing to immediately terminate if an error is
|
||||
# encountered, and immediately fall back to the second stage. In addition to
|
||||
# avoiding wasted work by attempting to recover from errors here, the empty
|
||||
# implementation of {@link BailErrorStrategy#sync} improves the performance of
|
||||
# the first stage.</li>
|
||||
# <li><strong>Silent validation:</strong> When syntax errors are not being
|
||||
# reported or logged, and the parse result is simply ignored if errors occur,
|
||||
# the {@link BailErrorStrategy} avoids wasting work on recovering from errors
|
||||
# when the result will be ignored either way.</li>
|
||||
# </ul>
|
||||
#
|
||||
# <p>
|
||||
# {@code myparser.setErrorHandler(new BailErrorStrategy());}</p>
|
||||
#
|
||||
# @see Parser#setErrorHandler(ANTLRErrorStrategy)
|
||||
#
|
||||
class BailErrorStrategy(DefaultErrorStrategy):
|
||||
# Instead of recovering from exception {@code e}, re-throw it wrapped
|
||||
# in a {@link ParseCancellationException} so it is not caught by the
|
||||
# rule function catches. Use {@link Exception#getCause()} to get the
|
||||
# original {@link RecognitionException}.
|
||||
#
|
||||
def recover(self, recognizer, e):
|
||||
context = recognizer._ctx
|
||||
while context is not None:
|
||||
context.exception = e
|
||||
context = context.parentCtx
|
||||
raise ParseCancellationException(e)
|
||||
|
||||
# Make sure we don't attempt to recover inline; if the parser
|
||||
# successfully recovers, it won't throw an exception.
|
||||
#
|
||||
def recoverInline(self, recognizer):
|
||||
self.recover(recognizer, InputMismatchException(recognizer))
|
||||
|
||||
# Make sure we don't attempt to recover from problems in subrules.#
|
||||
def sync(self, recognizer):
|
||||
pass
|
|
@ -0,0 +1,177 @@
|
|||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#/
|
||||
from antlr4.atn.Transition import PredicateTransition
|
||||
|
||||
|
||||
class UnsupportedOperationException(Exception):
|
||||
|
||||
def __init__(self, msg):
|
||||
super(UnsupportedOperationException, self).__init__(msg)
|
||||
|
||||
class IllegalStateException(Exception):
|
||||
|
||||
def __init__(self, msg):
|
||||
super(IllegalStateException, self).__init__(msg)
|
||||
|
||||
class CancellationException(IllegalStateException):
|
||||
|
||||
def __init__(self, msg):
|
||||
super(CancellationException, self).__init__(msg)
|
||||
|
||||
# The root of the ANTLR exception hierarchy. In general, ANTLR tracks just
|
||||
# 3 kinds of errors: prediction errors, failed predicate errors, and
|
||||
# mismatched input errors. In each case, the parser knows where it is
|
||||
# in the input, where it is in the ATN, the rule invocation stack,
|
||||
# and what kind of problem occurred.
|
||||
|
||||
class RecognitionException(Exception):
|
||||
|
||||
|
||||
def __init__(self, message=None, recognizer=None, input=None, ctx=None):
|
||||
super(RecognitionException, self).__init__(message)
|
||||
self.recognizer = recognizer
|
||||
self.input = input
|
||||
self.ctx = ctx
|
||||
# The current {@link Token} when an error occurred. Since not all streams
|
||||
# support accessing symbols by index, we have to track the {@link Token}
|
||||
# instance itself.
|
||||
self.offendingToken = None
|
||||
# Get the ATN state number the parser was in at the time the error
|
||||
# occurred. For {@link NoViableAltException} and
|
||||
# {@link LexerNoViableAltException} exceptions, this is the
|
||||
# {@link DecisionState} number. For others, it is the state whose outgoing
|
||||
# edge we couldn't match.
|
||||
self.offendingState = -1
|
||||
if recognizer is not None:
|
||||
self.offendingState = recognizer.state
|
||||
|
||||
# <p>If the state number is not known, this method returns -1.</p>
|
||||
|
||||
#
|
||||
# Gets the set of input symbols which could potentially follow the
|
||||
# previously matched symbol at the time this exception was thrown.
|
||||
#
|
||||
# <p>If the set of expected tokens is not known and could not be computed,
|
||||
# this method returns {@code null}.</p>
|
||||
#
|
||||
# @return The set of token types that could potentially follow the current
|
||||
# state in the ATN, or {@code null} if the information is not available.
|
||||
#/
|
||||
def getExpectedTokens(self):
|
||||
if self.recognizer is not None:
|
||||
return self.recognizer.atn.getExpectedTokens(self.offendingState, self.ctx)
|
||||
else:
|
||||
return None
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
|
||||
|
||||
class LexerNoViableAltException(RecognitionException):
|
||||
|
||||
def __init__(self, lexer, input, startIndex, deadEndConfigs):
|
||||
super(LexerNoViableAltException, self).__init__(message=None, recognizer=lexer, input=input, ctx=None)
|
||||
self.startIndex = startIndex
|
||||
self.deadEndConfigs = deadEndConfigs
|
||||
|
||||
def __unicode__(self):
|
||||
symbol = ""
|
||||
if self.startIndex >= 0 and self.startIndex < self.input.size():
|
||||
symbol = self.input.getText((self.startIndex,self.startIndex))
|
||||
# TODO symbol = Utils.escapeWhitespace(symbol, false);
|
||||
return u"LexerNoViableAltException" + symbol
|
||||
|
||||
# Indicates that the parser could not decide which of two or more paths
|
||||
# to take based upon the remaining input. It tracks the starting token
|
||||
# of the offending input and also knows where the parser was
|
||||
# in the various paths when the error. Reported by reportNoViableAlternative()
|
||||
#
|
||||
class NoViableAltException(RecognitionException):
|
||||
|
||||
def __init__(self, recognizer, input=None, startToken=None, offendingToken=None, deadEndConfigs=None, ctx=None):
|
||||
if ctx is None:
|
||||
ctx = recognizer._ctx
|
||||
if offendingToken is None:
|
||||
offendingToken = recognizer.getCurrentToken()
|
||||
if startToken is None:
|
||||
startToken = recognizer.getCurrentToken()
|
||||
if input is None:
|
||||
input = recognizer.getInputStream()
|
||||
super(NoViableAltException, self).__init__(recognizer=recognizer, input=input, ctx=ctx)
|
||||
# Which configurations did we try at input.index() that couldn't match input.LT(1)?#
|
||||
self.deadEndConfigs = deadEndConfigs
|
||||
# The token object at the start index; the input stream might
|
||||
# not be buffering tokens so get a reference to it. (At the
|
||||
# time the error occurred, of course the stream needs to keep a
|
||||
# buffer all of the tokens but later we might not have access to those.)
|
||||
self.startToken = startToken
|
||||
self.offendingToken = offendingToken
|
||||
|
||||
# This signifies any kind of mismatched input exceptions such as
|
||||
# when the current input does not match the expected token.
|
||||
#
|
||||
class InputMismatchException(RecognitionException):
|
||||
|
||||
def __init__(self, recognizer):
|
||||
super(InputMismatchException, self).__init__(recognizer=recognizer, input=recognizer.getInputStream(), ctx=recognizer._ctx)
|
||||
self.offendingToken = recognizer.getCurrentToken()
|
||||
|
||||
|
||||
# A semantic predicate failed during validation. Validation of predicates
|
||||
# occurs when normally parsing the alternative just like matching a token.
|
||||
# Disambiguating predicate evaluation occurs when we test a predicate during
|
||||
# prediction.
|
||||
|
||||
class FailedPredicateException(RecognitionException):
|
||||
|
||||
def __init__(self, recognizer, predicate=None, message=None):
|
||||
super(FailedPredicateException, self).__init__(message=self.formatMessage(predicate,message), recognizer=recognizer,
|
||||
input=recognizer.getInputStream(), ctx=recognizer._ctx)
|
||||
s = recognizer._interp.atn.states[recognizer.state]
|
||||
trans = s.transitions[0]
|
||||
if isinstance(trans, PredicateTransition):
|
||||
self.ruleIndex = trans.ruleIndex
|
||||
self.predicateIndex = trans.predIndex
|
||||
else:
|
||||
self.ruleIndex = 0
|
||||
self.predicateIndex = 0
|
||||
self.predicate = predicate
|
||||
self.offendingToken = recognizer.getCurrentToken()
|
||||
|
||||
def formatMessage(self, predicate, message):
|
||||
if message is not None:
|
||||
return message
|
||||
else:
|
||||
return "failed predicate: {" + predicate + "}?"
|
||||
|
||||
class ParseCancellationException(CancellationException):
|
||||
|
||||
pass
|
||||
|
|
@ -0,0 +1 @@
|
|||
__author__ = 'ericvergnaud'
|
|
@ -0,0 +1,26 @@
|
|||
class Chunk(object):
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
|
||||
|
||||
class TagChunk(Chunk):
|
||||
|
||||
def __init__(self, tag, label=None):
|
||||
self.tag = tag
|
||||
self.label = label
|
||||
|
||||
def __unicode__(self):
|
||||
if self.label is None:
|
||||
return self.tag
|
||||
else:
|
||||
return self.label + ":" + self.tag
|
||||
|
||||
class TextChunk(Chunk):
|
||||
|
||||
def __init__(self, text):
|
||||
self.text = text
|
||||
|
||||
def __unicode__(self):
|
||||
return "'" + self.text + "'"
|
||||
|
|
@ -0,0 +1,145 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2013 Terence Parr
|
||||
# Copyright (c) 2013 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
|
||||
#
|
||||
# Represents the result of matching a {@link ParseTree} against a tree pattern.
|
||||
#
|
||||
from io import StringIO
|
||||
|
||||
|
||||
class ParseTreeMatch(object):
|
||||
|
||||
#
|
||||
# Constructs a new instance of {@link ParseTreeMatch} from the specified
|
||||
# parse tree and pattern.
|
||||
#
|
||||
# @param tree The parse tree to match against the pattern.
|
||||
# @param pattern The parse tree pattern.
|
||||
# @param labels A mapping from label names to collections of
|
||||
# {@link ParseTree} objects located by the tree pattern matching process.
|
||||
# @param mismatchedNode The first node which failed to match the tree
|
||||
# pattern during the matching process.
|
||||
#
|
||||
# @exception IllegalArgumentException if {@code tree} is {@code null}
|
||||
# @exception IllegalArgumentException if {@code pattern} is {@code null}
|
||||
# @exception IllegalArgumentException if {@code labels} is {@code null}
|
||||
#
|
||||
def __init__(self, tree, pattern, labels, mismatchedNode):
|
||||
if tree is None:
|
||||
raise Exception("tree cannot be null")
|
||||
if pattern is None:
|
||||
raise Exception("pattern cannot be null")
|
||||
if labels is None:
|
||||
raise Exception("labels cannot be null")
|
||||
self.tree = tree
|
||||
self.pattern = pattern
|
||||
self.labels = labels
|
||||
self.mismatchedNode = mismatchedNode
|
||||
|
||||
#
|
||||
# Get the last node associated with a specific {@code label}.
|
||||
#
|
||||
# <p>For example, for pattern {@code <id:ID>}, {@code get("id")} returns the
|
||||
# node matched for that {@code ID}. If more than one node
|
||||
# matched the specified label, only the last is returned. If there is
|
||||
# no node associated with the label, this returns {@code null}.</p>
|
||||
#
|
||||
# <p>Pattern tags like {@code <ID>} and {@code <expr>} without labels are
|
||||
# considered to be labeled with {@code ID} and {@code expr}, respectively.</p>
|
||||
#
|
||||
# @param label The label to check.
|
||||
#
|
||||
# @return The last {@link ParseTree} to match a tag with the specified
|
||||
# label, or {@code null} if no parse tree matched a tag with the label.
|
||||
#
|
||||
def get(self, label):
|
||||
parseTrees = self.labels.get(label, None)
|
||||
if parseTrees is None or len(parseTrees)==0:
|
||||
return None
|
||||
else:
|
||||
return parseTrees[len(parseTrees)-1]
|
||||
|
||||
#
|
||||
# Return all nodes matching a rule or token tag with the specified label.
|
||||
#
|
||||
# <p>If the {@code label} is the name of a parser rule or token in the
|
||||
# grammar, the resulting list will contain both the parse trees matching
|
||||
# rule or tags explicitly labeled with the label and the complete set of
|
||||
# parse trees matching the labeled and unlabeled tags in the pattern for
|
||||
# the parser rule or token. For example, if {@code label} is {@code "foo"},
|
||||
# the result will contain <em>all</em> of the following.</p>
|
||||
#
|
||||
# <ul>
|
||||
# <li>Parse tree nodes matching tags of the form {@code <foo:anyRuleName>} and
|
||||
# {@code <foo:AnyTokenName>}.</li>
|
||||
# <li>Parse tree nodes matching tags of the form {@code <anyLabel:foo>}.</li>
|
||||
# <li>Parse tree nodes matching tags of the form {@code <foo>}.</li>
|
||||
# </ul>
|
||||
#
|
||||
# @param label The label.
|
||||
#
|
||||
# @return A collection of all {@link ParseTree} nodes matching tags with
|
||||
# the specified {@code label}. If no nodes matched the label, an empty list
|
||||
# is returned.
|
||||
#
|
||||
def getAll(self, label):
|
||||
nodes = self.labels.get(label, None)
|
||||
if nodes is None:
|
||||
return list()
|
||||
else:
|
||||
return nodes
|
||||
|
||||
|
||||
#
|
||||
# Gets a value indicating whether the match operation succeeded.
|
||||
#
|
||||
# @return {@code true} if the match operation succeeded; otherwise,
|
||||
# {@code false}.
|
||||
#
|
||||
def succeeded(self):
|
||||
return self.mismatchedNode is None
|
||||
|
||||
#
|
||||
# {@inheritDoc}
|
||||
#
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
|
||||
|
||||
def __unicode__(self):
|
||||
with StringIO() as buf:
|
||||
buf.write(u"Match ")
|
||||
buf.write(u"succeeded" if self.succeeded() else "failed")
|
||||
buf.write(u"; found ")
|
||||
buf.write(unicode(len(self.labels)))
|
||||
buf.write(u" labels")
|
||||
return buf.getvalue()
|
|
@ -0,0 +1,94 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2013 Terence Parr
|
||||
# Copyright (c) 2013 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
#
|
||||
# A pattern like {@code <ID> = <expr>;} converted to a {@link ParseTree} by
|
||||
# {@link ParseTreePatternMatcher#compile(String, int)}.
|
||||
#
|
||||
from antlr4.xpath.XPath import XPath
|
||||
|
||||
|
||||
class ParseTreePattern(object):
|
||||
|
||||
# Construct a new instance of the {@link ParseTreePattern} class.
|
||||
#
|
||||
# @param matcher The {@link ParseTreePatternMatcher} which created this
|
||||
# tree pattern.
|
||||
# @param pattern The tree pattern in concrete syntax form.
|
||||
# @param patternRuleIndex The parser rule which serves as the root of the
|
||||
# tree pattern.
|
||||
# @param patternTree The tree pattern in {@link ParseTree} form.
|
||||
#
|
||||
def __init__(self, matcher, pattern, patternRuleIndex , patternTree):
|
||||
self.matcher = matcher
|
||||
self.patternRuleIndex = patternRuleIndex
|
||||
self.pattern = pattern
|
||||
self.patternTree = patternTree
|
||||
|
||||
#
|
||||
# Match a specific parse tree against this tree pattern.
|
||||
#
|
||||
# @param tree The parse tree to match against this tree pattern.
|
||||
# @return A {@link ParseTreeMatch} object describing the result of the
|
||||
# match operation. The {@link ParseTreeMatch#succeeded()} method can be
|
||||
# used to determine whether or not the match was successful.
|
||||
#
|
||||
def match(self, tree):
|
||||
return self.matcher.match(tree, self)
|
||||
|
||||
#
|
||||
# Determine whether or not a parse tree matches this tree pattern.
|
||||
#
|
||||
# @param tree The parse tree to match against this tree pattern.
|
||||
# @return {@code true} if {@code tree} is a match for the current tree
|
||||
# pattern; otherwise, {@code false}.
|
||||
#
|
||||
def matches(self, tree):
|
||||
return self.matcher.match(tree, self).succeeded()
|
||||
|
||||
# Find all nodes using XPath and then try to match those subtrees against
|
||||
# this tree pattern.
|
||||
#
|
||||
# @param tree The {@link ParseTree} to match against this pattern.
|
||||
# @param xpath An expression matching the nodes
|
||||
#
|
||||
# @return A collection of {@link ParseTreeMatch} objects describing the
|
||||
# successful matches. Unsuccessful matches are omitted from the result,
|
||||
# regardless of the reason for the failure.
|
||||
#
|
||||
def findAll(self, tree, xpath):
|
||||
subtrees = XPath.findAll(tree, xpath, self.matcher.parser)
|
||||
matches = list()
|
||||
for t in subtrees:
|
||||
match = self.match(t)
|
||||
if match.succeeded():
|
||||
matches.append(match)
|
||||
return matches
|
|
@ -0,0 +1,392 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2013 Terence Parr
|
||||
# Copyright (c) 2013 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
#
|
||||
# A tree pattern matching mechanism for ANTLR {@link ParseTree}s.
|
||||
#
|
||||
# <p>Patterns are strings of source input text with special tags representing
|
||||
# token or rule references such as:</p>
|
||||
#
|
||||
# <p>{@code <ID> = <expr>;}</p>
|
||||
#
|
||||
# <p>Given a pattern start rule such as {@code statement}, this object constructs
|
||||
# a {@link ParseTree} with placeholders for the {@code ID} and {@code expr}
|
||||
# subtree. Then the {@link #match} routines can compare an actual
|
||||
# {@link ParseTree} from a parse with this pattern. Tag {@code <ID>} matches
|
||||
# any {@code ID} token and tag {@code <expr>} references the result of the
|
||||
# {@code expr} rule (generally an instance of {@code ExprContext}.</p>
|
||||
#
|
||||
# <p>Pattern {@code x = 0;} is a similar pattern that matches the same pattern
|
||||
# except that it requires the identifier to be {@code x} and the expression to
|
||||
# be {@code 0}.</p>
|
||||
#
|
||||
# <p>The {@link #matches} routines return {@code true} or {@code false} based
|
||||
# upon a match for the tree rooted at the parameter sent in. The
|
||||
# {@link #match} routines return a {@link ParseTreeMatch} object that
|
||||
# contains the parse tree, the parse tree pattern, and a map from tag name to
|
||||
# matched nodes (more below). A subtree that fails to match, returns with
|
||||
# {@link ParseTreeMatch#mismatchedNode} set to the first tree node that did not
|
||||
# match.</p>
|
||||
#
|
||||
# <p>For efficiency, you can compile a tree pattern in string form to a
|
||||
# {@link ParseTreePattern} object.</p>
|
||||
#
|
||||
# <p>See {@code TestParseTreeMatcher} for lots of examples.
|
||||
# {@link ParseTreePattern} has two static helper methods:
|
||||
# {@link ParseTreePattern#findAll} and {@link ParseTreePattern#match} that
|
||||
# are easy to use but not super efficient because they create new
|
||||
# {@link ParseTreePatternMatcher} objects each time and have to compile the
|
||||
# pattern in string form before using it.</p>
|
||||
#
|
||||
# <p>The lexer and parser that you pass into the {@link ParseTreePatternMatcher}
|
||||
# constructor are used to parse the pattern in string form. The lexer converts
|
||||
# the {@code <ID> = <expr>;} into a sequence of four tokens (assuming lexer
|
||||
# throws out whitespace or puts it on a hidden channel). Be aware that the
|
||||
# input stream is reset for the lexer (but not the parser; a
|
||||
# {@link ParserInterpreter} is created to parse the input.). Any user-defined
|
||||
# fields you have put into the lexer might get changed when this mechanism asks
|
||||
# it to scan the pattern string.</p>
|
||||
#
|
||||
# <p>Normally a parser does not accept token {@code <expr>} as a valid
|
||||
# {@code expr} but, from the parser passed in, we create a special version of
|
||||
# the underlying grammar representation (an {@link ATN}) that allows imaginary
|
||||
# tokens representing rules ({@code <expr>}) to match entire rules. We call
|
||||
# these <em>bypass alternatives</em>.</p>
|
||||
#
|
||||
# <p>Delimiters are {@code <} and {@code >}, with {@code \} as the escape string
|
||||
# by default, but you can set them to whatever you want using
|
||||
# {@link #setDelimiters}. You must escape both start and stop strings
|
||||
# {@code \<} and {@code \>}.</p>
|
||||
#
|
||||
from antlr4 import CommonTokenStream, ParserRuleContext
|
||||
from antlr4.InputStream import InputStream
|
||||
from antlr4.ListTokenSource import ListTokenSource
|
||||
from antlr4.Token import Token
|
||||
from antlr4.error.ErrorStrategy import BailErrorStrategy
|
||||
from antlr4.error.Errors import RecognitionException, ParseCancellationException
|
||||
from antlr4.tree.Chunk import TagChunk, TextChunk
|
||||
from antlr4.tree.RuleTagToken import RuleTagToken
|
||||
from antlr4.tree.TokenTagToken import TokenTagToken
|
||||
from antlr4.tree.Tree import TerminalNode, RuleNode
|
||||
|
||||
class CannotInvokeStartRule(Exception):
|
||||
|
||||
def __init__(self, e):
|
||||
super(CannotInvokeStartRule, self).__init__(e)
|
||||
|
||||
class StartRuleDoesNotConsumeFullPattern(Exception):
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class ParseTreePatternMatcher(object):
|
||||
|
||||
# Constructs a {@link ParseTreePatternMatcher} or from a {@link Lexer} and
|
||||
# {@link Parser} object. The lexer input stream is altered for tokenizing
|
||||
# the tree patterns. The parser is used as a convenient mechanism to get
|
||||
# the grammar name, plus token, rule names.
|
||||
def __init__(self, lexer, parser):
|
||||
self.lexer = lexer
|
||||
self.parser = parser
|
||||
self.start = "<"
|
||||
self.stop = ">"
|
||||
self.escape = "\\" # e.g., \< and \> must escape BOTH!
|
||||
|
||||
# Set the delimiters used for marking rule and token tags within concrete
|
||||
# syntax used by the tree pattern parser.
|
||||
#
|
||||
# @param start The start delimiter.
|
||||
# @param stop The stop delimiter.
|
||||
# @param escapeLeft The escape sequence to use for escaping a start or stop delimiter.
|
||||
#
|
||||
# @exception IllegalArgumentException if {@code start} is {@code null} or empty.
|
||||
# @exception IllegalArgumentException if {@code stop} is {@code null} or empty.
|
||||
#
|
||||
def setDelimiters(self, start, stop, escapeLeft):
|
||||
if start is None or len(start)==0:
|
||||
raise Exception("start cannot be null or empty")
|
||||
if stop is None or len(stop)==0:
|
||||
raise Exception("stop cannot be null or empty")
|
||||
self.start = start
|
||||
self.stop = stop
|
||||
self.escape = escapeLeft
|
||||
|
||||
# Does {@code pattern} matched as rule {@code patternRuleIndex} match {@code tree}?#
|
||||
def matchesRuleIndex(self, tree, pattern, patternRuleIndex):
|
||||
p = self.compileTreePattern(pattern, patternRuleIndex)
|
||||
return self.matches(tree, p)
|
||||
|
||||
# Does {@code pattern} matched as rule patternRuleIndex match tree? Pass in a
|
||||
# compiled pattern instead of a string representation of a tree pattern.
|
||||
#
|
||||
def matchesPattern(self, tree, pattern):
|
||||
mismatchedNode = self.matchImpl(tree, pattern.patternTree, dict())
|
||||
return mismatchedNode is None
|
||||
|
||||
#
|
||||
# Compare {@code pattern} matched as rule {@code patternRuleIndex} against
|
||||
# {@code tree} and return a {@link ParseTreeMatch} object that contains the
|
||||
# matched elements, or the node at which the match failed.
|
||||
#
|
||||
def matchRuleIndex(self, tree, pattern, patternRuleIndex):
|
||||
p = self.compileTreePattern(pattern, patternRuleIndex)
|
||||
return self.matchPattern(tree, p)
|
||||
|
||||
#
|
||||
# Compare {@code pattern} matched against {@code tree} and return a
|
||||
# {@link ParseTreeMatch} object that contains the matched elements, or the
|
||||
# node at which the match failed. Pass in a compiled pattern instead of a
|
||||
# string representation of a tree pattern.
|
||||
#
|
||||
def matchPattern(self, tree, pattern):
|
||||
labels = dict()
|
||||
mismatchedNode = self.matchImpl(tree, pattern.patternTree, labels)
|
||||
from antlr4.tree.ParseTreeMatch import ParseTreeMatch
|
||||
return ParseTreeMatch(tree, pattern, labels, mismatchedNode)
|
||||
|
||||
#
|
||||
# For repeated use of a tree pattern, compile it to a
|
||||
# {@link ParseTreePattern} using this method.
|
||||
#
|
||||
def compileTreePattern(self, pattern, patternRuleIndex):
|
||||
tokenList = self.tokenize(pattern)
|
||||
tokenSrc = ListTokenSource(tokenList)
|
||||
tokens = CommonTokenStream(tokenSrc)
|
||||
from antlr4.ParserInterpreter import ParserInterpreter
|
||||
parserInterp = ParserInterpreter(self.parser.grammarFileName, self.parser.tokenNames,
|
||||
self.parser.ruleNames, self.parser.getATNWithBypassAlts(),tokens)
|
||||
tree = None
|
||||
try:
|
||||
parserInterp.setErrorHandler(BailErrorStrategy())
|
||||
tree = parserInterp.parse(patternRuleIndex)
|
||||
except ParseCancellationException as e:
|
||||
raise e.cause
|
||||
except RecognitionException as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
raise CannotInvokeStartRule(e)
|
||||
|
||||
# Make sure tree pattern compilation checks for a complete parse
|
||||
if tokens.LA(1)!=Token.EOF:
|
||||
raise StartRuleDoesNotConsumeFullPattern()
|
||||
|
||||
from antlr4.tree.ParseTreePattern import ParseTreePattern
|
||||
return ParseTreePattern(self, pattern, patternRuleIndex, tree)
|
||||
|
||||
#
|
||||
# Recursively walk {@code tree} against {@code patternTree}, filling
|
||||
# {@code match.}{@link ParseTreeMatch#labels labels}.
|
||||
#
|
||||
# @return the first node encountered in {@code tree} which does not match
|
||||
# a corresponding node in {@code patternTree}, or {@code null} if the match
|
||||
# was successful. The specific node returned depends on the matching
|
||||
# algorithm used by the implementation, and may be overridden.
|
||||
#
|
||||
def matchImpl(self, tree, patternTree, labels):
|
||||
if tree is None:
|
||||
raise Exception("tree cannot be null")
|
||||
if patternTree is None:
|
||||
raise Exception("patternTree cannot be null")
|
||||
|
||||
# x and <ID>, x and y, or x and x; or could be mismatched types
|
||||
if isinstance(tree, TerminalNode) and isinstance(patternTree, TerminalNode ):
|
||||
mismatchedNode = None
|
||||
# both are tokens and they have same type
|
||||
if tree.symbol.type == patternTree.symbol.type:
|
||||
if isinstance( patternTree.symbol, TokenTagToken ): # x and <ID>
|
||||
tokenTagToken = patternTree.symbol
|
||||
# track label->list-of-nodes for both token name and label (if any)
|
||||
self.map(labels, tokenTagToken.tokenName, tree)
|
||||
if tokenTagToken.label is not None:
|
||||
self.map(labels, tokenTagToken.label, tree)
|
||||
elif tree.getText()==patternTree.getText():
|
||||
# x and x
|
||||
pass
|
||||
else:
|
||||
# x and y
|
||||
if mismatchedNode is None:
|
||||
mismatchedNode = tree
|
||||
else:
|
||||
if mismatchedNode is None:
|
||||
mismatchedNode = tree
|
||||
|
||||
return mismatchedNode
|
||||
|
||||
if isinstance(tree, ParserRuleContext) and isinstance(patternTree, ParserRuleContext):
|
||||
mismatchedNode = None
|
||||
# (expr ...) and <expr>
|
||||
ruleTagToken = self.getRuleTagToken(patternTree)
|
||||
if ruleTagToken is not None:
|
||||
m = None
|
||||
if tree.ruleContext.ruleIndex == patternTree.ruleContext.ruleIndex:
|
||||
# track label->list-of-nodes for both rule name and label (if any)
|
||||
self.map(labels, ruleTagToken.ruleName, tree)
|
||||
if ruleTagToken.label is not None:
|
||||
self.map(labels, ruleTagToken.label, tree)
|
||||
else:
|
||||
if mismatchedNode is None:
|
||||
mismatchedNode = tree
|
||||
|
||||
return mismatchedNode
|
||||
|
||||
# (expr ...) and (expr ...)
|
||||
if tree.getChildCount()!=patternTree.getChildCount():
|
||||
if mismatchedNode is None:
|
||||
mismatchedNode = tree
|
||||
return mismatchedNode
|
||||
|
||||
n = tree.getChildCount()
|
||||
for i in range(0, n):
|
||||
childMatch = self.matchImpl(tree.getChild(i), patternTree.getChild(i), labels)
|
||||
if childMatch is not None:
|
||||
return childMatch
|
||||
|
||||
return mismatchedNode
|
||||
|
||||
# if nodes aren't both tokens or both rule nodes, can't match
|
||||
return tree
|
||||
|
||||
def map(self, labels, label, tree):
|
||||
v = labels.get(label, None)
|
||||
if v is None:
|
||||
v = list()
|
||||
labels[label] = v
|
||||
v.append(tree)
|
||||
|
||||
# Is {@code t} {@code (expr <expr>)} subtree?#
|
||||
def getRuleTagToken(self, tree):
|
||||
if isinstance( tree, RuleNode ):
|
||||
if tree.getChildCount()==1 and isinstance(tree.getChild(0), TerminalNode ):
|
||||
c = tree.getChild(0)
|
||||
if isinstance( c.symbol, RuleTagToken ):
|
||||
return c.symbol
|
||||
return None
|
||||
|
||||
def tokenize(self, pattern):
|
||||
# split pattern into chunks: sea (raw input) and islands (<ID>, <expr>)
|
||||
chunks = self.split(pattern)
|
||||
|
||||
# create token stream from text and tags
|
||||
tokens = list()
|
||||
for chunk in chunks:
|
||||
if isinstance( chunk, TagChunk ):
|
||||
# add special rule token or conjure up new token from name
|
||||
if chunk.tag[0].isupper():
|
||||
ttype = self.parser.getTokenType(chunk.tag)
|
||||
if ttype==Token.INVALID_TYPE:
|
||||
raise Exception("Unknown token " + str(chunk.tag) + " in pattern: " + pattern)
|
||||
tokens.append(TokenTagToken(chunk.tag, ttype, chunk.label))
|
||||
elif chunk.tag[0].islower():
|
||||
ruleIndex = self.parser.getRuleIndex(chunk.tag)
|
||||
if ruleIndex==-1:
|
||||
raise Exception("Unknown rule " + str(chunk.tag) + " in pattern: " + pattern)
|
||||
ruleImaginaryTokenType = self.parser.getATNWithBypassAlts().ruleToTokenType[ruleIndex]
|
||||
tokens.append(RuleTagToken(chunk.tag, ruleImaginaryTokenType, chunk.label))
|
||||
else:
|
||||
raise Exception("invalid tag: " + str(chunk.tag) + " in pattern: " + pattern)
|
||||
else:
|
||||
self.lexer.setInputStream(InputStream(chunk.text))
|
||||
t = self.lexer.nextToken()
|
||||
while t.type!=Token.EOF:
|
||||
tokens.append(t)
|
||||
t = self.lexer.nextToken()
|
||||
return tokens
|
||||
|
||||
# Split {@code <ID> = <e:expr> ;} into 4 chunks for tokenizing by {@link #tokenize}.#
|
||||
def split(self, pattern):
|
||||
p = 0
|
||||
n = len(pattern)
|
||||
chunks = list()
|
||||
# find all start and stop indexes first, then collect
|
||||
starts = list()
|
||||
stops = list()
|
||||
while p < n :
|
||||
if p == pattern.find(self.escape + self.start, p):
|
||||
p += len(self.escape) + len(self.start)
|
||||
elif p == pattern.find(self.escape + self.stop, p):
|
||||
p += len(self.escape) + len(self.stop)
|
||||
elif p == pattern.find(self.start, p):
|
||||
starts.append(p)
|
||||
p += len(self.start)
|
||||
elif p == pattern.find(self.stop, p):
|
||||
stops.append(p)
|
||||
p += len(self.stop)
|
||||
else:
|
||||
p += 1
|
||||
|
||||
nt = len(starts)
|
||||
|
||||
if nt > len(stops):
|
||||
raise Exception("unterminated tag in pattern: " + pattern)
|
||||
if nt < len(stops):
|
||||
raise Exception("missing start tag in pattern: " + pattern)
|
||||
|
||||
for i in range(0, nt):
|
||||
if starts[i] >= stops[i]:
|
||||
raise Exception("tag delimiters out of order in pattern: " + pattern)
|
||||
|
||||
# collect into chunks now
|
||||
if nt==0:
|
||||
chunks.append(TextChunk(pattern))
|
||||
|
||||
if nt>0 and starts[0]>0: # copy text up to first tag into chunks
|
||||
text = pattern[0:starts[0]]
|
||||
chunks.add(TextChunk(text))
|
||||
|
||||
for i in range(0, nt):
|
||||
# copy inside of <tag>
|
||||
tag = pattern[starts[i] + len(self.start) : stops[i]]
|
||||
ruleOrToken = tag
|
||||
label = None
|
||||
colon = tag.find(':')
|
||||
if colon >= 0:
|
||||
label = tag[0:colon]
|
||||
ruleOrToken = tag[colon+1 : len(tag)]
|
||||
chunks.append(TagChunk(label, ruleOrToken))
|
||||
if i+1 < len(starts):
|
||||
# copy from end of <tag> to start of next
|
||||
text = pattern[stops[i] + len(self.stop) : starts[i + 1]]
|
||||
chunks.append(TextChunk(text))
|
||||
|
||||
if nt > 0 :
|
||||
afterLastTag = stops[nt - 1] + len(self.stop)
|
||||
if afterLastTag < n : # copy text from end of last tag to end
|
||||
text = pattern[afterLastTag : n]
|
||||
chunks.append(TextChunk(text))
|
||||
|
||||
# strip out the escape sequences from text chunks but not tags
|
||||
for i in range(0, len(chunks)):
|
||||
c = chunks[i]
|
||||
if isinstance( c, TextChunk ):
|
||||
unescaped = c.text.replace(self.escape, "")
|
||||
if len(unescaped) < len(c.text):
|
||||
chunks[i] = TextChunk(unescaped)
|
||||
return chunks
|
|
@ -0,0 +1,74 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2013 Terence Parr
|
||||
# Copyright (c) 2013 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
#
|
||||
# A {@link Token} object representing an entire subtree matched by a parser
|
||||
# rule; e.g., {@code <expr>}. These tokens are created for {@link TagChunk}
|
||||
# chunks where the tag corresponds to a parser rule.
|
||||
#
|
||||
from antlr4.Token import Token
|
||||
|
||||
|
||||
class RuleTagToken(Token):
|
||||
#
|
||||
# Constructs a new instance of {@link RuleTagToken} with the specified rule
|
||||
# name, bypass token type, and label.
|
||||
#
|
||||
# @param ruleName The name of the parser rule this rule tag matches.
|
||||
# @param bypassTokenType The bypass token type assigned to the parser rule.
|
||||
# @param label The label associated with the rule tag, or {@code null} if
|
||||
# the rule tag is unlabeled.
|
||||
#
|
||||
# @exception IllegalArgumentException if {@code ruleName} is {@code null}
|
||||
# or empty.
|
||||
|
||||
def __init__(self, ruleName, bypassTokenType, label=None):
|
||||
if ruleName is None or len(ruleName)==0:
|
||||
raise Exception("ruleName cannot be null or empty.")
|
||||
self.source = None
|
||||
self.type = bypassTokenType # token type of the token
|
||||
self.channel = Token.DEFAULT_CHANNEL # The parser ignores everything not on DEFAULT_CHANNEL
|
||||
self.start = -1 # optional; return -1 if not implemented.
|
||||
self.stop = -1 # optional; return -1 if not implemented.
|
||||
self.tokenIndex = -1 # from 0..n-1 of the token object in the input stream
|
||||
self.line = 0 # line=1..n of the 1st character
|
||||
self.column = -1 # beginning of the line at which it occurs, 0..n-1
|
||||
self.label = label
|
||||
self._text = self.getText() # text of the token.
|
||||
|
||||
self.ruleName = ruleName
|
||||
|
||||
|
||||
def getText(self):
|
||||
if self.label is None:
|
||||
return "<" + self.ruleName + ">"
|
||||
else:
|
||||
return "<" + self.label + ":" + self.ruleName + ">"
|
|
@ -0,0 +1,72 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2013 Terence Parr
|
||||
# Copyright (c) 2013 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
#
|
||||
# A {@link Token} object representing a token of a particular type; e.g.,
|
||||
# {@code <ID>}. These tokens are created for {@link TagChunk} chunks where the
|
||||
# tag corresponds to a lexer rule or token type.
|
||||
#
|
||||
from antlr4.Token import CommonToken
|
||||
|
||||
|
||||
class TokenTagToken(CommonToken):
|
||||
|
||||
# Constructs a new instance of {@link TokenTagToken} with the specified
|
||||
# token name, type, and label.
|
||||
#
|
||||
# @param tokenName The token name.
|
||||
# @param type The token type.
|
||||
# @param label The label associated with the token tag, or {@code null} if
|
||||
# the token tag is unlabeled.
|
||||
#
|
||||
def __init__(self, tokenName, type, label=None):
|
||||
super(TokenTagToken, self).__init__(type=type)
|
||||
self.tokenName = tokenName
|
||||
self.label = label
|
||||
self._text = self.getText()
|
||||
|
||||
#
|
||||
# {@inheritDoc}
|
||||
#
|
||||
# <p>The implementation for {@link TokenTagToken} returns the token tag
|
||||
# formatted with {@code <} and {@code >} delimiters.</p>
|
||||
#
|
||||
def getText(self):
|
||||
if self.label is None:
|
||||
return "<" + self.tokenName + ">"
|
||||
else:
|
||||
return "<" + self.label + ":" + self.tokenName + ">"
|
||||
|
||||
# <p>The implementation for {@link TokenTagToken} returns a string of the form
|
||||
# {@code tokenName:type}.</p>
|
||||
#
|
||||
def __unicode__(self):
|
||||
return self.tokenName + u":" + unicode(self.type)
|
|
@ -0,0 +1,191 @@
|
|||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#/
|
||||
|
||||
|
||||
# The basic notion of a tree has a parent, a payload, and a list of children.
|
||||
# It is the most abstract interface for all the trees used by ANTLR.
|
||||
#/
|
||||
from antlr4.Token import Token
|
||||
|
||||
INVALID_INTERVAL = (-1, -2)
|
||||
|
||||
class Tree(object):
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
|
||||
class SyntaxTree(Tree):
|
||||
pass
|
||||
|
||||
class ParseTree(SyntaxTree):
|
||||
pass
|
||||
|
||||
class RuleNode(ParseTree):
|
||||
pass
|
||||
|
||||
class TerminalNode(ParseTree):
|
||||
pass
|
||||
|
||||
class ErrorNode(TerminalNode):
|
||||
pass
|
||||
|
||||
class ParseTreeVisitor(object):
|
||||
def visit(self, tree):
|
||||
return tree.accept(self)
|
||||
|
||||
def visitChildren(self, node):
|
||||
result = self.defaultResult()
|
||||
n = node.getChildCount()
|
||||
for i in range(n):
|
||||
if not self.shouldVisitNextChild(node, result):
|
||||
return
|
||||
|
||||
c = node.getChild(i)
|
||||
childResult = c.accept(self)
|
||||
result = self.aggregateResult(result, childResult)
|
||||
|
||||
return result
|
||||
|
||||
def visitTerminal(self, node):
|
||||
return self.defaultResult()
|
||||
|
||||
def visitErrorNode(self, node):
|
||||
return self.defaultResult()
|
||||
|
||||
def defaultResult(self):
|
||||
return None
|
||||
|
||||
def aggregateResult(self, aggregate, nextResult):
|
||||
return nextResult
|
||||
|
||||
def shouldVisitNextChild(self, node, currentResult):
|
||||
return True
|
||||
|
||||
class ParseTreeListener(object):
|
||||
|
||||
def visitTerminal(self, node):
|
||||
pass
|
||||
|
||||
def visitErrorNode(self, node):
|
||||
pass
|
||||
|
||||
def enterEveryRule(self, ctx):
|
||||
pass
|
||||
|
||||
def exitEveryRule(self, ctx):
|
||||
pass
|
||||
|
||||
class TerminalNodeImpl(TerminalNode):
|
||||
|
||||
def __init__(self, symbol):
|
||||
self.parentCtx = None
|
||||
self.symbol = symbol
|
||||
|
||||
def getChild(self, i):
|
||||
return None
|
||||
|
||||
def getSymbol(self):
|
||||
return self.symbol
|
||||
|
||||
def getParent(self):
|
||||
return self.parentCtx
|
||||
|
||||
def getPayload(self):
|
||||
return self.symbol
|
||||
|
||||
def getSourceInterval(self):
|
||||
if self.symbol is None:
|
||||
return INVALID_INTERVAL
|
||||
tokenIndex = self.symbol.tokenIndex
|
||||
return (tokenIndex, tokenIndex)
|
||||
|
||||
def getChildCount(self):
|
||||
return 0
|
||||
|
||||
def accept(self, visitor):
|
||||
return visitor.visitTerminal(self)
|
||||
|
||||
def getText(self):
|
||||
return self.symbol.text
|
||||
|
||||
def __unicode__(self):
|
||||
if self.symbol.type == Token.EOF:
|
||||
return "<EOF>"
|
||||
else:
|
||||
return self.symbol.text
|
||||
|
||||
# Represents a token that was consumed during resynchronization
|
||||
# rather than during a valid match operation. For example,
|
||||
# we will create this kind of a node during single token insertion
|
||||
# and deletion as well as during "consume until error recovery set"
|
||||
# upon no viable alternative exceptions.
|
||||
|
||||
class ErrorNodeImpl(TerminalNodeImpl,ErrorNode):
|
||||
|
||||
def __init__(self, token):
|
||||
super(ErrorNodeImpl, self).__init__(token)
|
||||
|
||||
def accept(self, visitor):
|
||||
return visitor.visitErrorNode(self)
|
||||
|
||||
|
||||
class ParseTreeWalker(object):
|
||||
|
||||
DEFAULT = None
|
||||
|
||||
def walk(self, listener, t):
|
||||
if isinstance(t, ErrorNode):
|
||||
listener.visitErrorNode(t)
|
||||
return
|
||||
elif isinstance(t, TerminalNode):
|
||||
listener.visitTerminal(t)
|
||||
return
|
||||
self.enterRule(listener, t)
|
||||
for child in t.getChildren():
|
||||
self.walk(listener, child)
|
||||
self.exitRule(listener, t)
|
||||
|
||||
#
|
||||
# The discovery of a rule node, involves sending two events: the generic
|
||||
# {@link ParseTreeListener#enterEveryRule} and a
|
||||
# {@link RuleContext}-specific event. First we trigger the generic and then
|
||||
# the rule specific. We to them in reverse order upon finishing the node.
|
||||
#
|
||||
def enterRule(self, listener, r):
|
||||
ctx = r.getRuleContext()
|
||||
listener.enterEveryRule(ctx)
|
||||
ctx.enterRule(listener)
|
||||
|
||||
def exitRule(self, listener, r):
|
||||
ctx = r.getRuleContext()
|
||||
ctx.exitRule(listener)
|
||||
listener.exitEveryRule(ctx)
|
||||
|
||||
ParseTreeWalker.DEFAULT = ParseTreeWalker()
|
|
@ -0,0 +1,134 @@
|
|||
#
|
||||
# [The "BSD license"]
|
||||
# Copyright (c) 2012 Terence Parr
|
||||
# Copyright (c) 2012 Sam Harwell
|
||||
# Copyright (c) 2014 Eric Vergnaud
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
|
||||
# A set of utility routines useful for all kinds of ANTLR trees.#
|
||||
from io import StringIO
|
||||
|
||||
from antlr4.Token import Token
|
||||
from antlr4.Utils import escapeWhitespace
|
||||
from antlr4.tree.Tree import RuleNode, ErrorNode, TerminalNode
|
||||
|
||||
|
||||
class Trees(object):
|
||||
|
||||
# Print out a whole tree in LISP form. {@link #getNodeText} is used on the
|
||||
# node payloads to get the text for the nodes. Detect
|
||||
# parse trees and extract data appropriately.
|
||||
@classmethod
|
||||
def toStringTree(cls, t, ruleNames=None, recog=None):
|
||||
if recog is not None:
|
||||
ruleNames = recog.ruleNames
|
||||
s = escapeWhitespace(cls.getNodeText(t, ruleNames), False)
|
||||
if t.getChildCount()==0:
|
||||
return s
|
||||
with StringIO() as buf:
|
||||
buf.write(u"(")
|
||||
buf.write(s)
|
||||
buf.write(u' ')
|
||||
for i in range(0, t.getChildCount()):
|
||||
if i > 0:
|
||||
buf.write(u' ')
|
||||
buf.write(cls.toStringTree(t.getChild(i), ruleNames))
|
||||
buf.write(u")")
|
||||
return buf.getvalue()
|
||||
|
||||
@classmethod
|
||||
def getNodeText(cls, t, ruleNames=None, recog=None):
|
||||
if recog is not None:
|
||||
ruleNames = recog.ruleNames
|
||||
if ruleNames is not None:
|
||||
if isinstance(t, RuleNode):
|
||||
return ruleNames[t.getRuleContext().getRuleIndex()]
|
||||
elif isinstance( t, ErrorNode):
|
||||
return unicode(t)
|
||||
elif isinstance(t, TerminalNode):
|
||||
if t.symbol is not None:
|
||||
return t.symbol.text
|
||||
# no recog for rule names
|
||||
payload = t.getPayload()
|
||||
if isinstance(payload, Token ):
|
||||
return payload.text
|
||||
return unicode(t.getPayload())
|
||||
|
||||
|
||||
# Return ordered list of all children of this node
|
||||
@classmethod
|
||||
def getChildren(cls, t):
|
||||
return [ t.getChild(i) for i in range(0, t.getChildCount()) ]
|
||||
|
||||
# Return a list of all ancestors of this node. The first node of
|
||||
# list is the root and the last is the parent of this node.
|
||||
#
|
||||
@classmethod
|
||||
def getAncestors(cls, t):
|
||||
ancestors = []
|
||||
t = t.getParent()
|
||||
while t is not None:
|
||||
ancestors.append(0, t) # insert at start
|
||||
t = t.getParent()
|
||||
return ancestors
|
||||
|
||||
@classmethod
|
||||
def findAllTokenNodes(cls, t, ttype):
|
||||
return cls.findAllNodes(t, ttype, True)
|
||||
|
||||
@classmethod
|
||||
def findAllRuleNodes(cls, t, ruleIndex):
|
||||
return cls.findAllNodes(t, ruleIndex, False)
|
||||
|
||||
@classmethod
|
||||
def findAllNodes(cls, t, index, findTokens):
|
||||
nodes = []
|
||||
cls._findAllNodes(t, index, findTokens, nodes)
|
||||
return nodes
|
||||
|
||||
@classmethod
|
||||
def _findAllNodes(cls, t, index, findTokens, nodes):
|
||||
from antlr4.ParserRuleContext import ParserRuleContext
|
||||
# check this node (the root) first
|
||||
if findTokens and isinstance(t, TerminalNode):
|
||||
if t.symbol.type==index:
|
||||
nodes.append(t)
|
||||
elif not findTokens and isinstance(t, ParserRuleContext):
|
||||
if t.ruleIndex == index:
|
||||
nodes.append(t)
|
||||
# check children
|
||||
for i in range(0, t.getChildCount()):
|
||||
cls._findAllNodes(t.getChild(i), index, findTokens, nodes)
|
||||
|
||||
@classmethod
|
||||
def descendants(cls, t):
|
||||
nodes = []
|
||||
nodes.append(t)
|
||||
for i in range(0, t.getChildCount()):
|
||||
nodes.extend(cls.descendants(t.getChild(i)))
|
||||
return nodes
|
|
@ -0,0 +1,346 @@
|
|||
|
||||
#
|
||||
# Represent a subset of XPath XML path syntax for use in identifying nodes in
|
||||
# parse trees.
|
||||
#
|
||||
# <p>
|
||||
# Split path into words and separators {@code /} and {@code //} via ANTLR
|
||||
# itself then walk path elements from left to right. At each separator-word
|
||||
# pair, find set of nodes. Next stage uses those as work list.</p>
|
||||
#
|
||||
# <p>
|
||||
# The basic interface is
|
||||
# {@link XPath#findAll ParseTree.findAll}{@code (tree, pathString, parser)}.
|
||||
# But that is just shorthand for:</p>
|
||||
#
|
||||
# <pre>
|
||||
# {@link XPath} p = new {@link XPath#XPath XPath}(parser, pathString);
|
||||
# return p.{@link #evaluate evaluate}(tree);
|
||||
# </pre>
|
||||
#
|
||||
# <p>
|
||||
# See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this
|
||||
# allows operators:</p>
|
||||
#
|
||||
# <dl>
|
||||
# <dt>/</dt> <dd>root</dd>
|
||||
# <dt>//</dt> <dd>anywhere</dd>
|
||||
# <dt>!</dt> <dd>invert; this must appear directly after root or anywhere
|
||||
# operator</dd>
|
||||
# </dl>
|
||||
#
|
||||
# <p>
|
||||
# and path elements:</p>
|
||||
#
|
||||
# <dl>
|
||||
# <dt>ID</dt> <dd>token name</dd>
|
||||
# <dt>'string'</dt> <dd>any string literal token from the grammar</dd>
|
||||
# <dt>expr</dt> <dd>rule name</dd>
|
||||
# <dt>*</dt> <dd>wildcard matching any node</dd>
|
||||
# </dl>
|
||||
#
|
||||
# <p>
|
||||
# Whitespace is not allowed.</p>
|
||||
#
|
||||
from io import StringIO
|
||||
|
||||
from antlr4 import CommonTokenStream, DFA, PredictionContextCache, Lexer, LexerATNSimulator, ParserRuleContext, TerminalNode
|
||||
from antlr4.atn.ATNDeserializer import ATNDeserializer
|
||||
from antlr4.InputStream import InputStream
|
||||
from antlr4.Token import Token
|
||||
from antlr4.error.ErrorListener import ErrorListener
|
||||
from antlr4.error.Errors import LexerNoViableAltException
|
||||
from antlr4.tree.Trees import Trees
|
||||
|
||||
|
||||
def serializedATN():
|
||||
with StringIO() as buf:
|
||||
buf.write(u"\3\u0430\ud6d1\u8206\uad2d\u4417\uaef1\u8d80\uaadd\2")
|
||||
buf.write(u"\n\64\b\1\4\2\t\2\4\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7")
|
||||
buf.write(u"\t\7\4\b\t\b\4\t\t\t\3\2\3\2\3\2\3\3\3\3\3\4\3\4\3\5")
|
||||
buf.write(u"\3\5\3\6\3\6\7\6\37\n\6\f\6\16\6\"\13\6\3\6\3\6\3\7\3")
|
||||
buf.write(u"\7\5\7(\n\7\3\b\3\b\3\t\3\t\7\t.\n\t\f\t\16\t\61\13\t")
|
||||
buf.write(u"\3\t\3\t\3/\2\n\3\5\5\6\7\7\t\b\13\t\r\2\17\2\21\n\3")
|
||||
buf.write(u"\2\4\7\2\62;aa\u00b9\u00b9\u0302\u0371\u2041\u2042\17")
|
||||
buf.write(u"\2C\\c|\u00c2\u00d8\u00da\u00f8\u00fa\u0301\u0372\u037f")
|
||||
buf.write(u"\u0381\u2001\u200e\u200f\u2072\u2191\u2c02\u2ff1\u3003")
|
||||
buf.write(u"\ud801\uf902\ufdd1\ufdf2\uffff\64\2\3\3\2\2\2\2\5\3\2")
|
||||
buf.write(u"\2\2\2\7\3\2\2\2\2\t\3\2\2\2\2\13\3\2\2\2\2\21\3\2\2")
|
||||
buf.write(u"\2\3\23\3\2\2\2\5\26\3\2\2\2\7\30\3\2\2\2\t\32\3\2\2")
|
||||
buf.write(u"\2\13\34\3\2\2\2\r\'\3\2\2\2\17)\3\2\2\2\21+\3\2\2\2")
|
||||
buf.write(u"\23\24\7\61\2\2\24\25\7\61\2\2\25\4\3\2\2\2\26\27\7\61")
|
||||
buf.write(u"\2\2\27\6\3\2\2\2\30\31\7,\2\2\31\b\3\2\2\2\32\33\7#")
|
||||
buf.write(u"\2\2\33\n\3\2\2\2\34 \5\17\b\2\35\37\5\r\7\2\36\35\3")
|
||||
buf.write(u"\2\2\2\37\"\3\2\2\2 \36\3\2\2\2 !\3\2\2\2!#\3\2\2\2\"")
|
||||
buf.write(u" \3\2\2\2#$\b\6\2\2$\f\3\2\2\2%(\5\17\b\2&(\t\2\2\2\'")
|
||||
buf.write(u"%\3\2\2\2\'&\3\2\2\2(\16\3\2\2\2)*\t\3\2\2*\20\3\2\2")
|
||||
buf.write(u"\2+/\7)\2\2,.\13\2\2\2-,\3\2\2\2.\61\3\2\2\2/\60\3\2")
|
||||
buf.write(u"\2\2/-\3\2\2\2\60\62\3\2\2\2\61/\3\2\2\2\62\63\7)\2\2")
|
||||
buf.write(u"\63\22\3\2\2\2\6\2 \'/\3\3\6\2")
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
class XPathLexer(Lexer):
|
||||
|
||||
atn = ATNDeserializer().deserialize(serializedATN())
|
||||
|
||||
decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ]
|
||||
|
||||
|
||||
TOKEN_REF = 1
|
||||
RULE_REF = 2
|
||||
ANYWHERE = 3
|
||||
ROOT = 4
|
||||
WILDCARD = 5
|
||||
BANG = 6
|
||||
ID = 7
|
||||
STRING = 8
|
||||
|
||||
modeNames = [ u"DEFAULT_MODE" ]
|
||||
|
||||
literalNames = [ u"<INVALID>",
|
||||
u"'//'", u"'/'", u"'*'", u"'!'" ]
|
||||
|
||||
symbolicNames = [ u"<INVALID>",
|
||||
u"TOKEN_REF", u"RULE_REF", u"ANYWHERE", u"ROOT", u"WILDCARD",
|
||||
u"BANG", u"ID", u"STRING" ]
|
||||
|
||||
ruleNames = [ u"ANYWHERE", u"ROOT", u"WILDCARD", u"BANG", u"ID", u"NameChar",
|
||||
u"NameStartChar", u"STRING" ]
|
||||
|
||||
grammarFileName = u"XPathLexer.g4"
|
||||
|
||||
def __init__(self, input=None):
|
||||
super(XPathLexer, self).__init__(input)
|
||||
self.checkVersion("4.5")
|
||||
self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache())
|
||||
self._actions = None
|
||||
self._predicates = None
|
||||
|
||||
|
||||
def action(self, localctx, ruleIndex, actionIndex):
|
||||
if self._actions is None:
|
||||
actions = dict()
|
||||
actions[4] = self.ID_action
|
||||
self._actions = actions
|
||||
action = self._actions.get(ruleIndex, None)
|
||||
if action is not None:
|
||||
action(localctx, actionIndex)
|
||||
else:
|
||||
raise Exception("No registered action for:" + str(ruleIndex))
|
||||
|
||||
def ID_action(self, localctx , actionIndex):
|
||||
if actionIndex == 0:
|
||||
char = self.text[0]
|
||||
if char.isupper():
|
||||
self.type = XPathLexer.TOKEN_REF
|
||||
else:
|
||||
self.type = XPathLexer.RULE_REF
|
||||
|
||||
class XPath(object):
|
||||
|
||||
WILDCARD = "*" # word not operator/separator
|
||||
NOT = "!" # word for invert operator
|
||||
|
||||
def __init__(self, parser, path):
|
||||
self.parser = parser
|
||||
self.path = path
|
||||
self.elements = self.split(path)
|
||||
|
||||
def split(self, path):
|
||||
input = InputStream(path)
|
||||
lexer = XPathLexer(input)
|
||||
def recover(self, e):
|
||||
raise e
|
||||
lexer.recover = recover
|
||||
lexer.removeErrorListeners()
|
||||
lexer.addErrorListener(ErrorListener()) # XPathErrorListener does no more
|
||||
tokenStream = CommonTokenStream(lexer)
|
||||
try:
|
||||
tokenStream.fill()
|
||||
except LexerNoViableAltException as e:
|
||||
pos = lexer.getColumn()
|
||||
msg = "Invalid tokens or characters at index " + str(pos) + " in path '" + path + "'"
|
||||
raise Exception(msg, e)
|
||||
|
||||
tokens = tokenStream.getTokens()
|
||||
elements = list()
|
||||
n = len(tokens)
|
||||
i=0
|
||||
while i < n :
|
||||
el = tokens[i]
|
||||
next = None
|
||||
if el.type in [XPathLexer.ROOT, XPathLexer.ANYWHERE]:
|
||||
anywhere = el.type == XPathLexer.ANYWHERE
|
||||
i += 1
|
||||
next = tokens[i]
|
||||
invert = next.type==XPathLexer.BANG
|
||||
if invert:
|
||||
i += 1
|
||||
next = tokens[i]
|
||||
pathElement = self.getXPathElement(next, anywhere)
|
||||
pathElement.invert = invert
|
||||
elements.append(pathElement)
|
||||
i += 1
|
||||
|
||||
elif el.type in [XPathLexer.TOKEN_REF, XPathLexer.RULE_REF, XPathLexer.WILDCARD] :
|
||||
elements.append( self.getXPathElement(el, False) )
|
||||
i += 1
|
||||
|
||||
elif el.type==Token.EOF :
|
||||
break
|
||||
|
||||
else:
|
||||
raise Exception("Unknown path element " + str(el))
|
||||
|
||||
return elements
|
||||
|
||||
#
|
||||
# Convert word like {@code#} or {@code ID} or {@code expr} to a path
|
||||
# element. {@code anywhere} is {@code true} if {@code //} precedes the
|
||||
# word.
|
||||
#
|
||||
def getXPathElement(self, wordToken, anywhere):
|
||||
if wordToken.type==Token.EOF:
|
||||
raise Exception("Missing path element at end of path")
|
||||
word = wordToken.text
|
||||
ttype = self.parser.getTokenType(word)
|
||||
ruleIndex = self.parser.getRuleIndex(word)
|
||||
|
||||
if wordToken.type==XPathLexer.WILDCARD :
|
||||
|
||||
return XPathWildcardAnywhereElement() if anywhere else XPathWildcardElement()
|
||||
|
||||
elif wordToken.type in [XPathLexer.TOKEN_REF, XPathLexer.STRING]:
|
||||
|
||||
if ttype==Token.INVALID_TYPE:
|
||||
raise Exception( word + " at index " + str(wordToken.startIndex) + " isn't a valid token name")
|
||||
return XPathTokenAnywhereElement(word, ttype) if anywhere else XPathTokenElement(word, ttype)
|
||||
|
||||
else:
|
||||
|
||||
if ruleIndex==-1:
|
||||
raise Exception( word + " at index " + str(wordToken.getStartIndex()) + " isn't a valid rule name")
|
||||
return XPathRuleAnywhereElement(word, ruleIndex) if anywhere else XPathRuleElement(word, ruleIndex)
|
||||
|
||||
|
||||
def findAll(self, tree, xpath, parser):
|
||||
p = XPath(parser, xpath)
|
||||
return p.evaluate(tree)
|
||||
|
||||
#
|
||||
# Return a list of all nodes starting at {@code t} as root that satisfy the
|
||||
# path. The root {@code /} is relative to the node passed to
|
||||
# {@link #evaluate}.
|
||||
#
|
||||
def evaluate(self, t):
|
||||
dummyRoot = ParserRuleContext()
|
||||
dummyRoot.children = [t] # don't set t's parent.
|
||||
|
||||
work = [dummyRoot]
|
||||
|
||||
for i in range(0, len(self.elements)):
|
||||
next = set()
|
||||
for node in work:
|
||||
if len( node.children) > 0 :
|
||||
# only try to match next element if it has children
|
||||
# e.g., //func/*/stat might have a token node for which
|
||||
# we can't go looking for stat nodes.
|
||||
matching = self.elements[i].evaluate(node)
|
||||
next |= matching
|
||||
i += 1
|
||||
work = next
|
||||
|
||||
return work
|
||||
|
||||
|
||||
class XPathElement(object):
|
||||
|
||||
def __init__(self, nodeName):
|
||||
self.nodeName = nodeName
|
||||
self.invert = False
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
|
||||
def __unicode__(self):
|
||||
return type(self).__name__ + "[" + ("!" if self.invert else "") + self.nodeName + "]"
|
||||
|
||||
|
||||
|
||||
#
|
||||
# Either {@code ID} at start of path or {@code ...//ID} in middle of path.
|
||||
#
|
||||
class XPathRuleAnywhereElement(XPathElement):
|
||||
|
||||
def __init__(self, ruleName, ruleIndex):
|
||||
super(XPathRuleAnywhereElement, self).__init__(ruleName)
|
||||
self.ruleIndex = ruleIndex
|
||||
|
||||
def evaluate(self, t):
|
||||
return Trees.findAllRuleNodes(t, self.ruleIndex)
|
||||
|
||||
|
||||
class XPathRuleElement(XPathElement):
|
||||
|
||||
def __init__(self, ruleName, ruleIndex):
|
||||
super(XPathRuleElement, self).__init__(ruleName)
|
||||
self.ruleIndex = ruleIndex
|
||||
|
||||
def evaluate(self, t):
|
||||
# return all children of t that match nodeName
|
||||
nodes = []
|
||||
for c in Trees.getChildren(t):
|
||||
if isinstance(c, ParserRuleContext ):
|
||||
if (c.ruleIndex == self.ruleIndex ) == (not self.invert):
|
||||
nodes.append(c)
|
||||
return nodes
|
||||
|
||||
class XPathTokenAnywhereElement(XPathElement):
|
||||
|
||||
def __init__(self, ruleName, tokenType):
|
||||
super(XPathTokenAnywhereElement, self).__init__(ruleName)
|
||||
self.tokenType = tokenType
|
||||
|
||||
def evaluate(self, t):
|
||||
return Trees.findAllTokenNodes(t, self.tokenType)
|
||||
|
||||
|
||||
class XPathTokenElement(XPathElement):
|
||||
|
||||
def __init__(self, ruleName, tokenType):
|
||||
super(XPathTokenElement, self).__init__(ruleName)
|
||||
self.tokenType = tokenType
|
||||
|
||||
def evaluate(self, t):
|
||||
# return all children of t that match nodeName
|
||||
nodes = []
|
||||
for c in Trees.getChildren(t):
|
||||
if isinstance(c, TerminalNode):
|
||||
if (c.symbol.type == self.tokenType ) == (not self.invert):
|
||||
nodes.append(c)
|
||||
return nodes
|
||||
|
||||
class XPathWildcardAnywhereElement(XPathElement):
|
||||
|
||||
def __init__(self):
|
||||
super(XPathWildcardAnywhereElement, self).__init__(XPath.WILDCARD)
|
||||
|
||||
def evaluate(self, t):
|
||||
if self.invert:
|
||||
return list() # !* is weird but valid (empty)
|
||||
else:
|
||||
return Trees.descendants(t)
|
||||
|
||||
|
||||
class XPathWildcardElement(XPathElement):
|
||||
|
||||
def __init__(self):
|
||||
super(XPathWildcardElement, self).__init__(XPath.WILDCARD)
|
||||
|
||||
|
||||
def evaluate(self, t):
|
||||
if self.invert:
|
||||
return list() # !* is weird but valid (empty)
|
||||
else:
|
||||
return Trees.getChildren(t)
|
|
@ -0,0 +1 @@
|
|||
__author__ = 'ericvergnaud'
|
|
@ -0,0 +1,805 @@
|
|||
/*
|
||||
* [The "BSD license"]
|
||||
* Copyright (c) 2012 Terence Parr
|
||||
* Copyright (c) 2012 Sam Harwell
|
||||
* Copyright (c) 2014 Eric Vergnaud
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** ANTLR tool checks output templates are compatible with tool code generation.
|
||||
* For now, a simple string match used on x.y of x.y.z scheme.
|
||||
* Must match Tool.VERSION during load to templates.
|
||||
*
|
||||
* REQUIRED.
|
||||
*/
|
||||
|
||||
pythonTypeInitMap ::= [
|
||||
"bool":"False",
|
||||
"int":"0",
|
||||
"float":"0.0",
|
||||
"str":"",
|
||||
default:"None" // anything other than a primitive type is an object
|
||||
]
|
||||
|
||||
// args must be <object-model-object>, <fields-resulting-in-STs>
|
||||
|
||||
ParserFile(file, parser, namedActions) ::= <<
|
||||
<fileHeader(file.grammarFileName, file.ANTLRVersion)>
|
||||
# encoding: utf-8
|
||||
from __future__ import print_function
|
||||
from antlr4 import *
|
||||
from io import StringIO
|
||||
|
||||
<namedActions.header>
|
||||
<parser>
|
||||
|
||||
>>
|
||||
|
||||
ListenerFile(file, header) ::= <<
|
||||
<fileHeader(file.grammarFileName, file.ANTLRVersion)>
|
||||
from antlr4 import *
|
||||
<header>
|
||||
|
||||
# This class defines a complete listener for a parse tree produced by <file.parserName>.
|
||||
class <file.grammarName>Listener(ParseTreeListener):
|
||||
|
||||
<file.listenerNames:{lname |
|
||||
# Enter a parse tree produced by <file.parserName>#<lname>.
|
||||
def enter<lname; format="cap">(self, ctx):
|
||||
pass
|
||||
|
||||
# Exit a parse tree produced by <file.parserName>#<lname>.
|
||||
def exit<lname; format="cap">(self, ctx):
|
||||
pass
|
||||
|
||||
}; separator="\n">
|
||||
|
||||
>>
|
||||
|
||||
|
||||
VisitorFile(file, header) ::= <<
|
||||
<fileHeader(file.grammarFileName, file.ANTLRVersion)>
|
||||
from antlr4 import *
|
||||
<header>
|
||||
|
||||
# This class defines a complete generic visitor for a parse tree produced by <file.parserName>.
|
||||
|
||||
class <file.grammarName>Visitor(ParseTreeVisitor):
|
||||
|
||||
<file.visitorNames:{lname |
|
||||
# Visit a parse tree produced by <file.parserName>#<lname>.
|
||||
def visit<lname; format="cap">(self, ctx):
|
||||
return self.visitChildren(ctx)
|
||||
|
||||
}; separator="\n">
|
||||
|
||||
>>
|
||||
|
||||
|
||||
fileHeader(grammarFileName, ANTLRVersion) ::= <<
|
||||
# Generated from <grammarFileName> by ANTLR <ANTLRVersion>
|
||||
>>
|
||||
|
||||
Parser(parser, funcs, atn, sempredFuncs, superClass) ::= <<
|
||||
<Parser_(ctor="parser_ctor", ...)>
|
||||
>>
|
||||
|
||||
Parser_(parser, funcs, atn, sempredFuncs, ctor, superClass) ::= <<
|
||||
<if(superClass)>
|
||||
from .<superClass> import <superClass>
|
||||
|
||||
<endif>
|
||||
<atn>
|
||||
|
||||
class <parser.name> ( <if(superClass)><superClass><else>Parser<endif> ):
|
||||
|
||||
grammarFileName = "<parser.grammarFileName>"
|
||||
|
||||
atn = ATNDeserializer().deserialize(serializedATN())
|
||||
|
||||
decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ]
|
||||
|
||||
sharedContextCache = PredictionContextCache()
|
||||
|
||||
literalNames = [ <parser.literalNames:{t | u<t>}; null="u\"\<INVALID>\"", separator=", ", wrap, anchor> ]
|
||||
|
||||
symbolicNames = [ <parser.symbolicNames:{t | u<t>}; null="u\"\<INVALID>\"", separator=", ", wrap, anchor> ]
|
||||
|
||||
<parser.rules:{r | RULE_<r.name> = <r.index>}; separator="\n", wrap, anchor>
|
||||
|
||||
ruleNames = [ <parser.ruleNames:{r | u"<r>"}; separator=", ", wrap, anchor> ]
|
||||
|
||||
EOF = <TokenLabelType()>.EOF
|
||||
<if(parser.tokens)>
|
||||
<parser.tokens:{k | <k>=<parser.tokens.(k)>}; separator="\n", wrap, anchor>
|
||||
<endif>
|
||||
|
||||
<parser:(ctor)()>
|
||||
|
||||
<namedActions.members>
|
||||
|
||||
<funcs; separator="\n">
|
||||
|
||||
|
||||
<if(sempredFuncs)>
|
||||
def sempred(self, localctx, ruleIndex, predIndex):
|
||||
if self._predicates == None:
|
||||
self._predicates = dict()
|
||||
<parser.sempredFuncs.values:{f |
|
||||
self._predicates[<f.ruleIndex>] = self.<f.name>_sempred}; separator="\n ">
|
||||
pred = self._predicates.get(ruleIndex, None)
|
||||
if pred is None:
|
||||
raise Exception("No predicate with index:" + str(ruleIndex))
|
||||
else:
|
||||
return pred(localctx, predIndex)
|
||||
|
||||
<sempredFuncs.values; separator="\n">
|
||||
<endif>
|
||||
|
||||
|
||||
|
||||
>>
|
||||
|
||||
dumpActions(recog, argFuncs, actionFuncs, sempredFuncs) ::= <<
|
||||
<if(actionFuncs)>
|
||||
def action(self, localctx, ruleIndex, actionIndex):
|
||||
if self._actions is None:
|
||||
actions = dict()
|
||||
<recog.actionFuncs.values:{f|
|
||||
actions[<f.ruleIndex>] = self.<f.name>_action }; separator="\n">
|
||||
self._actions = actions
|
||||
action = self._actions.get(ruleIndex, None)
|
||||
if action is not None:
|
||||
action(localctx, actionIndex)
|
||||
else:
|
||||
raise Exception("No registered action for:" + str(ruleIndex))
|
||||
|
||||
<actionFuncs.values; separator="\n">
|
||||
|
||||
<endif>
|
||||
<if(sempredFuncs)>
|
||||
def sempred(self, localctx, ruleIndex, predIndex):
|
||||
if self._predicates is None:
|
||||
preds = dict()
|
||||
<recog.sempredFuncs.values:{f|
|
||||
preds[<f.ruleIndex>] = self.<f.name>_sempred}; separator="\n">
|
||||
self._predicates = preds
|
||||
pred = self._predicates.get(ruleIndex, None)
|
||||
if pred is not None:
|
||||
return pred(localctx, predIndex)
|
||||
else:
|
||||
raise Exception("No registered predicate for:" + str(ruleIndex))
|
||||
|
||||
<sempredFuncs.values; separator="\n">
|
||||
<endif>
|
||||
>>
|
||||
|
||||
parser_ctor(p) ::= <<
|
||||
def __init__(self, input):
|
||||
super(<parser.name>, self).__init__(input)
|
||||
self.checkVersion("<file.ANTLRVersion>")
|
||||
self._interp = ParserATNSimulator(self, self.atn, self.decisionsToDFA, self.sharedContextCache)
|
||||
self._predicates = None
|
||||
|
||||
>>
|
||||
|
||||
/* This generates a private method since the actionIndex is generated, making an
|
||||
* overriding implementation impossible to maintain.
|
||||
*/
|
||||
RuleActionFunction(r, actions) ::= <<
|
||||
|
||||
def <r.name>_action(self, localctx , actionIndex):
|
||||
<actions:{index|
|
||||
<if(first(actions))>
|
||||
if actionIndex == <index>:
|
||||
<actions.(index)>
|
||||
<elseif(rest(actions))>
|
||||
elif actionIndex == <index>:
|
||||
<actions.(index)>
|
||||
<endif> }; separator="\n">
|
||||
>>
|
||||
|
||||
/* This generates a private method since the predIndex is generated, making an
|
||||
* overriding implementation impossible to maintain.
|
||||
*/
|
||||
RuleSempredFunction(r, actions) ::= <<
|
||||
def <r.name>_sempred(self, localctx, predIndex):
|
||||
<actions:{index|
|
||||
<if(first(actions))>
|
||||
if predIndex == <index>:
|
||||
return <actions.(index)>
|
||||
<elseif(rest(actions))>
|
||||
elif predIndex == <index>:
|
||||
return <actions.(index)>
|
||||
<endif> }; separator="\n">
|
||||
|
||||
>>
|
||||
|
||||
RuleFunction(currentRule,args,code,locals,ruleCtx,altLabelCtxs,namedActions,finallyAction,postamble,exceptions) ::= <<
|
||||
|
||||
<ruleCtx>
|
||||
|
||||
<altLabelCtxs:{l | <altLabelCtxs.(l)>}; separator="\n">
|
||||
|
||||
def <currentRule.name>(self<currentRule.args:{a | , <a.name>}>):
|
||||
|
||||
localctx = <parser.name>.<currentRule.ctxType>(self, self._ctx, self.state<currentRule.args:{a | , <a.name>}>)
|
||||
self.enterRule(localctx, <currentRule.startState>, self.RULE_<currentRule.name>)
|
||||
<namedActions.init>
|
||||
<locals; separator="\n">
|
||||
try:
|
||||
<code>
|
||||
<postamble; separator="\n">
|
||||
<namedActions.after>
|
||||
<if(exceptions)>
|
||||
<exceptions; separator="\n">
|
||||
<else>
|
||||
except RecognitionException as re:
|
||||
localctx.exception = re
|
||||
self._errHandler.reportError(self, re)
|
||||
self._errHandler.recover(self, re)
|
||||
<endif>
|
||||
finally:
|
||||
<finallyAction>
|
||||
self.exitRule()
|
||||
return localctx
|
||||
|
||||
>>
|
||||
|
||||
LeftRecursiveRuleFunction(currentRule,args,code,locals,ruleCtx,altLabelCtxs,
|
||||
namedActions,finallyAction,postamble) ::=
|
||||
<<
|
||||
|
||||
<ruleCtx>
|
||||
<altLabelCtxs:{l | <altLabelCtxs.(l)>}; separator="\n">
|
||||
|
||||
def <currentRule.name>(self, _p=0<if(currentRule.args)>, <args:{a | , <a>}><endif>):
|
||||
_parentctx = self._ctx
|
||||
_parentState = self.state
|
||||
localctx = <parser.name>.<currentRule.ctxType>(self, self._ctx, _parentState<args:{a | , <a.name>}>)
|
||||
_prevctx = localctx
|
||||
_startState = <currentRule.startState>
|
||||
self.enterRecursionRule(localctx, <currentRule.startState>, self.RULE_<currentRule.name>, _p)
|
||||
<namedActions.init>
|
||||
<locals; separator="\n">
|
||||
try:
|
||||
<code>
|
||||
<postamble; separator="\n">
|
||||
<namedActions.after>
|
||||
except RecognitionException as re:
|
||||
localctx.exception = re
|
||||
self._errHandler.reportError(self, re)
|
||||
self._errHandler.recover(self, re)
|
||||
finally:
|
||||
<finallyAction>
|
||||
self.unrollRecursionContexts(_parentctx)
|
||||
return localctx
|
||||
|
||||
>>
|
||||
|
||||
CodeBlockForOuterMostAlt(currentOuterMostAltCodeBlock, locals, preamble, ops) ::= <<
|
||||
<if(currentOuterMostAltCodeBlock.altLabel)>localctx = <parser.name>.<currentOuterMostAltCodeBlock.altLabel; format="cap">Context(self, localctx)<endif>
|
||||
self.enterOuterAlt(localctx, <currentOuterMostAltCodeBlock.alt.altNum>)
|
||||
<CodeBlockForAlt(currentAltCodeBlock=currentOuterMostAltCodeBlock, ...)>
|
||||
>>
|
||||
|
||||
CodeBlockForAlt(currentAltCodeBlock, locals, preamble, ops) ::= <<
|
||||
<locals; separator="\n">
|
||||
<preamble; separator="\n">
|
||||
<ops; separator="\n">
|
||||
>>
|
||||
|
||||
LL1AltBlock(choice, preamble, alts, error) ::= <<
|
||||
self.state = <choice.stateNumber>
|
||||
<!_errHandler.sync(this);!>
|
||||
<if(choice.label)><labelref(choice.label)> = _input.LT(1)<endif>
|
||||
<preamble; separator="\n">
|
||||
token = self._input.LA(1)
|
||||
<choice.altLook,alts:{look,alt| <cases(ttypes=look)>
|
||||
<alt>
|
||||
}; separator="\nel">
|
||||
else:
|
||||
<error>
|
||||
|
||||
>>
|
||||
|
||||
LL1OptionalBlock(choice, alts, error) ::= <<
|
||||
self.state = <choice.stateNumber>
|
||||
<!_errHandler.sync(this);!>
|
||||
token = self._input.LA(1)
|
||||
<choice.altLook,alts:{look,alt| <cases(ttypes=look)>
|
||||
<alt>
|
||||
pass}; separator="\nel">
|
||||
else:
|
||||
<error>
|
||||
>>
|
||||
|
||||
LL1OptionalBlockSingleAlt(choice, expr, alts, preamble, error, followExpr) ::= <<
|
||||
self.state = <choice.stateNumber>
|
||||
<!_errHandler.sync(this);!>
|
||||
<preamble; separator="\n">
|
||||
if <expr>:
|
||||
<alts; separator="\n">
|
||||
|
||||
<!else if ( !(<followExpr>) ) <error>!>
|
||||
>>
|
||||
|
||||
|
||||
LL1StarBlockSingleAlt(choice, loopExpr, alts, preamble, iteration) ::= <<
|
||||
self.state = <choice.stateNumber>
|
||||
self._errHandler.sync(self)
|
||||
<preamble; separator="\n">
|
||||
while <loopExpr>:
|
||||
<alts; separator="\n">
|
||||
self.state = <choice.loopBackStateNumber>
|
||||
self._errHandler.sync(self)
|
||||
<iteration>
|
||||
|
||||
>>
|
||||
|
||||
LL1PlusBlockSingleAlt(choice, loopExpr, alts, preamble, iteration) ::= <<
|
||||
self.state = <choice.blockStartStateNumber> <! alt block decision !>
|
||||
self._errHandler.sync(self)
|
||||
<preamble; separator="\n">
|
||||
while True:
|
||||
<alts; separator="\n">
|
||||
self.state = <choice.stateNumber> <! loopback/exit decision !>
|
||||
self._errHandler.sync(self)
|
||||
<iteration>
|
||||
if not (<loopExpr>):
|
||||
break
|
||||
|
||||
>>
|
||||
|
||||
// LL(*) stuff
|
||||
|
||||
AltBlock(choice, preamble, alts, error) ::= <<
|
||||
self.state = <choice.stateNumber>
|
||||
<!_errHandler.sync(this);!>
|
||||
<if(choice.label)><labelref(choice.label)> = _input.LT(1)<endif>
|
||||
<preamble; separator="\n">
|
||||
la_ = self._interp.adaptivePredict(self._input,<choice.decision>,self._ctx)
|
||||
<alts:{alt |
|
||||
if la_ == <i>:
|
||||
<alt>
|
||||
pass
|
||||
}; separator="\nel">
|
||||
|
||||
>>
|
||||
|
||||
OptionalBlock(choice, alts, error) ::= <<
|
||||
self.state = <choice.stateNumber>
|
||||
<!_errHandler.sync(this);!>
|
||||
la_ = self._interp.adaptivePredict(self._input,<choice.decision>,self._ctx)
|
||||
<alts:{alt |
|
||||
if la_ == <i><if(!choice.ast.greedy)>+1<endif>:
|
||||
<alt>
|
||||
}; separator="\nel">
|
||||
|
||||
>>
|
||||
|
||||
StarBlock(choice, alts, sync, iteration) ::= <<
|
||||
self.state = <choice.stateNumber>
|
||||
self._errHandler.sync(self)
|
||||
_alt = self._interp.adaptivePredict(self._input,<choice.decision>,self._ctx)
|
||||
while _alt!=<choice.exitAlt> and _alt!=ATN.INVALID_ALT_NUMBER:
|
||||
if _alt==1<if(!choice.ast.greedy)>+1<endif>:
|
||||
<iteration>
|
||||
<alts> <! should only be one !>
|
||||
self.state = <choice.loopBackStateNumber>
|
||||
self._errHandler.sync(self)
|
||||
_alt = self._interp.adaptivePredict(self._input,<choice.decision>,self._ctx)
|
||||
|
||||
>>
|
||||
|
||||
PlusBlock(choice, alts, error) ::= <<
|
||||
self.state = <choice.blockStartStateNumber> <! alt block decision !>
|
||||
self._errHandler.sync(self)
|
||||
_alt = 1<if(!choice.ast.greedy)>+1<endif>
|
||||
while _alt!=<choice.exitAlt> and _alt!=ATN.INVALID_ALT_NUMBER:
|
||||
<alts:{alt|
|
||||
if _alt == <i><if(!choice.ast.greedy)>+1<endif>:
|
||||
<alt>
|
||||
}; separator="\nel">
|
||||
else:
|
||||
<error>
|
||||
self.state = <choice.loopBackStateNumber> <! loopback/exit decision !>
|
||||
self._errHandler.sync(self)
|
||||
_alt = self._interp.adaptivePredict(self._input,<choice.decision>,self._ctx)
|
||||
|
||||
>>
|
||||
|
||||
Sync(s) ::= "sync(<s.expecting.name>)"
|
||||
|
||||
ThrowNoViableAlt(t) ::= "raise NoViableAltException(self)"
|
||||
|
||||
TestSetInline(s) ::= <<
|
||||
<s.bitsets:{bits | <if(rest(rest(bits.ttypes)))><bitsetBitfieldComparison(s, bits)><else><bitsetInlineComparison(s, bits)><endif>}; separator=" or ">
|
||||
>>
|
||||
|
||||
// Java language spec 15.19 - shift operators mask operands rather than overflow to 0... need range test
|
||||
testShiftInRange(shiftAmount) ::= <<
|
||||
((<shiftAmount>) & ~0x3f) == 0
|
||||
>>
|
||||
|
||||
// produces smaller bytecode only when bits.ttypes contains more than two items
|
||||
bitsetBitfieldComparison(s, bits) ::= <%
|
||||
(<testShiftInRange({<offsetShiftVar(s.varName, bits.shift)>})> and ((1 \<\< <offsetShiftVar(s.varName, bits.shift)>) & (<bits.ttypes:{ttype | (1 \<\< <offsetShiftType(ttype, bits.shift)>)}; separator=" | ">)) != 0)
|
||||
%>
|
||||
|
||||
isZero ::= [
|
||||
"0":true,
|
||||
default:false
|
||||
]
|
||||
|
||||
offsetShiftVar(shiftAmount, offset) ::= <%
|
||||
<if(!isZero.(offset))>(<shiftAmount> - <offset>)<else><shiftAmount><endif>
|
||||
%>
|
||||
|
||||
offsetShiftType(shiftAmount, offset) ::= <%
|
||||
<if(!isZero.(offset))>(<parser.name>.<shiftAmount> - <offset>)<else><parser.name>.<shiftAmount><endif>
|
||||
%>
|
||||
|
||||
// produces more efficient bytecode when bits.ttypes contains at most two items
|
||||
bitsetInlineComparison(s, bits) ::= <%
|
||||
<bits.ttypes:{ttype | <s.varName>==<parser.name>.<ttype>}; separator=" or ">
|
||||
%>
|
||||
|
||||
cases(ttypes) ::= <<
|
||||
if token in [<ttypes:{t | <parser.name>.<t>}; separator=", ">]:
|
||||
>>
|
||||
|
||||
InvokeRule(r, argExprsChunks) ::= <<
|
||||
self.state = <r.stateNumber>
|
||||
<if(r.labels)><r.labels:{l | <labelref(l)> = }><endif>self.<r.name>(<if(r.ast.options.p)><r.ast.options.p><if(argExprsChunks)>,<endif><endif><argExprsChunks>)
|
||||
>>
|
||||
|
||||
MatchToken(m) ::= <<
|
||||
self.state = <m.stateNumber>
|
||||
<if(m.labels)><m.labels:{l | <labelref(l)> = }><endif>self.match(<parser.name>.<m.name>)
|
||||
>>
|
||||
|
||||
MatchSet(m, expr, capture) ::= "<CommonSetStuff(m, expr, capture, false)>"
|
||||
|
||||
MatchNotSet(m, expr, capture) ::= "<CommonSetStuff(m, expr, capture, true)>"
|
||||
|
||||
CommonSetStuff(m, expr, capture, invert) ::= <<
|
||||
self.state = <m.stateNumber>
|
||||
<if(m.labels)><m.labels:{l | <labelref(l)> = }>self._input.LT(1)<endif>
|
||||
<capture>
|
||||
<if(invert)>if <m.varName> \<= 0 or <expr><else>if not(<expr>)<endif>:
|
||||
<if(m.labels)><m.labels:{l | <labelref(l)> = }><else> <endif>self._errHandler.recoverInline(self)
|
||||
else:
|
||||
self.consume()
|
||||
>>
|
||||
|
||||
Wildcard(w) ::= <<
|
||||
self.state = <w.stateNumber>
|
||||
<if(w.labels)><w.labels:{l | <labelref(l)> = }><endif>self.matchWildcard()
|
||||
>>
|
||||
|
||||
// ACTION STUFF
|
||||
|
||||
Action(a, foo, chunks) ::= "<chunks>"
|
||||
|
||||
ArgAction(a, chunks) ::= "<chunks>"
|
||||
|
||||
SemPred(p, chunks, failChunks) ::= <<
|
||||
self.state = <p.stateNumber>
|
||||
if not <chunks>:
|
||||
from antlr4.error.Errors import FailedPredicateException
|
||||
raise FailedPredicateException(self, <p.predicate><if(failChunks)>, <failChunks><elseif(p.msg)>, <p.msg><endif>)
|
||||
>>
|
||||
|
||||
ExceptionClause(e, catchArg, catchAction) ::= <<
|
||||
catch (<catchArg>) {
|
||||
<catchAction>
|
||||
}
|
||||
>>
|
||||
|
||||
// lexer actions are not associated with model objects
|
||||
|
||||
LexerSkipCommand() ::= "skip()"
|
||||
LexerMoreCommand() ::= "more()"
|
||||
LexerPopModeCommand() ::= "popMode()"
|
||||
|
||||
LexerTypeCommand(arg) ::= "_type = <arg>"
|
||||
LexerChannelCommand(arg) ::= "_channel = <arg>"
|
||||
LexerModeCommand(arg) ::= "_mode = <arg>"
|
||||
LexerPushModeCommand(arg) ::= "pushMode(<arg>)"
|
||||
|
||||
ActionText(t) ::= "<t.text>"
|
||||
ActionTemplate(t) ::= "<t.st>"
|
||||
ArgRef(a) ::= "localctx.<a.name>"
|
||||
LocalRef(a) ::= "localctx.<a.name>"
|
||||
RetValueRef(a) ::= "localctx.<a.name>"
|
||||
QRetValueRef(a) ::= "<ctx(a)>.<a.dict>.<a.name>"
|
||||
/** How to translate $tokenLabel */
|
||||
TokenRef(t) ::= "<ctx(t)>.<t.name>"
|
||||
LabelRef(t) ::= "<ctx(t)>.<t.name>"
|
||||
ListLabelRef(t) ::= "<ctx(t)>.<ListLabelName(t.name)>"
|
||||
SetAttr(s,rhsChunks) ::= "<ctx(s)>.<s.name> = <rhsChunks>"
|
||||
|
||||
TokenLabelType() ::= "<file.TokenLabelType; null={Token}>"
|
||||
InputSymbolType() ::= "<file.InputSymbolType; null={Token}>"
|
||||
|
||||
TokenPropertyRef_text(t) ::= "(None if <ctx(t)>.<t.label> is None else <ctx(t)>.<t.label>.text)"
|
||||
TokenPropertyRef_type(t) ::= "(0 if <ctx(t)>.<t.label> is None else <ctx(t)>.<t.label>.type()"
|
||||
TokenPropertyRef_line(t) ::= "(0 if <ctx(t)>.<t.label> is None else <ctx(t)>.<t.label>.line)"
|
||||
TokenPropertyRef_pos(t) ::= "(0 if <ctx(t)>.<t.label> is None else <ctx(t)>.<t.label>.column)"
|
||||
TokenPropertyRef_channel(t) ::= "(0 if (<ctx(t)>.<t.label> is None else <ctx(t)>.<t.label>.channel)"
|
||||
TokenPropertyRef_index(t) ::= "(0 if <ctx(t)>.<t.label> is None else <ctx(t)>.<t.label>.tokenIndex)"
|
||||
TokenPropertyRef_int(t) ::= "(0 if <ctx(t)>.<t.label> is None else int(<ctx(t)>.<t.label>.text))"
|
||||
|
||||
RulePropertyRef_start(r) ::= "(None if <ctx(r)>.<r.label> is None else <ctx(r)>.<r.label>.start)"
|
||||
RulePropertyRef_stop(r) ::= "(None if <ctx(r)>.<r.label> is None else <ctx(r)>.<r.label>.stop)"
|
||||
RulePropertyRef_text(r) ::= "(None if <ctx(r)>.<r.label> is None else self._input.getText((<ctx(r)>.<r.label>.start,<ctx(r)>.<r.label>.stop)))"
|
||||
RulePropertyRef_ctx(r) ::= "<ctx(r)>.<r.label>"
|
||||
RulePropertyRef_parser(r) ::= "self"
|
||||
|
||||
ThisRulePropertyRef_start(r) ::= "localctx.start"
|
||||
ThisRulePropertyRef_stop(r) ::= "localctx.stop"
|
||||
ThisRulePropertyRef_text(r) ::= "self._input.getText((localctx.start, self._input.LT(-1)))"
|
||||
ThisRulePropertyRef_ctx(r) ::= "localctx"
|
||||
ThisRulePropertyRef_parser(r) ::= "self"
|
||||
|
||||
NonLocalAttrRef(s) ::= "getInvokingContext(<s.ruleIndex>).<s.name>"
|
||||
SetNonLocalAttr(s, rhsChunks) ::= "getInvokingContext(<s.ruleIndex>).<s.name> = <rhsChunks>"
|
||||
|
||||
AddToLabelList(a) ::= "<ctx(a.label)>.<a.listName>.append(<labelref(a.label)>)"
|
||||
|
||||
TokenDecl(t) ::= "self.<t.name> = None # <TokenLabelType()>"
|
||||
TokenTypeDecl(t) ::= "self.<t.name> = 0 # <TokenLabelType()> type"
|
||||
TokenListDecl(t) ::= "self.<t.name> = list() # of <TokenLabelType()>s"
|
||||
RuleContextDecl(r) ::= "self.<r.name> = None # <r.ctxName>"
|
||||
RuleContextListDecl(rdecl) ::= "self.<rdecl.name> = list() # of <rdecl.ctxName>s"
|
||||
|
||||
ContextTokenGetterDecl(t) ::= <<
|
||||
def <t.name>(self):
|
||||
return self.getToken(<parser.name>.<t.name>, 0)
|
||||
>>
|
||||
|
||||
// should never be called
|
||||
ContextTokenListGetterDecl(t) ::= <<
|
||||
def <t.name>_list(self):
|
||||
return self.getTokens(<parser.name>.<t.name>)
|
||||
>>
|
||||
|
||||
ContextTokenListIndexedGetterDecl(t) ::= <<
|
||||
def <t.name>(self, i=None):
|
||||
if i is None:
|
||||
return self.getTokens(<parser.name>.<t.name>)
|
||||
else:
|
||||
return self.getToken(<parser.name>.<t.name>, i)
|
||||
>>
|
||||
|
||||
ContextRuleGetterDecl(r) ::= <<
|
||||
def <r.name>(self):
|
||||
return self.getTypedRuleContext(<parser.name>.<r.ctxName>,0)
|
||||
|
||||
>>
|
||||
|
||||
// should never be called
|
||||
ContextRuleListGetterDecl(r) ::= <<
|
||||
def <r.name>_list(self):
|
||||
return self.getTypedRuleContexts(<parser.name>.<r.ctxName>)
|
||||
|
||||
>>
|
||||
|
||||
ContextRuleListIndexedGetterDecl(r) ::= <<
|
||||
def <r.name>(self, i=None):
|
||||
if i is None:
|
||||
return self.getTypedRuleContexts(<parser.name>.<r.ctxName>)
|
||||
else:
|
||||
return self.getTypedRuleContext(<parser.name>.<r.ctxName>,i)
|
||||
|
||||
>>
|
||||
|
||||
LexerRuleContext() ::= "RuleContext"
|
||||
|
||||
/** The rule context name is the rule followed by a suffix; e.g.,
|
||||
* r becomes rContext.
|
||||
*/
|
||||
RuleContextNameSuffix() ::= "Context"
|
||||
|
||||
ImplicitTokenLabel(tokenName) ::= "_<tokenName>"
|
||||
ImplicitRuleLabel(ruleName) ::= "_<ruleName>"
|
||||
ImplicitSetLabel(id) ::= "_tset<id>"
|
||||
ListLabelName(label) ::= "<label>"
|
||||
|
||||
CaptureNextToken(d) ::= "<d.varName> = self._input.LT(1)"
|
||||
CaptureNextTokenType(d) ::= "<d.varName> = self._input.LA(1)"
|
||||
|
||||
StructDecl(struct,ctorAttrs,attrs,getters,dispatchMethods,interfaces,extensionMembers,
|
||||
superClass={ParserRuleContext}) ::= <<
|
||||
class <struct.name>(<superClass>):
|
||||
|
||||
def __init__(self, parser, parent=None, invokingState=-1<struct.ctorAttrs:{a | , <a.name>=None}>):
|
||||
super(<parser.name>.<struct.name>, self).__init__(parent, invokingState)
|
||||
self.parser = parser
|
||||
<attrs:{a | <a>}; separator="\n">
|
||||
<struct.ctorAttrs:{a | self.<a.name> = <a.name>}; separator="\n">
|
||||
|
||||
<getters:{g | <g>}; separator="\n\n">
|
||||
|
||||
def getRuleIndex(self):
|
||||
return <parser.name>.RULE_<struct.derivedFromName>
|
||||
|
||||
<if(struct.provideCopyFrom)> <! don't need copy unless we have subclasses !>
|
||||
def copyFrom(self, ctx):
|
||||
super(<parser.name>.<struct.name>, self).copyFrom(ctx)
|
||||
<struct.attrs:{a | self.<a.name> = ctx.<a.name>}; separator="\n">
|
||||
|
||||
<endif>
|
||||
<dispatchMethods; separator="\n">
|
||||
<extensionMembers; separator="\n">
|
||||
|
||||
>>
|
||||
|
||||
AltLabelStructDecl(struct,attrs,getters,dispatchMethods) ::= <<
|
||||
class <struct.name>(<currentRule.name; format="cap">Context):
|
||||
|
||||
def __init__(self, parser, ctx): # actually a <parser.name>.<currentRule.name; format="cap">Context)
|
||||
super(<parser.name>.<struct.name>, self).__init__(parser)
|
||||
<attrs:{a | <a>}; separator="\n">
|
||||
self.copyFrom(ctx)
|
||||
|
||||
<getters:{g | <g>}; separator="\n">
|
||||
|
||||
<dispatchMethods; separator="\n">
|
||||
|
||||
>>
|
||||
|
||||
ListenerDispatchMethod(method) ::= <<
|
||||
def <if(method.isEnter)>enter<else>exit<endif>Rule(self, listener):
|
||||
if hasattr(listener, "<if(method.isEnter)>enter<else>exit<endif><struct.derivedFromName; format="cap">"):
|
||||
listener.<if(method.isEnter)>enter<else>exit<endif><struct.derivedFromName; format="cap">(self)
|
||||
|
||||
>>
|
||||
|
||||
VisitorDispatchMethod(method) ::= <<
|
||||
def accept(self, visitor):
|
||||
if hasattr(visitor, "visit<struct.derivedFromName; format="cap">"):
|
||||
return visitor.visit<struct.derivedFromName; format="cap">(self)
|
||||
else:
|
||||
return visitor.visitChildren(self)
|
||||
|
||||
>>
|
||||
|
||||
AttributeDecl(d) ::= "self.<d.name> = <if(d.initValue)><d.initValue><else>None<endif>"
|
||||
|
||||
/** If we don't know location of label def x, use this template */
|
||||
labelref(x) ::= "<if(!x.isLocal)>localctx.<endif><x.name>"
|
||||
|
||||
/** For any action chunk, what is correctly-typed context struct ptr? */
|
||||
ctx(actionChunk) ::= "localctx"
|
||||
|
||||
// used for left-recursive rules
|
||||
recRuleAltPredicate(ruleName,opPrec) ::= "self.precpred(self._ctx, <opPrec>)"
|
||||
recRuleSetReturnAction(src,name) ::= "$<name>=$<src>.<name>"
|
||||
recRuleSetStopToken() ::= "self._ctx.stop = self._input.LT(-1)"
|
||||
|
||||
recRuleAltStartAction(ruleName, ctxName, label) ::= <<
|
||||
localctx = <parser.name>.<ctxName>Context(self, _parentctx, _parentState)
|
||||
<if(label)>localctx.<label> = _prevctx<endif>
|
||||
self.pushNewRecursionContext(localctx, _startState, self.RULE_<ruleName>)
|
||||
>>
|
||||
|
||||
recRuleLabeledAltStartAction(ruleName, currentAltLabel, label, isListLabel) ::= <<
|
||||
localctx = <parser.name>.<currentAltLabel; format="cap">Context(self, <parser.name>.<ruleName; format="cap">Context(self, _parentctx, _parentState))
|
||||
<if(label)>
|
||||
<if(isListLabel)>
|
||||
localctx.<label>.append(_prevctx)
|
||||
<else>
|
||||
localctx.<label> = _prevctx
|
||||
<endif>
|
||||
<endif>
|
||||
self.pushNewRecursionContext(localctx, _startState, self.RULE_<ruleName>)
|
||||
>>
|
||||
|
||||
|
||||
recRuleReplaceContext(ctxName) ::= <<
|
||||
localctx = <parser.name>.<ctxName>Context(self, localctx)
|
||||
self._ctx = localctx
|
||||
_prevctx = localctx
|
||||
>>
|
||||
|
||||
recRuleSetPrevCtx() ::= <<
|
||||
if self._parseListeners is not None:
|
||||
self.triggerExitRuleEvent()
|
||||
_prevctx = localctx
|
||||
>>
|
||||
|
||||
|
||||
LexerFile(lexerFile, lexer, namedActions) ::= <<
|
||||
<fileHeader(lexerFile.grammarFileName, lexerFile.ANTLRVersion)>
|
||||
# encoding: utf-8
|
||||
from __future__ import print_function
|
||||
from antlr4 import *
|
||||
from io import StringIO
|
||||
|
||||
<namedActions.header>
|
||||
|
||||
<lexer>
|
||||
>>
|
||||
|
||||
Lexer(lexer, atn, actionFuncs, sempredFuncs, superClass) ::= <<
|
||||
|
||||
<atn>
|
||||
|
||||
class <lexer.name>(<if(superClass)><superClass><else>Lexer<endif>):
|
||||
|
||||
atn = ATNDeserializer().deserialize(serializedATN())
|
||||
|
||||
decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ]
|
||||
|
||||
<rest(lexer.modes):{m| <m> = <i>}; separator="\n">
|
||||
|
||||
<lexer.tokens:{k | <k> = <lexer.tokens.(k)>}; separator="\n", wrap, anchor>
|
||||
|
||||
modeNames = [ <lexer.modes:{m| u"<m>"}; separator=", ", wrap, anchor> ]
|
||||
|
||||
literalNames = [ u"\<INVALID>",
|
||||
<lexer.literalNames:{t | u<t>}; separator=", ", wrap, anchor> ]
|
||||
|
||||
symbolicNames = [ u"\<INVALID>",
|
||||
<lexer.symbolicNames:{t | u<t>}; separator=", ", wrap, anchor> ]
|
||||
|
||||
ruleNames = [ <lexer.ruleNames:{r | u"<r>"}; separator=", ", wrap, anchor> ]
|
||||
|
||||
grammarFileName = u"<lexer.grammarFileName>"
|
||||
|
||||
def __init__(self, input=None):
|
||||
super(<lexer.name>, self).__init__(input)
|
||||
self.checkVersion("<lexerFile.ANTLRVersion>")
|
||||
self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache())
|
||||
self._actions = None
|
||||
self._predicates = None
|
||||
|
||||
<namedActions.members>
|
||||
|
||||
<dumpActions(lexer, "", actionFuncs, sempredFuncs)>
|
||||
|
||||
>>
|
||||
|
||||
SerializedATN(model) ::= <<
|
||||
<! only one segment, can be inlined !>
|
||||
|
||||
def serializedATN():
|
||||
with StringIO() as buf:
|
||||
buf.write(u"<model.serialized; wrap={")<\n> buf.write(u"}>")
|
||||
return buf.getvalue()
|
||||
|
||||
>>
|
||||
|
||||
/** Using a type to init value map, try to init a type; if not in table
|
||||
* must be an object, default value is "null".
|
||||
*/
|
||||
initValue(typeName) ::= <<
|
||||
<javaTypeInitMap.(typeName)>
|
||||
>>
|
||||
|
||||
codeFileExtension() ::= ".py"
|
Loading…
Reference in New Issue