all Python2 runtime tests pass

This commit is contained in:
parrt 2015-06-25 12:25:42 -07:00
parent d1f95085fe
commit 8e89af918f
65 changed files with 13650 additions and 2 deletions

View File

@ -20,7 +20,6 @@
<sourceFolder url="file://$MODULE_DIR$/tool-testsuite/test" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/runtime" type="java-resource" />
<excludeFolder url="file://$MODULE_DIR$/runtime-testsuite/test/org/antlr/v4/test/runtime/javascript" />
<excludeFolder url="file://$MODULE_DIR$/runtime-testsuite/test/org/antlr/v4/test/runtime/python2" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />

View File

@ -0,0 +1,399 @@
IgnoredTests ::= [
default: false
]
TestFile(file) ::= <<
/* This file is generated by TestGenerator, any edits will be overwritten by the next generation. */
package org.antlr.v4.test.runtime.python2;
import org.junit.Ignore;
import org.junit.Test;
import static org.junit.Assert.*;
<if(file.Options.("ImportErrorQueue"))>
import org.antlr.v4.test.runtime.java.ErrorQueue;
<endif>
<if(file.Options.("ImportGrammar"))>
import org.antlr.v4.tool.Grammar;
<endif>
@SuppressWarnings("unused")
public class Test<file.name> extends BasePython2Test {
<file.tests:{test | <test>}; separator="\n", wrap, anchor>
}
>>
LexerTestMethod(test) ::= <<
/* This file and method are generated by TestGenerator, any edits will be overwritten by the next generation. */
<testAnnotations(test)>
public void test<test.name>() throws Exception {
mkdir(tmpdir);
<test.SlaveGrammars:{grammar |
String slave_<grammar> =<writeStringLiteral(test.SlaveGrammars.(grammar))>;
writeFile(tmpdir, "<grammar>.g4", slave_<grammar>);
}; separator="\n">
<test.Grammar:{grammar |
<buildStringLiteral(test.Grammar.(grammar), "grammar")>
<if(test.AfterGrammar)>
<test.AfterGrammar>
<endif>
String input =<writeStringLiteral(test.Input)>;
String found = execLexer("<grammar>.g4", grammar, "<grammar><if(test.Options.("CombinedGrammar"))>Lexer<endif>", input, <writeBoolean(test.Options.("ShowDFA"))>);
assertEquals(<writeStringLiteral(test.Output)>, found);
<if(!isEmpty.(test.Errors))>
assertEquals(<writeStringLiteral(test.Errors)>, this.stderrDuringParse);
<else>
assertNull(this.stderrDuringParse);
<endif>
}>
}
>>
CompositeLexerTestMethod(test) ::= <<
<LexerTestMethod(test)>
>>
ParserTestMethod(test) ::= <<
/* This file and method are generated by TestGenerator, any edits will be overwritten by the next generation. */
<testAnnotations(test)>
public void test<test.name>() throws Exception {
mkdir(tmpdir);
<test.SlaveGrammars:{grammar |
String slave_<grammar> =<writeStringLiteral(test.SlaveGrammars.(grammar))>;
<if(test.Options.("SlaveIsLexer"))>
rawGenerateAndBuildRecognizer("<grammar>.g4", slave_<grammar>, null, "<grammar>");
<else>
writeFile(tmpdir, "<grammar>.g4", slave_<grammar>);
<endif>
}; separator="\n">
<test.Grammar:{grammar |
<buildStringLiteral(test.Grammar.(grammar), "grammar")>
<test.AfterGrammar>
String input =<writeStringLiteral(test.Input)>;
String found = execParser("<grammar>.g4", grammar, "<grammar><if(!test.slaveIsLexer)>Parser<endif>", "<if(test.slaveIsLexer)><first(test.slaveGrammars).grammarName><else><grammar>Lexer<endif>", "<grammar>Listener", "<grammar>Visitor", "<test.Rule>", input, <writeBoolean(test.Options.("Debug"))>);
assertEquals(<writeStringLiteral(test.Output)>, found);
<if(!isEmpty.(test.Errors))>
assertEquals(<writeStringLiteral(test.Errors)>, this.stderrDuringParse);
<else>
assertNull(this.stderrDuringParse);
<endif>
}>
}
>>
CompositeParserTestMethod(test) ::= <<
<ParserTestMethod(test)>
>>
AbstractParserTestMethod(test) ::= <<
/* this file and method are generated, any edit will be overwritten by the next generation */
String test<test.name>(String input) throws Exception {
String grammar = <test.grammar.lines:{ line | "<line>};separator="\\n\" +\n", wrap, anchor>";
return execParser("<test.grammar.grammarName>.g4", grammar, "<test.grammar.grammarName>Parser", "<test.grammar.grammarName>Lexer", "<test.startRule>", input, <test.debug>);
}
>>
ConcreteParserTestMethod(test) ::= <<
/* this file and method are generated, any edit will be overwritten by the next generation */
@Test
public void test<test.name>() throws Exception {
String found = test<test.baseName>("<test.input>");
assertEquals("<test.expectedOutput>", found);
<if(test.expectedErrors)>
assertEquals("<test.expectedErrors>", this.stderrDuringParse);
<else>
assertNull(this.stderrDuringParse);
<endif>
}
>>
testAnnotations(test) ::= <%
@Test
<if(test.Options.("Ignore"))>
<\n>@Ignore(<writeStringLiteral(test.Options.("Ignore"))>)
<elseif(IgnoredTests.(({<file.name>.<test.name>})))>
<\n>@Ignore(<writeStringLiteral(IgnoredTests.(({<file.name>.<test.name>})))>)
<endif>
%>
buildStringLiteral(text, variable) ::= <<
StringBuilder <variable>Builder = new StringBuilder(<strlen.(text)>);
<lines.(text):{line|<variable>Builder.append("<escape.(line)>");}; separator="\n">
String <variable> = <variable>Builder.toString();
>>
writeStringLiteral(text) ::= <%
<if(isEmpty.(text))>
""
<else>
<writeLines(lines.(text))>
<endif>
%>
writeLines(textLines) ::= <%
<if(rest(textLines))>
<textLines:{line|
<\n> "<escape.(line)>}; separator="\" +">"
<else>
"<escape.(first(textLines))>"
<endif>
%>
string(text) ::= <<
"<escape.(text)>"
>>
writeBoolean(o) ::= "<if(o && !isEmpty.(o))>true<else>false<endif>"
writeln(s) ::= <<print(<s>)>>
write(s) ::= <<print(<s>,end='')>>
False() ::= "False"
True() ::= "True"
Not(v) ::= "not <v>"
Assert(s) ::= ""
Cast(t,v) ::= "<v>"
Append(a,b) ::= "<a> + str(<b>)"
Concat(a,b) ::= "<a><b>"
DeclareLocal(s,v) ::= "<s> = <v>"
AssertIsList(v) ::= "assert isinstance(v, (list, tuple))"
AssignLocal(s,v) ::= "<s> = <v>"
InitIntMember(n,v) ::= <%<n> = <v>%>
InitBooleanMember(n,v) ::= <%<n> = <v>%>
GetMember(n) ::= <%self.<n>%>
SetMember(n,v) ::= <%self.<n> = <v>%>
AddMember(n,v) ::= <%self.<n> += <v>%>
PlusMember(v,n) ::= <%<v> + str(self.<n>)%>
MemberEquals(n,v) ::= <%self.<n> == <v>%>
ModMemberEquals(n,m,v) ::= <%self.<n> % <m> == <v>%>
ModMemberNotEquals(n,m,v) ::= <%self.<n> % <m> != <v>%>
DumpDFA() ::= "self.dumpDFA()"
Pass() ::= "pass"
StringList() ::= ""
BuildParseTrees() ::= "self._buildParseTrees = True"
BailErrorStrategy() ::= <%self._errHandler = BailErrorStrategy()%>
ToStringTree(s) ::= <%<s>.toStringTree(recog=self)%>
Column() ::= "self.column"
Text() ::= "self.text"
ValEquals(a,b) ::= <%<a>==<b>%>
TextEquals(a) ::= <%self.text=="<a>"%>
PlusText(a) ::= <%"<a>" + self.text%>
InputText() ::= "self._input.getText()"
LTEquals(i, v) ::= <%self._input.LT(<i>).text==<v>%>
LANotEquals(i, v) ::= <%self._input.LA(<i>)!=<v>%>
TokenStartColumnEquals(i) ::= <%self._tokenStartColumn==<i>%>
ImportListener(X) ::= ""
GetExpectedTokenNames() ::= "self.getExpectedTokens().toString(self.literalNames, self.symbolicNames)"
RuleInvocationStack() ::= "str_list(self.getRuleInvocationStack())"
LL_EXACT_AMBIG_DETECTION() ::= <<self._interp.predictionMode = PredictionMode.LL_EXACT_AMBIG_DETECTION>>
ParserPropertyMember() ::= <<
@members {
def Property(self):
return True
}
>>
PositionAdjustingLexer() ::= <<
def resetAcceptPosition(self, index, line, column):
self._input.seek(index)
self.line = line
self.column = column
self._interp.consume(self._input)
def nextToken(self):
if self._interp.__dict__.get("resetAcceptPosition", None) is None:
self._interp.__dict__["resetAcceptPosition"] = self.resetAcceptPosition
return super(type(self),self).nextToken()
def emit(self):
if self._type==PositionAdjustingLexer.TOKENS:
self.handleAcceptPositionForKeyword("tokens")
elif self._type==PositionAdjustingLexer.LABEL:
self.handleAcceptPositionForIdentifier()
return super(type(self),self).emit()
def handleAcceptPositionForIdentifier(self):
tokenText = self.text
identifierLength = 0
while identifierLength \< len(tokenText) and self.isIdentifierChar(tokenText[identifierLength]):
identifierLength += 1
if self._input.index > self._tokenStartCharIndex + identifierLength:
offset = identifierLength - 1
self._interp.resetAcceptPosition(self._tokenStartCharIndex + offset,
self._tokenStartLine, self._tokenStartColumn + offset)
return True
else:
return False
def handleAcceptPositionForKeyword(self, keyword):
if self._input.index > self._tokenStartCharIndex + len(keyword):
offset = len(keyword) - 1
self._interp.resetAcceptPosition(self._tokenStartCharIndex + offset,
self._tokenStartLine, self._tokenStartColumn + offset)
return True
else:
return False
@staticmethod
def isIdentifierChar(c):
return c.isalnum() or c == '_'
>>
BasicListener(X) ::= <<
if __name__ is not None and "." in __name__:
from .<X>Listener import <X>Listener
else:
from <X>Listener import <X>Listener
class LeafListener(TListener):
def visitTerminal(self, node):
print(node.symbol.text)
>>
WalkListener(s) ::= <<
walker = ParseTreeWalker()
walker.walk(TParser.LeafListener(), <s>)
>>
TokenGetterListener(X) ::= <<
if __name__ is not None and "." in __name__:
from .<X>Listener import <X>Listener
else:
from <X>Listener import <X>Listener
class LeafListener(TListener):
def exitA(self, ctx):
if ctx.getChildCount()==2:
print(ctx.INT(0).symbol.text + ' ' + ctx.INT(1).symbol.text + ' ' + str_list(ctx.INT()))
else:
print(str(ctx.ID().symbol))
>>
RuleGetterListener(X) ::= <<
if __name__ is not None and "." in __name__:
from .<X>Listener import <X>Listener
else:
from <X>Listener import <X>Listener
class LeafListener(TListener):
def exitA(self, ctx):
if ctx.getChildCount()==2:
print(ctx.b(0).start.text + ' ' + ctx.b(1).start.text + ' ' + ctx.b()[0].start.text)
else:
print(ctx.b(0).start.text)
>>
LRListener(X) ::= <<
if __name__ is not None and "." in __name__:
from .<X>Listener import <X>Listener
else:
from <X>Listener import <X>Listener
class LeafListener(TListener):
def exitE(self, ctx):
if ctx.getChildCount()==3:
print(ctx.e(0).start.text + ' ' + ctx.e(1).start.text + ' ' + ctx.e()[0].start.text)
else:
print(ctx.INT().symbol.text)
>>
LRWithLabelsListener(X) ::= <<
if __name__ is not None and "." in __name__:
from .<X>Listener import <X>Listener
else:
from <X>Listener import <X>Listener
class LeafListener(TListener):
def exitCall(self, ctx):
print(ctx.e().start.text + ' ' + str(ctx.eList()))
def exitInt(self, ctx):
print(ctx.INT().symbol.text)
>>
DeclareContextListGettersFunction() ::= <<
def foo():
s = SContext()
a = s.a()
b = s.b()
>>
Declare_foo() ::= <<def foo(self):
print('foo')
>>
Invoke_foo() ::= "self.foo()"
Declare_pred() ::= <<def pred(self, v):
print('eval=' + str(v).lower())
return v
>>
Invoke_pred(v) ::= <<self.pred(<v>)>>
isEmpty ::= [
"": true,
default: false
]

View File

@ -0,0 +1,94 @@
package org.antlr.v4.test.runtime.python2;
import org.antlr.v4.test.runtime.python.BasePythonTest;
import org.stringtemplate.v4.ST;
public abstract class BasePython2Test extends BasePythonTest {
@Override
protected String getLanguage() {
return "Python2";
}
@Override
protected String getPythonExecutable() {
return "python2.7";
}
@Override
protected void writeLexerTestFile(String lexerName, boolean showDFA) {
ST outputFileST = new ST(
"from __future__ import print_function\n"
+ "import sys\n"
+ "from antlr4 import *\n"
+ "from <lexerName> import <lexerName>\n"
+ "\n"
+ "def main(argv):\n"
+ " input = FileStream(argv[1])\n"
+ " lexer = <lexerName>(input)\n"
+ " stream = CommonTokenStream(lexer)\n"
+ " stream.fill()\n"
+ " [ print(str(t)) for t in stream.tokens ]\n"
+ (showDFA ? " print(lexer._interp.decisionToDFA[Lexer.DEFAULT_MODE].toLexerString(), end='')\n"
: "") + "\n" + "if __name__ == '__main__':\n"
+ " main(sys.argv)\n" + "\n");
outputFileST.add("lexerName", lexerName);
writeFile(tmpdir, "Test.py", outputFileST.render());
}
@Override
protected void writeParserTestFile(String parserName, String lexerName,
String listenerName, String visitorName,
String parserStartRuleName, boolean debug, boolean trace) {
if(!parserStartRuleName.endsWith(")"))
parserStartRuleName += "()";
ST outputFileST = new ST(
"import sys\n"
+ "from antlr4 import *\n"
+ "from <lexerName> import <lexerName>\n"
+ "from <parserName> import <parserName>\n"
+ "from <listenerName> import <listenerName>\n"
+ "from <visitorName> import <visitorName>\n"
+ "\n"
+ "class TreeShapeListener(ParseTreeListener):\n"
+ "\n"
+ " def visitTerminal(self, node):\n"
+ " pass\n"
+ "\n"
+ " def visitErrorNode(self, node):\n"
+ " pass\n"
+ "\n"
+ " def exitEveryRule(self, ctx):\n"
+ " pass\n"
+ "\n"
+ " def enterEveryRule(self, ctx):\n"
+ " for child in ctx.getChildren():\n"
+ " parent = child.parentCtx\n"
+ " if not isinstance(parent, RuleNode) or parent.getRuleContext() != ctx:\n"
+ " raise IllegalStateException(\"Invalid parse tree shape detected.\")\n"
+ "\n"
+ "def main(argv):\n"
+ " input = FileStream(argv[1])\n"
+ " lexer = <lexerName>(input)\n"
+ " stream = CommonTokenStream(lexer)\n"
+ "<createParser>"
+ " parser.buildParseTrees = True\n"
+ " tree = parser.<parserStartRuleName>\n"
+ " ParseTreeWalker.DEFAULT.walk(TreeShapeListener(), tree)\n"
+ "\n" + "if __name__ == '__main__':\n"
+ " main(sys.argv)\n" + "\n");
String stSource = " parser = <parserName>(stream)\n";
if(debug)
stSource += " parser.addErrorListener(DiagnosticErrorListener())\n";
if(trace)
stSource += " parser.setTrace(True)\n";
ST createParserST = new ST(stSource);
outputFileST.add("createParser", createParserST);
outputFileST.add("parserName", parserName);
outputFileST.add("lexerName", lexerName);
outputFileST.add("listenerName", listenerName);
outputFileST.add("visitorName", visitorName);
outputFileST.add("parserStartRuleName", parserStartRuleName);
writeFile(tmpdir, "Test.py", outputFileST.render());
}
}

View File

@ -1,7 +1,7 @@
/* This file is generated by TestGenerator, any edits will be overwritten by the next generation. */
package org.antlr.v4.test.runtime.python2;
import org.antlr.v4.test.tool.ErrorQueue;
import org.antlr.v4.test.runtime.java.ErrorQueue;
import org.antlr.v4.tool.Grammar;
import org.junit.Test;

View File

@ -0,0 +1,26 @@
[The "BSD license"]
Copyright (c) 2015 Terence Parr, Sam Harwell, Eric Vergnaud
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1 @@
include *.txt

View File

@ -0,0 +1,4 @@
This is the Python 2.7 runtime for AntLR.
Visit the AntLR web sites for more information:
http://www.antlr.org
http://theantlrguy.atlassian.net/wiki/display/ANTLR4/Python+Target

View File

@ -0,0 +1,13 @@
What's in this release?
- fixed minor bugs due to mix of ascii/unicode encoding
- fixed bug where non-ascii input streams would fail
- added support for visitor pattern
- added support for wildcards in grammar
Breaking change:
In version 4.4, the parser/lexer had a tokenNames member.
This has been removed in favor of the following members:
- lexicalNames, containing the parsed text
- symbolicNames, corresponding to tokenNames

13
runtime/Python2/setup.py Normal file
View File

@ -0,0 +1,13 @@
from distutils.core import setup
setup(
name='antlr4-python2-runtime',
version='4.5.2',
packages=['antlr4', 'antlr4.atn', 'antlr4.dfa', 'antlr4.tree', 'antlr4.error', 'antlr4.xpath'],
package_dir={'': 'src'},
url='http://www.antlr.org',
license='BSD',
author='Eric Vergnaud, Terence Parr, Sam Harwell',
author_email='eric.vergnaud@wanadoo.fr',
description='ANTLR 4.5.2 runtime for Python 2.7.6'
)

View File

@ -0,0 +1,328 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# This implementation of {@link TokenStream} loads tokens from a
# {@link TokenSource} on-demand, and places the tokens in a buffer to provide
# access to any previous token by index.
#
# <p>
# This token stream ignores the value of {@link Token#getChannel}. If your
# parser requires the token stream filter tokens to only those on a particular
# channel, such as {@link Token#DEFAULT_CHANNEL} or
# {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a
# {@link CommonTokenStream}.</p>
from io import StringIO
from antlr4.Token import Token
from antlr4.error.Errors import IllegalStateException
# this is just to keep meaningful parameter types to Parser
class TokenStream(object):
pass
class BufferedTokenStream(TokenStream):
def __init__(self, tokenSource):
# The {@link TokenSource} from which tokens for this stream are fetched.
self.tokenSource = tokenSource
# A collection of all tokens fetched from the token source. The list is
# considered a complete view of the input once {@link #fetchedEOF} is set
# to {@code true}.
self.tokens = []
# The index into {@link #tokens} of the current token (next token to
# {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be
# {@link #LT LT(1)}.
#
# <p>This field is set to -1 when the stream is first constructed or when
# {@link #setTokenSource} is called, indicating that the first token has
# not yet been fetched from the token source. For additional information,
# see the documentation of {@link IntStream} for a description of
# Initializing Methods.</p>
self.index = -1
# Indicates whether the {@link Token#EOF} token has been fetched from
# {@link #tokenSource} and added to {@link #tokens}. This field improves
# performance for the following cases:
#
# <ul>
# <li>{@link #consume}: The lookahead check in {@link #consume} to prevent
# consuming the EOF symbol is optimized by checking the values of
# {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.</li>
# <li>{@link #fetch}: The check to prevent adding multiple EOF symbols into
# {@link #tokens} is trivial with this field.</li>
# <ul>
self.fetchedEOF = False
def mark(self):
return 0
def release(self, marker):
# no resources to release
pass
def reset(self):
self.seek(0)
def seek(self, index):
self.lazyInit()
self.index = self.adjustSeekIndex(index)
def get(self, index):
self.lazyInit()
return self.tokens[index]
def consume(self):
skipEofCheck = False
if self.index >= 0:
if self.fetchedEOF:
# the last token in tokens is EOF. skip check if p indexes any
# fetched token except the last.
skipEofCheck = self.index < len(self.tokens) - 1
else:
# no EOF token in tokens. skip check if p indexes a fetched token.
skipEofCheck = self.index < len(self.tokens)
else:
# not yet initialized
skipEofCheck = False
if not skipEofCheck and self.LA(1) == Token.EOF:
raise IllegalStateException("cannot consume EOF")
if self.sync(self.index + 1):
self.index = self.adjustSeekIndex(self.index + 1)
# Make sure index {@code i} in tokens has a token.
#
# @return {@code true} if a token is located at index {@code i}, otherwise
# {@code false}.
# @see #get(int i)
#/
def sync(self, i):
assert i >= 0
n = i - len(self.tokens) + 1 # how many more elements we need?
if n > 0 :
fetched = self.fetch(n)
return fetched >= n
return True
# Add {@code n} elements to buffer.
#
# @return The actual number of elements added to the buffer.
#/
def fetch(self, n):
if self.fetchedEOF:
return 0
for i in range(0, n):
t = self.tokenSource.nextToken()
t.tokenIndex = len(self.tokens)
self.tokens.append(t)
if t.type==Token.EOF:
self.fetchedEOF = True
return i + 1
return n
# Get all tokens from start..stop inclusively#/
def getTokens(self, start, stop, types=None):
if start<0 or stop<0:
return None
self.lazyInit()
subset = []
if stop >= len(self.tokens):
stop = len(self.tokens)-1
for i in range(start, stop):
t = self.tokens[i]
if t.type==Token.EOF:
break
if types is None or t.type in types:
subset.append(t)
return subset
def LA(self, i):
return self.LT(i).type
def LB(self, k):
if (self.index-k) < 0:
return None
return self.tokens[self.index-k]
def LT(self, k):
self.lazyInit()
if k==0:
return None
if k < 0:
return self.LB(-k)
i = self.index + k - 1
self.sync(i)
if i >= len(self.tokens): # return EOF token
# EOF must be last token
return self.tokens[len(self.tokens)-1]
return self.tokens[i]
# Allowed derived classes to modify the behavior of operations which change
# the current stream position by adjusting the target token index of a seek
# operation. The default implementation simply returns {@code i}. If an
# exception is thrown in this method, the current stream index should not be
# changed.
#
# <p>For example, {@link CommonTokenStream} overrides this method to ensure that
# the seek target is always an on-channel token.</p>
#
# @param i The target token index.
# @return The adjusted target token index.
def adjustSeekIndex(self, i):
return i
def lazyInit(self):
if self.index == -1:
self.setup()
def setup(self):
self.sync(0)
self.index = self.adjustSeekIndex(0)
# Reset this token stream by setting its token source.#/
def setTokenSource(self, tokenSource):
self.tokenSource = tokenSource
self.tokens = []
self.index = -1
# Given a starting index, return the index of the next token on channel.
# Return i if tokens[i] is on channel. Return -1 if there are no tokens
# on channel between i and EOF.
#/
def nextTokenOnChannel(self, i, channel):
self.sync(i)
if i>=len(self.tokens):
return -1
token = self.tokens[i]
while token.channel!=channel:
if token.type==Token.EOF:
return -1
i += 1
self.sync(i)
token = self.tokens[i]
return i
# Given a starting index, return the index of the previous token on channel.
# Return i if tokens[i] is on channel. Return -1 if there are no tokens
# on channel between i and 0.
def previousTokenOnChannel(self, i, channel):
while i>=0 and self.tokens[i].channel!=channel:
i -= 1
return i
# Collect all tokens on specified channel to the right of
# the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or
# EOF. If channel is -1, find any non default channel token.
def getHiddenTokensToRight(self, tokenIndex, channel=-1):
self.lazyInit()
if tokenIndex<0 or tokenIndex>=len(self.tokens):
raise Exception(str(tokenIndex) + " not in 0.." + str(len(self.tokens)-1))
from antlr4.Lexer import Lexer
nextOnChannel = self.nextTokenOnChannel(tokenIndex + 1, Lexer.DEFAULT_TOKEN_CHANNEL)
from_ = tokenIndex+1
# if none onchannel to right, nextOnChannel=-1 so set to = last token
to = (len(self.tokens)-1) if nextOnChannel==-1 else nextOnChannel
return self.filterForChannel(from_, to, channel)
# Collect all tokens on specified channel to the left of
# the current token up until we see a token on DEFAULT_TOKEN_CHANNEL.
# If channel is -1, find any non default channel token.
def getHiddenTokensToLeft(self, tokenIndex, channel=-1):
self.lazyInit()
if tokenIndex<0 or tokenIndex>=len(self.tokens):
raise Exception(str(tokenIndex) + " not in 0.." + str(len(self.tokens)-1))
from antlr4.Lexer import Lexer
prevOnChannel = self.previousTokenOnChannel(tokenIndex - 1, Lexer.DEFAULT_TOKEN_CHANNEL)
if prevOnChannel == tokenIndex - 1:
return None
# if none on channel to left, prevOnChannel=-1 then from=0
from_ = prevOnChannel+1
to = tokenIndex-1
return self.filterForChannel(from_, to, channel)
def filterForChannel(self, left, right, channel):
hidden = []
for i in range(left, right+1):
t = self.tokens[i]
if channel==-1:
from antlr4.Lexer import Lexer
if t.channel!= Lexer.DEFAULT_TOKEN_CHANNEL:
hidden.append(t)
elif t.channel==channel:
hidden.append(t)
if len(hidden)==0:
return None
return hidden
def getSourceName(self):
return self.tokenSource.getSourceName()
# Get the text of all tokens in this buffer.#/
def getText(self, interval=None):
self.lazyInit()
self.fill()
if interval is None:
interval = (0, len(self.tokens)-1)
start = interval[0]
if isinstance(start, Token):
start = start.tokenIndex
stop = interval[1]
if isinstance(stop, Token):
stop = stop.tokenIndex
if start is None or stop is None or start<0 or stop<0:
return ""
if stop >= len(self.tokens):
stop = len(self.tokens)-1
with StringIO() as buf:
for i in range(start, stop+1):
t = self.tokens[i]
if t.type==Token.EOF:
break
buf.write(t.text)
return buf.getvalue()
# Get all tokens from lexer until EOF#/
def fill(self):
self.lazyInit()
while self.fetch(1000)==1000:
pass

View File

@ -0,0 +1,84 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# This default implementation of {@link TokenFactory} creates
# {@link CommonToken} objects.
#
from antlr4.Token import CommonToken
class TokenFactory(object):
pass
class CommonTokenFactory(TokenFactory):
#
# The default {@link CommonTokenFactory} instance.
#
# <p>
# This token factory does not explicitly copy token text when constructing
# tokens.</p>
#
DEFAULT = None
def __init__(self, copyText=False):
# Indicates whether {@link CommonToken#setText} should be called after
# constructing tokens to explicitly set the text. This is useful for cases
# where the input stream might not be able to provide arbitrary substrings
# of text from the input after the lexer creates a token (e.g. the
# implementation of {@link CharStream#getText} in
# {@link UnbufferedCharStream} throws an
# {@link UnsupportedOperationException}). Explicitly setting the token text
# allows {@link Token#getText} to be called at any time regardless of the
# input stream implementation.
#
# <p>
# The default value is {@code false} to avoid the performance and memory
# overhead of copying text for every token unless explicitly requested.</p>
#
self.copyText = copyText
def create(self, source, type, text, channel, start, stop, line, column):
t = CommonToken(source, type, channel, start, stop)
t.line = line
t.column = column
if text is not None:
t.text = text
elif self.copyText and source[1] is not None:
t.text = source[1].getText(start,stop)
return t
def createThin(self, type, text):
t = CommonToken(type=type)
t.text = text
return t
CommonTokenFactory.DEFAULT = CommonTokenFactory()

View File

@ -0,0 +1,110 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#/
#
# This class extends {@link BufferedTokenStream} with functionality to filter
# token streams to tokens on a particular channel (tokens where
# {@link Token#getChannel} returns a particular value).
#
# <p>
# This token stream provides access to all tokens by index or when calling
# methods like {@link #getText}. The channel filtering is only used for code
# accessing tokens via the lookahead methods {@link #LA}, {@link #LT}, and
# {@link #LB}.</p>
#
# <p>
# By default, tokens are placed on the default channel
# ({@link Token#DEFAULT_CHANNEL}), but may be reassigned by using the
# {@code ->channel(HIDDEN)} lexer command, or by using an embedded action to
# call {@link Lexer#setChannel}.
# </p>
#
# <p>
# Note: lexer rules which use the {@code ->skip} lexer command or call
# {@link Lexer#skip} do not produce tokens at all, so input text matched by
# such a rule will not be available as part of the token stream, regardless of
# channel.</p>
#/
from antlr4.BufferedTokenStream import BufferedTokenStream
from antlr4.Token import Token
class CommonTokenStream(BufferedTokenStream):
def __init__(self, lexer, channel=Token.DEFAULT_CHANNEL):
super(CommonTokenStream, self).__init__(lexer)
self.channel = channel
def adjustSeekIndex(self, i):
return self.nextTokenOnChannel(i, self.channel)
def LB(self, k):
if k==0 or (self.index-k)<0:
return None
i = self.index
n = 1
# find k good tokens looking backwards
while n <= k:
# skip off-channel tokens
i = self.previousTokenOnChannel(i - 1, self.channel)
n += 1
if i < 0:
return None
return self.tokens[i]
def LT(self, k):
self.lazyInit()
if k == 0:
return None
if k < 0:
return self.LB(-k)
i = self.index
n = 1 # we know tokens[pos] is a good one
# find k good tokens
while n < k:
# skip off-channel tokens, but make sure to not look past EOF
if self.sync(i + 1):
i = self.nextTokenOnChannel(i + 1, self.channel)
n += 1
return self.tokens[i]
# Count EOF just once.#/
def getNumberOfOnChannelTokens(self):
n = 0
self.fill()
for i in range(0, len(self.tokens)):
t = self.tokens[i]
if t.channel==self.channel:
n += 1
if t.type==Token.EOF:
break
return n

View File

@ -0,0 +1,58 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# This is an InputStream that is loaded from a file all at once
# when you construct the object.
#
import codecs
import unittest
from antlr4.InputStream import InputStream
class FileStream(InputStream):
def __init__(self, fileName, encoding='ascii'):
self.fileName = fileName
# read binary to avoid line ending conversion
with open(fileName, 'rb') as file:
bytes = file.read()
data = codecs.decode(bytes, encoding)
super(type(self), self).__init__(data)
class TestFileStream(unittest.TestCase):
def testStream(self):
stream = FileStream("FileStream.py")
self.assertTrue(stream.size>0)

View File

@ -0,0 +1,133 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
import unittest
#
# Vacuum all input from a string and then treat it like a buffer.
#
from antlr4.Token import Token
class InputStream (object):
def __init__(self, data):
self.name = "<empty>"
self.strdata = unicode(data)
self._loadString()
def _loadString(self):
self._index = 0
self.data = [ord(c) for c in self.strdata]
self._size = len(self.data)
@property
def index(self):
return self._index
@property
def size(self):
return self._size
# Reset the stream so that it's in the same state it was
# when the object was created *except* the data array is not
# touched.
#
def reset(self):
self._index = 0
def consume(self):
if self._index >= self._size:
assert self.LA(1) == Token.EOF
raise Exception("cannot consume EOF")
self._index += 1
def LA(self, offset):
if offset==0:
return 0 # undefined
if offset<0:
offset += 1 # e.g., translate LA(-1) to use offset=0
pos = self._index + offset - 1
if pos < 0 or pos >= self._size: # invalid
return Token.EOF
return self.data[pos]
def LT(self, offset):
return self.LA(offset)
# mark/release do nothing; we have entire buffer
def mark(self):
return -1
def release(self, marker):
pass
# consume() ahead until p==_index; can't just set p=_index as we must
# update line and column. If we seek backwards, just set p
#
def seek(self, _index):
if _index<=self._index:
self._index = _index # just jump; don't update stream state (line, ...)
return
# seek forward
self._index = min(_index, self._size)
def getText(self, start, stop):
if stop >= self._size:
stop = self._size-1
if start >= self._size:
return ""
else:
return self.strdata[start:stop+1]
def __str__(self):
return unicode(self)
def __unicode__(self):
return self.strdata
class TestInputStream(unittest.TestCase):
def testStream(self):
stream = InputStream("abcde")
self.assertEqual(0, stream.index)
self.assertEqual(5, stream.size)
self.assertEqual(ord("a"), stream.LA(1))
stream.consume()
self.assertEqual(1, stream.index)
stream.seek(5)
self.assertEqual(Token.EOF, stream.LA(1))
self.assertEqual("bcd", stream.getText(1, 3))
stream.reset()
self.assertEqual(0, stream.index)

View File

@ -0,0 +1,297 @@
from io import StringIO
import unittest
from antlr4.Token import Token
class Interval(object):
def __init__(self, start, stop):
self.start = start
self.stop = stop
self.range = xrange(start, stop)
def __contains__(self, item):
return item in self.range
def __len__(self):
return self.stop - self.start
def __iter__(self):
return iter(self.range)
class IntervalSet(object):
def __init__(self):
self.intervals = None
self.readOnly = False
def __iter__(self):
if self.intervals is not None:
for i in self.intervals:
for c in i:
yield c
def __getitem__(self, item):
i = 0
for k in self:
if i==item:
return k
else:
i += 1
return Token.INVALID_TYPE
def addOne(self, v):
self.addRange(Interval(v, v+1))
def addRange(self, v):
if self.intervals is None:
self.intervals = list()
self.intervals.append(v)
else:
# find insert pos
k = 0
for i in self.intervals:
# distinct range -> insert
if v.stop<i.start:
self.intervals.insert(k, v)
return
# contiguous range -> adjust
elif v.stop==i.start:
self.intervals[k] = Interval(v.start, i.stop)
return
# overlapping range -> adjust and reduce
elif v.start<=i.stop:
self.intervals[k] = Interval(min(i.start,v.start), max(i.stop,v.stop))
self.reduce(k)
return
k += 1
# greater than any existing
self.intervals.append(v)
def addSet(self, other):
if other.intervals is not None:
for i in other.intervals:
self.addRange(i)
return self
def reduce(self, k):
# only need to reduce if k is not the last
if k<len(self.intervals)-1:
l = self.intervals[k]
r = self.intervals[k+1]
# if r contained in l
if l.stop >= r.stop:
self.intervals.pop(k+1)
self.reduce(k)
elif l.stop >= r.start:
self.intervals[k] = Interval(l.start, r.stop)
self.intervals.pop(k+1)
def complement(self, start, stop):
result = IntervalSet()
result.addRange(Interval(start,stop+1))
for i in self.intervals:
result.removeRange(i)
return result
def __contains__(self, item):
if self.intervals is None:
return False
else:
for i in self.intervals:
if item in i:
return True
return False
def __len__(self):
xlen = 0
for i in self.intervals:
xlen += len(i)
return xlen
def removeRange(self, v):
if v.start==v.stop-1:
self.removeOne(v.start)
elif self.intervals is not None:
k = 0
for i in self.intervals:
# intervals are ordered
if v.stop<=i.start:
return
# check for including range, split it
elif v.start>i.start and v.stop<i.stop:
self.intervals[k] = Interval(i.start, v.start)
x = Interval(v.stop, i.stop)
self.intervals.insert(k, x)
return
# check for included range, remove it
elif v.start<=i.start and v.stop>=i.stop:
self.intervals.pop(k)
k = k - 1 # need another pass
# check for lower boundary
elif v.start<i.stop:
self.intervals[k] = Interval(i.start, v.start)
# check for upper boundary
elif v.stop<i.stop:
self.intervals[k] = Interval(v.stop, i.stop)
k += 1
def removeOne(self, v):
if self.intervals is not None:
k = 0
for i in self.intervals:
# intervals is ordered
if v<i.start:
return
# check for single value range
elif v==i.start and v==i.stop-1:
self.intervals.pop(k)
return
# check for lower boundary
elif v==i.start:
self.intervals[k] = Interval(i.start+1, i.stop)
return
# check for upper boundary
elif v==i.stop-1:
self.intervals[k] = Interval(i.start, i.stop-1)
return
# split existing range
elif v<i.stop-1:
x = Interval(i.start, v)
i.start = v + 1
self.intervals.insert(k, x)
return
k += 1
def toString(self, literalNames, symbolicNames):
if self.intervals is None:
return u"{}"
with StringIO() as buf:
if len(self)>1:
buf.write(u"{")
first = True
for i in self.intervals:
for j in i:
if not first:
buf.write(u", ")
buf.write(self.elementName(literalNames, symbolicNames, j))
first = False
if len(self)>1:
buf.write(u"}")
return buf.getvalue()
def elementName(self, literalNames, symbolicNames, a):
if a==Token.EOF:
return u"<EOF>"
elif a==Token.EPSILON:
return u"<EPSILON>"
else:
if a<len(literalNames):
return literalNames[a]
if a<len(symbolicNames):
return symbolicNames[a]
return u"<UNKNOWN>"
class TestIntervalSet(unittest.TestCase):
def testEmpty(self):
s = IntervalSet()
self.assertIsNone(s.intervals)
self.assertFalse(30 in s)
def testOne(self):
s = IntervalSet()
s.addOne(30)
self.assertTrue(30 in s)
self.assertFalse(29 in s)
self.assertFalse(31 in s)
def testTwo(self):
s = IntervalSet()
s.addOne(30)
s.addOne(40)
self.assertTrue(30 in s)
self.assertTrue(40 in s)
self.assertFalse(35 in s)
def testRange(self):
s = IntervalSet()
s.addRange(Interval(30,41))
self.assertTrue(30 in s)
self.assertTrue(40 in s)
self.assertTrue(35 in s)
def testDistinct1(self):
s = IntervalSet()
s.addRange(Interval(30,32))
s.addRange(Interval(40,42))
self.assertEquals(2,len(s.intervals))
self.assertTrue(30 in s)
self.assertTrue(40 in s)
self.assertFalse(35 in s)
def testDistinct2(self):
s = IntervalSet()
s.addRange(Interval(40,42))
s.addRange(Interval(30,32))
self.assertEquals(2,len(s.intervals))
self.assertTrue(30 in s)
self.assertTrue(40 in s)
self.assertFalse(35 in s)
def testContiguous1(self):
s = IntervalSet()
s.addRange(Interval(30,36))
s.addRange(Interval(36,41))
self.assertEquals(1,len(s.intervals))
self.assertTrue(30 in s)
self.assertTrue(40 in s)
self.assertTrue(35 in s)
def testContiguous2(self):
s = IntervalSet()
s.addRange(Interval(36,41))
s.addRange(Interval(30,36))
self.assertEquals(1,len(s.intervals))
self.assertTrue(30 in s)
self.assertTrue(40 in s)
def testOverlapping1(self):
s = IntervalSet()
s.addRange(Interval(30,40))
s.addRange(Interval(35,45))
self.assertEquals(1,len(s.intervals))
self.assertTrue(30 in s)
self.assertTrue(44 in s)
def testOverlapping2(self):
s = IntervalSet()
s.addRange(Interval(35,45))
s.addRange(Interval(30,40))
self.assertEquals(1,len(s.intervals))
self.assertTrue(30 in s)
self.assertTrue(44 in s)
def testOverlapping3(self):
s = IntervalSet()
s.addRange(Interval(30,32))
s.addRange(Interval(40,42))
s.addRange(Interval(50,52))
s.addRange(Interval(20,61))
self.assertEquals(1,len(s.intervals))
self.assertTrue(20 in s)
self.assertTrue(60 in s)
def testComplement(self):
s = IntervalSet()
s.addRange(Interval(10,21))
c = s.complement(1,100)
self.assertTrue(1 in c)
self.assertTrue(100 in c)
self.assertTrue(10 not in c)
self.assertTrue(20 not in c)

View File

@ -0,0 +1,195 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#/
from antlr4.IntervalSet import IntervalSet, Interval
from antlr4.Token import Token
from antlr4.PredictionContext import PredictionContext, SingletonPredictionContext, PredictionContextFromRuleContext
from antlr4.atn.ATNConfig import ATNConfig
from antlr4.atn.ATNState import RuleStopState
from antlr4.atn.Transition import WildcardTransition, NotSetTransition, AbstractPredicateTransition, RuleTransition
class LL1Analyzer (object):
#* Special value added to the lookahead sets to indicate that we hit
# a predicate during analysis if {@code seeThruPreds==false}.
#/
HIT_PRED = Token.INVALID_TYPE
def __init__(self, atn):
self.atn = atn
#*
# Calculates the SLL(1) expected lookahead set for each outgoing transition
# of an {@link ATNState}. The returned array has one element for each
# outgoing transition in {@code s}. If the closure from transition
# <em>i</em> leads to a semantic predicate before matching a symbol, the
# element at index <em>i</em> of the result will be {@code null}.
#
# @param s the ATN state
# @return the expected symbols for each outgoing transition of {@code s}.
#/
def getDecisionLookahead(self, s):
if s is None:
return None
count = len(s.transitions)
look = [] * count
for alt in range(0, count):
look[alt] = set()
lookBusy = set()
seeThruPreds = False # fail to get lookahead upon pred
self._LOOK(s.transition(alt).target, None, PredictionContext.EMPTY, \
look[alt], lookBusy, set(), seeThruPreds, False)
# Wipe out lookahead for this alternative if we found nothing
# or we had a predicate when we !seeThruPreds
if len(look[alt])==0 or self.HIT_PRED in look[alt]:
look[alt] = None
return look
#*
# Compute set of tokens that can follow {@code s} in the ATN in the
# specified {@code ctx}.
#
# <p>If {@code ctx} is {@code null} and the end of the rule containing
# {@code s} is reached, {@link Token#EPSILON} is added to the result set.
# If {@code ctx} is not {@code null} and the end of the outermost rule is
# reached, {@link Token#EOF} is added to the result set.</p>
#
# @param s the ATN state
# @param stopState the ATN state to stop at. This can be a
# {@link BlockEndState} to detect epsilon paths through a closure.
# @param ctx the complete parser context, or {@code null} if the context
# should be ignored
#
# @return The set of tokens that can follow {@code s} in the ATN in the
# specified {@code ctx}.
#/
def LOOK(self, s, stopState=None, ctx=None):
r = IntervalSet()
seeThruPreds = True # ignore preds; get all lookahead
lookContext = PredictionContextFromRuleContext(s.atn, ctx) if ctx is not None else None
self._LOOK(s, stopState, lookContext, r, set(), set(), seeThruPreds, True)
return r
#*
# Compute set of tokens that can follow {@code s} in the ATN in the
# specified {@code ctx}.
#
# <p>If {@code ctx} is {@code null} and {@code stopState} or the end of the
# rule containing {@code s} is reached, {@link Token#EPSILON} is added to
# the result set. If {@code ctx} is not {@code null} and {@code addEOF} is
# {@code true} and {@code stopState} or the end of the outermost rule is
# reached, {@link Token#EOF} is added to the result set.</p>
#
# @param s the ATN state.
# @param stopState the ATN state to stop at. This can be a
# {@link BlockEndState} to detect epsilon paths through a closure.
# @param ctx The outer context, or {@code null} if the outer context should
# not be used.
# @param look The result lookahead set.
# @param lookBusy A set used for preventing epsilon closures in the ATN
# from causing a stack overflow. Outside code should pass
# {@code new HashSet<ATNConfig>} for this argument.
# @param calledRuleStack A set used for preventing left recursion in the
# ATN from causing a stack overflow. Outside code should pass
# {@code new BitSet()} for this argument.
# @param seeThruPreds {@code true} to true semantic predicates as
# implicitly {@code true} and "see through them", otherwise {@code false}
# to treat semantic predicates as opaque and add {@link #HIT_PRED} to the
# result if one is encountered.
# @param addEOF Add {@link Token#EOF} to the result if the end of the
# outermost context is reached. This parameter has no effect if {@code ctx}
# is {@code null}.
#/
def _LOOK(self, s, stopState , ctx, look, lookBusy, \
calledRuleStack, seeThruPreds, addEOF):
c = ATNConfig(s, 0, ctx)
if c in lookBusy:
return
lookBusy.add(c)
if s == stopState:
if ctx is None:
look.addOne(Token.EPSILON)
return
elif ctx.isEmpty() and addEOF:
look.addOne(Token.EOF)
return
if isinstance(s, RuleStopState ):
if ctx is None:
look.addOne(Token.EPSILON)
return
elif ctx.isEmpty() and addEOF:
look.addOne(Token.EOF)
return
if ctx != PredictionContext.EMPTY:
# run thru all possible stack tops in ctx
for i in range(0, len(ctx)):
returnState = self.atn.states[ctx.getReturnState(i)]
removed = returnState.ruleIndex in calledRuleStack
try:
calledRuleStack.discard(returnState.ruleIndex)
self._LOOK(returnState, stopState, ctx.getParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF)
finally:
if removed:
calledRuleStack.add(returnState.ruleIndex)
return
for t in s.transitions:
if type(t) == RuleTransition:
if t.target.ruleIndex in calledRuleStack:
continue
newContext = SingletonPredictionContext.create(ctx, t.followState.stateNumber)
try:
calledRuleStack.add(t.target.ruleIndex)
self._LOOK(t.target, stopState, newContext, look, lookBusy, calledRuleStack, seeThruPreds, addEOF)
finally:
calledRuleStack.remove(t.target.ruleIndex)
elif isinstance(t, AbstractPredicateTransition ):
if seeThruPreds:
self._LOOK(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF)
else:
look.addOne(self.HIT_PRED)
elif t.isEpsilon:
self._LOOK(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF)
elif type(t) == WildcardTransition:
look.addRange( Interval(Token.MIN_USER_TOKEN_TYPE, self.atn.maxTokenType + 1) )
else:
set = t.label
if set is not None:
if isinstance(t, NotSetTransition):
set = set.complement(Token.MIN_USER_TOKEN_TYPE, self.atn.maxTokenType)
look.addSet(set)

View File

@ -0,0 +1,343 @@
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, self list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, self list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from self software without specific prior written permission.
#
# self SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# self SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#/
# A lexer is recognizer that draws input symbols from a character stream.
# lexer grammars result in a subclass of self object. A Lexer object
# uses simplified match() and error recovery mechanisms in the interest
# of speed.
#/
from io import StringIO
from antlr4.CommonTokenFactory import CommonTokenFactory
from antlr4.Recognizer import Recognizer
from antlr4.Token import Token
from antlr4.error.Errors import IllegalStateException, LexerNoViableAltException
class TokenSource(object):
pass
class Lexer(Recognizer, TokenSource):
DEFAULT_MODE = 0
MORE = -2
SKIP = -3
DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL
HIDDEN = Token.HIDDEN_CHANNEL
MIN_CHAR_VALUE = '\u0000'
MAX_CHAR_VALUE = '\uFFFE'
def __init__(self, input):
super(Lexer, self).__init__()
self._input = input
self._factory = CommonTokenFactory.DEFAULT
self._tokenFactorySourcePair = (self, input)
self._interp = None # child classes must populate this
# The goal of all lexer rules/methods is to create a token object.
# self is an instance variable as multiple rules may collaborate to
# create a single token. nextToken will return self object after
# matching lexer rule(s). If you subclass to allow multiple token
# emissions, then set self to the last token to be matched or
# something nonnull so that the auto token emit mechanism will not
# emit another token.
self._token = None
# What character index in the stream did the current token start at?
# Needed, for example, to get the text for current token. Set at
# the start of nextToken.
self._tokenStartCharIndex = -1
# The line on which the first character of the token resides#/
self._tokenStartLine = -1
# The character position of first character within the line#/
self._tokenStartColumn = -1
# Once we see EOF on char stream, next token will be EOF.
# If you have DONE : EOF ; then you see DONE EOF.
self._hitEOF = False
# The channel number for the current token#/
self._channel = Token.DEFAULT_CHANNEL
# The token type for the current token#/
self._type = Token.INVALID_TYPE
self._modeStack = []
self._mode = self.DEFAULT_MODE
# You can set the text for the current token to override what is in
# the input char buffer. Use setText() or can set self instance var.
#/
self._text = None
def reset(self):
# wack Lexer state variables
if self._input is not None:
self._input.seek(0) # rewind the input
self._token = None
self._type = Token.INVALID_TYPE
self._channel = Token.DEFAULT_CHANNEL
self._tokenStartCharIndex = -1
self._tokenStartColumn = -1
self._tokenStartLine = -1
self._text = None
self._hitEOF = False
self._mode = Lexer.DEFAULT_MODE
self._modeStack = []
self._interp.reset()
# Return a token from self source; i.e., match a token on the char
# stream.
def nextToken(self):
if self._input is None:
raise IllegalStateException("nextToken requires a non-null input stream.")
# Mark start location in char stream so unbuffered streams are
# guaranteed at least have text of current token
tokenStartMarker = self._input.mark()
try:
while True:
if self._hitEOF:
self.emitEOF()
return self._token
self._token = None
self._channel = Token.DEFAULT_CHANNEL
self._tokenStartCharIndex = self._input.index
self._tokenStartColumn = self._interp.column
self._tokenStartLine = self._interp.line
self._text = None
continueOuter = False
while True:
self._type = Token.INVALID_TYPE
ttype = self.SKIP
try:
ttype = self._interp.match(self._input, self._mode)
except LexerNoViableAltException as e:
self.notifyListeners(e) # report error
self.recover(e)
if self._input.LA(1)==Token.EOF:
self._hitEOF = True
if self._type == Token.INVALID_TYPE:
self._type = ttype
if self._type == self.SKIP:
continueOuter = True
break
if self._type!=self.MORE:
break
if continueOuter:
continue
if self._token is None:
self.emit()
return self._token
finally:
# make sure we release marker after match or
# unbuffered char stream will keep buffering
self._input.release(tokenStartMarker)
# Instruct the lexer to skip creating a token for current lexer rule
# and look for another token. nextToken() knows to keep looking when
# a lexer rule finishes with token set to SKIP_TOKEN. Recall that
# if token==null at end of any token rule, it creates one for you
# and emits it.
#/
def skip(self):
self._type = self.SKIP
def more(self):
self._type = self.MORE
def mode(self, m):
self._mode = m
def pushMode(self, m):
if self._interp.debug:
print("pushMode " + str(m))
self._modeStack.append(self._mode)
self.mode(m)
def popMode(self):
if len(self._modeStack)==0:
raise Exception("Empty Stack")
if self._interp.debug:
print("popMode back to "+ self._modeStack[:-1])
self.mode( self._modeStack.pop() )
return self._mode
# Set the char stream and reset the lexer#/
@property
def inputStream(self):
return self._input
@inputStream.setter
def inputStream(self, input):
self._input = None
self._tokenFactorySourcePair = (self, self._input)
self.reset()
self._input = input
self._tokenFactorySourcePair = (self, self._input)
@property
def sourceName(self):
return self._input.sourceName
# By default does not support multiple emits per nextToken invocation
# for efficiency reasons. Subclass and override self method, nextToken,
# and getToken (to push tokens into a list and pull from that list
# rather than a single variable as self implementation does).
#/
def emitToken(self, token):
self._token = token
# The standard method called to automatically emit a token at the
# outermost lexical rule. The token object should point into the
# char buffer start..stop. If there is a text override in 'text',
# use that to set the token's text. Override self method to emit
# custom Token objects or provide a new factory.
#/
def emit(self):
t = self._factory.create(self._tokenFactorySourcePair, self._type, self._text, self._channel, self._tokenStartCharIndex,
self.getCharIndex()-1, self._tokenStartLine, self._tokenStartColumn)
self.emitToken(t)
return t
def emitEOF(self):
cpos = self.column
lpos = self.line
eof = self._factory.create(self._tokenFactorySourcePair, Token.EOF, None, Token.DEFAULT_CHANNEL, self._input.index,
self._input.index-1, lpos, cpos)
self.emitToken(eof)
return eof
@property
def type(self):
return self._type
@type.setter
def type(self, type):
self._type = type
@property
def line(self):
return self._interp.line
@line.setter
def line(self, line):
self._interp.line = line
@property
def column(self):
return self._interp.column
@column.setter
def column(self, column):
self._interp.column = column
# What is the index of the current character of lookahead?#/
def getCharIndex(self):
return self._input.index
# Return the text matched so far for the current token or any
# text override.
@property
def text(self):
if self._text is not None:
return self._text
else:
return self._interp.getText(self._input)
# Set the complete text of self token; it wipes any previous
# changes to the text.
@text.setter
def text(self, txt):
self._text = txt
# Return a list of all Token objects in input char stream.
# Forces load of all tokens. Does not include EOF token.
#/
def getAllTokens(self):
tokens = []
t = self.nextToken()
while t.type!=Token.EOF:
tokens.append(t)
t = self.nextToken()
return tokens
def notifyListeners(self, e):
start = self._tokenStartCharIndex
stop = self._input.index
text = self._input.getText(start, stop)
msg = "token recognition error at: '" + self.getErrorDisplay(text) + "'"
listener = self.getErrorListenerDispatch()
listener.syntaxError(self, None, self._tokenStartLine, self._tokenStartColumn, msg, e)
def getErrorDisplay(self, s):
with StringIO() as buf:
for c in s:
buf.write(unicode(self.getErrorDisplayForChar(c)))
return buf.getvalue()
def getErrorDisplayForChar(self, c):
if ord(c[0])==Token.EOF:
return "<EOF>"
elif c=='\n':
return "\\n"
elif c=='\t':
return "\\t"
elif c=='\r':
return "\\r"
else:
return str(c)
def getCharErrorDisplay(self, c):
return "'" + self.getErrorDisplayForChar(c) + "'"
# Lexers can normally match any char in it's vocabulary after matching
# a token, so do the easy thing and just kill a character and hope
# it all works out. You can instead use the rule invocation stack
# to do sophisticated error recovery if you are in a fragment rule.
#/
def recover(self, re):
if self._input.LA(1) != Token.EOF:
if isinstance(re, LexerNoViableAltException):
# skip a char and try again
self._interp.consume(self._input)
else:
# TODO: Do we lose character or line position information?
self._input.consume()

View File

@ -0,0 +1,139 @@
#
# Provides an implementation of {@link TokenSource} as a wrapper around a list
# of {@link Token} objects.
#
# <p>If the final token in the list is an {@link Token#EOF} token, it will be used
# as the EOF token for every call to {@link #nextToken} after the end of the
# list is reached. Otherwise, an EOF token will be created.</p>
#
from antlr4.CommonTokenFactory import CommonTokenFactory
from antlr4.Lexer import TokenSource
from antlr4.Token import Token
class ListTokenSource(TokenSource):
# Constructs a new {@link ListTokenSource} instance from the specified
# collection of {@link Token} objects and source name.
#
# @param tokens The collection of {@link Token} objects to provide as a
# {@link TokenSource}.
# @param sourceName The name of the {@link TokenSource}. If this value is
# {@code null}, {@link #getSourceName} will attempt to infer the name from
# the next {@link Token} (or the previous token if the end of the input has
# been reached).
#
# @exception NullPointerException if {@code tokens} is {@code null}
#
def __init__(self, tokens, sourceName=None):
if tokens is None:
raise ReferenceError("tokens cannot be null")
self.tokens = tokens
self.sourceName = sourceName
# The index into {@link #tokens} of token to return by the next call to
# {@link #nextToken}. The end of the input is indicated by this value
# being greater than or equal to the number of items in {@link #tokens}.
self.pos = 0
# This field caches the EOF token for the token source.
self.eofToken = None
# This is the backing field for {@link #getTokenFactory} and
self._factory = CommonTokenFactory.DEFAULT
#
# {@inheritDoc}
#
@property
def column(self):
if self.pos < len(self.tokens):
return self.tokens[self.pos].column
elif self.eofToken is not None:
return self.eofToken.column
elif len(self.tokens) > 0:
# have to calculate the result from the line/column of the previous
# token, along with the text of the token.
lastToken = self.tokens[len(self.tokens) - 1]
tokenText = lastToken.getText()
if tokenText is not None:
lastNewLine = tokenText.rfind('\n')
if lastNewLine >= 0:
return len(tokenText) - lastNewLine - 1
return lastToken.column + lastToken.stopIndex - lastToken.startIndex + 1
# only reach this if tokens is empty, meaning EOF occurs at the first
# position in the input
return 0
#
# {@inheritDoc}
#
def nextToken(self):
if self.pos >= len(self.tokens):
if self.eofToken is None:
start = -1
if len(self.tokens) > 0:
previousStop = self.tokens[len(self.tokens) - 1].stopIndex
if previousStop != -1:
start = previousStop + 1
stop = max(-1, start - 1)
self.eofToken = self._factory.create((self, self.getInputStream()),
Token.EOF, "EOF", Token.DEFAULT_CHANNEL, start, stop, self.line, self.column)
return self.eofToken
t = self.tokens[self.pos]
if self.pos == len(self.tokens) - 1 and t.type == Token.EOF:
eofToken = t
self.pos += 1
return t
#
# {@inheritDoc}
#
@property
def line(self):
if self.pos < len(self.tokens):
return self.tokens[self.pos].line
elif self.eofToken is not None:
return self.eofToken.line
elif len(self.tokens) > 0:
# have to calculate the result from the line/column of the previous
# token, along with the text of the token.
lastToken = self.tokens[len(self.tokens) - 1]
line = lastToken.line
tokenText = lastToken.text
if tokenText is not None:
for c in tokenText:
if c == '\n':
line += 1
# if no text is available, assume the token did not contain any newline characters.
return line
# only reach this if tokens is empty, meaning EOF occurs at the first
# position in the input
return 1
#
# {@inheritDoc}
#
def getInputStream(self):
if self.pos < len(self.tokens):
return self.tokens[self.pos].getInputStream()
elif self.eofToken is not None:
return self.eofToken.getInputStream()
elif len(self.tokens) > 0:
return self.tokens[len(self.tokens) - 1].getInputStream()
else:
# no input stream information is available
return None
#
# {@inheritDoc}
#
def getSourceName(self):
if self.sourceName is not None:
return self.sourceName
inputStream = self.getInputStream()
if inputStream is not None:
return inputStream.getSourceName()
else:
return "List"

View File

@ -0,0 +1,575 @@
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, self list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, self list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from self software without specific prior written permission.
#
# self SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# self SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from __future__ import print_function
from antlr4.error.ErrorStrategy import DefaultErrorStrategy
from antlr4.Recognizer import Recognizer
from antlr4.Token import Token
from antlr4.Lexer import Lexer
from antlr4.atn.ATNDeserializer import ATNDeserializer
from antlr4.atn.ATNDeserializationOptions import ATNDeserializationOptions
from antlr4.error.Errors import UnsupportedOperationException
from antlr4.tree.ParseTreePatternMatcher import ParseTreePatternMatcher
from antlr4.tree.Tree import ParseTreeListener
class TraceListener(ParseTreeListener):
def __init__(self, parser):
self._parser = parser
def enterEveryRule(self, ctx):
print("enter " + self._parser.ruleNames[ctx.getRuleIndex()] + ", LT(1)=" + self._parser._input.LT(1).text)
def visitTerminal(self, node):
print("consume " + str(node.symbol) + " rule " + self._parser.ruleNames[self._parser._ctx.getRuleIndex()])
def visitErrorNode(self, node):
pass
def exitEveryRule(self, ctx):
print("exit " + self._parser.ruleNames[ctx.getRuleIndex()] + ", LT(1)=" + self._parser._input.LT(1).text)
# self is all the parsing support code essentially; most of it is error recovery stuff.#
class Parser (Recognizer):
# self field maps from the serialized ATN string to the deserialized {@link ATN} with
# bypass alternatives.
#
# @see ATNDeserializationOptions#isGenerateRuleBypassTransitions()
#
bypassAltsAtnCache = dict()
def __init__(self, input):
super(Parser, self).__init__()
# The input stream.
self._input = None
# The error handling strategy for the parser. The default value is a new
# instance of {@link DefaultErrorStrategy}.
self._errHandler = DefaultErrorStrategy()
self._precedenceStack = list()
self._precedenceStack.append(0)
# The {@link ParserRuleContext} object for the currently executing rule.
# self is always non-null during the parsing process.
self._ctx = None
# Specifies whether or not the parser should construct a parse tree during
# the parsing process. The default value is {@code true}.
self.buildParseTrees = True
# When {@link #setTrace}{@code (true)} is called, a reference to the
# {@link TraceListener} is stored here so it can be easily removed in a
# later call to {@link #setTrace}{@code (false)}. The listener itself is
# implemented as a parser listener so self field is not directly used by
# other parser methods.
self._tracer = None
# The list of {@link ParseTreeListener} listeners registered to receive
# events during the parse.
self._parseListeners = None
# The number of syntax errors reported during parsing. self value is
# incremented each time {@link #notifyErrorListeners} is called.
self._syntaxErrors = 0
self.setInputStream(input)
# reset the parser's state#
def reset(self):
if self._input is not None:
self._input.seek(0)
self._errHandler.reset(self)
self._ctx = None
self._syntaxErrors = 0
self.setTrace(False)
self._precedenceStack = list()
self._precedenceStack.append(0)
if self._interp is not None:
self._interp.reset()
# Match current input symbol against {@code ttype}. If the symbol type
# matches, {@link ANTLRErrorStrategy#reportMatch} and {@link #consume} are
# called to complete the match process.
#
# <p>If the symbol type does not match,
# {@link ANTLRErrorStrategy#recoverInline} is called on the current error
# strategy to attempt recovery. If {@link #getBuildParseTree} is
# {@code true} and the token index of the symbol returned by
# {@link ANTLRErrorStrategy#recoverInline} is -1, the symbol is added to
# the parse tree by calling {@link ParserRuleContext#addErrorNode}.</p>
#
# @param ttype the token type to match
# @return the matched symbol
# @throws RecognitionException if the current input symbol did not match
# {@code ttype} and the error strategy could not recover from the
# mismatched symbol
def match(self, ttype):
t = self.getCurrentToken()
if t.type==ttype:
self._errHandler.reportMatch(self)
self.consume()
else:
t = self._errHandler.recoverInline(self)
if self.buildParseTrees and t.tokenIndex==-1:
# we must have conjured up a new token during single token insertion
# if it's not the current symbol
self._ctx.addErrorNode(t)
return t
# Match current input symbol as a wildcard. If the symbol type matches
# (i.e. has a value greater than 0), {@link ANTLRErrorStrategy#reportMatch}
# and {@link #consume} are called to complete the match process.
#
# <p>If the symbol type does not match,
# {@link ANTLRErrorStrategy#recoverInline} is called on the current error
# strategy to attempt recovery. If {@link #getBuildParseTree} is
# {@code true} and the token index of the symbol returned by
# {@link ANTLRErrorStrategy#recoverInline} is -1, the symbol is added to
# the parse tree by calling {@link ParserRuleContext#addErrorNode}.</p>
#
# @return the matched symbol
# @throws RecognitionException if the current input symbol did not match
# a wildcard and the error strategy could not recover from the mismatched
# symbol
def matchWildcard(self):
t = self.getCurrentToken()
if t.type > 0:
self._errHandler.reportMatch(self)
self.consume()
else:
t = self._errHandler.recoverInline(self)
if self.buildParseTrees and t.tokenIndex == -1:
# we must have conjured up a new token during single token insertion
# if it's not the current symbol
self._ctx.addErrorNode(t)
return t
def getParseListeners(self):
return list() if self._parseListeners is None else self._parseListeners
# Registers {@code listener} to receive events during the parsing process.
#
# <p>To support output-preserving grammar transformations (including but not
# limited to left-recursion removal, automated left-factoring, and
# optimized code generation), calls to listener methods during the parse
# may differ substantially from calls made by
# {@link ParseTreeWalker#DEFAULT} used after the parse is complete. In
# particular, rule entry and exit events may occur in a different order
# during the parse than after the parser. In addition, calls to certain
# rule entry methods may be omitted.</p>
#
# <p>With the following specific exceptions, calls to listener events are
# <em>deterministic</em>, i.e. for identical input the calls to listener
# methods will be the same.</p>
#
# <ul>
# <li>Alterations to the grammar used to generate code may change the
# behavior of the listener calls.</li>
# <li>Alterations to the command line options passed to ANTLR 4 when
# generating the parser may change the behavior of the listener calls.</li>
# <li>Changing the version of the ANTLR Tool used to generate the parser
# may change the behavior of the listener calls.</li>
# </ul>
#
# @param listener the listener to add
#
# @throws NullPointerException if {@code} listener is {@code null}
#
def addParseListener(self, listener):
if listener is None:
raise ReferenceError("listener")
if self._parseListeners is None:
self._parseListeners = []
self._parseListeners.append(listener)
#
# Remove {@code listener} from the list of parse listeners.
#
# <p>If {@code listener} is {@code null} or has not been added as a parse
# listener, self method does nothing.</p>
# @param listener the listener to remove
#
def removeParseListener(self, listener):
if self._parseListeners is not None:
self._parseListeners.remove(listener)
if len(self._parseListeners)==0:
self._parseListeners = None
# Remove all parse listeners.
def removeParseListeners(self):
self._parseListeners = None
# Notify any parse listeners of an enter rule event.
def triggerEnterRuleEvent(self):
if self._parseListeners is not None:
for listener in self._parseListeners:
listener.enterEveryRule(self._ctx)
self._ctx.enterRule(listener)
#
# Notify any parse listeners of an exit rule event.
#
# @see #addParseListener
#
def triggerExitRuleEvent(self):
if self._parseListeners is not None:
# reverse order walk of listeners
for listener in reversed(self._parseListeners):
self._ctx.exitRule(listener)
listener.exitEveryRule(self._ctx)
def getTokenFactory(self):
return self._input.tokenSource._factory
# Tell our token source and error strategy about a new way to create tokens.#
def setTokenFactory(self, factory):
self._input.tokenSource._factory = factory
# The ATN with bypass alternatives is expensive to create so we create it
# lazily.
#
# @throws UnsupportedOperationException if the current parser does not
# implement the {@link #getSerializedATN()} method.
#
def getATNWithBypassAlts(self):
serializedAtn = self.getSerializedATN()
if serializedAtn is None:
raise UnsupportedOperationException("The current parser does not support an ATN with bypass alternatives.")
result = self.bypassAltsAtnCache.get(serializedAtn, None)
if result is None:
deserializationOptions = ATNDeserializationOptions()
deserializationOptions.generateRuleBypassTransitions = True
result = ATNDeserializer(deserializationOptions).deserialize(serializedAtn)
self.bypassAltsAtnCache[serializedAtn] = result
return result
# The preferred method of getting a tree pattern. For example, here's a
# sample use:
#
# <pre>
# ParseTree t = parser.expr();
# ParseTreePattern p = parser.compileParseTreePattern("&lt;ID&gt;+0", MyParser.RULE_expr);
# ParseTreeMatch m = p.match(t);
# String id = m.get("ID");
# </pre>
#
def compileParseTreePattern(self, pattern, patternRuleIndex, lexer = None):
if lexer is None:
if self.getTokenStream() is not None:
tokenSource = self.getTokenStream().getTokenSource()
if isinstance( tokenSource, Lexer ):
lexer = tokenSource
if lexer is None:
raise UnsupportedOperationException("Parser can't discover a lexer to use")
m = ParseTreePatternMatcher(lexer, self)
return m.compile(pattern, patternRuleIndex)
def getInputStream(self):
return self.getTokenStream()
def setInputStream(self, input):
self.setTokenStream(input)
def getTokenStream(self):
return self._input
# Set the token stream and reset the parser.#
def setTokenStream(self, input):
self._input = None
self.reset()
self._input = input
# Match needs to return the current input symbol, which gets put
# into the label for the associated token ref; e.g., x=ID.
#
def getCurrentToken(self):
return self._input.LT(1)
def notifyErrorListeners(self, msg, offendingToken = None, e = None):
if offendingToken is None:
offendingToken = self.getCurrentToken()
self._syntaxErrors += 1
line = offendingToken.line
column = offendingToken.column
listener = self.getErrorListenerDispatch()
listener.syntaxError(self, offendingToken, line, column, msg, e)
#
# Consume and return the {@linkplain #getCurrentToken current symbol}.
#
# <p>E.g., given the following input with {@code A} being the current
# lookahead symbol, self function moves the cursor to {@code B} and returns
# {@code A}.</p>
#
# <pre>
# A B
# ^
# </pre>
#
# If the parser is not in error recovery mode, the consumed symbol is added
# to the parse tree using {@link ParserRuleContext#addChild(Token)}, and
# {@link ParseTreeListener#visitTerminal} is called on any parse listeners.
# If the parser <em>is</em> in error recovery mode, the consumed symbol is
# added to the parse tree using
# {@link ParserRuleContext#addErrorNode(Token)}, and
# {@link ParseTreeListener#visitErrorNode} is called on any parse
# listeners.
#
def consume(self):
o = self.getCurrentToken()
if o.type != Token.EOF:
self.getInputStream().consume()
hasListener = self._parseListeners is not None and len(self._parseListeners)>0
if self.buildParseTrees or hasListener:
if self._errHandler.inErrorRecoveryMode(self):
node = self._ctx.addErrorNode(o)
else:
node = self._ctx.addTokenNode(o)
if hasListener:
for listener in self._parseListeners:
listener.visitTerminal(node)
return o
def addContextToParseTree(self):
# add current context to parent if we have a parent
if self._ctx.parentCtx is not None:
self._ctx.parentCtx.addChild(self._ctx)
# Always called by generated parsers upon entry to a rule. Access field
# {@link #_ctx} get the current context.
#
def enterRule(self, localctx , state , ruleIndex ):
self.state = state
self._ctx = localctx
self._ctx.start = self._input.LT(1)
if self.buildParseTrees:
self.addContextToParseTree()
if self._parseListeners is not None:
self.triggerEnterRuleEvent()
def exitRule(self):
self._ctx.stop = self._input.LT(-1)
# trigger event on _ctx, before it reverts to parent
if self._parseListeners is not None:
self.triggerExitRuleEvent()
self.state = self._ctx.invokingState
self._ctx = self._ctx.parentCtx
def enterOuterAlt(self, localctx, altNum):
# if we have new localctx, make sure we replace existing ctx
# that is previous child of parse tree
if self.buildParseTrees and self._ctx != localctx:
if self._ctx.parentCtx is not None:
self._ctx.parentCtx.removeLastChild()
self._ctx.parentCtx.addChild(localctx)
self._ctx = localctx
# Get the precedence level for the top-most precedence rule.
#
# @return The precedence level for the top-most precedence rule, or -1 if
# the parser context is not nested within a precedence rule.
#
def getPrecedence(self):
if len(self._precedenceStack)==0:
return -1
else:
return self._precedenceStack[-1]
def enterRecursionRule(self, localctx, state, ruleIndex, precedence):
self.state = state
self._precedenceStack.append(precedence)
self._ctx = localctx
self._ctx.start = self._input.LT(1)
if self._parseListeners is not None:
self.triggerEnterRuleEvent() # simulates rule entry for left-recursive rules
#
# Like {@link #enterRule} but for recursive rules.
#
def pushNewRecursionContext(self, localctx, state, ruleIndex):
previous = self._ctx
previous.parentCtx = localctx
previous.invokingState = state
previous.stop = self._input.LT(-1)
self._ctx = localctx
self._ctx.start = previous.start
if self.buildParseTrees:
self._ctx.addChild(previous)
if self._parseListeners is not None:
self.triggerEnterRuleEvent() # simulates rule entry for left-recursive rules
def unrollRecursionContexts(self, parentCtx):
self._precedenceStack.pop()
self._ctx.stop = self._input.LT(-1)
retCtx = self._ctx # save current ctx (return value)
# unroll so _ctx is as it was before call to recursive method
if self._parseListeners is not None:
while self._ctx is not parentCtx:
self.triggerExitRuleEvent()
self._ctx = self._ctx.parentCtx
else:
self._ctx = parentCtx
# hook into tree
retCtx.parentCtx = parentCtx
if self.buildParseTrees and parentCtx is not None:
# add return ctx into invoking rule's tree
parentCtx.addChild(retCtx)
def getInvokingContext(self, ruleIndex):
ctx = self._ctx
while ctx is not None:
if ctx.ruleIndex == ruleIndex:
return ctx
ctx = ctx.parentCtx
return None
def precpred(self, localctx , precedence):
return precedence >= self._precedenceStack[-1]
def inContext(self, context):
# TODO: useful in parser?
return False
#
# Checks whether or not {@code symbol} can follow the current state in the
# ATN. The behavior of self method is equivalent to the following, but is
# implemented such that the complete context-sensitive follow set does not
# need to be explicitly constructed.
#
# <pre>
# return getExpectedTokens().contains(symbol);
# </pre>
#
# @param symbol the symbol type to check
# @return {@code true} if {@code symbol} can follow the current state in
# the ATN, otherwise {@code false}.
#
def isExpectedToken(self, symbol):
atn = self._interp.atn
ctx = self._ctx
s = atn.states[self.state]
following = atn.nextTokens(s)
if symbol in following:
return True
if not Token.EPSILON in following:
return False
while ctx is not None and ctx.invokingState>=0 and Token.EPSILON in following:
invokingState = atn.states[ctx.invokingState]
rt = invokingState.transitions[0]
following = atn.nextTokens(rt.followState)
if symbol in following:
return True
ctx = ctx.parentCtx
if Token.EPSILON in following and symbol == Token.EOF:
return True
else:
return False
# Computes the set of input symbols which could follow the current parser
# state and context, as given by {@link #getState} and {@link #getContext},
# respectively.
#
# @see ATN#getExpectedTokens(int, RuleContext)
#
def getExpectedTokens(self):
return self._interp.atn.getExpectedTokens(self.state, self._ctx)
def getExpectedTokensWithinCurrentRule(self):
atn = self._interp.atn
s = atn.states[self.state]
return atn.nextTokens(s)
# Get a rule's index (i.e., {@code RULE_ruleName} field) or -1 if not found.#
def getRuleIndex(self, ruleName):
ruleIndex = self.getRuleIndexMap().get(ruleName, None)
if ruleIndex is not None:
return ruleIndex
else:
return -1
# Return List&lt;String&gt; of the rule names in your parser instance
# leading up to a call to the current rule. You could override if
# you want more details such as the file/line info of where
# in the ATN a rule is invoked.
#
# this is very useful for error messages.
#
def getRuleInvocationStack(self, p=None):
if p is None:
p = self._ctx
stack = list()
while p is not None:
# compute what follows who invoked us
ruleIndex = p.getRuleIndex()
if ruleIndex<0:
stack.append("n/a")
else:
stack.append(self.ruleNames[ruleIndex])
p = p.parentCtx
return stack
# For debugging and other purposes.#
def getDFAStrings(self):
return [ unicode(dfa) for dfa in self._interp.decisionToDFA]
# For debugging and other purposes.#
def dumpDFA(self):
seenOne = False
for i in range(0, len(self._interp.decisionToDFA)):
dfa = self._interp.decisionToDFA[i]
if len(dfa.states)>0:
if seenOne:
print()
print("Decision " + str(dfa.decision) + ":")
print(dfa.toString(self.literalNames, self.symbolicNames), end='')
seenOne = True
def getSourceName(self):
return self._input.sourceName
# During a parse is sometimes useful to listen in on the rule entry and exit
# events as well as token matches. self is for quick and dirty debugging.
#
def setTrace(self, trace):
if not trace:
self.removeParseListener(self._tracer)
self._tracer = None
else:
if self._tracer is not None:
self.removeParseListener(self._tracer)
self._tracer = TraceListener(self)
self.addParseListener(self._tracer)

View File

@ -0,0 +1,187 @@
#
# [The "BSD license"]
# Copyright (c) 2013 Terence Parr
# Copyright (c) 2013 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# A parser simulator that mimics what ANTLR's generated
# parser code does. A ParserATNSimulator is used to make
# predictions via adaptivePredict but this class moves a pointer through the
# ATN to simulate parsing. ParserATNSimulator just
# makes us efficient rather than having to backtrack, for example.
#
# This properly creates parse trees even for left recursive rules.
#
# We rely on the left recursive rule invocation and special predicate
# transitions to make left recursive rules work.
#
# See TestParserInterpreter for examples.
#
from antlr4 import PredictionContextCache
from antlr4.dfa.DFA import DFA
from antlr4.Parser import Parser
from antlr4.ParserRuleContext import InterpreterRuleContext
from antlr4.Token import Token
from antlr4.atn.ATNState import StarLoopEntryState, ATNState, LoopEndState
from antlr4.atn.ParserATNSimulator import ParserATNSimulator
from antlr4.atn.Transition import Transition
from antlr4.error.Errors import RecognitionException, UnsupportedOperationException, FailedPredicateException
class ParserInterpreter(Parser):
def __init__(self, grammarFileName, tokenNames, ruleNames, atn, input):
super(ParserInterpreter, self).__init__(input)
self.grammarFileName = grammarFileName
self.atn = atn
self.tokenNames = tokenNames
self.ruleNames = ruleNames
self.decisionToDFA = [ DFA(state) for state in atn.decisionToState ]
self.sharedContextCache = PredictionContextCache()
self._parentContextStack = list()
# identify the ATN states where pushNewRecursionContext must be called
self.pushRecursionContextStates = set()
for state in atn.states:
if not isinstance(state, StarLoopEntryState):
continue
if state.precedenceRuleDecision:
self.pushRecursionContextStates.add(state.stateNumber)
# get atn simulator that knows how to do predictions
self._interp = ParserATNSimulator(self, atn, self.decisionToDFA, self.sharedContextCache)
# Begin parsing at startRuleIndex#
def parse(self, startRuleIndex):
startRuleStartState = self.atn.ruleToStartState[startRuleIndex]
rootContext = InterpreterRuleContext(None, ATNState.INVALID_STATE_NUMBER, startRuleIndex)
if startRuleStartState.isPrecedenceRule:
self.enterRecursionRule(rootContext, startRuleStartState.stateNumber, startRuleIndex, 0)
else:
self.enterRule(rootContext, startRuleStartState.stateNumber, startRuleIndex)
while True:
p = self.getATNState()
if p.stateType==ATNState.RULE_STOP :
# pop; return from rule
if len(self._ctx)==0:
if startRuleStartState.isPrecedenceRule:
result = self._ctx
parentContext = self._parentContextStack.pop()
self.unrollRecursionContexts(parentContext.a)
return result
else:
self.exitRule()
return rootContext
self.visitRuleStopState(p)
else:
try:
self.visitState(p)
except RecognitionException as e:
self.state = self.atn.ruleToStopState[p.ruleIndex].stateNumber
self._ctx.exception = e
self._errHandler.reportError(self, e)
self._errHandler.recover(self, e)
def enterRecursionRule(self, localctx, state, ruleIndex, precedence):
self._parentContextStack.append((self._ctx, localctx.invokingState))
super(ParserInterpreter, self).enterRecursionRule(localctx, state, ruleIndex, precedence)
def getATNState(self):
return self.atn.states[self.state]
def visitState(self, p):
edge = 0
if len(p.transitions) > 1:
self._errHandler.sync(self)
edge = self._interp.adaptivePredict(self._input, p.decision, self._ctx)
else:
edge = 1
transition = p.transitions[edge - 1]
tt = transition.serializationType
if tt==Transition.EPSILON:
if self.pushRecursionContextStates[p.stateNumber] and not isinstance(transition.target, LoopEndState):
t = self._parentContextStack[-1]
ctx = InterpreterRuleContext(t[0], t[1], self._ctx.ruleIndex)
self.pushNewRecursionContext(ctx, self.atn.ruleToStartState[p.ruleIndex].stateNumber, self._ctx.ruleIndex)
elif tt==Transition.ATOM:
self.match(transition.label)
elif tt in [ Transition.RANGE, Transition.SET, Transition.NOT_SET]:
if not transition.matches(self._input.LA(1), Token.MIN_USER_TOKEN_TYPE, 0xFFFF):
self._errHandler.recoverInline(self)
self.matchWildcard()
elif tt==Transition.WILDCARD:
self.matchWildcard()
elif tt==Transition.RULE:
ruleStartState = transition.target
ruleIndex = ruleStartState.ruleIndex
ctx = InterpreterRuleContext(self._ctx, p.stateNumber, ruleIndex)
if ruleStartState.isPrecedenceRule:
self.enterRecursionRule(ctx, ruleStartState.stateNumber, ruleIndex, transition.precedence)
else:
self.enterRule(ctx, transition.target.stateNumber, ruleIndex)
elif tt==Transition.PREDICATE:
if not self.sempred(self._ctx, transition.ruleIndex, transition.predIndex):
raise FailedPredicateException(self)
elif tt==Transition.ACTION:
self.action(self._ctx, transition.ruleIndex, transition.actionIndex)
elif tt==Transition.PRECEDENCE:
if not self.precpred(self._ctx, transition.precedence):
msg = "precpred(_ctx, " + str(transition.precedence) + ")"
raise FailedPredicateException(self, msg)
else:
raise UnsupportedOperationException("Unrecognized ATN transition type.")
self.state = transition.target.stateNumber
def visitRuleStopState(self, p):
ruleStartState = self.atn.ruleToStartState[p.ruleIndex]
if ruleStartState.isPrecedenceRule:
parentContext = self._parentContextStack.pop()
self.unrollRecursionContexts(parentContext.a)
self.state = parentContext[1]
else:
self.exitRule()
ruleTransition = self.atn.states[self.state].transitions[0]
self.state = ruleTransition.followState.stateNumber

View File

@ -0,0 +1,188 @@
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#* A rule invocation record for parsing.
#
# Contains all of the information about the current rule not stored in the
# RuleContext. It handles parse tree children list, Any ATN state
# tracing, and the default values available for rule indications:
# start, stop, rule index, current alt number, current
# ATN state.
#
# Subclasses made for each rule and grammar track the parameters,
# return values, locals, and labels specific to that rule. These
# are the objects that are returned from rules.
#
# Note text is not an actual field of a rule return value; it is computed
# from start and stop using the input stream's toString() method. I
# could add a ctor to this so that we can pass in and store the input
# stream, but I'm not sure we want to do that. It would seem to be undefined
# to get the .text property anyway if the rule matches tokens from multiple
# input streams.
#
# I do not use getters for fields of objects that are used simply to
# group values such as this aggregate. The getters/setters are there to
# satisfy the superclass interface.
from antlr4.RuleContext import RuleContext
from antlr4.tree.Tree import TerminalNodeImpl, ErrorNodeImpl, TerminalNode, INVALID_INTERVAL
class ParserRuleContext(RuleContext):
def __init__(self, parent = None, invokingStateNumber = None ):
super(ParserRuleContext, self).__init__(parent, invokingStateNumber)
#* If we are debugging or building a parse tree for a visitor,
# we need to track all of the tokens and rule invocations associated
# with this rule's context. This is empty for parsing w/o tree constr.
# operation because we don't the need to track the details about
# how we parse this rule.
#/
self.children = None
self.start = None
self.stop = None
# The exception that forced this rule to return. If the rule successfully
# completed, this is {@code null}.
self.exception = None
#* COPY a ctx (I'm deliberately not using copy constructor)#/
def copyFrom(self, ctx):
# from RuleContext
self.parentCtx = ctx.parentCtx
self.invokingState = ctx.invokingState
self.children = None
self.start = ctx.start
self.stop = ctx.stop
# Double dispatch methods for listeners
def enterRule(self, listener):
pass
def exitRule(self, listener):
pass
#* Does not set parent link; other add methods do that#/
def addChild(self, child):
if self.children is None:
self.children = []
self.children.append(child)
return child
#* Used by enterOuterAlt to toss out a RuleContext previously added as
# we entered a rule. If we have # label, we will need to remove
# generic ruleContext object.
#/
def removeLastChild(self):
if self.children is not None:
del self.children[len(self.children)-1]
def addTokenNode(self, token):
node = TerminalNodeImpl(token)
self.addChild(node)
node.parentCtx = self
return node
def addErrorNode(self, badToken):
node = ErrorNodeImpl(badToken)
self.addChild(node)
node.parentCtx = self
return node
def getChild(self, i, ttype = None):
if ttype is None:
return self.children[i] if len(self.children)>i else None
else:
for child in self.getChildren():
if not isinstance(child, ttype):
continue
if i==0:
return child
i -= 1
return None
def getChildren(self, predicate = None):
if self.children is not None:
for child in self.children:
if predicate is not None and not predicate(child):
continue
yield child
def getToken(self, ttype, i):
for child in self.getChildren():
if not isinstance(child, TerminalNode):
continue
if child.symbol.type != ttype:
continue
if i==0:
return child
i -= 1
return None
def getTokens(self, ttype ):
if self.getChildren() is None:
return []
tokens = []
for child in self.getChildren():
if not isinstance(child, TerminalNode):
continue
if child.symbol.type != ttype:
continue
tokens.append(child)
return tokens
def getTypedRuleContext(self, ctxType, i):
return self.getChild(i, ctxType)
def getTypedRuleContexts(self, ctxType):
children = self.getChildren()
if children is None:
return []
contexts = []
for child in children:
if not isinstance(child, ctxType):
continue
contexts.append(child)
return contexts
def getChildCount(self):
return len(self.children) if self.children else 0
def getSourceInterval(self):
if self.start is None or self.stop is None:
return INVALID_INTERVAL
else:
return (self.start.tokenIndex, self.stop.tokenIndex)
RuleContext.EMPTY = ParserRuleContext()
class InterpreterRuleContext(ParserRuleContext):
def __init__(self, parent, invokingStateNumber, ruleIndex):
super(InterpreterRuleContext, self).__init__(parent, invokingStateNumber)
self.ruleIndex = ruleIndex

View File

@ -0,0 +1,660 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#/
from io import StringIO
from antlr4.RuleContext import RuleContext
from antlr4.atn.ATNState import ATNState
class PredictionContext(object):
# Represents {@code $} in local context prediction, which means wildcard.
# {@code#+x =#}.
#/
EMPTY = None
# Represents {@code $} in an array in full context mode, when {@code $}
# doesn't mean wildcard: {@code $ + x = [$,x]}. Here,
# {@code $} = {@link #EMPTY_RETURN_STATE}.
#/
EMPTY_RETURN_STATE = 0x7FFFFFFF
globalNodeCount = 1
id = globalNodeCount
# Stores the computed hash code of this {@link PredictionContext}. The hash
# code is computed in parts to match the following reference algorithm.
#
# <pre>
# private int referenceHashCode() {
# int hash = {@link MurmurHash#initialize MurmurHash.initialize}({@link #INITIAL_HASH});
#
# for (int i = 0; i &lt; {@link #size()}; i++) {
# hash = {@link MurmurHash#update MurmurHash.update}(hash, {@link #getParent getParent}(i));
# }
#
# for (int i = 0; i &lt; {@link #size()}; i++) {
# hash = {@link MurmurHash#update MurmurHash.update}(hash, {@link #getReturnState getReturnState}(i));
# }
#
# hash = {@link MurmurHash#finish MurmurHash.finish}(hash, 2# {@link #size()});
# return hash;
# }
# </pre>
#/
def __init__(self, cachedHashCode):
self.cachedHashCode = cachedHashCode
# This means only the {@link #EMPTY} context is in set.
def isEmpty(self):
return self is self.EMPTY
def hasEmptyPath(self):
return self.getReturnState(len(self) - 1) == self.EMPTY_RETURN_STATE
def __hash__(self):
return self.cachedHashCode
def __str__(self):
return unicode(self)
def calculateHashCode(parent, returnState):
return hash( str(parent) + str(returnState))
def calculateEmptyHashCode():
return hash("")
# Used to cache {@link PredictionContext} objects. Its used for the shared
# context cash associated with contexts in DFA states. This cache
# can be used for both lexers and parsers.
class PredictionContextCache(object):
def __init__(self):
self.cache = dict()
# Add a context to the cache and return it. If the context already exists,
# return that one instead and do not add a new context to the cache.
# Protect shared cache from unsafe thread access.
#
def add(self, ctx):
if ctx==PredictionContext.EMPTY:
return PredictionContext.EMPTY
existing = self.cache.get(ctx, None)
if existing is not None:
return existing
self.cache[ctx] = ctx
return ctx
def get(self, ctx):
return self.cache.get(ctx, None)
def __len__(self):
return len(self.cache)
class SingletonPredictionContext(PredictionContext):
@staticmethod
def create(parent , returnState ):
if returnState == PredictionContext.EMPTY_RETURN_STATE and parent is None:
# someone can pass in the bits of an array ctx that mean $
return SingletonPredictionContext.EMPTY
else:
return SingletonPredictionContext(parent, returnState)
def __init__(self, parent, returnState):
assert returnState!=ATNState.INVALID_STATE_NUMBER
hashCode = calculateHashCode(parent, returnState) if parent is not None else calculateEmptyHashCode()
super(SingletonPredictionContext, self).__init__(hashCode)
self.parentCtx = parent
self.returnState = returnState
def __len__(self):
return 1
def getParent(self, index):
assert index == 0
return self.parentCtx
def getReturnState(self, index):
assert index == 0
return self.returnState
def __eq__(self, other):
if self is other:
return True
elif other is None:
return False
elif not isinstance(other, SingletonPredictionContext):
return False
elif hash(self) != hash(other):
return False # can't be same if hash is different
else:
return self.returnState == other.returnState and self.parentCtx==other.parentCtx
def __hash__(self):
return self.cachedHashCode
def __unicode__(self):
up = "" if self.parentCtx is None else unicode(self.parentCtx)
if len(up)==0:
if self.returnState == self.EMPTY_RETURN_STATE:
return u"$"
else:
return unicode(self.returnState)
else:
return unicode(self.returnState) + u" " + up
class EmptyPredictionContext(SingletonPredictionContext):
def __init__(self):
super(EmptyPredictionContext, self).__init__(None, self.EMPTY_RETURN_STATE)
def isEmpty(self):
return True
def getParent(self, index):
return None
def getReturnState(self, index):
return self.returnState
def __eq__(self, other):
return self is other
def __unicode__(self):
return "$"
PredictionContext.EMPTY = EmptyPredictionContext()
class ArrayPredictionContext(PredictionContext):
# Parent can be null only if full ctx mode and we make an array
# from {@link #EMPTY} and non-empty. We merge {@link #EMPTY} by using null parent and
# returnState == {@link #EMPTY_RETURN_STATE}.
def __init__(self, parents, returnStates):
super(ArrayPredictionContext, self).__init__(calculateHashCode(parents, returnStates))
assert parents is not None and len(parents)>0
assert returnStates is not None and len(returnStates)>0
self.parents = parents
self.returnStates = returnStates
def isEmpty(self):
# since EMPTY_RETURN_STATE can only appear in the last position, we
# don't need to verify that size==1
return self.returnStates[0]==PredictionContext.EMPTY_RETURN_STATE
def __len__(self):
return len(self.returnStates)
def getParent(self, index):
return self.parents[index]
def getReturnState(self, index):
return self.returnStates[index]
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, ArrayPredictionContext):
return False
elif hash(self) != hash(other):
return False # can't be same if hash is different
else:
return self.returnStates==other.returnStates and self.parents==other.parents
def __unicode__(self):
if self.isEmpty():
return "[]"
with StringIO() as buf:
buf.write(u"[")
for i in range(0,len(self.returnStates)):
if i>0:
buf.write(u", ")
if self.returnStates[i]==PredictionContext.EMPTY_RETURN_STATE:
buf.write(u"$")
continue
buf.write(self.returnStates[i])
if self.parents[i] is not None:
buf.write(u' ')
buf.write(unicode(self.parents[i]))
else:
buf.write(u"null")
buf.write(u"]")
return buf.getvalue()
# Convert a {@link RuleContext} tree to a {@link PredictionContext} graph.
# Return {@link #EMPTY} if {@code outerContext} is empty or null.
#/
def PredictionContextFromRuleContext(atn, outerContext=None):
if outerContext is None:
outerContext = RuleContext.EMPTY
# if we are in RuleContext of start rule, s, then PredictionContext
# is EMPTY. Nobody called us. (if we are empty, return empty)
if outerContext.parentCtx is None or outerContext is RuleContext.EMPTY:
return PredictionContext.EMPTY
# If we have a parent, convert it to a PredictionContext graph
parent = PredictionContextFromRuleContext(atn, outerContext.parentCtx)
state = atn.states[outerContext.invokingState]
transition = state.transitions[0]
return SingletonPredictionContext.create(parent, transition.followState.stateNumber)
def calculateListsHashCode(parents, returnStates ):
with StringIO() as s:
for parent in parents:
s.write(unicode(parent))
for returnState in returnStates:
s.write(unicode(returnState))
return hash(s.getvalue())
def merge(a, b, rootIsWildcard, mergeCache):
assert a is not None and b is not None # must be empty context, never null
# share same graph if both same
if a==b:
return a
if isinstance(a, SingletonPredictionContext) and isinstance(b, SingletonPredictionContext):
return mergeSingletons(a, b, rootIsWildcard, mergeCache)
# At least one of a or b is array
# If one is $ and rootIsWildcard, return $ as# wildcard
if rootIsWildcard:
if isinstance( a, EmptyPredictionContext ):
return a
if isinstance( b, EmptyPredictionContext ):
return b
# convert singleton so both are arrays to normalize
if isinstance( a, SingletonPredictionContext ):
a = ArrayPredictionContext(a)
if isinstance( b, SingletonPredictionContext):
b = ArrayPredictionContext(b)
return mergeArrays(a, b, rootIsWildcard, mergeCache)
#
# Merge two {@link SingletonPredictionContext} instances.
#
# <p>Stack tops equal, parents merge is same; return left graph.<br>
# <embed src="images/SingletonMerge_SameRootSamePar.svg" type="image/svg+xml"/></p>
#
# <p>Same stack top, parents differ; merge parents giving array node, then
# remainders of those graphs. A new root node is created to point to the
# merged parents.<br>
# <embed src="images/SingletonMerge_SameRootDiffPar.svg" type="image/svg+xml"/></p>
#
# <p>Different stack tops pointing to same parent. Make array node for the
# root where both element in the root point to the same (original)
# parent.<br>
# <embed src="images/SingletonMerge_DiffRootSamePar.svg" type="image/svg+xml"/></p>
#
# <p>Different stack tops pointing to different parents. Make array node for
# the root where each element points to the corresponding original
# parent.<br>
# <embed src="images/SingletonMerge_DiffRootDiffPar.svg" type="image/svg+xml"/></p>
#
# @param a the first {@link SingletonPredictionContext}
# @param b the second {@link SingletonPredictionContext}
# @param rootIsWildcard {@code true} if this is a local-context merge,
# otherwise false to indicate a full-context merge
# @param mergeCache
#/
def mergeSingletons(a, b, rootIsWildcard, mergeCache):
if mergeCache is not None:
previous = mergeCache.get(a,b)
if previous is not None:
return previous
previous = mergeCache.get(b,a)
if previous is not None:
return previous
rootMerge = mergeRoot(a, b, rootIsWildcard)
if rootMerge is not None:
if mergeCache is not None:
mergeCache.put(a, b, rootMerge)
return rootMerge
if a.returnState==b.returnState:
parent = merge(a.parentCtx, b.parentCtx, rootIsWildcard, mergeCache)
# if parent is same as existing a or b parent or reduced to a parent, return it
if parent == a.parentCtx:
return a # ax + bx = ax, if a=b
if parent == b.parentCtx:
return b # ax + bx = bx, if a=b
# else: ax + ay = a'[x,y]
# merge parents x and y, giving array node with x,y then remainders
# of those graphs. dup a, a' points at merged array
# new joined parent so create new singleton pointing to it, a'
a_ = SingletonPredictionContext.create(parent, a.returnState)
if mergeCache is not None:
mergeCache.put(a, b, a_)
return a_
else: # a != b payloads differ
# see if we can collapse parents due to $+x parents if local ctx
singleParent = None
if a is b or (a.parentCtx is not None and a.parentCtx==b.parentCtx): # ax + bx = [a,b]x
singleParent = a.parentCtx
if singleParent is not None: # parents are same
# sort payloads and use same parent
payloads = [ a.returnState, b.returnState ]
if a.returnState > b.returnState:
payloads[0] = b.returnState
payloads[1] = a.returnState
parents = [singleParent, singleParent]
a_ = ArrayPredictionContext(parents, payloads);
if mergeCache is not None:
mergeCache.put(a, b, a_)
return a_
# parents differ and can't merge them. Just pack together
# into array; can't merge.
# ax + by = [ax,by]
payloads = [ a.returnState, b.returnState ]
parents = [ a.parentCtx, b.parentCtx ]
if a.returnState > b.returnState: # sort by payload
payloads[0] = b.returnState
payloads[1] = a.returnState
parents = [ b.parentCtx, a.parentCtx ]
a_ = ArrayPredictionContext(parents, payloads)
if mergeCache is not None:
mergeCache.put(a, b, a_)
return a_
#
# Handle case where at least one of {@code a} or {@code b} is
# {@link #EMPTY}. In the following diagrams, the symbol {@code $} is used
# to represent {@link #EMPTY}.
#
# <h2>Local-Context Merges</h2>
#
# <p>These local-context merge operations are used when {@code rootIsWildcard}
# is true.</p>
#
# <p>{@link #EMPTY} is superset of any graph; return {@link #EMPTY}.<br>
# <embed src="images/LocalMerge_EmptyRoot.svg" type="image/svg+xml"/></p>
#
# <p>{@link #EMPTY} and anything is {@code #EMPTY}, so merged parent is
# {@code #EMPTY}; return left graph.<br>
# <embed src="images/LocalMerge_EmptyParent.svg" type="image/svg+xml"/></p>
#
# <p>Special case of last merge if local context.<br>
# <embed src="images/LocalMerge_DiffRoots.svg" type="image/svg+xml"/></p>
#
# <h2>Full-Context Merges</h2>
#
# <p>These full-context merge operations are used when {@code rootIsWildcard}
# is false.</p>
#
# <p><embed src="images/FullMerge_EmptyRoots.svg" type="image/svg+xml"/></p>
#
# <p>Must keep all contexts; {@link #EMPTY} in array is a special value (and
# null parent).<br>
# <embed src="images/FullMerge_EmptyRoot.svg" type="image/svg+xml"/></p>
#
# <p><embed src="images/FullMerge_SameRoot.svg" type="image/svg+xml"/></p>
#
# @param a the first {@link SingletonPredictionContext}
# @param b the second {@link SingletonPredictionContext}
# @param rootIsWildcard {@code true} if this is a local-context merge,
# otherwise false to indicate a full-context merge
#/
def mergeRoot(a, b, rootIsWildcard):
if rootIsWildcard:
if a == PredictionContext.EMPTY:
return PredictionContext.EMPTY ## + b =#
if b == PredictionContext.EMPTY:
return PredictionContext.EMPTY # a +# =#
else:
if a == PredictionContext.EMPTY and b == PredictionContext.EMPTY:
return PredictionContext.EMPTY # $ + $ = $
elif a == PredictionContext.EMPTY: # $ + x = [$,x]
payloads = [ b.returnState, PredictionContext.EMPTY_RETURN_STATE ]
parents = [ b.parentCtx, None ]
return ArrayPredictionContext(parents, payloads)
elif b == PredictionContext.EMPTY: # x + $ = [$,x] ($ is always first if present)
payloads = [ a.returnState, PredictionContext.EMPTY_RETURN_STATE ]
parents = [ a.parentCtx, None ]
return ArrayPredictionContext(parents, payloads)
return None
#
# Merge two {@link ArrayPredictionContext} instances.
#
# <p>Different tops, different parents.<br>
# <embed src="images/ArrayMerge_DiffTopDiffPar.svg" type="image/svg+xml"/></p>
#
# <p>Shared top, same parents.<br>
# <embed src="images/ArrayMerge_ShareTopSamePar.svg" type="image/svg+xml"/></p>
#
# <p>Shared top, different parents.<br>
# <embed src="images/ArrayMerge_ShareTopDiffPar.svg" type="image/svg+xml"/></p>
#
# <p>Shared top, all shared parents.<br>
# <embed src="images/ArrayMerge_ShareTopSharePar.svg" type="image/svg+xml"/></p>
#
# <p>Equal tops, merge parents and reduce top to
# {@link SingletonPredictionContext}.<br>
# <embed src="images/ArrayMerge_EqualTop.svg" type="image/svg+xml"/></p>
#/
def mergeArrays(a, b, rootIsWildcard, mergeCache):
if mergeCache is not None:
previous = mergeCache.get(a,b)
if previous is not None:
return previous
previous = mergeCache.get(b,a)
if previous is not None:
return previous
# merge sorted payloads a + b => M
i = 0; # walks a
j = 0; # walks b
k = 0; # walks target M array
mergedReturnStates = [] * (len(a.returnState) + len( b.returnStates))
mergedParents = [] * len(mergedReturnStates)
# walk and merge to yield mergedParents, mergedReturnStates
while i<len(a.returnStates) and j<len(b.returnStates):
a_parent = a.parents[i]
b_parent = b.parents[j]
if a.returnStates[i]==b.returnStates[j]:
# same payload (stack tops are equal), must yield merged singleton
payload = a.returnStates[i]
# $+$ = $
bothDollars = payload == PredictionContext.EMPTY_RETURN_STATE and \
a_parent is None and b_parent is None
ax_ax = (a_parent is not None and b_parent is not None) and a_parent==b_parent # ax+ax -> ax
if bothDollars or ax_ax:
mergedParents[k] = a_parent # choose left
mergedReturnStates[k] = payload
else: # ax+ay -> a'[x,y]
mergedParent = merge(a_parent, b_parent, rootIsWildcard, mergeCache)
mergedParents[k] = mergedParent
mergedReturnStates[k] = payload
i += 1 # hop over left one as usual
j += 1 # but also skip one in right side since we merge
elif a.returnStates[i]<b.returnStates[j]: # copy a[i] to M
mergedParents[k] = a_parent
mergedReturnStates[k] = a.returnStates[i]
i += 1
else: # b > a, copy b[j] to M
mergedParents[k] = b_parent
mergedReturnStates[k] = b.returnStates[j]
j += 1
k += 1
# copy over any payloads remaining in either array
if i < len(a.returnStates):
for p in range(i, len(a.returnStates)):
mergedParents[k] = a.parents[p]
mergedReturnStates[k] = a.returnStates[p]
k += 1
else:
for p in range(j, len(b.returnStates)):
mergedParents[k] = b.parents[p]
mergedReturnStates[k] = b.returnStates[p]
k += 1
# trim merged if we combined a few that had same stack tops
if k < len(mergedParents): # write index < last position; trim
if k == 1: # for just one merged element, return singleton top
a_ = SingletonPredictionContext.create(mergedParents[0], mergedReturnStates[0])
if mergeCache is not None:
mergeCache.put(a,b,a_)
return a_
mergedParents = mergedParents[0:k]
mergedReturnStates = mergedReturnStates[0:k]
M = ArrayPredictionContext(mergedParents, mergedReturnStates)
# if we created same array as a or b, return that instead
# TODO: track whether this is possible above during merge sort for speed
if M==a:
if mergeCache is not None:
mergeCache.put(a,b,a)
return a
if M==b:
if mergeCache is not None:
mergeCache.put(a,b,b)
return b
combineCommonParents(mergedParents)
if mergeCache is not None:
mergeCache.put(a,b,M)
return M
#
# Make pass over all <em>M</em> {@code parents}; merge any {@code equals()}
# ones.
#/
def combineCommonParents(parents):
uniqueParents = dict()
for p in range(0, len(parents)):
parent = parents[p]
if uniqueParents.get(parent, None) is None:
uniqueParents[parent] = parent
for p in range(0, len(parents)):
parents[p] = uniqueParents[parents[p]]
def getCachedPredictionContext(context, contextCache, visited):
if context.isEmpty():
return context
existing = visited.get(context)
if existing is not None:
return existing
existing = contextCache.get(context)
if existing is not None:
visited[context] = existing
return existing
changed = False
parents = [None] * len(context)
for i in range(0, len(parents)):
parent = getCachedPredictionContext(context.getParent(i), contextCache, visited)
if changed or parent is not context.getParent(i):
if not changed:
parents = [None] * len(context)
for j in range(0, len(context)):
parents[j] = context.getParent(j)
changed = True
parents[i] = parent
if not changed:
contextCache.add(context)
visited[context] = context
return context
updated = None
if len(parents) == 0:
updated = PredictionContext.EMPTY
elif len(parents) == 1:
updated = SingletonPredictionContext.create(parents[0], context.getReturnState(0))
else:
updated = ArrayPredictionContext(parents, context.returnStates)
contextCache.add(updated)
visited[updated] = updated
visited[context] = updated
return updated
# # extra structures, but cut/paste/morphed works, so leave it.
# # seems to do a breadth-first walk
# public static List<PredictionContext> getAllNodes(PredictionContext context) {
# Map<PredictionContext, PredictionContext> visited =
# new IdentityHashMap<PredictionContext, PredictionContext>();
# Deque<PredictionContext> workList = new ArrayDeque<PredictionContext>();
# workList.add(context);
# visited.put(context, context);
# List<PredictionContext> nodes = new ArrayList<PredictionContext>();
# while (!workList.isEmpty()) {
# PredictionContext current = workList.pop();
# nodes.add(current);
# for (int i = 0; i < current.size(); i++) {
# PredictionContext parent = current.getParent(i);
# if ( parent!=null && visited.put(parent, parent) == null) {
# workList.push(parent);
# }
# }
# }
# return nodes;
# }
# ter's recursive version of Sam's getAllNodes()
def getAllContextNodes(context, nodes=None, visited=None):
if nodes is None:
nodes = list()
return getAllContextNodes(context, nodes, visited)
elif visited is None:
visited = dict()
return getAllContextNodes(context, nodes, visited)
else:
if context is None or visited.get(context, None) is not None:
return nodes
visited.put(context, context)
nodes.add(context)
for i in range(0, len(context)):
getAllContextNodes(context.getParent(i), nodes, visited);
return nodes

View File

@ -0,0 +1,168 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
from __builtin__ import unicode
from antlr4.Token import Token
from antlr4.error.ErrorListener import ProxyErrorListener, ConsoleErrorListener
class Recognizer(object):
tokenTypeMapCache = dict()
ruleIndexMapCache = dict()
def __init__(self):
self._listeners = [ ConsoleErrorListener.INSTANCE ]
self._interp = None
self._stateNumber = -1
def extractVersion(self, version):
pos = version.find(".")
major = version[0:pos]
version = version[pos+1:]
pos = version.find(".")
if pos==-1:
pos = version.find("-")
if pos==-1:
pos = len(version)
minor = version[0:pos]
return major, minor
def checkVersion(self, toolVersion):
runtimeVersion = "4.5.2"
rvmajor, rvminor = self.extractVersion(runtimeVersion)
tvmajor, tvminor = self.extractVersion(toolVersion)
if rvmajor!=tvmajor or rvminor!=tvminor:
print("ANTLR runtime and generated code versions disagree: "+runtimeVersion+"!="+toolVersion)
def addErrorListener(self, listener):
self._listeners.append(listener)
def removeErrorListener(self, listener):
self._listeners.remove(listener)
def removeErrorListeners(self):
self._listeners = []
def getTokenTypeMap(self):
tokenNames = self.getTokenNames()
if tokenNames is None:
from antlr4.error.Errors import UnsupportedOperationException
raise UnsupportedOperationException("The current recognizer does not provide a list of token names.")
result = self.tokenTypeMapCache.get(tokenNames, None)
if result is None:
result = zip( tokenNames, range(0, len(tokenNames)))
result["EOF"] = Token.EOF
self.tokenTypeMapCache[tokenNames] = result
return result
# Get a map from rule names to rule indexes.
#
# <p>Used for XPath and tree pattern compilation.</p>
#
def getRuleIndexMap(self):
ruleNames = self.getRuleNames()
if ruleNames is None:
from antlr4.error.Errors import UnsupportedOperationException
raise UnsupportedOperationException("The current recognizer does not provide a list of rule names.")
result = self.ruleIndexMapCache.get(ruleNames, None)
if result is None:
result = zip( ruleNames, range(0, len(ruleNames)))
self.ruleIndexMapCache[ruleNames] = result
return result
def getTokenType(self, tokenName):
ttype = self.getTokenTypeMap().get(tokenName, None)
if ttype is not None:
return ttype
else:
return Token.INVALID_TYPE
# What is the error header, normally line/character position information?#
def getErrorHeader(self, e):
line = e.getOffendingToken().line
column = e.getOffendingToken().column
return u"line " + unicode(line) + u":" + unicode(column)
# How should a token be displayed in an error message? The default
# is to display just the text, but during development you might
# want to have a lot of information spit out. Override in that case
# to use t.toString() (which, for CommonToken, dumps everything about
# the token). This is better than forcing you to override a method in
# your token objects because you don't have to go modify your lexer
# so that it creates a new Java type.
#
# @deprecated This method is not called by the ANTLR 4 Runtime. Specific
# implementations of {@link ANTLRErrorStrategy} may provide a similar
# feature when necessary. For example, see
# {@link DefaultErrorStrategy#getTokenErrorDisplay}.
#
def getTokenErrorDisplay(self, t):
if t is None:
return u"<no token>"
s = t.text
if s is None:
if t.type==Token.EOF:
s = u"<EOF>"
else:
s = u"<" + unicode(t.type) + u">"
s = s.replace(u"\n",u"\\n")
s = s.replace(u"\r",u"\\r")
s = s.replace(u"\t",u"\\t")
return u"'" + s + u"'"
def getErrorListenerDispatch(self):
return ProxyErrorListener(self._listeners)
# subclass needs to override these if there are sempreds or actions
# that the ATN interp needs to execute
def sempred(self, localctx, ruleIndex, actionIndex):
return True
def precpred(self, localctx , precedence):
return True
@property
def state(self):
return self._stateNumber
# Indicate that the recognizer has changed internal state that is
# consistent with the ATN state passed in. This way we always know
# where we are in the ATN as the parser goes along. The rule
# context objects form a stack that lets us see the stack of
# invoking rules. Combine this and we have complete ATN
# configuration information.
@state.setter
def state(self, atnState):
self._stateNumber = atnState

View File

@ -0,0 +1,234 @@
# [The "BSD license"]
# Copyright (c) 2013 Terence Parr
# Copyright (c) 2013 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#/
# A rule context is a record of a single rule invocation. It knows
# which context invoked it, if any. If there is no parent context, then
# naturally the invoking state is not valid. The parent link
# provides a chain upwards from the current rule invocation to the root
# of the invocation tree, forming a stack. We actually carry no
# information about the rule associated with this context (except
# when parsing). We keep only the state number of the invoking state from
# the ATN submachine that invoked this. Contrast this with the s
# pointer inside ParserRuleContext that tracks the current state
# being "executed" for the current rule.
#
# The parent contexts are useful for computing lookahead sets and
# getting error information.
#
# These objects are used during parsing and prediction.
# For the special case of parsers, we use the subclass
# ParserRuleContext.
#
# @see ParserRuleContext
#/
from io import StringIO
from antlr4.tree.Tree import RuleNode, INVALID_INTERVAL
from antlr4.tree.Trees import Trees
class RuleContext(RuleNode):
EMPTY = None
def __init__(self, parent=None, invokingState=-1):
super(RuleContext, self).__init__()
# What context invoked this rule?
self.parentCtx = parent
# What state invoked the rule associated with this context?
# The "return address" is the followState of invokingState
# If parent is null, this should be -1.
self.invokingState = invokingState
def depth(self):
n = 0
p = self
while p is not None:
p = p.parentCtx
n += 1
return n
# A context is empty if there is no invoking state; meaning nobody call
# current context.
def isEmpty(self):
return self.invokingState == -1
# satisfy the ParseTree / SyntaxTree interface
def getSourceInterval(self):
return INVALID_INTERVAL
def getRuleContext(self):
return self
def getPayload(self):
return self
# Return the combined text of all child nodes. This method only considers
# tokens which have been added to the parse tree.
# <p>
# Since tokens on hidden channels (e.g. whitespace or comments) are not
# added to the parse trees, they will not appear in the output of this
# method.
#/
def getText(self):
if self.getChildCount() == 0:
return u""
with StringIO() as builder:
for child in self.getChildren():
builder.write(child.getText())
return builder.getvalue()
def getRuleIndex(self):
return -1
def getChild(self, i):
return None
def getChildCount(self):
return 0
def getChildren(self):
for c in []:
yield c
def accept(self, visitor):
return visitor.visitChildren(self)
# # Call this method to view a parse tree in a dialog box visually.#/
# public Future<JDialog> inspect(@Nullable Parser parser) {
# List<String> ruleNames = parser != null ? Arrays.asList(parser.getRuleNames()) : null;
# return inspect(ruleNames);
# }
#
# public Future<JDialog> inspect(@Nullable List<String> ruleNames) {
# TreeViewer viewer = new TreeViewer(ruleNames, this);
# return viewer.open();
# }
#
# # Save this tree in a postscript file#/
# public void save(@Nullable Parser parser, String fileName)
# throws IOException, PrintException
# {
# List<String> ruleNames = parser != null ? Arrays.asList(parser.getRuleNames()) : null;
# save(ruleNames, fileName);
# }
#
# # Save this tree in a postscript file using a particular font name and size#/
# public void save(@Nullable Parser parser, String fileName,
# String fontName, int fontSize)
# throws IOException
# {
# List<String> ruleNames = parser != null ? Arrays.asList(parser.getRuleNames()) : null;
# save(ruleNames, fileName, fontName, fontSize);
# }
#
# # Save this tree in a postscript file#/
# public void save(@Nullable List<String> ruleNames, String fileName)
# throws IOException, PrintException
# {
# Trees.writePS(this, ruleNames, fileName);
# }
#
# # Save this tree in a postscript file using a particular font name and size#/
# public void save(@Nullable List<String> ruleNames, String fileName,
# String fontName, int fontSize)
# throws IOException
# {
# Trees.writePS(this, ruleNames, fileName, fontName, fontSize);
# }
#
# # Print out a whole tree, not just a node, in LISP format
# # (root child1 .. childN). Print just a node if this is a leaf.
# # We have to know the recognizer so we can get rule names.
# #/
# @Override
# public String toStringTree(@Nullable Parser recog) {
# return Trees.toStringTree(this, recog);
# }
#
# Print out a whole tree, not just a node, in LISP format
# (root child1 .. childN). Print just a node if this is a leaf.
#
def toStringTree(self, ruleNames=None, recog=None):
return Trees.toStringTree(self, ruleNames=ruleNames, recog=recog)
# }
#
# @Override
# public String toStringTree() {
# return toStringTree((List<String>)null);
# }
#
def __unicode__(self):
return self.toString(None, None)
# @Override
# public String toString() {
# return toString((List<String>)null, (RuleContext)null);
# }
#
# public final String toString(@Nullable Recognizer<?,?> recog) {
# return toString(recog, ParserRuleContext.EMPTY);
# }
#
# public final String toString(@Nullable List<String> ruleNames) {
# return toString(ruleNames, null);
# }
#
# // recog null unless ParserRuleContext, in which case we use subclass toString(...)
# public String toString(@Nullable Recognizer<?,?> recog, @Nullable RuleContext stop) {
# String[] ruleNames = recog != null ? recog.getRuleNames() : null;
# List<String> ruleNamesList = ruleNames != null ? Arrays.asList(ruleNames) : null;
# return toString(ruleNamesList, stop);
# }
def toString(self, ruleNames, stop):
with StringIO() as buf:
p = self
buf.write(u"[")
while p is not None and p is not stop:
if ruleNames is None:
if not p.isEmpty():
buf.write(unicode(p.invokingState))
else:
ri = p.getRuleIndex()
ruleName = ruleNames[ri] if ri >= 0 and ri < len(ruleNames) else unicode(ri)
buf.write(ruleName)
if p.parentCtx is not None and (ruleNames is not None or not p.parentCtx.isEmpty()):
buf.write(u" ")
p = p.parentCtx
buf.write(u"]")
return buf.getvalue()

View File

@ -0,0 +1,48 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# This is an InputStream that is loaded from stdin all at once
# when you construct the object.
#
import codecs
import sys
from antlr4.InputStream import InputStream
class StdinStream(InputStream):
def __init__(self, encoding='ascii'):
bytes = sys.stdin.read()
data = codecs.decode(bytes, encoding)
super(type(self), self).__init__(data)

View File

@ -0,0 +1,184 @@
#[The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# A token has properties: text, type, line, character position in the line
# (so we can ignore tabs), token channel, index, and source from which
# we obtained this token.
from io import StringIO
class Token (object):
INVALID_TYPE = 0
# During lookahead operations, this "token" signifies we hit rule end ATN state
# and did not follow it despite needing to.
EPSILON = -2
MIN_USER_TOKEN_TYPE = 1
EOF = -1
# All tokens go to the parser (unless skip() is called in that rule)
# on a particular "channel". The parser tunes to a particular channel
# so that whitespace etc... can go to the parser on a "hidden" channel.
DEFAULT_CHANNEL = 0
# Anything on different channel than DEFAULT_CHANNEL is not parsed
# by parser.
HIDDEN_CHANNEL = 1
def __init__(self):
self.source = None
self.type = None # token type of the token
self.channel = None # The parser ignores everything not on DEFAULT_CHANNEL
self.start = None # optional; return -1 if not implemented.
self.stop = None # optional; return -1 if not implemented.
self.tokenIndex = None # from 0..n-1 of the token object in the input stream
self.line = None # line=1..n of the 1st character
self.column = None # beginning of the line at which it occurs, 0..n-1
self._text = None # text of the token.
@property
def text(self):
return self._text
# Explicitly set the text for this token. If {code text} is not
# {@code null}, then {@link #getText} will return this value rather than
# extracting the text from the input.
#
# @param text The explicit text of the token, or {@code null} if the text
# should be obtained from the input along with the start and stop indexes
# of the token.
@text.setter
def text(self, text):
self._text = text
def getTokenSource(self):
return self.source[0]
def getInputStream(self):
return self.source[1]
def __str__(self):
return unicode(self)
class CommonToken(Token):
# An empty {@link Pair} which is used as the default value of
# {@link #source} for tokens that do not have a source.
EMPTY_SOURCE = (None, None)
def __init__(self, source = EMPTY_SOURCE, type = None, channel=Token.DEFAULT_CHANNEL, start=-1, stop=-1):
super(CommonToken, self).__init__()
self.source = source
self.type = type
self.channel = channel
self.start = start
self.stop = stop
self.tokenIndex = -1
if source[0] is not None:
self.line = source[0].line
self.column = source[0].column
else:
self.column = -1
# Constructs a new {@link CommonToken} as a copy of another {@link Token}.
#
# <p>
# If {@code oldToken} is also a {@link CommonToken} instance, the newly
# constructed token will share a reference to the {@link #text} field and
# the {@link Pair} stored in {@link #source}. Otherwise, {@link #text} will
# be assigned the result of calling {@link #getText}, and {@link #source}
# will be constructed from the result of {@link Token#getTokenSource} and
# {@link Token#getInputStream}.</p>
#
# @param oldToken The token to copy.
#
def clone(self):
t = CommonToken(self.source, self.type, self.channel, self.start, self.stop)
t.tokenIndex = self.tokenIndex
t.line = self.line
t.column = self.column
t.text = self.text
return t
@property
def text(self):
if self._text is not None:
return self._text
input = self.getInputStream()
if input is None:
return None
n = input.size
if self.start < n and self.stop < n:
return input.getText(self.start, self.stop)
else:
return u"<EOF>"
@text.setter
def text(self, text):
self._text = text
def __unicode__(self):
with StringIO() as buf:
buf.write(u"[@")
buf.write(unicode(self.tokenIndex))
buf.write(u",")
buf.write(unicode(self.start))
buf.write(u":")
buf.write(unicode(self.stop))
buf.write(u"='")
txt = self.text
if txt is not None:
txt = txt.replace(u"\n",u"\\n")
txt = txt.replace(u"\r",u"\\r")
txt = txt.replace(u"\t",u"\\t")
else:
txt = u"<no text>"
buf.write(txt)
buf.write(u"',<")
buf.write(unicode(self.type))
buf.write(u">")
if self.channel > 0:
buf.write(u",channel=")
buf.write(unicode(self.channel))
buf.write(u",")
buf.write(unicode(self.line))
buf.write(u":")
buf.write(unicode(self.column))
buf.write(u"]")
return buf.getvalue()

View File

@ -0,0 +1,64 @@
#[The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
from io import StringIO
def str_collection(val, begin, end):
with StringIO() as buf:
buf.write(begin)
first = True
for item in val:
if not first:
buf.write(u', ')
buf.write(unicode(item))
first = False
buf.write(end)
return buf.getvalue()
def str_list(val):
return str_collection(val, u'[', u']')
def str_set(val):
return str_collection(val, u'{', u'}')
def escapeWhitespace(s, escapeSpaces):
with StringIO() as buf:
for c in s:
if c==' ' and escapeSpaces:
buf.write(u'\u00B7')
elif c=='\t':
buf.write(u"\\t")
elif c=='\n':
buf.write(u"\\n")
elif c=='\r':
buf.write(u"\\r")
else:
buf.write(unicode(c))
return buf.getvalue()

View File

@ -0,0 +1 @@

View File

@ -0,0 +1,147 @@
# [The "BSD license"]
# Copyright (c) 2013 Terence Parr
# Copyright (c) 2013 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#/
from antlr4.IntervalSet import IntervalSet
from antlr4.Token import Token
class ATN(object):
INVALID_ALT_NUMBER = 0
# Used for runtime deserialization of ATNs from strings#/
def __init__(self, grammarType , maxTokenType ):
# The type of the ATN.
self.grammarType = grammarType
# The maximum value for any symbol recognized by a transition in the ATN.
self.maxTokenType = maxTokenType
self.states = []
# Each subrule/rule is a decision point and we must track them so we
# can go back later and build DFA predictors for them. This includes
# all the rules, subrules, optional blocks, ()+, ()* etc...
self.decisionToState = []
# Maps from rule index to starting state number.
self.ruleToStartState = []
# Maps from rule index to stop state number.
self.ruleToStopState = None
self.modeNameToStartState = dict()
# For lexer ATNs, this maps the rule index to the resulting token type.
# For parser ATNs, this maps the rule index to the generated bypass token
# type if the
# {@link ATNDeserializationOptions#isGenerateRuleBypassTransitions}
# deserialization option was specified; otherwise, this is {@code null}.
self.ruleToTokenType = None
# For lexer ATNs, this is an array of {@link LexerAction} objects which may
# be referenced by action transitions in the ATN.
self.lexerActions = None
self.modeToStartState = []
# Compute the set of valid tokens that can occur starting in state {@code s}.
# If {@code ctx} is null, the set of tokens will not include what can follow
# the rule surrounding {@code s}. In other words, the set will be
# restricted to tokens reachable staying within {@code s}'s rule.
def nextTokensInContext(self, s, ctx):
from antlr4.LL1Analyzer import LL1Analyzer
anal = LL1Analyzer(self)
return anal.LOOK(s, ctx=ctx)
# Compute the set of valid tokens that can occur starting in {@code s} and
# staying in same rule. {@link Token#EPSILON} is in set if we reach end of
# rule.
def nextTokensNoContext(self, s):
if s.nextTokenWithinRule is not None:
return s.nextTokenWithinRule
s.nextTokenWithinRule = self.nextTokensInContext(s, None)
s.nextTokenWithinRule.readonly = True
return s.nextTokenWithinRule
def nextTokens(self, s, ctx = None):
if ctx==None:
return self.nextTokensNoContext(s)
else:
return self.nextTokensInContext(s, ctx)
def addState(self, state):
if state is not None:
state.atn = self
state.stateNumber = len(self.states)
self.states.append(state)
def removeState(self, state):
self.states[state.stateNumber] = None # just free mem, don't shift states in list
def defineDecisionState(self, s):
self.decisionToState.append(s)
s.decision = len(self.decisionToState)-1
return s.decision
def getDecisionState(self, decision):
if len(self.decisionToState)==0:
return None
else:
return self.decisionToState[decision]
# Computes the set of input symbols which could follow ATN state number
# {@code stateNumber} in the specified full {@code context}. This method
# considers the complete parser context, but does not evaluate semantic
# predicates (i.e. all predicates encountered during the calculation are
# assumed true). If a path in the ATN exists from the starting state to the
# {@link RuleStopState} of the outermost context without matching any
# symbols, {@link Token#EOF} is added to the returned set.
#
# <p>If {@code context} is {@code null}, it is treated as
# {@link ParserRuleContext#EMPTY}.</p>
#
# @param stateNumber the ATN state number
# @param context the full parse context
# @return The set of potentially valid input symbols which could follow the
# specified state in the specified context.
# @throws IllegalArgumentException if the ATN does not contain a state with
# number {@code stateNumber}
#/
def getExpectedTokens(self, stateNumber, ctx ):
if stateNumber < 0 or stateNumber >= len(self.states):
raise Exception("Invalid state number.")
s = self.states[stateNumber]
following = self.nextTokens(s)
if Token.EPSILON not in following:
return following
expected = IntervalSet()
expected.addSet(following)
expected.removeOne(Token.EPSILON)
while (ctx != None and ctx.invokingState >= 0 and Token.EPSILON in following):
invokingState = self.states[ctx.invokingState]
rt = invokingState.transitions[0]
following = self.nextTokens(rt.followState)
expected.addSet(following)
expected.removeOne(Token.EPSILON)
ctx = ctx.parentCtx
if Token.EPSILON in following:
expected.addOne(Token.EOF)
return expected

View File

@ -0,0 +1,154 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#/
# A tuple: (ATN state, predicted alt, syntactic, semantic context).
# The syntactic context is a graph-structured stack node whose
# path(s) to the root is the rule invocation(s)
# chain used to arrive at the state. The semantic context is
# the tree of semantic predicates encountered before reaching
# an ATN state.
#/
from io import StringIO
from antlr4.atn.ATNState import DecisionState
from antlr4.atn.SemanticContext import SemanticContext
class ATNConfig(object):
def __init__(self, state=None, alt=None, context=None, semantic=None, config=None):
if config is not None:
if state is None:
state = config.state
if alt is None:
alt = config.alt
if context is None:
context = config.context
if semantic is None:
semantic = config.semanticContext
if semantic is None:
semantic = SemanticContext.NONE
# The ATN state associated with this configuration#/
self.state = state
# What alt (or lexer rule) is predicted by this configuration#/
self.alt = alt
# The stack of invoking states leading to the rule/states associated
# with this config. We track only those contexts pushed during
# execution of the ATN simulator.
self.context = context
self.semanticContext = semantic
# We cannot execute predicates dependent upon local context unless
# we know for sure we are in the correct context. Because there is
# no way to do this efficiently, we simply cannot evaluate
# dependent predicates unless we are in the rule that initially
# invokes the ATN simulator.
#
# closure() tracks the depth of how far we dip into the
# outer context: depth &gt; 0. Note that it may not be totally
# accurate depth since I don't ever decrement. TODO: make it a boolean then
self.reachesIntoOuterContext = 0 if config is None else config.reachesIntoOuterContext
self.precedenceFilterSuppressed = False if config is None else config.precedenceFilterSuppressed
# An ATN configuration is equal to another if both have
# the same state, they predict the same alternative, and
# syntactic/semantic contexts are the same.
#/
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, ATNConfig):
return False
else:
return self.state.stateNumber==other.state.stateNumber \
and self.alt==other.alt \
and ((self.context is other.context) or (self.context==other.context)) \
and self.semanticContext==other.semanticContext \
and self.precedenceFilterSuppressed==other.precedenceFilterSuppressed
def __hash__(self):
return hash( str(self.state.stateNumber) + "/" +
str(self.alt) + "/" +
str(self.context) + "/" +
str(self.semanticContext) )
def __str__(self):
return unicode(self)
def __unicode__(self):
with StringIO() as buf:
buf.write(u"(")
buf.write(unicode(self.state))
buf.write(u",")
buf.write(unicode(self.alt))
if self.context is not None:
buf.write(u",[")
buf.write(unicode(self.context))
buf.write(u"]")
if self.semanticContext is not None and self.semanticContext is not SemanticContext.NONE:
buf.write(u",")
buf.write(unicode(self.semanticContext))
if self.reachesIntoOuterContext>0:
buf.write(u",up=")
buf.write(unicode(self.reachesIntoOuterContext))
buf.write(u')')
return buf.getvalue()
class LexerATNConfig(ATNConfig):
def __init__(self, state, alt=None, context=None, semantic=SemanticContext.NONE, lexerActionExecutor=None, config=None):
super(LexerATNConfig, self).__init__(state=state, alt=alt, context=context, semantic=semantic, config=config)
if config is not None:
if lexerActionExecutor is None:
lexerActionExecutor = config.lexerActionExecutor
# This is the backing field for {@link #getLexerActionExecutor}.
self.lexerActionExecutor = lexerActionExecutor
self.passedThroughNonGreedyDecision = False if config is None else self.checkNonGreedyDecision(config, state)
def __hash__(self):
return hash(str(self.state.stateNumber) + str(self.alt) + str(self.context) \
+ str(self.semanticContext) + str(1 if self.passedThroughNonGreedyDecision else 0) \
+ str(self.lexerActionExecutor))
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, LexerATNConfig):
return False
if self.passedThroughNonGreedyDecision != other.passedThroughNonGreedyDecision:
return False
if self.lexerActionExecutor is not other.lexerActionExecutor:
return False
return super(LexerATNConfig, self).__eq__(other)
def checkNonGreedyDecision(self, source, target):
return source.passedThroughNonGreedyDecision \
or isinstance(target, DecisionState) and target.nonGreedy

View File

@ -0,0 +1,239 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Specialized {@link Set}{@code <}{@link ATNConfig}{@code >} that can track
# info about the set, with support for combining similar configurations using a
# graph-structured stack.
#/
from io import StringIO
from antlr4.PredictionContext import merge
from antlr4.Utils import str_list
from antlr4.atn.ATN import ATN
from antlr4.atn.SemanticContext import SemanticContext
from antlr4.error.Errors import UnsupportedOperationException, IllegalStateException
class ATNConfigSet(object):
#
# The reason that we need this is because we don't want the hash map to use
# the standard hash code and equals. We need all configurations with the same
# {@code (s,i,_,semctx)} to be equal. Unfortunately, this key effectively doubles
# the number of objects associated with ATNConfigs. The other solution is to
# use a hash table that lets us specify the equals/hashcode operation.
def __init__(self, fullCtx=True):
# All configs but hashed by (s, i, _, pi) not including context. Wiped out
# when we go readonly as this set becomes a DFA state.
self.configLookup = set()
# Indicates that this configuration set is part of a full context
# LL prediction. It will be used to determine how to merge $. With SLL
# it's a wildcard whereas it is not for LL context merge.
self.fullCtx = fullCtx
# Indicates that the set of configurations is read-only. Do not
# allow any code to manipulate the set; DFA states will point at
# the sets and they must not change. This does not protect the other
# fields; in particular, conflictingAlts is set after
# we've made this readonly.
self.readonly = False
# Track the elements as they are added to the set; supports get(i)#/
self.configs = []
# TODO: these fields make me pretty uncomfortable but nice to pack up info together, saves recomputation
# TODO: can we track conflicts as they are added to save scanning configs later?
self.uniqueAlt = 0
self.conflictingAlts = None
# Used in parser and lexer. In lexer, it indicates we hit a pred
# while computing a closure operation. Don't make a DFA state from this.
self.hasSemanticContext = False
self.dipsIntoOuterContext = False
self.cachedHashCode = -1
def __iter__(self):
return self.configs.__iter__()
# Adding a new config means merging contexts with existing configs for
# {@code (s, i, pi, _)}, where {@code s} is the
# {@link ATNConfig#state}, {@code i} is the {@link ATNConfig#alt}, and
# {@code pi} is the {@link ATNConfig#semanticContext}. We use
# {@code (s,i,pi)} as key.
#
# <p>This method updates {@link #dipsIntoOuterContext} and
# {@link #hasSemanticContext} when necessary.</p>
#/
def add(self, config, mergeCache=None):
if self.readonly:
raise Exception("This set is readonly")
if config.semanticContext is not SemanticContext.NONE:
self.hasSemanticContext = True
if config.reachesIntoOuterContext > 0:
self.dipsIntoOuterContext = True
existing = self.getOrAdd(config)
if existing is config:
self.cachedHashCode = -1
self.configs.append(config) # track order here
return True
# a previous (s,i,pi,_), merge with it and save result
rootIsWildcard = not self.fullCtx
merged = merge(existing.context, config.context, rootIsWildcard, mergeCache)
# no need to check for existing.context, config.context in cache
# since only way to create new graphs is "call rule" and here. We
# cache at both places.
existing.reachesIntoOuterContext = max(existing.reachesIntoOuterContext, config.reachesIntoOuterContext)
# make sure to preserve the precedence filter suppression during the merge
if config.precedenceFilterSuppressed:
existing.precedenceFilterSuppressed = True
existing.context = merged # replace context; no need to alt mapping
return True
def getOrAdd(self, config):
for c in self.configLookup:
if c==config:
return c
self.configLookup.add(config)
return config
def getStates(self):
states = set()
for c in self.configs:
states.add(c.state)
return states
def getPredicates(self):
preds = list()
for c in self.configs:
if c.semanticContext!=SemanticContext.NONE:
preds.append(c.semanticContext)
return preds
def get(self, i):
return self.configs[i]
def optimizeConfigs(self, interpreter):
if self.readonly:
raise IllegalStateException("This set is readonly")
if len(self.configLookup)==0:
return
for config in self.configs:
config.context = interpreter.getCachedContext(config.context)
def addAll(self, coll):
for c in coll:
self.add(c)
return False
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, ATNConfigSet):
return False
same = self.configs is not None and \
self.configs==other.configs and \
self.fullCtx == other.fullCtx and \
self.uniqueAlt == other.uniqueAlt and \
self.conflictingAlts == other.conflictingAlts and \
self.hasSemanticContext == other.hasSemanticContext and \
self.dipsIntoOuterContext == other.dipsIntoOuterContext
return same
def __hash__(self):
if self.readonly:
if self.cachedHashCode == -1:
self.cachedHashCode = self.hashConfigs()
return self.cachedHashCode
return self.hashConfigs()
def hashConfigs(self):
with StringIO() as buf:
for cfg in self.configs:
buf.write(unicode(cfg))
return hash(buf.getvalue())
def __len__(self):
return len(self.configs)
def isEmpty(self):
return len(self.configs)==0
def __contains__(self, item):
if self.configLookup is None:
raise UnsupportedOperationException("This method is not implemented for readonly sets.")
return item in self.configLookup
def containsFast(self, obj):
if self.configLookup is None:
raise UnsupportedOperationException("This method is not implemented for readonly sets.")
return self.configLookup.containsFast(obj)
def clear(self):
if self.readonly:
raise IllegalStateException("This set is readonly")
self.configs.clear()
self.cachedHashCode = -1
self.configLookup.clear()
def setReadonly(self, readonly):
self.readonly = readonly
self.configLookup = None # can't mod, no need for lookup cache
def __str__(self):
return unicode(self)
def __unicode__(self):
with StringIO() as buf:
buf.write(str_list(self.configs))
if self.hasSemanticContext:
buf.write(u",hasSemanticContext=")
buf.write(unicode(self.hasSemanticContext))
if self.uniqueAlt!=ATN.INVALID_ALT_NUMBER:
buf.write(u",uniqueAlt=")
buf.write(unicode(self.uniqueAlt))
if self.conflictingAlts is not None:
buf.write(u",conflictingAlts=")
buf.write(unicode(self.conflictingAlts))
if self.dipsIntoOuterContext:
buf.write(u",dipsIntoOuterContext")
return buf.getvalue()
class OrderedATNConfigSet(ATNConfigSet):
def __init__(self):
super(OrderedATNConfigSet, self).__init__()
# self.configLookup = set()

View File

@ -0,0 +1,46 @@
#[The "BSD license"]
# Copyright (c) 2013 Terence Parr
# Copyright (c) 2013 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
class ATNDeserializationOptions(object):
defaultOptions = None
def __init__(self, copyFrom = None):
self.readOnly = False
self.verifyATN = True if copyFrom is None else copyFrom.verifyATN
self.generateRuleBypassTransitions = False if copyFrom is None else copyFrom.generateRuleBypassTransitions
def __setattr__(self, key, value):
if key!="readOnly" and self.readOnly:
raise Exception("The object is read only.")
super(type(self), self).__setattr__(key,value)
ATNDeserializationOptions.defaultOptions = ATNDeserializationOptions()
ATNDeserializationOptions.defaultOptions.readOnly = True

View File

@ -0,0 +1,542 @@
# [The "BSD license"]
# Copyright (c) 2013 Terence Parr
# Copyright (c) 2013 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#/
from uuid import UUID
from antlr4.atn.ATN import ATN
from antlr4.atn.ATNType import ATNType
from antlr4.atn.ATNState import *
from antlr4.atn.Transition import *
from antlr4.atn.LexerAction import *
from antlr4.atn.ATNDeserializationOptions import ATNDeserializationOptions
# This is the earliest supported serialized UUID.
BASE_SERIALIZED_UUID = UUID("AADB8D7E-AEEF-4415-AD2B-8204D6CF042E")
# This list contains all of the currently supported UUIDs, ordered by when
# the feature first appeared in this branch.
SUPPORTED_UUIDS = [ BASE_SERIALIZED_UUID ]
SERIALIZED_VERSION = 3
# This is the current serialized UUID.
SERIALIZED_UUID = BASE_SERIALIZED_UUID
class ATNDeserializer (object):
def __init__(self, options = None):
if options is None:
options = ATNDeserializationOptions.defaultOptions
self.deserializationOptions = options
self.edgeFactories = None
self.stateFactories = None
self.actionFactories = None
# Determines if a particular serialized representation of an ATN supports
# a particular feature, identified by the {@link UUID} used for serializing
# the ATN at the time the feature was first introduced.
#
# @param feature The {@link UUID} marking the first time the feature was
# supported in the serialized ATN.
# @param actualUuid The {@link UUID} of the actual serialized ATN which is
# currently being deserialized.
# @return {@code true} if the {@code actualUuid} value represents a
# serialized ATN at or after the feature identified by {@code feature} was
# introduced; otherwise, {@code false}.
def isFeatureSupported(self, feature, actualUuid):
idx1 = SUPPORTED_UUIDS.index(feature)
if idx1<0:
return False
idx2 = SUPPORTED_UUIDS.index(actualUuid)
return idx2 >= idx1
def deserialize(self, data):
self.reset(data)
self.checkVersion()
self.checkUUID()
atn = self.readATN()
self.readStates(atn)
self.readRules(atn)
self.readModes(atn)
sets = self.readSets(atn)
self.readEdges(atn, sets)
self.readDecisions(atn)
self.readLexerActions(atn)
self.markPrecedenceDecisions(atn)
self.verifyATN(atn)
if self.deserializationOptions.generateRuleBypassTransitions \
and atn.grammarType == ATNType.PARSER:
self.generateRuleBypassTransitions(atn)
# re-verify after modification
self.verifyATN(atn)
return atn
def reset(self, data):
def adjust(c):
v = ord(c)
return v-2 if v>1 else -1
temp = [ adjust(c) for c in data ]
# don't adjust the first value since that's the version number
temp[0] = ord(data[0])
self.data = temp
self.pos = 0
def checkVersion(self):
version = self.readInt()
if version != SERIALIZED_VERSION:
raise Exception("Could not deserialize ATN with version " + str(version) + " (expected " + str(SERIALIZED_VERSION) + ").")
def checkUUID(self):
uuid = self.readUUID()
if not uuid in SUPPORTED_UUIDS:
raise Exception("Could not deserialize ATN with UUID: " + str(uuid) + \
" (expected " + str(SERIALIZED_UUID) + " or a legacy UUID).", uuid, SERIALIZED_UUID)
self.uuid = uuid
def readATN(self):
grammarType = self.readInt()
maxTokenType = self.readInt()
return ATN(grammarType, maxTokenType)
def readStates(self, atn):
loopBackStateNumbers = []
endStateNumbers = []
nstates = self.readInt()
for i in range(0, nstates):
stype = self.readInt()
# ignore bad type of states
if stype==ATNState.INVALID_TYPE:
atn.addState(None)
continue
ruleIndex = self.readInt()
if ruleIndex == 0xFFFF:
ruleIndex = -1
s = self.stateFactory(stype, ruleIndex)
if stype == ATNState.LOOP_END: # special case
loopBackStateNumber = self.readInt()
loopBackStateNumbers.append((s, loopBackStateNumber))
elif isinstance(s, BlockStartState):
endStateNumber = self.readInt()
endStateNumbers.append((s, endStateNumber))
atn.addState(s)
# delay the assignment of loop back and end states until we know all the state instances have been initialized
for pair in loopBackStateNumbers:
pair[0].loopBackState = atn.states[pair[1]]
for pair in endStateNumbers:
pair[0].endState = atn.states[pair[1]]
numNonGreedyStates = self.readInt()
for i in range(0, numNonGreedyStates):
stateNumber = self.readInt()
atn.states[stateNumber].nonGreedy = True
numPrecedenceStates = self.readInt()
for i in range(0, numPrecedenceStates):
stateNumber = self.readInt()
atn.states[stateNumber].isPrecedenceRule = True
def readRules(self, atn):
nrules = self.readInt()
if atn.grammarType == ATNType.LEXER:
atn.ruleToTokenType = [0] * nrules
atn.ruleToStartState = [0] * nrules
for i in range(0, nrules):
s = self.readInt()
startState = atn.states[s]
atn.ruleToStartState[i] = startState
if atn.grammarType == ATNType.LEXER:
tokenType = self.readInt()
if tokenType == 0xFFFF:
tokenType = Token.EOF
atn.ruleToTokenType[i] = tokenType
atn.ruleToStopState = [0] * nrules
for state in atn.states:
if not isinstance(state, RuleStopState):
continue
atn.ruleToStopState[state.ruleIndex] = state
atn.ruleToStartState[state.ruleIndex].stopState = state
def readModes(self, atn):
nmodes = self.readInt()
for i in range(0, nmodes):
s = self.readInt()
atn.modeToStartState.append(atn.states[s])
def readSets(self, atn):
sets = []
m = self.readInt()
for i in range(0, m):
iset = IntervalSet()
sets.append(iset)
n = self.readInt()
containsEof = self.readInt()
if containsEof!=0:
iset.addOne(-1)
for j in range(0, n):
i1 = self.readInt()
i2 = self.readInt()
iset.addRange(Interval(i1, i2 + 1)) # range upper limit is exclusive
return sets
def readEdges(self, atn, sets):
nedges = self.readInt()
for i in range(0, nedges):
src = self.readInt()
trg = self.readInt()
ttype = self.readInt()
arg1 = self.readInt()
arg2 = self.readInt()
arg3 = self.readInt()
trans = self.edgeFactory(atn, ttype, src, trg, arg1, arg2, arg3, sets)
srcState = atn.states[src]
srcState.addTransition(trans)
# edges for rule stop states can be derived, so they aren't serialized
for state in atn.states:
for i in range(0, len(state.transitions)):
t = state.transitions[i]
if not isinstance(t, RuleTransition):
continue
outermostPrecedenceReturn = -1
if atn.ruleToStartState[t.target.ruleIndex].isPrecedenceRule:
if t.precedence == 0:
outermostPrecedenceReturn = t.target.ruleIndex
trans = EpsilonTransition(t.followState, outermostPrecedenceReturn)
atn.ruleToStopState[t.target.ruleIndex].addTransition(trans)
for state in atn.states:
if isinstance(state, BlockStartState):
# we need to know the end state to set its start state
if state.endState is None:
raise Exception("IllegalState")
# block end states can only be associated to a single block start state
if state.endState.startState is not None:
raise Exception("IllegalState")
state.endState.startState = state
elif isinstance(state, PlusLoopbackState):
for i in range(0, len(state.transitions)):
target = state.transitions[i].target
if isinstance(target, PlusBlockStartState):
target.loopBackState = state
elif isinstance(state, StarLoopbackState):
for i in range(0, len(state.transitions)):
target = state.transitions[i].target
if isinstance(target, StarLoopEntryState):
target.loopBackState = state
def readDecisions(self, atn):
ndecisions = self.readInt()
for i in range(0, ndecisions):
s = self.readInt()
decState = atn.states[s]
atn.decisionToState.append(decState)
decState.decision = i
def readLexerActions(self, atn):
if atn.grammarType == ATNType.LEXER:
count = self.readInt()
atn.lexerActions = [ None ] * count
for i in range(0, count):
actionType = self.readInt()
data1 = self.readInt()
if data1 == 0xFFFF:
data1 = -1
data2 = self.readInt()
if data2 == 0xFFFF:
data2 = -1
lexerAction = self.lexerActionFactory(actionType, data1, data2)
atn.lexerActions[i] = lexerAction
def generateRuleBypassTransitions(self, atn):
count = len(atn.ruleToStartState)
atn.ruleToTokenType = [ 0 ] * count
for i in range(0, count):
atn.ruleToTokenType[i] = atn.maxTokenType + i + 1
for i in range(0, count):
self.generateRuleBypassTransition(atn, i)
def generateRuleBypassTransition(self, atn, idx):
bypassStart = BasicBlockStartState()
bypassStart.ruleIndex = idx
atn.addState(bypassStart)
bypassStop = BlockEndState()
bypassStop.ruleIndex = idx
atn.addState(bypassStop)
bypassStart.endState = bypassStop
atn.defineDecisionState(bypassStart)
bypassStop.startState = bypassStart
excludeTransition = None
if atn.ruleToStartState[idx].isPrecedenceRule:
# wrap from the beginning of the rule to the StarLoopEntryState
endState = None
for state in atn.states:
if self.stateIsEndStateFor(state, idx):
endState = state
excludeTransition = state.loopBackState.transitions[0]
break
if excludeTransition is None:
raise Exception("Couldn't identify final state of the precedence rule prefix section.")
else:
endState = atn.ruleToStopState[idx]
# all non-excluded transitions that currently target end state need to target blockEnd instead
for state in atn.states:
for transition in state.transitions:
if transition == excludeTransition:
continue
if transition.target == endState:
transition.target = bypassStop
# all transitions leaving the rule start state need to leave blockStart instead
ruleToStartState = atn.ruleToStartState[idx]
count = len(ruleToStartState.transitions)
while count > 0:
bypassStart.addTransition(ruleToStartState.transitions[count-1])
del ruleToStartState.transitions[-1]
# link the new states
atn.ruleToStartState[idx].addTransition(EpsilonTransition(bypassStart))
bypassStop.addTransition(EpsilonTransition(endState))
matchState = BasicState()
atn.addState(matchState)
matchState.addTransition(AtomTransition(bypassStop, atn.ruleToTokenType[idx]))
bypassStart.addTransition(EpsilonTransition(matchState))
def stateIsEndStateFor(self, state, idx):
if state.ruleIndex != idx:
return None
if not isinstance(state, StarLoopEntryState):
return None
maybeLoopEndState = state.transitions[len(state.transitions) - 1].target
if not isinstance(maybeLoopEndState, LoopEndState):
return None
if maybeLoopEndState.epsilonOnlyTransitions and \
isinstance(maybeLoopEndState.transitions[0].target, RuleStopState):
return state
else:
return None
#
# Analyze the {@link StarLoopEntryState} states in the specified ATN to set
# the {@link StarLoopEntryState#precedenceRuleDecision} field to the
# correct value.
#
# @param atn The ATN.
#
def markPrecedenceDecisions(self, atn):
for state in atn.states:
if not isinstance(state, StarLoopEntryState):
continue
# We analyze the ATN to determine if this ATN decision state is the
# decision for the closure block that determines whether a
# precedence rule should continue or complete.
#
if atn.ruleToStartState[state.ruleIndex].isPrecedenceRule:
maybeLoopEndState = state.transitions[len(state.transitions) - 1].target
if isinstance(maybeLoopEndState, LoopEndState):
if maybeLoopEndState.epsilonOnlyTransitions and \
isinstance(maybeLoopEndState.transitions[0].target, RuleStopState):
state.precedenceRuleDecision = True
def verifyATN(self, atn):
if not self.deserializationOptions.verifyATN:
return
# verify assumptions
for state in atn.states:
if state is None:
continue
self.checkCondition(state.epsilonOnlyTransitions or len(state.transitions) <= 1)
if isinstance(state, PlusBlockStartState):
self.checkCondition(state.loopBackState is not None)
if isinstance(state, StarLoopEntryState):
self.checkCondition(state.loopBackState is not None)
self.checkCondition(len(state.transitions) == 2)
if isinstance(state.transitions[0].target, StarBlockStartState):
self.checkCondition(isinstance(state.transitions[1].target, LoopEndState))
self.checkCondition(not state.nonGreedy)
elif isinstance(state.transitions[0].target, LoopEndState):
self.checkCondition(isinstance(state.transitions[1].target, StarBlockStartState))
self.checkCondition(state.nonGreedy)
else:
raise Exception("IllegalState")
if isinstance(state, StarLoopbackState):
self.checkCondition(len(state.transitions) == 1)
self.checkCondition(isinstance(state.transitions[0].target, StarLoopEntryState))
if isinstance(state, LoopEndState):
self.checkCondition(state.loopBackState is not None)
if isinstance(state, RuleStartState):
self.checkCondition(state.stopState is not None)
if isinstance(state, BlockStartState):
self.checkCondition(state.endState is not None)
if isinstance(state, BlockEndState):
self.checkCondition(state.startState is not None)
if isinstance(state, DecisionState):
self.checkCondition(len(state.transitions) <= 1 or state.decision >= 0)
else:
self.checkCondition(len(state.transitions) <= 1 or isinstance(state, RuleStopState))
def checkCondition(self, condition, message=None):
if not condition:
if message is None:
message = "IllegalState"
raise Exception(message)
def readInt(self):
i = self.data[self.pos]
self.pos += 1
return i
def readInt32(self):
low = self.readInt()
high = self.readInt()
return low | (high << 16)
def readLong(self):
low = self.readInt32()
high = self.readInt32()
return (low & 0x00000000FFFFFFFF) | (high << 32)
def readUUID(self):
low = self.readLong()
high = self.readLong()
allBits = (low & 0xFFFFFFFFFFFFFFFF) | (high << 64)
return UUID(int=allBits)
def edgeFactory(self, atn, type, src, trg, arg1, arg2, arg3, sets):
target = atn.states[trg]
if self.edgeFactories is None:
ef = [None] * 11
ef[0] = lambda args : None
ef[Transition.EPSILON] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
EpsilonTransition(target)
ef[Transition.RANGE] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
RangeTransition(target, Token.EOF, arg2) if arg3 != 0 else RangeTransition(target, arg1, arg2)
ef[Transition.RULE] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
RuleTransition(atn.states[arg1], arg2, arg3, target)
ef[Transition.PREDICATE] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
PredicateTransition(target, arg1, arg2, arg3 != 0)
ef[Transition.PRECEDENCE] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
PrecedencePredicateTransition(target, arg1)
ef[Transition.ATOM] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
AtomTransition(target, Token.EOF) if arg3 != 0 else AtomTransition(target, arg1)
ef[Transition.ACTION] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
ActionTransition(target, arg1, arg2, arg3 != 0)
ef[Transition.SET] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
SetTransition(target, sets[arg1])
ef[Transition.NOT_SET] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
NotSetTransition(target, sets[arg1])
ef[Transition.WILDCARD] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \
WildcardTransition(target)
self.edgeFactories = ef
if type> len(self.edgeFactories) or self.edgeFactories[type] is None:
raise Exception("The specified transition type: " + str(type) + " is not valid.")
else:
return self.edgeFactories[type](atn, src, trg, arg1, arg2, arg3, sets, target)
def stateFactory(self, type, ruleIndex):
if self.stateFactories is None:
sf = [None] * 13
sf[ATNState.INVALID_TYPE] = lambda : None
sf[ATNState.BASIC] = lambda : BasicState()
sf[ATNState.RULE_START] = lambda : RuleStartState()
sf[ATNState.BLOCK_START] = lambda : BasicBlockStartState()
sf[ATNState.PLUS_BLOCK_START] = lambda : PlusBlockStartState()
sf[ATNState.STAR_BLOCK_START] = lambda : StarBlockStartState()
sf[ATNState.TOKEN_START] = lambda : TokensStartState()
sf[ATNState.RULE_STOP] = lambda : RuleStopState()
sf[ATNState.BLOCK_END] = lambda : BlockEndState()
sf[ATNState.STAR_LOOP_BACK] = lambda : StarLoopbackState()
sf[ATNState.STAR_LOOP_ENTRY] = lambda : StarLoopEntryState()
sf[ATNState.PLUS_LOOP_BACK] = lambda : PlusLoopbackState()
sf[ATNState.LOOP_END] = lambda : LoopEndState()
self.stateFactories = sf
if type> len(self.stateFactories) or self.stateFactories[type] is None:
raise Exception("The specified state type " + str(type) + " is not valid.")
else:
s = self.stateFactories[type]()
if s is not None:
s.ruleIndex = ruleIndex
return s
def lexerActionFactory(self, type, data1, data2):
if self.actionFactories is None:
af = [ None ] * 8
af[LexerActionType.CHANNEL] = lambda data1, data2: LexerChannelAction(data1)
af[LexerActionType.CUSTOM] = lambda data1, data2: LexerCustomAction(data1, data2)
af[LexerActionType.MODE] = lambda data1, data2: LexerModeAction(data1)
af[LexerActionType.MORE] = lambda data1, data2: LexerMoreAction.INSTANCE
af[LexerActionType.POP_MODE] = lambda data1, data2: LexerPopModeAction.INSTANCE
af[LexerActionType.PUSH_MODE] = lambda data1, data2: LexerPushModeAction(data1)
af[LexerActionType.SKIP] = lambda data1, data2: LexerSkipAction.INSTANCE
af[LexerActionType.TYPE] = lambda data1, data2: LexerTypeAction(data1)
self.actionFactories = af
if type> len(self.actionFactories) or self.actionFactories[type] is None:
raise Exception("The specified lexer action type " + str(type) + " is not valid.")
else:
return self.actionFactories[type](data1, data2)

View File

@ -0,0 +1,70 @@
#
# [The "BSD license"]
# Copyright (c) 2013 Terence Parr
# Copyright (c) 2013 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#/
from antlr4.PredictionContext import getCachedPredictionContext
from antlr4.atn.ATNConfigSet import ATNConfigSet
from antlr4.dfa.DFAState import DFAState
class ATNSimulator(object):
# Must distinguish between missing edge and edge we know leads nowhere#/
ERROR = DFAState(0x7FFFFFFF, ATNConfigSet())
# The context cache maps all PredictionContext objects that are ==
# to a single cached copy. This cache is shared across all contexts
# in all ATNConfigs in all DFA states. We rebuild each ATNConfigSet
# to use only cached nodes/graphs in addDFAState(). We don't want to
# fill this during closure() since there are lots of contexts that
# pop up but are not used ever again. It also greatly slows down closure().
#
# <p>This cache makes a huge difference in memory and a little bit in speed.
# For the Java grammar on java.*, it dropped the memory requirements
# at the end from 25M to 16M. We don't store any of the full context
# graphs in the DFA because they are limited to local context only,
# but apparently there's a lot of repetition there as well. We optimize
# the config contexts before storing the config set in the DFA states
# by literally rebuilding them with cached subgraphs only.</p>
#
# <p>I tried a cache for use during closure operations, that was
# whacked after each adaptivePredict(). It cost a little bit
# more time I think and doesn't save on the overall footprint
# so it's not worth the complexity.</p>
#/
def __init__(self, atn, sharedContextCache):
self.atn = atn
self.sharedContextCache = sharedContextCache
def getCachedContext(self, context):
if self.sharedContextCache is None:
return context
visited = dict()
return getCachedPredictionContext(context, self.sharedContextCache, visited)

View File

@ -0,0 +1,283 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The following images show the relation of states and
# {@link ATNState#transitions} for various grammar constructs.
#
# <ul>
#
# <li>Solid edges marked with an &#0949; indicate a required
# {@link EpsilonTransition}.</li>
#
# <li>Dashed edges indicate locations where any transition derived from
# {@link Transition} might appear.</li>
#
# <li>Dashed nodes are place holders for either a sequence of linked
# {@link BasicState} states or the inclusion of a block representing a nested
# construct in one of the forms below.</li>
#
# <li>Nodes showing multiple outgoing alternatives with a {@code ...} support
# any number of alternatives (one or more). Nodes without the {@code ...} only
# support the exact number of alternatives shown in the diagram.</li>
#
# </ul>
#
# <h2>Basic Blocks</h2>
#
# <h3>Rule</h3>
#
# <embed src="images/Rule.svg" type="image/svg+xml"/>
#
# <h3>Block of 1 or more alternatives</h3>
#
# <embed src="images/Block.svg" type="image/svg+xml"/>
#
# <h2>Greedy Loops</h2>
#
# <h3>Greedy Closure: {@code (...)*}</h3>
#
# <embed src="images/ClosureGreedy.svg" type="image/svg+xml"/>
#
# <h3>Greedy Positive Closure: {@code (...)+}</h3>
#
# <embed src="images/PositiveClosureGreedy.svg" type="image/svg+xml"/>
#
# <h3>Greedy Optional: {@code (...)?}</h3>
#
# <embed src="images/OptionalGreedy.svg" type="image/svg+xml"/>
#
# <h2>Non-Greedy Loops</h2>
#
# <h3>Non-Greedy Closure: {@code (...)*?}</h3>
#
# <embed src="images/ClosureNonGreedy.svg" type="image/svg+xml"/>
#
# <h3>Non-Greedy Positive Closure: {@code (...)+?}</h3>
#
# <embed src="images/PositiveClosureNonGreedy.svg" type="image/svg+xml"/>
#
# <h3>Non-Greedy Optional: {@code (...)??}</h3>
#
# <embed src="images/OptionalNonGreedy.svg" type="image/svg+xml"/>
#
INITIAL_NUM_TRANSITIONS = 4
class ATNState(object):
# constants for serialization
INVALID_TYPE = 0
BASIC = 1
RULE_START = 2
BLOCK_START = 3
PLUS_BLOCK_START = 4
STAR_BLOCK_START = 5
TOKEN_START = 6
RULE_STOP = 7
BLOCK_END = 8
STAR_LOOP_BACK = 9
STAR_LOOP_ENTRY = 10
PLUS_LOOP_BACK = 11
LOOP_END = 12
serializationNames = [
"INVALID",
"BASIC",
"RULE_START",
"BLOCK_START",
"PLUS_BLOCK_START",
"STAR_BLOCK_START",
"TOKEN_START",
"RULE_STOP",
"BLOCK_END",
"STAR_LOOP_BACK",
"STAR_LOOP_ENTRY",
"PLUS_LOOP_BACK",
"LOOP_END" ]
INVALID_STATE_NUMBER = -1
def __init__(self):
# Which ATN are we in?
self.atn = None
self.stateNumber = ATNState.INVALID_STATE_NUMBER
self.stateType = None
self.ruleIndex = 0 # at runtime, we don't have Rule objects
self.epsilonOnlyTransitions = False
# Track the transitions emanating from this ATN state.
self.transitions = []
# Used to cache lookahead during parsing, not used during construction
self.nextTokenWithinRule = None
def __hash__(self):
return self.stateNumber
def __eq__(self, other):
if isinstance(other, ATNState):
return self.stateNumber==other.stateNumber
else:
return False
def onlyHasEpsilonTransitions(self):
return self.epsilonOnlyTransitions
def isNonGreedyExitState(self):
return False
def __str__(self):
return unicode(self)
def __unicode__(self):
return unicode(self.stateNumber)
def addTransition(self, trans, index=-1):
if len(self.transitions)==0:
self.epsilonOnlyTransitions = trans.isEpsilon
elif self.epsilonOnlyTransitions != trans.isEpsilon:
self.epsilonOnlyTransitions = False
# TODO System.err.format(Locale.getDefault(), "ATN state %d has both epsilon and non-epsilon transitions.\n", stateNumber);
if index==-1:
self.transitions.append(trans)
else:
self.transitions.insert(index, trans)
class BasicState(ATNState):
def __init__(self):
super(BasicState, self).__init__()
self.stateType = self.BASIC
class DecisionState(ATNState):
def __init__(self):
super(DecisionState, self).__init__()
self.decision = -1
self.nonGreedy = False
# The start of a regular {@code (...)} block.
class BlockStartState(DecisionState):
def __init__(self):
super(BlockStartState, self).__init__()
self.endState = None
class BasicBlockStartState(BlockStartState):
def __init__(self):
super(BasicBlockStartState, self).__init__()
self.stateType = self.BLOCK_START
# Terminal node of a simple {@code (a|b|c)} block.
class BlockEndState(ATNState):
def __init__(self):
super(BlockEndState, self).__init__()
self.stateType = self.BLOCK_END
self.startState = None
# The last node in the ATN for a rule, unless that rule is the start symbol.
# In that case, there is one transition to EOF. Later, we might encode
# references to all calls to this rule to compute FOLLOW sets for
# error handling.
#
class RuleStopState(ATNState):
def __init__(self):
super(RuleStopState, self).__init__()
self.stateType = self.RULE_STOP
class RuleStartState(ATNState):
def __init__(self):
super(RuleStartState, self).__init__()
self.stateType = self.RULE_START
self.stopState = None
self.isPrecedenceRule = False
# Decision state for {@code A+} and {@code (A|B)+}. It has two transitions:
# one to the loop back to start of the block and one to exit.
#
class PlusLoopbackState(DecisionState):
def __init__(self):
super(PlusLoopbackState, self).__init__()
self.stateType = self.PLUS_LOOP_BACK
# Start of {@code (A|B|...)+} loop. Technically a decision state, but
# we don't use for code generation; somebody might need it, so I'm defining
# it for completeness. In reality, the {@link PlusLoopbackState} node is the
# real decision-making note for {@code A+}.
#
class PlusBlockStartState(BlockStartState):
def __init__(self):
super(PlusBlockStartState, self).__init__()
self.stateType = self.PLUS_BLOCK_START
self.loopBackState = None
# The block that begins a closure loop.
class StarBlockStartState(BlockStartState):
def __init__(self):
super(StarBlockStartState, self).__init__()
self.stateType = self.STAR_BLOCK_START
class StarLoopbackState(ATNState):
def __init__(self):
super(StarLoopbackState, self).__init__()
self.stateType = self.STAR_LOOP_BACK
class StarLoopEntryState(DecisionState):
def __init__(self):
super(StarLoopEntryState, self).__init__()
self.stateType = self.STAR_LOOP_ENTRY
self.loopBackState = None
# Indicates whether this state can benefit from a precedence DFA during SLL decision making.
self.precedenceRuleDecision = None
# Mark the end of a * or + loop.
class LoopEndState(ATNState):
def __init__(self):
super(LoopEndState, self).__init__()
self.stateType = self.LOOP_END
self.loopBackState = None
# The Tokens rule start state linking to each lexer rule start state */
class TokensStartState(DecisionState):
def __init__(self):
super(TokensStartState, self).__init__()
self.stateType = self.TOKEN_START

View File

@ -0,0 +1,37 @@
# [The "BSD license"]
# Copyright (c) 2013 Terence Parr
# Copyright (c) 2013 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#/
# Represents the type of recognizer an ATN applies to.
class ATNType(object):
LEXER = 0
PARSER = 1

View File

@ -0,0 +1,588 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#/
# When we hit an accept state in either the DFA or the ATN, we
# have to notify the character stream to start buffering characters
# via {@link IntStream#mark} and record the current state. The current sim state
# includes the current index into the input, the current line,
# and current character position in that line. Note that the Lexer is
# tracking the starting line and characterization of the token. These
# variables track the "state" of the simulator when it hits an accept state.
#
# <p>We track these variables separately for the DFA and ATN simulation
# because the DFA simulation often has to fail over to the ATN
# simulation. If the ATN simulation fails, we need the DFA to fall
# back to its previously accepted state, if any. If the ATN succeeds,
# then the ATN does the accept and the DFA simulator that invoked it
# can simply return the predicted token type.</p>
#/
from antlr4 import Lexer
from antlr4.PredictionContext import SingletonPredictionContext, PredictionContext
from antlr4.Token import Token
from antlr4.atn.ATN import ATN
from antlr4.atn.ATNConfig import LexerATNConfig
from antlr4.atn.ATNSimulator import ATNSimulator
from antlr4.atn.ATNConfigSet import OrderedATNConfigSet
from antlr4.atn.ATNState import RuleStopState
from antlr4.atn.LexerActionExecutor import LexerActionExecutor
from antlr4.atn.Transition import Transition
from antlr4.dfa.DFAState import DFAState
from antlr4.error.Errors import LexerNoViableAltException, UnsupportedOperationException
class SimState(object):
def __init__(self):
self.reset()
def reset(self):
self.index = -1
self.line = 0
self.column = -1
self.dfaState = None
class LexerATNSimulator(ATNSimulator):
debug = False
dfa_debug = False
MIN_DFA_EDGE = 0
MAX_DFA_EDGE = 127 # forces unicode to stay in ATN
ERROR = None
match_calls = 0
def __init__(self, recog, atn, decisionToDFA, sharedContextCache):
super(LexerATNSimulator, self).__init__(atn, sharedContextCache)
self.decisionToDFA = decisionToDFA
self.recog = recog
# The current token's starting index into the character stream.
# Shared across DFA to ATN simulation in case the ATN fails and the
# DFA did not have a previous accept state. In this case, we use the
# ATN-generated exception object.
self.startIndex = -1
# line number 1..n within the input#/
self.line = 1
# The index of the character relative to the beginning of the line 0..n-1#/
self.column = 0
from antlr4.Lexer import Lexer
self.mode = Lexer.DEFAULT_MODE
# Used during DFA/ATN exec to record the most recent accept configuration info
self.prevAccept = SimState()
def copyState(self, simulator ):
self.column = simulator.column
self.line = simulator.line
self.mode = simulator.mode
self.startIndex = simulator.startIndex
def match(self, input , mode):
self.match_calls += 1
self.mode = mode
mark = input.mark()
try:
self.startIndex = input.index
self.prevAccept.reset()
dfa = self.decisionToDFA[mode]
if dfa.s0 is None:
return self.matchATN(input)
else:
return self.execATN(input, dfa.s0)
finally:
input.release(mark)
def reset(self):
self.prevAccept.reset()
self.startIndex = -1
self.line = 1
self.column = 0
self.mode = Lexer.DEFAULT_MODE
def matchATN(self, input):
startState = self.atn.modeToStartState[self.mode]
if self.debug:
print("matchATN mode " + str(self.mode) + " start: " + str(startState))
old_mode = self.mode
s0_closure = self.computeStartState(input, startState)
suppressEdge = s0_closure.hasSemanticContext
s0_closure.hasSemanticContext = False
next = self.addDFAState(s0_closure)
if not suppressEdge:
self.decisionToDFA[self.mode].s0 = next
predict = self.execATN(input, next)
if self.debug:
print("DFA after matchATN: " + str(self.decisionToDFA[old_mode].toLexerString()))
return predict
def execATN(self, input, ds0):
if self.debug:
print("start state closure=" + str(ds0.configs))
if ds0.isAcceptState:
# allow zero-length tokens
self.captureSimState(self.prevAccept, input, ds0)
t = input.LA(1)
s = ds0 # s is current/from DFA state
while True: # while more work
if self.debug:
print("execATN loop starting closure: %s\n", s.configs)
# As we move src->trg, src->trg, we keep track of the previous trg to
# avoid looking up the DFA state again, which is expensive.
# If the previous target was already part of the DFA, we might
# be able to avoid doing a reach operation upon t. If s!=null,
# it means that semantic predicates didn't prevent us from
# creating a DFA state. Once we know s!=null, we check to see if
# the DFA state has an edge already for t. If so, we can just reuse
# it's configuration set; there's no point in re-computing it.
# This is kind of like doing DFA simulation within the ATN
# simulation because DFA simulation is really just a way to avoid
# computing reach/closure sets. Technically, once we know that
# we have a previously added DFA state, we could jump over to
# the DFA simulator. But, that would mean popping back and forth
# a lot and making things more complicated algorithmically.
# This optimization makes a lot of sense for loops within DFA.
# A character will take us back to an existing DFA state
# that already has lots of edges out of it. e.g., .* in comments.
# print("Target for:" + str(s) + " and:" + str(t))
target = self.getExistingTargetState(s, t)
# print("Existing:" + str(target))
if target is None:
target = self.computeTargetState(input, s, t)
# print("Computed:" + str(target))
if target == self.ERROR:
break
# If this is a consumable input element, make sure to consume before
# capturing the accept state so the input index, line, and char
# position accurately reflect the state of the interpreter at the
# end of the token.
if t != Token.EOF:
self.consume(input)
if target.isAcceptState:
self.captureSimState(self.prevAccept, input, target)
if t == Token.EOF:
break
t = input.LA(1)
s = target # flip; current DFA target becomes new src/from state
return self.failOrAccept(self.prevAccept, input, s.configs, t)
# Get an existing target state for an edge in the DFA. If the target state
# for the edge has not yet been computed or is otherwise not available,
# this method returns {@code null}.
#
# @param s The current DFA state
# @param t The next input symbol
# @return The existing target DFA state for the given input symbol
# {@code t}, or {@code null} if the target state for this edge is not
# already cached
def getExistingTargetState(self, s, t):
if s.edges is None or t < self.MIN_DFA_EDGE or t > self.MAX_DFA_EDGE:
return None
target = s.edges[t - self.MIN_DFA_EDGE]
if self.debug and target is not None:
print("reuse state "+s.stateNumber+ " edge to "+target.stateNumber)
return target
# Compute a target state for an edge in the DFA, and attempt to add the
# computed state and corresponding edge to the DFA.
#
# @param input The input stream
# @param s The current DFA state
# @param t The next input symbol
#
# @return The computed target DFA state for the given input symbol
# {@code t}. If {@code t} does not lead to a valid DFA state, this method
# returns {@link #ERROR}.
def computeTargetState(self, input, s, t):
reach = OrderedATNConfigSet()
# if we don't find an existing DFA state
# Fill reach starting from closure, following t transitions
self.getReachableConfigSet(input, s.configs, reach, t)
if len(reach)==0: # we got nowhere on t from s
if not reach.hasSemanticContext:
# we got nowhere on t, don't throw out this knowledge; it'd
# cause a failover from DFA later.
self. addDFAEdge(s, t, self.ERROR)
# stop when we can't match any more char
return self.ERROR
# Add an edge from s to target DFA found/created for reach
return self.addDFAEdge(s, t, cfgs=reach)
def failOrAccept(self, prevAccept , input, reach, t):
if self.prevAccept.dfaState is not None:
lexerActionExecutor = prevAccept.dfaState.lexerActionExecutor
self.accept(input, lexerActionExecutor, self.startIndex, prevAccept.index, prevAccept.line, prevAccept.column)
return prevAccept.dfaState.prediction
else:
# if no accept and EOF is first char, return EOF
if t==Token.EOF and input.index==self.startIndex:
return Token.EOF
raise LexerNoViableAltException(self.recog, input, self.startIndex, reach)
# Given a starting configuration set, figure out all ATN configurations
# we can reach upon input {@code t}. Parameter {@code reach} is a return
# parameter.
def getReachableConfigSet(self, input, closure, reach, t):
# this is used to skip processing for configs which have a lower priority
# than a config that already reached an accept state for the same rule
skipAlt = ATN.INVALID_ALT_NUMBER
for cfg in closure:
currentAltReachedAcceptState = ( cfg.alt == skipAlt )
if currentAltReachedAcceptState and cfg.passedThroughNonGreedyDecision:
continue
if self.debug:
print("testing %s at %s\n", self.getTokenName(t), cfg.toString(self.recog, True))
for trans in cfg.state.transitions: # for each transition
target = self.getReachableTarget(trans, t)
if target is not None:
lexerActionExecutor = cfg.lexerActionExecutor
if lexerActionExecutor is not None:
lexerActionExecutor = lexerActionExecutor.fixOffsetBeforeMatch(input.index - self.startIndex)
treatEofAsEpsilon = (t == Token.EOF)
config = LexerATNConfig(state=target, lexerActionExecutor=lexerActionExecutor, config=cfg)
if self.closure(input, config, reach, currentAltReachedAcceptState, True, treatEofAsEpsilon):
# any remaining configs for this alt have a lower priority than
# the one that just reached an accept state.
skipAlt = cfg.alt
def accept(self, input, lexerActionExecutor, startIndex, index, line, charPos):
if self.debug:
print("ACTION %s\n", lexerActionExecutor)
# seek to after last char in token
input.seek(index)
self.line = line
self.column = charPos
if lexerActionExecutor is not None and self.recog is not None:
lexerActionExecutor.execute(self.recog, input, startIndex)
def getReachableTarget(self, trans, t):
if trans.matches(t, 0, 0xFFFE):
return trans.target
else:
return None
def computeStartState(self, input, p):
initialContext = PredictionContext.EMPTY
configs = OrderedATNConfigSet()
for i in range(0,len(p.transitions)):
target = p.transitions[i].target
c = LexerATNConfig(state=target, alt=i+1, context=initialContext)
self.closure(input, c, configs, False, False, False)
return configs
# Since the alternatives within any lexer decision are ordered by
# preference, this method stops pursuing the closure as soon as an accept
# state is reached. After the first accept state is reached by depth-first
# search from {@code config}, all other (potentially reachable) states for
# this rule would have a lower priority.
#
# @return {@code true} if an accept state is reached, otherwise
# {@code false}.
def closure(self, input, config, configs, currentAltReachedAcceptState,
speculative, treatEofAsEpsilon):
if self.debug:
print("closure("+config.toString(self.recog, True)+")")
if isinstance( config.state, RuleStopState ):
if self.debug:
if self.recog is not None:
print("closure at %s rule stop %s\n", self.recog.getRuleNames()[config.state.ruleIndex], config)
else:
print("closure at rule stop %s\n", config)
if config.context is None or config.context.hasEmptyPath():
if config.context is None or config.context.isEmpty():
configs.add(config)
return True
else:
configs.add(LexerATNConfig(state=config.state, config=config, context=PredictionContext.EMPTY))
currentAltReachedAcceptState = True
if config.context is not None and not config.context.isEmpty():
for i in range(0,len(config.context)):
if config.context.getReturnState(i) != PredictionContext.EMPTY_RETURN_STATE:
newContext = config.context.getParent(i) # "pop" return state
returnState = self.atn.states[config.context.getReturnState(i)]
c = LexerATNConfig(state=returnState, config=config, context=newContext)
currentAltReachedAcceptState = self.closure(input, c, configs,
currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
return currentAltReachedAcceptState
# optimization
if not config.state.epsilonOnlyTransitions:
if not currentAltReachedAcceptState or not config.passedThroughNonGreedyDecision:
configs.add(config)
for t in config.state.transitions:
c = self.getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon)
if c is not None:
currentAltReachedAcceptState = self.closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
return currentAltReachedAcceptState
# side-effect: can alter configs.hasSemanticContext
def getEpsilonTarget(self, input, config, t, configs, speculative, treatEofAsEpsilon):
c = None
if t.serializationType==Transition.RULE:
newContext = SingletonPredictionContext.create(config.context, t.followState.stateNumber)
c = LexerATNConfig(state=t.target, config=config, context=newContext)
elif t.serializationType==Transition.PRECEDENCE:
raise UnsupportedOperationException("Precedence predicates are not supported in lexers.")
elif t.serializationType==Transition.PREDICATE:
# Track traversing semantic predicates. If we traverse,
# we cannot add a DFA state for this "reach" computation
# because the DFA would not test the predicate again in the
# future. Rather than creating collections of semantic predicates
# like v3 and testing them on prediction, v4 will test them on the
# fly all the time using the ATN not the DFA. This is slower but
# semantically it's not used that often. One of the key elements to
# this predicate mechanism is not adding DFA states that see
# predicates immediately afterwards in the ATN. For example,
# a : ID {p1}? | ID {p2}? ;
# should create the start state for rule 'a' (to save start state
# competition), but should not create target of ID state. The
# collection of ATN states the following ID references includes
# states reached by traversing predicates. Since this is when we
# test them, we cannot cash the DFA state target of ID.
if self.debug:
print("EVAL rule "+ str(t.ruleIndex) + ":" + str(t.predIndex))
configs.hasSemanticContext = True
if self.evaluatePredicate(input, t.ruleIndex, t.predIndex, speculative):
c = LexerATNConfig(state=t.target, config=config)
elif t.serializationType==Transition.ACTION:
if config.context is None or config.context.hasEmptyPath():
# execute actions anywhere in the start rule for a token.
#
# TODO: if the entry rule is invoked recursively, some
# actions may be executed during the recursive call. The
# problem can appear when hasEmptyPath() is true but
# isEmpty() is false. In this case, the config needs to be
# split into two contexts - one with just the empty path
# and another with everything but the empty path.
# Unfortunately, the current algorithm does not allow
# getEpsilonTarget to return two configurations, so
# additional modifications are needed before we can support
# the split operation.
lexerActionExecutor = LexerActionExecutor.append(config.lexerActionExecutor,
self.atn.lexerActions[t.actionIndex])
c = LexerATNConfig(state=t.target, config=config, lexerActionExecutor=lexerActionExecutor)
else:
# ignore actions in referenced rules
c = LexerATNConfig(state=t.target, config=config)
elif t.serializationType==Transition.EPSILON:
c = LexerATNConfig(state=t.target, config=config)
elif t.serializationType in [ Transition.ATOM, Transition.RANGE, Transition.SET ]:
if treatEofAsEpsilon:
if t.matches(Token.EOF, 0, 0xFFFF):
c = LexerATNConfig(state=t.target, config=config)
return c
# Evaluate a predicate specified in the lexer.
#
# <p>If {@code speculative} is {@code true}, this method was called before
# {@link #consume} for the matched character. This method should call
# {@link #consume} before evaluating the predicate to ensure position
# sensitive values, including {@link Lexer#getText}, {@link Lexer#getLine},
# and {@link Lexer#getcolumn}, properly reflect the current
# lexer state. This method should restore {@code input} and the simulator
# to the original state before returning (i.e. undo the actions made by the
# call to {@link #consume}.</p>
#
# @param input The input stream.
# @param ruleIndex The rule containing the predicate.
# @param predIndex The index of the predicate within the rule.
# @param speculative {@code true} if the current index in {@code input} is
# one character before the predicate's location.
#
# @return {@code true} if the specified predicate evaluates to
# {@code true}.
#/
def evaluatePredicate(self, input, ruleIndex, predIndex, speculative):
# assume true if no recognizer was provided
if self.recog is None:
return True
if not speculative:
return self.recog.sempred(None, ruleIndex, predIndex)
savedcolumn = self.column
savedLine = self.line
index = input.index
marker = input.mark()
try:
self.consume(input)
return self.recog.sempred(None, ruleIndex, predIndex)
finally:
self.column = savedcolumn
self.line = savedLine
input.seek(index)
input.release(marker)
def captureSimState(self, settings, input, dfaState):
settings.index = input.index
settings.line = self.line
settings.column = self.column
settings.dfaState = dfaState
def addDFAEdge(self, from_, tk, to=None, cfgs=None):
if to is None and cfgs is not None:
# leading to this call, ATNConfigSet.hasSemanticContext is used as a
# marker indicating dynamic predicate evaluation makes this edge
# dependent on the specific input sequence, so the static edge in the
# DFA should be omitted. The target DFAState is still created since
# execATN has the ability to resynchronize with the DFA state cache
# following the predicate evaluation step.
#
# TJP notes: next time through the DFA, we see a pred again and eval.
# If that gets us to a previously created (but dangling) DFA
# state, we can continue in pure DFA mode from there.
#/
suppressEdge = cfgs.hasSemanticContext
cfgs.hasSemanticContext = False
to = self.addDFAState(cfgs)
if suppressEdge:
return to
# add the edge
if tk < self.MIN_DFA_EDGE or tk > self.MAX_DFA_EDGE:
# Only track edges within the DFA bounds
return to
if self.debug:
print("EDGE " + str(from_) + " -> " + str(to) + " upon "+ chr(tk))
if from_.edges is None:
# make room for tokens 1..n and -1 masquerading as index 0
from_.edges = [ None ] * (self.MAX_DFA_EDGE - self.MIN_DFA_EDGE + 1)
from_.edges[tk - self.MIN_DFA_EDGE] = to # connect
return to
# Add a new DFA state if there isn't one with this set of
# configurations already. This method also detects the first
# configuration containing an ATN rule stop state. Later, when
# traversing the DFA, we will know which rule to accept.
def addDFAState(self, configs):
# the lexer evaluates predicates on-the-fly; by this point configs
# should not contain any configurations with unevaluated predicates.
assert not configs.hasSemanticContext
proposed = DFAState(configs=configs)
firstConfigWithRuleStopState = None
for c in configs:
if isinstance(c.state, RuleStopState):
firstConfigWithRuleStopState = c
break
if firstConfigWithRuleStopState is not None:
proposed.isAcceptState = True
proposed.lexerActionExecutor = firstConfigWithRuleStopState.lexerActionExecutor
proposed.prediction = self.atn.ruleToTokenType[firstConfigWithRuleStopState.state.ruleIndex]
dfa = self.decisionToDFA[self.mode]
existing = dfa.states.get(proposed, None)
if existing is not None:
return existing
newState = proposed
newState.stateNumber = len(dfa.states)
configs.setReadonly(True)
newState.configs = configs
dfa.states[newState] = newState
return newState
def getDFA(self, mode):
return self.decisionToDFA[mode]
# Get the text matched so far for the current token.
def getText(self, input):
# index is first lookahead char, don't include.
return input.getText(self.startIndex, input.index-1)
def consume(self, input):
curChar = input.LA(1)
if curChar==ord('\n'):
self.line += 1
self.column = 0
else:
self.column += 1
input.consume()
def getTokenName(self, t):
if t==-1:
return "EOF"
else:
return "'" + chr(t) + "'"

View File

@ -0,0 +1,316 @@
#
#[The "BSD license"]
# Copyright (c) 2013 Terence Parr
# Copyright (c) 2013 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
class LexerActionType(object):
CHANNEL = 0 #The type of a {@link LexerChannelAction} action.
CUSTOM = 1 #The type of a {@link LexerCustomAction} action.
MODE = 2 #The type of a {@link LexerModeAction} action.
MORE = 3 #The type of a {@link LexerMoreAction} action.
POP_MODE = 4 #The type of a {@link LexerPopModeAction} action.
PUSH_MODE = 5 #The type of a {@link LexerPushModeAction} action.
SKIP = 6 #The type of a {@link LexerSkipAction} action.
TYPE = 7 #The type of a {@link LexerTypeAction} action.
class LexerAction(object):
def __init__(self, action):
self.actionType = action
self.isPositionDependent = False
def __hash__(self):
return hash(str(self.actionType))
def __eq__(self, other):
return self is other
def __str__(self):
return unicode(self)
def __unicode__(self):
return unicode(super(LexerAction, self))
#
# Implements the {@code skip} lexer action by calling {@link Lexer#skip}.
#
# <p>The {@code skip} command does not have any parameters, so this action is
# implemented as a singleton instance exposed by {@link #INSTANCE}.</p>
class LexerSkipAction(LexerAction ):
# Provides a singleton instance of this parameterless lexer action.
INSTANCE = None
def __init__(self):
super(LexerSkipAction, self).__init__(LexerActionType.SKIP)
def execute(self, lexer):
lexer.skip()
def __unicode__(self):
return u"skip"
LexerSkipAction.INSTANCE = LexerSkipAction()
# Implements the {@code type} lexer action by calling {@link Lexer#setType}
# with the assigned type.
class LexerTypeAction(LexerAction):
def __init__(self, type):
super(LexerTypeAction, self).__init__(LexerActionType.TYPE)
self.type = type
def execute(self, lexer):
lexer.type = self.type
def __hash__(self):
return hash(str(self.actionType) + str(self.type))
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, LexerTypeAction):
return False
else:
return self.type == other.type
def __unicode__(self):
return u"type(" + unicode(self.type) + u")"
# Implements the {@code pushMode} lexer action by calling
# {@link Lexer#pushMode} with the assigned mode.
class LexerPushModeAction(LexerAction):
def __init__(self, mode):
super(LexerPushModeAction, self).__init__(LexerActionType.PUSH_MODE)
self.mode = mode
# <p>This action is implemented by calling {@link Lexer#pushMode} with the
# value provided by {@link #getMode}.</p>
def execute(self, lexer):
lexer.pushMode(self.mode)
def __hash__(self):
return hash(str(self.actionType) + str(self.mode))
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, LexerPushModeAction):
return False
else:
return self.mode == other.mode
def __unicode__(self):
return u"pushMode(" + unicode(self.mode) + u")"
# Implements the {@code popMode} lexer action by calling {@link Lexer#popMode}.
#
# <p>The {@code popMode} command does not have any parameters, so this action is
# implemented as a singleton instance exposed by {@link #INSTANCE}.</p>
class LexerPopModeAction(LexerAction):
INSTANCE = None
def __init__(self):
super(LexerPopModeAction, self).__init__(LexerActionType.POP_MODE)
# <p>This action is implemented by calling {@link Lexer#popMode}.</p>
def execute(self, lexer):
lexer.popMode()
def __unicode__(self):
return "popMode"
LexerPopModeAction.INSTANCE = LexerPopModeAction()
# Implements the {@code more} lexer action by calling {@link Lexer#more}.
#
# <p>The {@code more} command does not have any parameters, so this action is
# implemented as a singleton instance exposed by {@link #INSTANCE}.</p>
class LexerMoreAction(LexerAction):
INSTANCE = None
def __init__(self):
super(LexerMoreAction, self).__init__(LexerActionType.MORE)
# <p>This action is implemented by calling {@link Lexer#popMode}.</p>
def execute(self, lexer):
lexer.more()
def __unicode__(self):
return "more"
LexerMoreAction.INSTANCE = LexerMoreAction()
# Implements the {@code mode} lexer action by calling {@link Lexer#mode} with
# the assigned mode.
class LexerModeAction(LexerAction):
def __init__(self, mode):
super(LexerModeAction, self).__init__(LexerActionType.MODE)
self.mode = mode
# <p>This action is implemented by calling {@link Lexer#mode} with the
# value provided by {@link #getMode}.</p>
def execute(self, lexer):
lexer.mode(self.mode)
def __hash__(self):
return hash(str(self.actionType) + str(self.mode))
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, LexerModeAction):
return False
else:
return self.mode == other.mode
def __unicode__(self):
return u"mode(" + unicode(self.mode) + u")"
# Executes a custom lexer action by calling {@link Recognizer#action} with the
# rule and action indexes assigned to the custom action. The implementation of
# a custom action is added to the generated code for the lexer in an override
# of {@link Recognizer#action} when the grammar is compiled.
#
# <p>This class may represent embedded actions created with the <code>{...}</code>
# syntax in ANTLR 4, as well as actions created for lexer commands where the
# command argument could not be evaluated when the grammar was compiled.</p>
class LexerCustomAction(LexerAction):
# Constructs a custom lexer action with the specified rule and action
# indexes.
#
# @param ruleIndex The rule index to use for calls to
# {@link Recognizer#action}.
# @param actionIndex The action index to use for calls to
# {@link Recognizer#action}.
#/
def __init__(self, ruleIndex, actionIndex):
super(LexerCustomAction, self).__init__(LexerActionType.CUSTOM)
self.ruleIndex = ruleIndex
self.actionIndex = actionIndex
self.isPositionDependent = True
# <p>Custom actions are implemented by calling {@link Lexer#action} with the
# appropriate rule and action indexes.</p>
def execute(self, lexer):
lexer.action(None, self.ruleIndex, self.actionIndex)
def __hash__(self):
return hash(str(self.actionType) + str(self.ruleIndex) + str(self.actionIndex))
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, LexerCustomAction):
return False
else:
return self.ruleIndex == other.ruleIndex and self.actionIndex == other.actionIndex
# Implements the {@code channel} lexer action by calling
# {@link Lexer#setChannel} with the assigned channel.
class LexerChannelAction(LexerAction):
# Constructs a new {@code channel} action with the specified channel value.
# @param channel The channel value to pass to {@link Lexer#setChannel}.
def __init__(self, channel):
super(LexerChannelAction, self).__init__(LexerActionType.CHANNEL)
self.channel = channel
# <p>This action is implemented by calling {@link Lexer#setChannel} with the
# value provided by {@link #getChannel}.</p>
def execute(self, lexer):
lexer._channel = self.channel
def __hash__(self):
return hash(str(self.actionType) + str(self.channel))
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, LexerChannelAction):
return False
else:
return self.channel == other.channel
def __unicode__(self):
return u"channel(" + unicode(self.channel) + u")"
# This implementation of {@link LexerAction} is used for tracking input offsets
# for position-dependent actions within a {@link LexerActionExecutor}.
#
# <p>This action is not serialized as part of the ATN, and is only required for
# position-dependent lexer actions which appear at a location other than the
# end of a rule. For more information about DFA optimizations employed for
# lexer actions, see {@link LexerActionExecutor#append} and
# {@link LexerActionExecutor#fixOffsetBeforeMatch}.</p>
class LexerIndexedCustomAction(LexerAction):
# Constructs a new indexed custom action by associating a character offset
# with a {@link LexerAction}.
#
# <p>Note: This class is only required for lexer actions for which
# {@link LexerAction#isPositionDependent} returns {@code true}.</p>
#
# @param offset The offset into the input {@link CharStream}, relative to
# the token start index, at which the specified lexer action should be
# executed.
# @param action The lexer action to execute at a particular offset in the
# input {@link CharStream}.
def __init__(self, offset, action):
super(LexerIndexedCustomAction, self).__init__(action.actionType)
self.offset = offset
self.action = action
self.isPositionDependent = True
# <p>This method calls {@link #execute} on the result of {@link #getAction}
# using the provided {@code lexer}.</p>
def execute(self, lexer):
# assume the input stream position was properly set by the calling code
self.action.execute(lexer)
def __hash__(self):
return hash(str(self.actionType) + str(self.offset) + str(self.action))
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, LexerIndexedCustomAction):
return False
else:
return self.offset == other.offset and self.action == other.action

View File

@ -0,0 +1,160 @@
#
# [The "BSD license"]
# Copyright (c) 2013 Terence Parr
# Copyright (c) 2013 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#/
# Represents an executor for a sequence of lexer actions which traversed during
# the matching operation of a lexer rule (token).
#
# <p>The executor tracks position information for position-dependent lexer actions
# efficiently, ensuring that actions appearing only at the end of the rule do
# not cause bloating of the {@link DFA} created for the lexer.</p>
from antlr4.atn.LexerAction import LexerIndexedCustomAction
class LexerActionExecutor(object):
def __init__(self, lexerActions=list()):
self.lexerActions = lexerActions
# Caches the result of {@link #hashCode} since the hash code is an element
# of the performance-critical {@link LexerATNConfig#hashCode} operation.
self.hashCode = hash("".join([str(la) for la in lexerActions]))
# Creates a {@link LexerActionExecutor} which executes the actions for
# the input {@code lexerActionExecutor} followed by a specified
# {@code lexerAction}.
#
# @param lexerActionExecutor The executor for actions already traversed by
# the lexer while matching a token within a particular
# {@link LexerATNConfig}. If this is {@code null}, the method behaves as
# though it were an empty executor.
# @param lexerAction The lexer action to execute after the actions
# specified in {@code lexerActionExecutor}.
#
# @return A {@link LexerActionExecutor} for executing the combine actions
# of {@code lexerActionExecutor} and {@code lexerAction}.
@staticmethod
def append(lexerActionExecutor, lexerAction):
if lexerActionExecutor is None:
return LexerActionExecutor([ lexerAction ])
lexerActions = lexerActionExecutor.lexerActions + [ lexerAction ]
return LexerActionExecutor(lexerActions)
# Creates a {@link LexerActionExecutor} which encodes the current offset
# for position-dependent lexer actions.
#
# <p>Normally, when the executor encounters lexer actions where
# {@link LexerAction#isPositionDependent} returns {@code true}, it calls
# {@link IntStream#seek} on the input {@link CharStream} to set the input
# position to the <em>end</em> of the current token. This behavior provides
# for efficient DFA representation of lexer actions which appear at the end
# of a lexer rule, even when the lexer rule matches a variable number of
# characters.</p>
#
# <p>Prior to traversing a match transition in the ATN, the current offset
# from the token start index is assigned to all position-dependent lexer
# actions which have not already been assigned a fixed offset. By storing
# the offsets relative to the token start index, the DFA representation of
# lexer actions which appear in the middle of tokens remains efficient due
# to sharing among tokens of the same length, regardless of their absolute
# position in the input stream.</p>
#
# <p>If the current executor already has offsets assigned to all
# position-dependent lexer actions, the method returns {@code this}.</p>
#
# @param offset The current offset to assign to all position-dependent
# lexer actions which do not already have offsets assigned.
#
# @return A {@link LexerActionExecutor} which stores input stream offsets
# for all position-dependent lexer actions.
#/
def fixOffsetBeforeMatch(self, offset):
updatedLexerActions = None
for i in range(0, len(self.lexerActions)):
if self.lexerActions[i].isPositionDependent and not isinstance(self.lexerActions[i], LexerIndexedCustomAction):
if updatedLexerActions is None:
updatedLexerActions = [ la for la in self.lexerActions ]
updatedLexerActions[i] = LexerIndexedCustomAction(offset, self.lexerActions[i])
if updatedLexerActions is None:
return self
else:
return LexerActionExecutor(updatedLexerActions)
# Execute the actions encapsulated by this executor within the context of a
# particular {@link Lexer}.
#
# <p>This method calls {@link IntStream#seek} to set the position of the
# {@code input} {@link CharStream} prior to calling
# {@link LexerAction#execute} on a position-dependent action. Before the
# method returns, the input position will be restored to the same position
# it was in when the method was invoked.</p>
#
# @param lexer The lexer instance.
# @param input The input stream which is the source for the current token.
# When this method is called, the current {@link IntStream#index} for
# {@code input} should be the start of the following token, i.e. 1
# character past the end of the current token.
# @param startIndex The token start index. This value may be passed to
# {@link IntStream#seek} to set the {@code input} position to the beginning
# of the token.
#/
def execute(self, lexer, input, startIndex):
requiresSeek = False
stopIndex = input.index
try:
for lexerAction in self.lexerActions:
if isinstance(lexerAction, LexerIndexedCustomAction):
offset = lexerAction.offset
input.seek(startIndex + offset)
lexerAction = lexerAction.action
requiresSeek = (startIndex + offset) != stopIndex
elif lexerAction.isPositionDependent:
input.seek(stopIndex)
requiresSeek = False
lexerAction.execute(lexer)
finally:
if requiresSeek:
input.seek(stopIndex)
def __hash__(self):
return self.hashCode
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, LexerActionExecutor):
return False
else:
return self.hashCode == other.hashCode \
and self.lexerActions == other.lexerActions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,544 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# This enumeration defines the prediction modes available in ANTLR 4 along with
# utility methods for analyzing configuration sets for conflicts and/or
# ambiguities.
from antlr4.atn.ATN import ATN
from antlr4.atn.ATNConfig import ATNConfig
from antlr4.atn.ATNConfigSet import ATNConfigSet
from antlr4.atn.ATNState import RuleStopState
from antlr4.atn.SemanticContext import SemanticContext
class PredictionMode(object):
#
# The SLL(*) prediction mode. This prediction mode ignores the current
# parser context when making predictions. This is the fastest prediction
# mode, and provides correct results for many grammars. This prediction
# mode is more powerful than the prediction mode provided by ANTLR 3, but
# may result in syntax errors for grammar and input combinations which are
# not SLL.
#
# <p>
# When using this prediction mode, the parser will either return a correct
# parse tree (i.e. the same parse tree that would be returned with the
# {@link #LL} prediction mode), or it will report a syntax error. If a
# syntax error is encountered when using the {@link #SLL} prediction mode,
# it may be due to either an actual syntax error in the input or indicate
# that the particular combination of grammar and input requires the more
# powerful {@link #LL} prediction abilities to complete successfully.</p>
#
# <p>
# This prediction mode does not provide any guarantees for prediction
# behavior for syntactically-incorrect inputs.</p>
#
SLL = 0
#
# The LL(*) prediction mode. This prediction mode allows the current parser
# context to be used for resolving SLL conflicts that occur during
# prediction. This is the fastest prediction mode that guarantees correct
# parse results for all combinations of grammars with syntactically correct
# inputs.
#
# <p>
# When using this prediction mode, the parser will make correct decisions
# for all syntactically-correct grammar and input combinations. However, in
# cases where the grammar is truly ambiguous this prediction mode might not
# report a precise answer for <em>exactly which</em> alternatives are
# ambiguous.</p>
#
# <p>
# This prediction mode does not provide any guarantees for prediction
# behavior for syntactically-incorrect inputs.</p>
#
LL = 1
#
# The LL(*) prediction mode with exact ambiguity detection. In addition to
# the correctness guarantees provided by the {@link #LL} prediction mode,
# this prediction mode instructs the prediction algorithm to determine the
# complete and exact set of ambiguous alternatives for every ambiguous
# decision encountered while parsing.
#
# <p>
# This prediction mode may be used for diagnosing ambiguities during
# grammar development. Due to the performance overhead of calculating sets
# of ambiguous alternatives, this prediction mode should be avoided when
# the exact results are not necessary.</p>
#
# <p>
# This prediction mode does not provide any guarantees for prediction
# behavior for syntactically-incorrect inputs.</p>
#
LL_EXACT_AMBIG_DETECTION = 2
#
# Computes the SLL prediction termination condition.
#
# <p>
# This method computes the SLL prediction termination condition for both of
# the following cases.</p>
#
# <ul>
# <li>The usual SLL+LL fallback upon SLL conflict</li>
# <li>Pure SLL without LL fallback</li>
# </ul>
#
# <p><strong>COMBINED SLL+LL PARSING</strong></p>
#
# <p>When LL-fallback is enabled upon SLL conflict, correct predictions are
# ensured regardless of how the termination condition is computed by this
# method. Due to the substantially higher cost of LL prediction, the
# prediction should only fall back to LL when the additional lookahead
# cannot lead to a unique SLL prediction.</p>
#
# <p>Assuming combined SLL+LL parsing, an SLL configuration set with only
# conflicting subsets should fall back to full LL, even if the
# configuration sets don't resolve to the same alternative (e.g.
# {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting
# configuration, SLL could continue with the hopes that more lookahead will
# resolve via one of those non-conflicting configurations.</p>
#
# <p>Here's the prediction termination rule them: SLL (for SLL+LL parsing)
# stops when it sees only conflicting configuration subsets. In contrast,
# full LL keeps going when there is uncertainty.</p>
#
# <p><strong>HEURISTIC</strong></p>
#
# <p>As a heuristic, we stop prediction when we see any conflicting subset
# unless we see a state that only has one alternative associated with it.
# The single-alt-state thing lets prediction continue upon rules like
# (otherwise, it would admit defeat too soon):</p>
#
# <p>{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}</p>
#
# <p>When the ATN simulation reaches the state before {@code ';'}, it has a
# DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally
# {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop
# processing this node because alternative to has another way to continue,
# via {@code [6|2|[]]}.</p>
#
# <p>It also let's us continue for this rule:</p>
#
# <p>{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}</p>
#
# <p>After matching input A, we reach the stop state for rule A, state 1.
# State 8 is the state right before B. Clearly alternatives 1 and 2
# conflict and no amount of further lookahead will separate the two.
# However, alternative 3 will be able to continue and so we do not stop
# working on this state. In the previous example, we're concerned with
# states associated with the conflicting alternatives. Here alt 3 is not
# associated with the conflicting configs, but since we can continue
# looking for input reasonably, don't declare the state done.</p>
#
# <p><strong>PURE SLL PARSING</strong></p>
#
# <p>To handle pure SLL parsing, all we have to do is make sure that we
# combine stack contexts for configurations that differ only by semantic
# predicate. From there, we can do the usual SLL termination heuristic.</p>
#
# <p><strong>PREDICATES IN SLL+LL PARSING</strong></p>
#
# <p>SLL decisions don't evaluate predicates until after they reach DFA stop
# states because they need to create the DFA cache that works in all
# semantic situations. In contrast, full LL evaluates predicates collected
# during start state computation so it can ignore predicates thereafter.
# This means that SLL termination detection can totally ignore semantic
# predicates.</p>
#
# <p>Implementation-wise, {@link ATNConfigSet} combines stack contexts but not
# semantic predicate contexts so we might see two configurations like the
# following.</p>
#
# <p>{@code (s, 1, x, {}), (s, 1, x', {p})}</p>
#
# <p>Before testing these configurations against others, we have to merge
# {@code x} and {@code x'} (without modifying the existing configurations).
# For example, we test {@code (x+x')==x''} when looking for conflicts in
# the following configurations.</p>
#
# <p>{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}</p>
#
# <p>If the configuration set has predicates (as indicated by
# {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of
# the configurations to strip out all of the predicates so that a standard
# {@link ATNConfigSet} will merge everything ignoring predicates.</p>
#
@classmethod
def hasSLLConflictTerminatingPrediction(cls, mode, configs):
# Configs in rule stop states indicate reaching the end of the decision
# rule (local context) or end of start rule (full context). If all
# configs meet this condition, then none of the configurations is able
# to match additional input so we terminate prediction.
#
if cls.allConfigsInRuleStopStates(configs):
return True
# pure SLL mode parsing
if mode == PredictionMode.SLL:
# Don't bother with combining configs from different semantic
# contexts if we can fail over to full LL; costs more time
# since we'll often fail over anyway.
if configs.hasSemanticContext:
# dup configs, tossing out semantic predicates
dup = ATNConfigSet()
for c in configs:
c = ATNConfig(c,SemanticContext.NONE)
dup.add(c)
configs = dup
# now we have combined contexts for configs with dissimilar preds
# pure SLL or combined SLL+LL mode parsing
altsets = cls.getConflictingAltSubsets(configs)
return cls.hasConflictingAltSet(altsets) and not cls.hasStateAssociatedWithOneAlt(configs)
# Checks if any configuration in {@code configs} is in a
# {@link RuleStopState}. Configurations meeting this condition have reached
# the end of the decision rule (local context) or end of start rule (full
# context).
#
# @param configs the configuration set to test
# @return {@code true} if any configuration in {@code configs} is in a
# {@link RuleStopState}, otherwise {@code false}
@classmethod
def hasConfigInRuleStopState(cls, configs):
for c in configs:
if isinstance(c.state, RuleStopState):
return True
return False
# Checks if all configurations in {@code configs} are in a
# {@link RuleStopState}. Configurations meeting this condition have reached
# the end of the decision rule (local context) or end of start rule (full
# context).
#
# @param configs the configuration set to test
# @return {@code true} if all configurations in {@code configs} are in a
# {@link RuleStopState}, otherwise {@code false}
@classmethod
def allConfigsInRuleStopStates(cls, configs):
for config in configs:
if not isinstance(config.state, RuleStopState):
return False
return True
#
# Full LL prediction termination.
#
# <p>Can we stop looking ahead during ATN simulation or is there some
# uncertainty as to which alternative we will ultimately pick, after
# consuming more input? Even if there are partial conflicts, we might know
# that everything is going to resolve to the same minimum alternative. That
# means we can stop since no more lookahead will change that fact. On the
# other hand, there might be multiple conflicts that resolve to different
# minimums. That means we need more look ahead to decide which of those
# alternatives we should predict.</p>
#
# <p>The basic idea is to split the set of configurations {@code C}, into
# conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with
# non-conflicting configurations. Two configurations conflict if they have
# identical {@link ATNConfig#state} and {@link ATNConfig#context} values
# but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)}
# and {@code (s, j, ctx, _)} for {@code i!=j}.</p>
#
# <p>Reduce these configuration subsets to the set of possible alternatives.
# You can compute the alternative subsets in one pass as follows:</p>
#
# <p>{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in
# {@code C} holding {@code s} and {@code ctx} fixed.</p>
#
# <p>Or in pseudo-code, for each configuration {@code c} in {@code C}:</p>
#
# <pre>
# map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
# alt and not pred
# </pre>
#
# <p>The values in {@code map} are the set of {@code A_s,ctx} sets.</p>
#
# <p>If {@code |A_s,ctx|=1} then there is no conflict associated with
# {@code s} and {@code ctx}.</p>
#
# <p>Reduce the subsets to singletons by choosing a minimum of each subset. If
# the union of these alternative subsets is a singleton, then no amount of
# more lookahead will help us. We will always pick that alternative. If,
# however, there is more than one alternative, then we are uncertain which
# alternative to predict and must continue looking for resolution. We may
# or may not discover an ambiguity in the future, even if there are no
# conflicting subsets this round.</p>
#
# <p>The biggest sin is to terminate early because it means we've made a
# decision but were uncertain as to the eventual outcome. We haven't used
# enough lookahead. On the other hand, announcing a conflict too late is no
# big deal; you will still have the conflict. It's just inefficient. It
# might even look until the end of file.</p>
#
# <p>No special consideration for semantic predicates is required because
# predicates are evaluated on-the-fly for full LL prediction, ensuring that
# no configuration contains a semantic context during the termination
# check.</p>
#
# <p><strong>CONFLICTING CONFIGS</strong></p>
#
# <p>Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict
# when {@code i!=j} but {@code x=x'}. Because we merge all
# {@code (s, i, _)} configurations together, that means that there are at
# most {@code n} configurations associated with state {@code s} for
# {@code n} possible alternatives in the decision. The merged stacks
# complicate the comparison of configuration contexts {@code x} and
# {@code x'}. Sam checks to see if one is a subset of the other by calling
# merge and checking to see if the merged result is either {@code x} or
# {@code x'}. If the {@code x} associated with lowest alternative {@code i}
# is the superset, then {@code i} is the only possible prediction since the
# others resolve to {@code min(i)} as well. However, if {@code x} is
# associated with {@code j>i} then at least one stack configuration for
# {@code j} is not in conflict with alternative {@code i}. The algorithm
# should keep going, looking for more lookahead due to the uncertainty.</p>
#
# <p>For simplicity, I'm doing a equality check between {@code x} and
# {@code x'} that lets the algorithm continue to consume lookahead longer
# than necessary. The reason I like the equality is of course the
# simplicity but also because that is the test you need to detect the
# alternatives that are actually in conflict.</p>
#
# <p><strong>CONTINUE/STOP RULE</strong></p>
#
# <p>Continue if union of resolved alternative sets from non-conflicting and
# conflicting alternative subsets has more than one alternative. We are
# uncertain about which alternative to predict.</p>
#
# <p>The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which
# alternatives are still in the running for the amount of input we've
# consumed at this point. The conflicting sets let us to strip away
# configurations that won't lead to more states because we resolve
# conflicts to the configuration with a minimum alternate for the
# conflicting set.</p>
#
# <p><strong>CASES</strong></p>
#
# <ul>
#
# <li>no conflicts and more than 1 alternative in set =&gt; continue</li>
#
# <li> {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)},
# {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set
# {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
# {@code {1,3}} =&gt; continue
# </li>
#
# <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
# {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set
# {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
# {@code {1}} =&gt; stop and predict 1</li>
#
# <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
# {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U
# {@code {1}} = {@code {1}} =&gt; stop and predict 1, can announce
# ambiguity {@code {1,2}}</li>
#
# <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)},
# {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U
# {@code {2}} = {@code {1,2}} =&gt; continue</li>
#
# <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)},
# {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U
# {@code {3}} = {@code {1,3}} =&gt; continue</li>
#
# </ul>
#
# <p><strong>EXACT AMBIGUITY DETECTION</strong></p>
#
# <p>If all states report the same conflicting set of alternatives, then we
# know we have the exact ambiguity set.</p>
#
# <p><code>|A_<em>i</em>|&gt;1</code> and
# <code>A_<em>i</em> = A_<em>j</em></code> for all <em>i</em>, <em>j</em>.</p>
#
# <p>In other words, we continue examining lookahead until all {@code A_i}
# have more than one alternative and all {@code A_i} are the same. If
# {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate
# because the resolved set is {@code {1}}. To determine what the real
# ambiguity is, we have to know whether the ambiguity is between one and
# two or one and three so we keep going. We can only stop prediction when
# we need exact ambiguity detection when the sets look like
# {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...</p>
#
@classmethod
def resolvesToJustOneViableAlt(cls, altsets):
return cls.getSingleViableAlt(altsets)
#
# Determines if every alternative subset in {@code altsets} contains more
# than one alternative.
#
# @param altsets a collection of alternative subsets
# @return {@code true} if every {@link BitSet} in {@code altsets} has
# {@link BitSet#cardinality cardinality} &gt; 1, otherwise {@code false}
#
@classmethod
def allSubsetsConflict(cls, altsets):
return not cls.hasNonConflictingAltSet(altsets)
#
# Determines if any single alternative subset in {@code altsets} contains
# exactly one alternative.
#
# @param altsets a collection of alternative subsets
# @return {@code true} if {@code altsets} contains a {@link BitSet} with
# {@link BitSet#cardinality cardinality} 1, otherwise {@code false}
#
@classmethod
def hasNonConflictingAltSet(cls, altsets):
for alts in altsets:
if len(alts)==1:
return True
return False
#
# Determines if any single alternative subset in {@code altsets} contains
# more than one alternative.
#
# @param altsets a collection of alternative subsets
# @return {@code true} if {@code altsets} contains a {@link BitSet} with
# {@link BitSet#cardinality cardinality} &gt; 1, otherwise {@code false}
#
@classmethod
def hasConflictingAltSet(cls, altsets):
for alts in altsets:
if len(alts)>1:
return True
return False
#
# Determines if every alternative subset in {@code altsets} is equivalent.
#
# @param altsets a collection of alternative subsets
# @return {@code true} if every member of {@code altsets} is equal to the
# others, otherwise {@code false}
#
@classmethod
def allSubsetsEqual(cls, altsets):
first = None
for alts in altsets:
if first is None:
first = alts
elif not alts==first:
return False
return True
#
# Returns the unique alternative predicted by all alternative subsets in
# {@code altsets}. If no such alternative exists, this method returns
# {@link ATN#INVALID_ALT_NUMBER}.
#
# @param altsets a collection of alternative subsets
#
@classmethod
def getUniqueAlt(cls, altsets):
all = cls.getAlts(altsets)
if len(all)==1:
return all[0]
else:
return ATN.INVALID_ALT_NUMBER
# Gets the complete set of represented alternatives for a collection of
# alternative subsets. This method returns the union of each {@link BitSet}
# in {@code altsets}.
#
# @param altsets a collection of alternative subsets
# @return the set of represented alternatives in {@code altsets}
#
@classmethod
def getAlts(cls, altsets):
all = set()
for alts in altsets:
all = all | alts
return all
#
# This function gets the conflicting alt subsets from a configuration set.
# For each configuration {@code c} in {@code configs}:
#
# <pre>
# map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
# alt and not pred
# </pre>
#
@classmethod
def getConflictingAltSubsets(cls, configs):
configToAlts = dict()
for c in configs:
s = str(c.state.stateNumber) + "/" + str(c.context)
alts = configToAlts.get(s, None)
if alts is None:
alts = set()
configToAlts[s] = alts
alts.add(c.alt)
return configToAlts.values()
#
# Get a map from state to alt subset from a configuration set. For each
# configuration {@code c} in {@code configs}:
#
# <pre>
# map[c.{@link ATNConfig#state state}] U= c.{@link ATNConfig#alt alt}
# </pre>
#
@classmethod
def getStateToAltMap(cls, configs):
m = dict()
for c in configs:
alts = m.get(c.state, None)
if alts is None:
alts = set()
m[c.state] = alts
alts.add(c.alt)
return m
@classmethod
def hasStateAssociatedWithOneAlt(cls, configs):
x = cls.getStateToAltMap(configs)
for alts in x.values():
if len(alts)==1:
return True
return False
@classmethod
def getSingleViableAlt(cls, altsets):
viableAlts = set()
for alts in altsets:
minAlt = min(alts)
viableAlts.add(minAlt);
if len(viableAlts)>1 : # more than 1 viable alt
return ATN.INVALID_ALT_NUMBER
return min(viableAlts)

View File

@ -0,0 +1,360 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# A tree structure used to record the semantic context in which
# an ATN configuration is valid. It's either a single predicate,
# a conjunction {@code p1&&p2}, or a sum of products {@code p1||p2}.
#
# <p>I have scoped the {@link AND}, {@link OR}, and {@link Predicate} subclasses of
# {@link SemanticContext} within the scope of this outer class.</p>
#
from io import StringIO
class SemanticContext(object):
#
# The default {@link SemanticContext}, which is semantically equivalent to
# a predicate of the form {@code {true}?}.
#
NONE = None
#
# For context independent predicates, we evaluate them without a local
# context (i.e., null context). That way, we can evaluate them without
# having to create proper rule-specific context during prediction (as
# opposed to the parser, which creates them naturally). In a practical
# sense, this avoids a cast exception from RuleContext to myruleContext.
#
# <p>For context dependent predicates, we must pass in a local context so that
# references such as $arg evaluate properly as _localctx.arg. We only
# capture context dependent predicates in the context in which we begin
# prediction, so we passed in the outer context here in case of context
# dependent predicate evaluation.</p>
#
def eval(self, parser, outerContext):
pass
#
# Evaluate the precedence predicates for the context and reduce the result.
#
# @param parser The parser instance.
# @param outerContext The current parser context object.
# @return The simplified semantic context after precedence predicates are
# evaluated, which will be one of the following values.
# <ul>
# <li>{@link #NONE}: if the predicate simplifies to {@code true} after
# precedence predicates are evaluated.</li>
# <li>{@code null}: if the predicate simplifies to {@code false} after
# precedence predicates are evaluated.</li>
# <li>{@code this}: if the semantic context is not changed as a result of
# precedence predicate evaluation.</li>
# <li>A non-{@code null} {@link SemanticContext}: the new simplified
# semantic context after precedence predicates are evaluated.</li>
# </ul>
#
def evalPrecedence(self, parser, outerContext):
return self
def __str__(self):
return unicode(self)
def __unicode__(self):
return unicode(super(SemanticContext, self))
def andContext(a, b):
if a is None or a is SemanticContext.NONE:
return b
if b is None or b is SemanticContext.NONE:
return a
result = AND(a, b)
if len(result.opnds) == 1:
return result.opnds[0]
else:
return result
def orContext(a, b):
if a is None:
return b
if b is None:
return a
if a is SemanticContext.NONE or b is SemanticContext.NONE:
return SemanticContext.NONE
result = OR(a, b)
if len(result.opnds) == 1:
return result.opnds[0]
else:
return result
def filterPrecedencePredicates(collection):
result = []
for context in collection:
if isinstance(context, PrecedencePredicate):
if result is None:
result = []
result.append(context)
return result
class Predicate(SemanticContext):
def __init__(self, ruleIndex=-1, predIndex=-1, isCtxDependent=False):
self.ruleIndex = ruleIndex
self.predIndex = predIndex
self.isCtxDependent = isCtxDependent # e.g., $i ref in pred
def eval(self, parser, outerContext):
localctx = outerContext if self.isCtxDependent else None
return parser.sempred(localctx, self.ruleIndex, self.predIndex)
def __hash__(self):
with StringIO() as buf:
buf.write(unicode(self.ruleIndex))
buf.write(u"/")
buf.write(unicode(self.predIndex))
buf.write(u"/")
buf.write(unicode(self.isCtxDependent))
return hash(buf.getvalue())
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, Predicate):
return False
return self.ruleIndex == other.ruleIndex and \
self.predIndex == other.predIndex and \
self.isCtxDependent == other.isCtxDependent
def __unicode__(self):
return u"{" + unicode(self.ruleIndex) + u":" + unicode(self.predIndex) + u"}?"
class PrecedencePredicate(SemanticContext):
def __init__(self, precedence=0):
self.precedence = precedence
def eval(self, parser, outerContext):
return parser.precpred(outerContext, self.precedence)
def evalPrecedence(self, parser, outerContext):
if parser.precpred(outerContext, self.precedence):
return SemanticContext.NONE
else:
return None
def __cmp__(self, other):
return self.precedence - other.precedence
def __hash__(self):
return 31
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, PrecedencePredicate):
return False
else:
return self.precedence == other.precedence
# A semantic context which is true whenever none of the contained contexts
# is false.
#
class AND(SemanticContext):
def __init__(self, a, b):
operands = set()
if isinstance( a, AND):
for o in a.opnds:
operands.add(o)
else:
operands.add(a)
if isinstance( b, AND):
for o in b.opnds:
operands.add(o)
else:
operands.add(b)
precedencePredicates = filterPrecedencePredicates(operands)
if len(precedencePredicates)>0:
# interested in the transition with the lowest precedence
reduced = min(precedencePredicates)
operands.add(reduced)
self.opnds = [ o for o in operands ]
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, AND):
return False
else:
return self.opnds == other.opnds
def __hash__(self):
return hash(str(self.opnds)+ "/AND")
#
# {@inheritDoc}
#
# <p>
# The evaluation of predicates by this context is short-circuiting, but
# unordered.</p>
#
def eval(self, parser, outerContext):
for opnd in self.opnds:
if not opnd.eval(parser, outerContext):
return False
return True
def evalPrecedence(self, parser, outerContext):
differs = False
operands = []
for context in self.opnds:
evaluated = context.evalPrecedence(parser, outerContext)
differs |= evaluated is not context
if evaluated is None:
# The AND context is false if any element is false
return None
elif evaluated is not SemanticContext.NONE:
# Reduce the result by skipping true elements
operands.append(evaluated)
if not differs:
return self
if len(operands)==0:
# all elements were true, so the AND context is true
return SemanticContext.NONE
result = None
for o in operands:
result = o if result is None else andContext(result, o)
return result
def __unicode__(self):
with StringIO() as buf:
first = True
for o in self.opnds:
if not first:
buf.write(u"&&")
buf.write(unicode(o))
first = False
return buf.getvalue()
#
# A semantic context which is true whenever at least one of the contained
# contexts is true.
#
class OR (SemanticContext):
def __init__(self, a, b):
operands = set()
if isinstance( a, OR):
for o in a.opnds:
operands.add(o)
else:
operands.add(a)
if isinstance( b, OR):
for o in b.opnds:
operands.add(o)
else:
operands.add(b)
precedencePredicates = filterPrecedencePredicates(operands)
if len(precedencePredicates)>0:
# interested in the transition with the highest precedence
s = sorted(precedencePredicates)
reduced = s[len(s)-1]
operands.add(reduced)
self.opnds = [ o for o in operands ]
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, OR):
return False
else:
return self.opnds == other.opnds
def __hash__(self):
return hash(str(self.opnds)+"/OR")
# <p>
# The evaluation of predicates by this context is short-circuiting, but
# unordered.</p>
#
def eval(self, parser, outerContext):
for opnd in self.opnds:
if opnd.eval(parser, outerContext):
return True
return False
def evalPrecedence(self, parser, outerContext):
differs = False
operands = []
for context in self.opnds:
evaluated = context.evalPrecedence(parser, outerContext)
differs |= evaluated is not context
if evaluated is SemanticContext.NONE:
# The OR context is true if any element is true
return SemanticContext.NONE
elif evaluated is not None:
# Reduce the result by skipping false elements
operands.append(evaluated)
if not differs:
return self
if len(operands)==0:
# all elements were false, so the OR context is false
return None
result = None
for o in operands:
result = o if result is None else orContext(result, o)
return result
def __unicode__(self):
with StringIO() as buf:
first = True
for o in self.opnds:
if not first:
buf.write(u"||")
buf.write(unicode(o))
first = False
return buf.getvalue()
SemanticContext.NONE = Predicate()

View File

@ -0,0 +1,279 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# An ATN transition between any two ATN states. Subclasses define
# atom, set, epsilon, action, predicate, rule transitions.
#
# <p>This is a one way link. It emanates from a state (usually via a list of
# transitions) and has a target state.</p>
#
# <p>Since we never have to change the ATN transitions once we construct it,
# we can fix these transitions as specific classes. The DFA transitions
# on the other hand need to update the labels as it adds transitions to
# the states. We'll use the term Edge for the DFA to distinguish them from
# ATN transitions.</p>
#
from __builtin__ import unicode
from antlr4.IntervalSet import IntervalSet, Interval
from antlr4.Token import Token
from antlr4.atn.SemanticContext import Predicate, PrecedencePredicate
class Transition (object):
# constants for serialization
EPSILON = 1
RANGE = 2
RULE = 3
PREDICATE = 4 # e.g., {isType(input.LT(1))}?
ATOM = 5
ACTION = 6
SET = 7 # ~(A|B) or ~atom, wildcard, which convert to next 2
NOT_SET = 8
WILDCARD = 9
PRECEDENCE = 10
serializationNames = [
u"INVALID",
u"EPSILON",
u"RANGE",
u"RULE",
u"PREDICATE",
u"ATOM",
u"ACTION",
u"SET",
u"NOT_SET",
u"WILDCARD",
u"PRECEDENCE"
]
serializationTypes = dict()
def __init__(self, target):
# The target of this transition.
if target is None:
raise Exception("target cannot be null.")
self.target = target
# Are we epsilon, action, sempred?
self.isEpsilon = False
self.label = None
def __str__(self):
return unicode(self)
# TODO: make all transitions sets? no, should remove set edges
class AtomTransition(Transition):
def __init__(self, target, label):
super(AtomTransition, self).__init__(target)
self.label_ = label # The token type or character value; or, signifies special label.
self.label = self.makeLabel()
self.serializationType = self.ATOM
def makeLabel(self):
s = IntervalSet()
s.addOne(self.label_)
return s
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
return self.label_ == symbol
def __unicode__(self):
return unicode(self.label_)
class RuleTransition(Transition):
def __init__(self, ruleStart, ruleIndex, precedence, followState):
super(RuleTransition, self).__init__(ruleStart)
self.ruleIndex = ruleIndex # ptr to the rule definition object for this rule ref
self.precedence = precedence
self.followState = followState # what node to begin computations following ref to rule
self.serializationType = self.RULE
self.isEpsilon = True
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
return False
class EpsilonTransition(Transition):
def __init__(self, target, outermostPrecedenceReturn=-1):
super(EpsilonTransition, self).__init__(target)
self.serializationType = self.EPSILON
self.isEpsilon = True
self.outermostPrecedenceReturn = outermostPrecedenceReturn
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
return False
def __unicode__(self):
return "epsilon"
class RangeTransition(Transition):
def __init__(self, target, start, stop):
super(RangeTransition, self).__init__(target)
self.serializationType = self.RANGE
self.start = start
self.stop = stop
self.label = self.makeLabel()
def makeLabel(self):
s = IntervalSet()
s.addRange(Interval(self.start, self.stop + 1))
return s
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
return symbol >= self.start and symbol <= self.stop
def __unicode__(self):
return "'" + chr(self.start) + "'..'" + chr(self.stop) + "'"
class AbstractPredicateTransition(Transition):
def __init__(self, target):
super(AbstractPredicateTransition, self).__init__(target)
class PredicateTransition(AbstractPredicateTransition):
def __init__(self, target, ruleIndex, predIndex, isCtxDependent):
super(PredicateTransition, self).__init__(target)
self.serializationType = self.PREDICATE
self.ruleIndex = ruleIndex
self.predIndex = predIndex
self.isCtxDependent = isCtxDependent # e.g., $i ref in pred
self.isEpsilon = True
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
return False
def getPredicate(self):
return Predicate(self.ruleIndex, self.predIndex, self.isCtxDependent)
def __unicode__(self):
return u"pred_" + unicode(self.ruleIndex) + u":" + unicode(self.predIndex)
class ActionTransition(Transition):
def __init__(self, target, ruleIndex, actionIndex=-1, isCtxDependent=False):
super(ActionTransition, self).__init__(target)
self.serializationType = self.ACTION
self.ruleIndex = ruleIndex
self.actionIndex = actionIndex
self.isCtxDependent = isCtxDependent # e.g., $i ref in pred
self.isEpsilon = True
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
return False
def __unicode__(self):
return u"action_" + unicode(self.ruleIndex) + u":" + unicode(self.actionIndex)
# A transition containing a set of values.
class SetTransition(Transition):
def __init__(self, target, set):
super(SetTransition, self).__init__(target)
self.serializationType = self.SET
if set is not None:
self.label = set
else:
self.label = IntervalSet()
self.label.addRange(Interval(Token.INVALID_TYPE, Token.INVALID_TYPE + 1))
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
return symbol in self.label
def __unicode__(self):
return unicode(self.label)
class NotSetTransition(SetTransition):
def __init__(self, target, set):
super(NotSetTransition, self).__init__(target, set)
self.serializationType = self.NOT_SET
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
return symbol >= minVocabSymbol \
and symbol <= maxVocabSymbol \
and not super(type(self), self).matches(symbol, minVocabSymbol, maxVocabSymbol)
def __unicode__(self):
return u'~' + super(type(self), self).__unicode__()
class WildcardTransition(Transition):
def __init__(self, target):
super(WildcardTransition, self).__init__(target)
self.serializationType = self.WILDCARD
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
return symbol >= minVocabSymbol and symbol <= maxVocabSymbol
def __unicode__(self):
return u"."
class PrecedencePredicateTransition(AbstractPredicateTransition):
def __init__(self, target, precedence):
super(PrecedencePredicateTransition, self).__init__(target)
self.serializationType = self.PRECEDENCE
self.precedence = precedence
self.isEpsilon = True
def matches( self, symbol, minVocabSymbol, maxVocabSymbol):
return False
def getPredicate(self):
return PrecedencePredicate(self.precedence)
def __unicode__(self):
return self.precedence + " >= _p"
Transition.serializationTypes = {
EpsilonTransition: Transition.EPSILON,
RangeTransition: Transition.RANGE,
RuleTransition: Transition.RULE,
PredicateTransition: Transition.PREDICATE,
AtomTransition: Transition.ATOM,
ActionTransition: Transition.ACTION,
SetTransition: Transition.SET,
NotSetTransition: Transition.NOT_SET,
WildcardTransition: Transition.WILDCARD,
PrecedencePredicateTransition: Transition.PRECEDENCE
}

View File

@ -0,0 +1 @@
__author__ = 'ericvergnaud'

View File

@ -0,0 +1,150 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from antlr4.atn.ATNConfigSet import ATNConfigSet
from antlr4.dfa.DFAState import DFAState
from antlr4.error.Errors import IllegalStateException
class DFA(object):
def __init__(self, atnStartState, decision=0):
# From which ATN state did we create this DFA?
self.atnStartState = atnStartState
self.decision = decision
# A set of all DFA states. Use {@link Map} so we can get old state back
# ({@link Set} only allows you to see if it's there).
self._states = dict()
self.s0 = None
# {@code true} if this DFA is for a precedence decision; otherwise,
# {@code false}. This is the backing field for {@link #isPrecedenceDfa},
# {@link #setPrecedenceDfa}.
self.precedenceDfa = False
# Get the start state for a specific precedence value.
#
# @param precedence The current precedence.
# @return The start state corresponding to the specified precedence, or
# {@code null} if no start state exists for the specified precedence.
#
# @throws IllegalStateException if this is not a precedence DFA.
# @see #isPrecedenceDfa()
def getPrecedenceStartState(self, precedence):
if not self.precedenceDfa:
raise IllegalStateException("Only precedence DFAs may contain a precedence start state.")
# s0.edges is never null for a precedence DFA
if precedence < 0 or precedence >= len(self.s0.edges):
return None
return self.s0.edges[precedence]
# Set the start state for a specific precedence value.
#
# @param precedence The current precedence.
# @param startState The start state corresponding to the specified
# precedence.
#
# @throws IllegalStateException if this is not a precedence DFA.
# @see #isPrecedenceDfa()
#
def setPrecedenceStartState(self, precedence, startState):
if not self.precedenceDfa:
raise IllegalStateException("Only precedence DFAs may contain a precedence start state.")
if precedence < 0:
return
# synchronization on s0 here is ok. when the DFA is turned into a
# precedence DFA, s0 will be initialized once and not updated again
# s0.edges is never null for a precedence DFA
if precedence >= len(self.s0.edges):
ext = [None] * (precedence + 1 - len(self.s0.edges))
self.s0.edges.extend(ext)
self.s0.edges[precedence] = startState
#
# Sets whether this is a precedence DFA. If the specified value differs
# from the current DFA configuration, the following actions are taken;
# otherwise no changes are made to the current DFA.
#
# <ul>
# <li>The {@link #states} map is cleared</li>
# <li>If {@code precedenceDfa} is {@code false}, the initial state
# {@link #s0} is set to {@code null}; otherwise, it is initialized to a new
# {@link DFAState} with an empty outgoing {@link DFAState#edges} array to
# store the start states for individual precedence values.</li>
# <li>The {@link #precedenceDfa} field is updated</li>
# </ul>
#
# @param precedenceDfa {@code true} if this is a precedence DFA; otherwise,
# {@code false}
def setPrecedenceDfa(self, precedenceDfa):
if self.precedenceDfa != precedenceDfa:
self._states = dict()
if precedenceDfa:
precedenceState = DFAState(ATNConfigSet())
precedenceState.edges = []
precedenceState.isAcceptState = False
precedenceState.requiresFullContext = False
self.s0 = precedenceState
else:
self.s0 = None
self.precedenceDfa = precedenceDfa
@property
def states(self):
return self._states
# Return a list of all states in this DFA, ordered by state number.
def sortedStates(self):
return sorted(self._states.keys(), key=lambda state: state.stateNumber)
def __str__(self):
return unicode(self)
def __unicode__(self):
return self.toString(None)
def toString(self, literalNames=None, symbolicNames=None):
if self.s0 is None:
return ""
from antlr4.dfa.DFASerializer import DFASerializer
serializer = DFASerializer(self, literalNames, symbolicNames)
return unicode(serializer)
def toLexerString(self):
if self.s0 is None:
return ""
from antlr4.dfa.DFASerializer import LexerDFASerializer
serializer = LexerDFASerializer(self)
return unicode(serializer)

View File

@ -0,0 +1,100 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#/
# A DFA walker that knows how to dump them to serialized strings.#/
from io import StringIO
from antlr4.Utils import str_list
class DFASerializer(object):
def __init__(self, dfa, literalNames=None, symbolicNames=None):
self.dfa = dfa
self.literalNames = literalNames
self.symbolicNames = symbolicNames
def __str__(self):
return unicode(self)
def __unicode__(self):
if self.dfa.s0 is None:
return None
with StringIO() as buf:
for s in self.dfa.sortedStates():
n = 0
if s.edges is not None:
n = len(s.edges)
for i in range(0, n):
t = s.edges[i]
if t is not None and t.stateNumber != 0x7FFFFFFF:
buf.write(self.getStateString(s))
label = self.getEdgeLabel(i)
buf.write(u"-")
buf.write(label)
buf.write(u"->")
buf.write(self.getStateString(t))
buf.write(u'\n')
output = buf.getvalue()
if len(output)==0:
return None
else:
return output
def getEdgeLabel(self, i):
if i==0:
return u"EOF"
if self.literalNames is not None and i<=len(self.literalNames):
return self.literalNames[i-1]
elif self.symbolicNames is not None and i<=len(self.symbolicNames):
return self.symbolicNames[i-1]
else:
return unicode(i-1)
def getStateString(self, s):
n = s.stateNumber
baseStateStr = ( u":" if s.isAcceptState else u"") + u"s" + unicode(n) + \
( u"^" if s.requiresFullContext else u"")
if s.isAcceptState:
if s.predicates is not None:
return baseStateStr + u"=>" + str_list(s.predicates)
else:
return baseStateStr + u"=>" + unicode(s.prediction)
else:
return baseStateStr
class LexerDFASerializer(DFASerializer):
def __init__(self, dfa):
super(LexerDFASerializer, self).__init__(dfa, None)
def getEdgeLabel(self, i):
return u"'" + unichr(i) + u"'"

View File

@ -0,0 +1,156 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#/
# Map a predicate to a predicted alternative.#/
from io import StringIO
from antlr4.atn.ATNConfigSet import ATNConfigSet
class PredPrediction(object):
def __init__(self, pred, alt):
self.alt = alt
self.pred = pred
def __str__(self):
return unicode(self)
def __unicode__(self):
return u"(" + unicode(self.pred) + u", " + unicode(self.alt) + u")"
# A DFA state represents a set of possible ATN configurations.
# As Aho, Sethi, Ullman p. 117 says "The DFA uses its state
# to keep track of all possible states the ATN can be in after
# reading each input symbol. That is to say, after reading
# input a1a2..an, the DFA is in a state that represents the
# subset T of the states of the ATN that are reachable from the
# ATN's start state along some path labeled a1a2..an."
# In conventional NFA&rarr;DFA conversion, therefore, the subset T
# would be a bitset representing the set of states the
# ATN could be in. We need to track the alt predicted by each
# state as well, however. More importantly, we need to maintain
# a stack of states, tracking the closure operations as they
# jump from rule to rule, emulating rule invocations (method calls).
# I have to add a stack to simulate the proper lookahead sequences for
# the underlying LL grammar from which the ATN was derived.
#
# <p>I use a set of ATNConfig objects not simple states. An ATNConfig
# is both a state (ala normal conversion) and a RuleContext describing
# the chain of rules (if any) followed to arrive at that state.</p>
#
# <p>A DFA state may have multiple references to a particular state,
# but with different ATN contexts (with same or different alts)
# meaning that state was reached via a different set of rule invocations.</p>
#/
class DFAState(object):
def __init__(self, stateNumber=-1, configs=ATNConfigSet()):
self.stateNumber = stateNumber
self.configs = configs
# {@code edges[symbol]} points to target of symbol. Shift up by 1 so (-1)
# {@link Token#EOF} maps to {@code edges[0]}.
self.edges = None
self.isAcceptState = False
# if accept state, what ttype do we match or alt do we predict?
# This is set to {@link ATN#INVALID_ALT_NUMBER} when {@link #predicates}{@code !=null} or
# {@link #requiresFullContext}.
self.prediction = 0
self.lexerActionExecutor = None
# Indicates that this state was created during SLL prediction that
# discovered a conflict between the configurations in the state. Future
# {@link ParserATNSimulator#execATN} invocations immediately jumped doing
# full context prediction if this field is true.
self.requiresFullContext = False
# During SLL parsing, this is a list of predicates associated with the
# ATN configurations of the DFA state. When we have predicates,
# {@link #requiresFullContext} is {@code false} since full context prediction evaluates predicates
# on-the-fly. If this is not null, then {@link #prediction} is
# {@link ATN#INVALID_ALT_NUMBER}.
#
# <p>We only use these for non-{@link #requiresFullContext} but conflicting states. That
# means we know from the context (it's $ or we don't dip into outer
# context) that it's an ambiguity not a conflict.</p>
#
# <p>This list is computed by {@link ParserATNSimulator#predicateDFAState}.</p>
self.predicates = None
# Get the set of all alts mentioned by all ATN configurations in this
# DFA state.
def getAltSet(self):
alts = set()
if self.configs is not None:
for c in self.configs:
alts.add(c.alt)
if len(alts)==0:
return None
else:
return alts
def __hash__(self):
return hash(self.configs)
# Two {@link DFAState} instances are equal if their ATN configuration sets
# are the same. This method is used to see if a state already exists.
#
# <p>Because the number of alternatives and number of ATN configurations are
# finite, there is a finite number of DFA states that can be processed.
# This is necessary to show that the algorithm terminates.</p>
#
# <p>Cannot test the DFA state numbers here because in
# {@link ParserATNSimulator#addDFAState} we need to know if any other state
# exists that has this exact set of ATN configurations. The
# {@link #stateNumber} is irrelevant.</p>
def __eq__(self, other):
# compare set of ATN configurations in this set with other
if self is other:
return True
elif not isinstance(other, DFAState):
return False
else:
return self.configs==other.configs
def __str__(self):
return unicode(self)
def __unicode__(self):
with StringIO() as buf:
buf.write(unicode(self.stateNumber))
buf.write(u":")
buf.write(unicode(self.configs))
if self.isAcceptState:
buf.write(u"=>")
if self.predicates is not None:
buf.write(unicode(self.predicates))
else:
buf.write(unicode(self.prediction))
return buf.getvalue()

View File

@ -0,0 +1 @@
__author__ = 'ericvergnaud'

View File

@ -0,0 +1,133 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# This implementation of {@link ANTLRErrorListener} can be used to identify
# certain potential correctness and performance problems in grammars. "Reports"
# are made by calling {@link Parser#notifyErrorListeners} with the appropriate
# message.
#
# <ul>
# <li><b>Ambiguities</b>: These are cases where more than one path through the
# grammar can match the input.</li>
# <li><b>Weak context sensitivity</b>: These are cases where full-context
# prediction resolved an SLL conflict to a unique alternative which equaled the
# minimum alternative of the SLL conflict.</li>
# <li><b>Strong (forced) context sensitivity</b>: These are cases where the
# full-context prediction resolved an SLL conflict to a unique alternative,
# <em>and</em> the minimum alternative of the SLL conflict was found to not be
# a truly viable alternative. Two-stage parsing cannot be used for inputs where
# this situation occurs.</li>
# </ul>
from io import StringIO
from antlr4.Utils import str_set
from antlr4.error.ErrorListener import ErrorListener
class DiagnosticErrorListener(ErrorListener):
def __init__(self, exactOnly=True):
# whether all ambiguities or only exact ambiguities are reported.
self.exactOnly = exactOnly
def reportAmbiguity(self, recognizer, dfa, startIndex,
stopIndex, exact, ambigAlts, configs):
if self.exactOnly and not exact:
return
with StringIO() as buf:
buf.write(u"reportAmbiguity d=")
buf.write(self.getDecisionDescription(recognizer, dfa))
buf.write(u": ambigAlts=")
buf.write(str_set(self.getConflictingAlts(ambigAlts, configs)))
buf.write(u", input='")
buf.write(recognizer.getTokenStream().getText((startIndex, stopIndex)))
buf.write(u"'")
recognizer.notifyErrorListeners(buf.getvalue())
def reportAttemptingFullContext(self, recognizer, dfa, startIndex,
stopIndex, conflictingAlts, configs):
with StringIO() as buf:
buf.write(u"reportAttemptingFullContext d=")
buf.write(self.getDecisionDescription(recognizer, dfa))
buf.write(u", input='")
buf.write(recognizer.getTokenStream().getText((startIndex, stopIndex)))
buf.write(u"'")
recognizer.notifyErrorListeners(buf.getvalue())
def reportContextSensitivity(self, recognizer, dfa, startIndex,
stopIndex, prediction, configs):
with StringIO() as buf:
buf.write(u"reportContextSensitivity d=")
buf.write(self.getDecisionDescription(recognizer, dfa))
buf.write(u", input='")
buf.write(recognizer.getTokenStream().getText((startIndex, stopIndex)))
buf.write(u"'")
recognizer.notifyErrorListeners(buf.getvalue())
def getDecisionDescription(self, recognizer, dfa):
decision = dfa.decision
ruleIndex = dfa.atnStartState.ruleIndex
ruleNames = recognizer.ruleNames
if ruleIndex < 0 or ruleIndex >= len(ruleNames):
return unicode(decision)
ruleName = ruleNames[ruleIndex]
if ruleName is None or len(ruleName)==0:
return unicode(decision)
return unicode(decision) + u" (" + ruleName + u")"
#
# Computes the set of conflicting or ambiguous alternatives from a
# configuration set, if that information was not already provided by the
# parser.
#
# @param reportedAlts The set of conflicting or ambiguous alternatives, as
# reported by the parser.
# @param configs The conflicting or ambiguous configuration set.
# @return Returns {@code reportedAlts} if it is not {@code null}, otherwise
# returns the set of alternatives represented in {@code configs}.
#
def getConflictingAlts(self, reportedAlts, configs):
if reportedAlts is not None:
return reportedAlts
result = set()
for config in configs:
result.add(config.alt)
return result

View File

@ -0,0 +1,97 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# Provides an empty default implementation of {@link ANTLRErrorListener}. The
# default implementation of each method does nothing, but can be overridden as
# necessary.
from __future__ import print_function
import sys
class ErrorListener(object):
def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
pass
def reportAmbiguity(self, recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs):
pass
def reportAttemptingFullContext(self, recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs):
pass
def reportContextSensitivity(self, recognizer, dfa, startIndex, stopIndex, prediction, configs):
pass
class ConsoleErrorListener(ErrorListener):
#
# Provides a default instance of {@link ConsoleErrorListener}.
#
INSTANCE = None
#
# {@inheritDoc}
#
# <p>
# This implementation prints messages to {@link System#err} containing the
# values of {@code line}, {@code charPositionInLine}, and {@code msg} using
# the following format.</p>
#
# <pre>
# line <em>line</em>:<em>charPositionInLine</em> <em>msg</em>
# </pre>
#
def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
print("line " + str(line) + ":" + str(column) + " " + msg, file=sys.stderr)
ConsoleErrorListener.INSTANCE = ConsoleErrorListener()
class ProxyErrorListener(ErrorListener):
def __init__(self, delegates):
super(ProxyErrorListener, self).__init__()
if delegates is None:
raise ReferenceError("delegates")
self.delegates = delegates
def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
for delegate in self.delegates:
delegate.syntaxError(recognizer, offendingSymbol, line, column, msg, e)
def reportAmbiguity(self, recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs):
for delegate in self.delegates:
delegate.reportAmbiguity(recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs)
def reportAttemptingFullContext(self, recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs):
for delegate in self.delegates:
delegate.reportAttemptingFullContext(recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs)
def reportContextSensitivity(self, recognizer, dfa, startIndex, stopIndex, prediction, configs):
for delegate in self.delegates:
delegate.reportContextSensitivity(recognizer, dfa, startIndex, stopIndex, prediction, configs)

View File

@ -0,0 +1,719 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
from antlr4.IntervalSet import IntervalSet
from antlr4.Token import Token
from antlr4.atn.ATNState import ATNState
from antlr4.error.Errors import NoViableAltException, InputMismatchException, FailedPredicateException, ParseCancellationException
class ErrorStrategy(object):
def reset(self, recognizer):
pass
def recoverInline(self, recognizer):
pass
def recover(self, recognizer, e):
pass
def sync(self, recognizer):
pass
def inErrorRecoveryMode(self, recognizer):
pass
def reportError(self, recognizer, e):
pass
# This is the default implementation of {@link ANTLRErrorStrategy} used for
# error reporting and recovery in ANTLR parsers.
#
class DefaultErrorStrategy(ErrorStrategy):
def __init__(self):
super(DefaultErrorStrategy, self).__init__()
# Indicates whether the error strategy is currently "recovering from an
# error". This is used to suppress reporting multiple error messages while
# attempting to recover from a detected syntax error.
#
# @see #inErrorRecoveryMode
#
self.errorRecoveryMode = False
# The index into the input stream where the last error occurred.
# This is used to prevent infinite loops where an error is found
# but no token is consumed during recovery...another error is found,
# ad nauseum. This is a failsafe mechanism to guarantee that at least
# one token/tree node is consumed for two errors.
#
self.lastErrorIndex = -1
self.lastErrorStates = None
# <p>The default implementation simply calls {@link #endErrorCondition} to
# ensure that the handler is not in error recovery mode.</p>
def reset(self, recognizer):
self.endErrorCondition(recognizer)
#
# This method is called to enter error recovery mode when a recognition
# exception is reported.
#
# @param recognizer the parser instance
#
def beginErrorCondition(self, recognizer):
self.errorRecoveryMode = True
def inErrorRecoveryMode(self, recognizer):
return self.errorRecoveryMode
#
# This method is called to leave error recovery mode after recovering from
# a recognition exception.
#
# @param recognizer
#
def endErrorCondition(self, recognizer):
self.errorRecoveryMode = False
self.lastErrorStates = None
self.lastErrorIndex = -1
#
# {@inheritDoc}
#
# <p>The default implementation simply calls {@link #endErrorCondition}.</p>
#
def reportMatch(self, recognizer):
self.endErrorCondition(recognizer)
#
# {@inheritDoc}
#
# <p>The default implementation returns immediately if the handler is already
# in error recovery mode. Otherwise, it calls {@link #beginErrorCondition}
# and dispatches the reporting task based on the runtime type of {@code e}
# according to the following table.</p>
#
# <ul>
# <li>{@link NoViableAltException}: Dispatches the call to
# {@link #reportNoViableAlternative}</li>
# <li>{@link InputMismatchException}: Dispatches the call to
# {@link #reportInputMismatch}</li>
# <li>{@link FailedPredicateException}: Dispatches the call to
# {@link #reportFailedPredicate}</li>
# <li>All other types: calls {@link Parser#notifyErrorListeners} to report
# the exception</li>
# </ul>
#
def reportError(self, recognizer, e):
# if we've already reported an error and have not matched a token
# yet successfully, don't report any errors.
if self.inErrorRecoveryMode(recognizer):
return # don't report spurious errors
self.beginErrorCondition(recognizer)
if isinstance( e, NoViableAltException ):
self.reportNoViableAlternative(recognizer, e)
elif isinstance( e, InputMismatchException ):
self.reportInputMismatch(recognizer, e)
elif isinstance( e, FailedPredicateException ):
self.reportFailedPredicate(recognizer, e)
else:
print("unknown recognition error type: " + type(e).__name__)
recognizer.notifyErrorListeners(e.getOffendingToken(), e.getMessage(), e)
#
# {@inheritDoc}
#
# <p>The default implementation resynchronizes the parser by consuming tokens
# until we find one in the resynchronization set--loosely the set of tokens
# that can follow the current rule.</p>
#
def recover(self, recognizer, e):
if self.lastErrorIndex==recognizer.getInputStream().index \
and self.lastErrorStates is not None \
and recognizer.state in self.lastErrorStates:
# uh oh, another error at same token index and previously-visited
# state in ATN; must be a case where LT(1) is in the recovery
# token set so nothing got consumed. Consume a single token
# at least to prevent an infinite loop; this is a failsafe.
recognizer.consume()
self.lastErrorIndex = recognizer._input.index
if self.lastErrorStates is None:
self.lastErrorStates = []
self.lastErrorStates.append(recognizer.state)
followSet = self.getErrorRecoverySet(recognizer)
self.consumeUntil(recognizer, followSet)
# The default implementation of {@link ANTLRErrorStrategy#sync} makes sure
# that the current lookahead symbol is consistent with what were expecting
# at this point in the ATN. You can call this anytime but ANTLR only
# generates code to check before subrules/loops and each iteration.
#
# <p>Implements Jim Idle's magic sync mechanism in closures and optional
# subrules. E.g.,</p>
#
# <pre>
# a : sync ( stuff sync )* ;
# sync : {consume to what can follow sync} ;
# </pre>
#
# At the start of a sub rule upon error, {@link #sync} performs single
# token deletion, if possible. If it can't do that, it bails on the current
# rule and uses the default error recovery, which consumes until the
# resynchronization set of the current rule.
#
# <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
# with an empty alternative), then the expected set includes what follows
# the subrule.</p>
#
# <p>During loop iteration, it consumes until it sees a token that can start a
# sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
# stay in the loop as long as possible.</p>
#
# <p><strong>ORIGINS</strong></p>
#
# <p>Previous versions of ANTLR did a poor job of their recovery within loops.
# A single mismatch token or missing token would force the parser to bail
# out of the entire rules surrounding the loop. So, for rule</p>
#
# <pre>
# classDef : 'class' ID '{' member* '}'
# </pre>
#
# input with an extra token between members would force the parser to
# consume until it found the next class definition rather than the next
# member definition of the current class.
#
# <p>This functionality cost a little bit of effort because the parser has to
# compare token set at the start of the loop and at each iteration. If for
# some reason speed is suffering for you, you can turn off this
# functionality by simply overriding this method as a blank { }.</p>
#
def sync(self, recognizer):
# If already recovering, don't try to sync
if self.inErrorRecoveryMode(recognizer):
return
s = recognizer._interp.atn.states[recognizer.state]
la = recognizer.getTokenStream().LA(1)
# try cheaper subset first; might get lucky. seems to shave a wee bit off
if la==Token.EOF or la in recognizer.atn.nextTokens(s):
return
# Return but don't end recovery. only do that upon valid token match
if recognizer.isExpectedToken(la):
return
if s.stateType in [ATNState.BLOCK_START, ATNState.STAR_BLOCK_START,
ATNState.PLUS_BLOCK_START, ATNState.STAR_LOOP_ENTRY]:
# report error and recover if possible
if self.singleTokenDeletion(recognizer)is not None:
return
else:
raise InputMismatchException(recognizer)
elif s.stateType in [ATNState.PLUS_LOOP_BACK, ATNState.STAR_LOOP_BACK]:
self.reportUnwantedToken(recognizer)
expecting = recognizer.getExpectedTokens()
whatFollowsLoopIterationOrRule = expecting.addSet(self.getErrorRecoverySet(recognizer))
self.consumeUntil(recognizer, whatFollowsLoopIterationOrRule)
else:
# do nothing if we can't identify the exact kind of ATN state
pass
# This is called by {@link #reportError} when the exception is a
# {@link NoViableAltException}.
#
# @see #reportError
#
# @param recognizer the parser instance
# @param e the recognition exception
#
def reportNoViableAlternative(self, recognizer, e):
tokens = recognizer.getTokenStream()
if tokens is not None:
if e.startToken.type==Token.EOF:
input = "<EOF>"
else:
input = tokens.getText((e.startToken, e.offendingToken))
else:
input = "<unknown input>"
msg = "no viable alternative at input " + self.escapeWSAndQuote(input)
recognizer.notifyErrorListeners(msg, e.offendingToken, e)
#
# This is called by {@link #reportError} when the exception is an
# {@link InputMismatchException}.
#
# @see #reportError
#
# @param recognizer the parser instance
# @param e the recognition exception
#
def reportInputMismatch(self, recognizer, e):
msg = "mismatched input " + self.getTokenErrorDisplay(e.offendingToken) \
+ " expecting " + e.getExpectedTokens().toString(recognizer.literalNames, recognizer.symbolicNames)
recognizer.notifyErrorListeners(msg, e.offendingToken, e)
#
# This is called by {@link #reportError} when the exception is a
# {@link FailedPredicateException}.
#
# @see #reportError
#
# @param recognizer the parser instance
# @param e the recognition exception
#
def reportFailedPredicate(self, recognizer, e):
ruleName = recognizer.ruleNames[recognizer._ctx.getRuleIndex()]
msg = "rule " + ruleName + " " + e.message
recognizer.notifyErrorListeners(msg, e.offendingToken, e)
# This method is called to report a syntax error which requires the removal
# of a token from the input stream. At the time this method is called, the
# erroneous symbol is current {@code LT(1)} symbol and has not yet been
# removed from the input stream. When this method returns,
# {@code recognizer} is in error recovery mode.
#
# <p>This method is called when {@link #singleTokenDeletion} identifies
# single-token deletion as a viable recovery strategy for a mismatched
# input error.</p>
#
# <p>The default implementation simply returns if the handler is already in
# error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
# enter error recovery mode, followed by calling
# {@link Parser#notifyErrorListeners}.</p>
#
# @param recognizer the parser instance
#
def reportUnwantedToken(self, recognizer):
if self.inErrorRecoveryMode(recognizer):
return
self.beginErrorCondition(recognizer)
t = recognizer.getCurrentToken()
tokenName = self.getTokenErrorDisplay(t)
expecting = self.getExpectedTokens(recognizer)
msg = "extraneous input " + tokenName + " expecting " \
+ expecting.toString(recognizer.literalNames, recognizer.symbolicNames)
recognizer.notifyErrorListeners(msg, t, None)
# This method is called to report a syntax error which requires the
# insertion of a missing token into the input stream. At the time this
# method is called, the missing token has not yet been inserted. When this
# method returns, {@code recognizer} is in error recovery mode.
#
# <p>This method is called when {@link #singleTokenInsertion} identifies
# single-token insertion as a viable recovery strategy for a mismatched
# input error.</p>
#
# <p>The default implementation simply returns if the handler is already in
# error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
# enter error recovery mode, followed by calling
# {@link Parser#notifyErrorListeners}.</p>
#
# @param recognizer the parser instance
#
def reportMissingToken(self, recognizer):
if self.inErrorRecoveryMode(recognizer):
return
self.beginErrorCondition(recognizer)
t = recognizer.getCurrentToken()
expecting = self.getExpectedTokens(recognizer)
msg = "missing " + expecting.toString(recognizer.literalNames, recognizer.symbolicNames) \
+ " at " + self.getTokenErrorDisplay(t)
recognizer.notifyErrorListeners(msg, t, None)
# <p>The default implementation attempts to recover from the mismatched input
# by using single token insertion and deletion as described below. If the
# recovery attempt fails, this method throws an
# {@link InputMismatchException}.</p>
#
# <p><strong>EXTRA TOKEN</strong> (single token deletion)</p>
#
# <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the
# right token, however, then assume {@code LA(1)} is some extra spurious
# token and delete it. Then consume and return the next token (which was
# the {@code LA(2)} token) as the successful result of the match operation.</p>
#
# <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p>
#
# <p><strong>MISSING TOKEN</strong> (single token insertion)</p>
#
# <p>If current token (at {@code LA(1)}) is consistent with what could come
# after the expected {@code LA(1)} token, then assume the token is missing
# and use the parser's {@link TokenFactory} to create it on the fly. The
# "insertion" is performed by returning the created token as the successful
# result of the match operation.</p>
#
# <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p>
#
# <p><strong>EXAMPLE</strong></p>
#
# <p>For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When
# the parser returns from the nested call to {@code expr}, it will have
# call chain:</p>
#
# <pre>
# stat &rarr; expr &rarr; atom
# </pre>
#
# and it will be trying to match the {@code ')'} at this point in the
# derivation:
#
# <pre>
# =&gt; ID '=' '(' INT ')' ('+' atom)* ';'
# ^
# </pre>
#
# The attempt to match {@code ')'} will fail when it sees {@code ';'} and
# call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'}
# is in the set of tokens that can follow the {@code ')'} token reference
# in rule {@code atom}. It can assume that you forgot the {@code ')'}.
#
def recoverInline(self, recognizer):
# SINGLE TOKEN DELETION
matchedSymbol = self.singleTokenDeletion(recognizer)
if matchedSymbol is not None:
# we have deleted the extra token.
# now, move past ttype token as if all were ok
recognizer.consume()
return matchedSymbol
# SINGLE TOKEN INSERTION
if self.singleTokenInsertion(recognizer):
return self.getMissingSymbol(recognizer)
# even that didn't work; must throw the exception
raise InputMismatchException(recognizer)
#
# This method implements the single-token insertion inline error recovery
# strategy. It is called by {@link #recoverInline} if the single-token
# deletion strategy fails to recover from the mismatched input. If this
# method returns {@code true}, {@code recognizer} will be in error recovery
# mode.
#
# <p>This method determines whether or not single-token insertion is viable by
# checking if the {@code LA(1)} input symbol could be successfully matched
# if it were instead the {@code LA(2)} symbol. If this method returns
# {@code true}, the caller is responsible for creating and inserting a
# token with the correct type to produce this behavior.</p>
#
# @param recognizer the parser instance
# @return {@code true} if single-token insertion is a viable recovery
# strategy for the current mismatched input, otherwise {@code false}
#
def singleTokenInsertion(self, recognizer):
currentSymbolType = recognizer.getTokenStream().LA(1)
# if current token is consistent with what could come after current
# ATN state, then we know we're missing a token; error recovery
# is free to conjure up and insert the missing token
atn = recognizer._interp.atn
currentState = atn.states[recognizer.state]
next = currentState.transitions[0].target
expectingAtLL2 = atn.nextTokens(next, recognizer._ctx)
if currentSymbolType in expectingAtLL2:
self.reportMissingToken(recognizer)
return True
else:
return False
# This method implements the single-token deletion inline error recovery
# strategy. It is called by {@link #recoverInline} to attempt to recover
# from mismatched input. If this method returns null, the parser and error
# handler state will not have changed. If this method returns non-null,
# {@code recognizer} will <em>not</em> be in error recovery mode since the
# returned token was a successful match.
#
# <p>If the single-token deletion is successful, this method calls
# {@link #reportUnwantedToken} to report the error, followed by
# {@link Parser#consume} to actually "delete" the extraneous token. Then,
# before returning {@link #reportMatch} is called to signal a successful
# match.</p>
#
# @param recognizer the parser instance
# @return the successfully matched {@link Token} instance if single-token
# deletion successfully recovers from the mismatched input, otherwise
# {@code null}
#
def singleTokenDeletion(self, recognizer):
nextTokenType = recognizer.getTokenStream().LA(2)
expecting = self.getExpectedTokens(recognizer)
if nextTokenType in expecting:
self.reportUnwantedToken(recognizer)
# print("recoverFromMismatchedToken deleting " \
# + str(recognizer.getTokenStream().LT(1)) \
# + " since " + str(recognizer.getTokenStream().LT(2)) \
# + " is what we want", file=sys.stderr)
recognizer.consume() # simply delete extra token
# we want to return the token we're actually matching
matchedSymbol = recognizer.getCurrentToken()
self.reportMatch(recognizer) # we know current token is correct
return matchedSymbol
else:
return None
# Conjure up a missing token during error recovery.
#
# The recognizer attempts to recover from single missing
# symbols. But, actions might refer to that missing symbol.
# For example, x=ID {f($x);}. The action clearly assumes
# that there has been an identifier matched previously and that
# $x points at that token. If that token is missing, but
# the next token in the stream is what we want we assume that
# this token is missing and we keep going. Because we
# have to return some token to replace the missing token,
# we have to conjure one up. This method gives the user control
# over the tokens returned for missing tokens. Mostly,
# you will want to create something special for identifier
# tokens. For literals such as '{' and ',', the default
# action in the parser or tree parser works. It simply creates
# a CommonToken of the appropriate type. The text will be the token.
# If you change what tokens must be created by the lexer,
# override this method to create the appropriate tokens.
#
def getMissingSymbol(self, recognizer):
currentSymbol = recognizer.getCurrentToken()
expecting = self.getExpectedTokens(recognizer)
expectedTokenType = expecting[0] # get any element
if expectedTokenType==Token.EOF:
tokenText = "<missing EOF>"
else:
name = None
if expectedTokenType < len(recognizer.literalNames):
name = recognizer.literalNames[expectedTokenType]
if name is None and expectedTokenType < len(recognizer.symbolicNames):
name = recognizer.symbolicNames[expectedTokenType]
tokenText = "<missing " + str(name) + ">"
current = currentSymbol
lookback = recognizer.getTokenStream().LT(-1)
if current.type==Token.EOF and lookback is not None:
current = lookback
return recognizer.getTokenFactory().create(current.source,
expectedTokenType, tokenText, Token.DEFAULT_CHANNEL,
-1, -1, current.line, current.column)
def getExpectedTokens(self, recognizer):
return recognizer.getExpectedTokens()
# How should a token be displayed in an error message? The default
# is to display just the text, but during development you might
# want to have a lot of information spit out. Override in that case
# to use t.toString() (which, for CommonToken, dumps everything about
# the token). This is better than forcing you to override a method in
# your token objects because you don't have to go modify your lexer
# so that it creates a new Java type.
#
def getTokenErrorDisplay(self, t):
if t is None:
return u"<no token>"
s = t.text
if s is None:
if t.type==Token.EOF:
s = u"<EOF>"
else:
s = u"<" + unicode(t.type) + u">"
return self.escapeWSAndQuote(s)
def escapeWSAndQuote(self, s):
s = s.replace(u"\n",u"\\n")
s = s.replace(u"\r",u"\\r")
s = s.replace(u"\t",u"\\t")
return u"'" + s + u"'"
# Compute the error recovery set for the current rule. During
# rule invocation, the parser pushes the set of tokens that can
# follow that rule reference on the stack; this amounts to
# computing FIRST of what follows the rule reference in the
# enclosing rule. See LinearApproximator.FIRST().
# This local follow set only includes tokens
# from within the rule; i.e., the FIRST computation done by
# ANTLR stops at the end of a rule.
#
# EXAMPLE
#
# When you find a "no viable alt exception", the input is not
# consistent with any of the alternatives for rule r. The best
# thing to do is to consume tokens until you see something that
# can legally follow a call to r#or* any rule that called r.
# You don't want the exact set of viable next tokens because the
# input might just be missing a token--you might consume the
# rest of the input looking for one of the missing tokens.
#
# Consider grammar:
#
# a : '[' b ']'
# | '(' b ')'
# ;
# b : c '^' INT ;
# c : ID
# | INT
# ;
#
# At each rule invocation, the set of tokens that could follow
# that rule is pushed on a stack. Here are the various
# context-sensitive follow sets:
#
# FOLLOW(b1_in_a) = FIRST(']') = ']'
# FOLLOW(b2_in_a) = FIRST(')') = ')'
# FOLLOW(c_in_b) = FIRST('^') = '^'
#
# Upon erroneous input "[]", the call chain is
#
# a -> b -> c
#
# and, hence, the follow context stack is:
#
# depth follow set start of rule execution
# 0 <EOF> a (from main())
# 1 ']' b
# 2 '^' c
#
# Notice that ')' is not included, because b would have to have
# been called from a different context in rule a for ')' to be
# included.
#
# For error recovery, we cannot consider FOLLOW(c)
# (context-sensitive or otherwise). We need the combined set of
# all context-sensitive FOLLOW sets--the set of all tokens that
# could follow any reference in the call chain. We need to
# resync to one of those tokens. Note that FOLLOW(c)='^' and if
# we resync'd to that token, we'd consume until EOF. We need to
# sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
# In this case, for input "[]", LA(1) is ']' and in the set, so we would
# not consume anything. After printing an error, rule c would
# return normally. Rule b would not find the required '^' though.
# At this point, it gets a mismatched token error and throws an
# exception (since LA(1) is not in the viable following token
# set). The rule exception handler tries to recover, but finds
# the same recovery set and doesn't consume anything. Rule b
# exits normally returning to rule a. Now it finds the ']' (and
# with the successful match exits errorRecovery mode).
#
# So, you can see that the parser walks up the call chain looking
# for the token that was a member of the recovery set.
#
# Errors are not generated in errorRecovery mode.
#
# ANTLR's error recovery mechanism is based upon original ideas:
#
# "Algorithms + Data Structures = Programs" by Niklaus Wirth
#
# and
#
# "A note on error recovery in recursive descent parsers":
# http:#portal.acm.org/citation.cfm?id=947902.947905
#
# Later, Josef Grosch had some good ideas:
#
# "Efficient and Comfortable Error Recovery in Recursive Descent
# Parsers":
# ftp:#www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
#
# Like Grosch I implement context-sensitive FOLLOW sets that are combined
# at run-time upon error to avoid overhead during parsing.
#
def getErrorRecoverySet(self, recognizer):
atn = recognizer._interp.atn
ctx = recognizer._ctx
recoverSet = IntervalSet()
while ctx is not None and ctx.invokingState>=0:
# compute what follows who invoked us
invokingState = atn.states[ctx.invokingState]
rt = invokingState.transitions[0]
follow = atn.nextTokens(rt.followState)
recoverSet.addSet(follow)
ctx = ctx.parentCtx
recoverSet.removeOne(Token.EPSILON)
return recoverSet
# Consume tokens until one matches the given token set.#
def consumeUntil(self, recognizer, set_):
ttype = recognizer.getTokenStream().LA(1)
while ttype != Token.EOF and not ttype in set_:
recognizer.consume()
ttype = recognizer.getTokenStream().LA(1)
#
# This implementation of {@link ANTLRErrorStrategy} responds to syntax errors
# by immediately canceling the parse operation with a
# {@link ParseCancellationException}. The implementation ensures that the
# {@link ParserRuleContext#exception} field is set for all parse tree nodes
# that were not completed prior to encountering the error.
#
# <p>
# This error strategy is useful in the following scenarios.</p>
#
# <ul>
# <li><strong>Two-stage parsing:</strong> This error strategy allows the first
# stage of two-stage parsing to immediately terminate if an error is
# encountered, and immediately fall back to the second stage. In addition to
# avoiding wasted work by attempting to recover from errors here, the empty
# implementation of {@link BailErrorStrategy#sync} improves the performance of
# the first stage.</li>
# <li><strong>Silent validation:</strong> When syntax errors are not being
# reported or logged, and the parse result is simply ignored if errors occur,
# the {@link BailErrorStrategy} avoids wasting work on recovering from errors
# when the result will be ignored either way.</li>
# </ul>
#
# <p>
# {@code myparser.setErrorHandler(new BailErrorStrategy());}</p>
#
# @see Parser#setErrorHandler(ANTLRErrorStrategy)
#
class BailErrorStrategy(DefaultErrorStrategy):
# Instead of recovering from exception {@code e}, re-throw it wrapped
# in a {@link ParseCancellationException} so it is not caught by the
# rule function catches. Use {@link Exception#getCause()} to get the
# original {@link RecognitionException}.
#
def recover(self, recognizer, e):
context = recognizer._ctx
while context is not None:
context.exception = e
context = context.parentCtx
raise ParseCancellationException(e)
# Make sure we don't attempt to recover inline; if the parser
# successfully recovers, it won't throw an exception.
#
def recoverInline(self, recognizer):
self.recover(recognizer, InputMismatchException(recognizer))
# Make sure we don't attempt to recover from problems in subrules.#
def sync(self, recognizer):
pass

View File

@ -0,0 +1,177 @@
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#/
from antlr4.atn.Transition import PredicateTransition
class UnsupportedOperationException(Exception):
def __init__(self, msg):
super(UnsupportedOperationException, self).__init__(msg)
class IllegalStateException(Exception):
def __init__(self, msg):
super(IllegalStateException, self).__init__(msg)
class CancellationException(IllegalStateException):
def __init__(self, msg):
super(CancellationException, self).__init__(msg)
# The root of the ANTLR exception hierarchy. In general, ANTLR tracks just
# 3 kinds of errors: prediction errors, failed predicate errors, and
# mismatched input errors. In each case, the parser knows where it is
# in the input, where it is in the ATN, the rule invocation stack,
# and what kind of problem occurred.
class RecognitionException(Exception):
def __init__(self, message=None, recognizer=None, input=None, ctx=None):
super(RecognitionException, self).__init__(message)
self.recognizer = recognizer
self.input = input
self.ctx = ctx
# The current {@link Token} when an error occurred. Since not all streams
# support accessing symbols by index, we have to track the {@link Token}
# instance itself.
self.offendingToken = None
# Get the ATN state number the parser was in at the time the error
# occurred. For {@link NoViableAltException} and
# {@link LexerNoViableAltException} exceptions, this is the
# {@link DecisionState} number. For others, it is the state whose outgoing
# edge we couldn't match.
self.offendingState = -1
if recognizer is not None:
self.offendingState = recognizer.state
# <p>If the state number is not known, this method returns -1.</p>
#
# Gets the set of input symbols which could potentially follow the
# previously matched symbol at the time this exception was thrown.
#
# <p>If the set of expected tokens is not known and could not be computed,
# this method returns {@code null}.</p>
#
# @return The set of token types that could potentially follow the current
# state in the ATN, or {@code null} if the information is not available.
#/
def getExpectedTokens(self):
if self.recognizer is not None:
return self.recognizer.atn.getExpectedTokens(self.offendingState, self.ctx)
else:
return None
def __str__(self):
return unicode(self)
class LexerNoViableAltException(RecognitionException):
def __init__(self, lexer, input, startIndex, deadEndConfigs):
super(LexerNoViableAltException, self).__init__(message=None, recognizer=lexer, input=input, ctx=None)
self.startIndex = startIndex
self.deadEndConfigs = deadEndConfigs
def __unicode__(self):
symbol = ""
if self.startIndex >= 0 and self.startIndex < self.input.size():
symbol = self.input.getText((self.startIndex,self.startIndex))
# TODO symbol = Utils.escapeWhitespace(symbol, false);
return u"LexerNoViableAltException" + symbol
# Indicates that the parser could not decide which of two or more paths
# to take based upon the remaining input. It tracks the starting token
# of the offending input and also knows where the parser was
# in the various paths when the error. Reported by reportNoViableAlternative()
#
class NoViableAltException(RecognitionException):
def __init__(self, recognizer, input=None, startToken=None, offendingToken=None, deadEndConfigs=None, ctx=None):
if ctx is None:
ctx = recognizer._ctx
if offendingToken is None:
offendingToken = recognizer.getCurrentToken()
if startToken is None:
startToken = recognizer.getCurrentToken()
if input is None:
input = recognizer.getInputStream()
super(NoViableAltException, self).__init__(recognizer=recognizer, input=input, ctx=ctx)
# Which configurations did we try at input.index() that couldn't match input.LT(1)?#
self.deadEndConfigs = deadEndConfigs
# The token object at the start index; the input stream might
# not be buffering tokens so get a reference to it. (At the
# time the error occurred, of course the stream needs to keep a
# buffer all of the tokens but later we might not have access to those.)
self.startToken = startToken
self.offendingToken = offendingToken
# This signifies any kind of mismatched input exceptions such as
# when the current input does not match the expected token.
#
class InputMismatchException(RecognitionException):
def __init__(self, recognizer):
super(InputMismatchException, self).__init__(recognizer=recognizer, input=recognizer.getInputStream(), ctx=recognizer._ctx)
self.offendingToken = recognizer.getCurrentToken()
# A semantic predicate failed during validation. Validation of predicates
# occurs when normally parsing the alternative just like matching a token.
# Disambiguating predicate evaluation occurs when we test a predicate during
# prediction.
class FailedPredicateException(RecognitionException):
def __init__(self, recognizer, predicate=None, message=None):
super(FailedPredicateException, self).__init__(message=self.formatMessage(predicate,message), recognizer=recognizer,
input=recognizer.getInputStream(), ctx=recognizer._ctx)
s = recognizer._interp.atn.states[recognizer.state]
trans = s.transitions[0]
if isinstance(trans, PredicateTransition):
self.ruleIndex = trans.ruleIndex
self.predicateIndex = trans.predIndex
else:
self.ruleIndex = 0
self.predicateIndex = 0
self.predicate = predicate
self.offendingToken = recognizer.getCurrentToken()
def formatMessage(self, predicate, message):
if message is not None:
return message
else:
return "failed predicate: {" + predicate + "}?"
class ParseCancellationException(CancellationException):
pass

View File

@ -0,0 +1 @@
__author__ = 'ericvergnaud'

View File

@ -0,0 +1,26 @@
class Chunk(object):
def __str__(self):
return unicode(self)
class TagChunk(Chunk):
def __init__(self, tag, label=None):
self.tag = tag
self.label = label
def __unicode__(self):
if self.label is None:
return self.tag
else:
return self.label + ":" + self.tag
class TextChunk(Chunk):
def __init__(self, text):
self.text = text
def __unicode__(self):
return "'" + self.text + "'"

View File

@ -0,0 +1,145 @@
#
# [The "BSD license"]
# Copyright (c) 2013 Terence Parr
# Copyright (c) 2013 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# Represents the result of matching a {@link ParseTree} against a tree pattern.
#
from io import StringIO
class ParseTreeMatch(object):
#
# Constructs a new instance of {@link ParseTreeMatch} from the specified
# parse tree and pattern.
#
# @param tree The parse tree to match against the pattern.
# @param pattern The parse tree pattern.
# @param labels A mapping from label names to collections of
# {@link ParseTree} objects located by the tree pattern matching process.
# @param mismatchedNode The first node which failed to match the tree
# pattern during the matching process.
#
# @exception IllegalArgumentException if {@code tree} is {@code null}
# @exception IllegalArgumentException if {@code pattern} is {@code null}
# @exception IllegalArgumentException if {@code labels} is {@code null}
#
def __init__(self, tree, pattern, labels, mismatchedNode):
if tree is None:
raise Exception("tree cannot be null")
if pattern is None:
raise Exception("pattern cannot be null")
if labels is None:
raise Exception("labels cannot be null")
self.tree = tree
self.pattern = pattern
self.labels = labels
self.mismatchedNode = mismatchedNode
#
# Get the last node associated with a specific {@code label}.
#
# <p>For example, for pattern {@code <id:ID>}, {@code get("id")} returns the
# node matched for that {@code ID}. If more than one node
# matched the specified label, only the last is returned. If there is
# no node associated with the label, this returns {@code null}.</p>
#
# <p>Pattern tags like {@code <ID>} and {@code <expr>} without labels are
# considered to be labeled with {@code ID} and {@code expr}, respectively.</p>
#
# @param label The label to check.
#
# @return The last {@link ParseTree} to match a tag with the specified
# label, or {@code null} if no parse tree matched a tag with the label.
#
def get(self, label):
parseTrees = self.labels.get(label, None)
if parseTrees is None or len(parseTrees)==0:
return None
else:
return parseTrees[len(parseTrees)-1]
#
# Return all nodes matching a rule or token tag with the specified label.
#
# <p>If the {@code label} is the name of a parser rule or token in the
# grammar, the resulting list will contain both the parse trees matching
# rule or tags explicitly labeled with the label and the complete set of
# parse trees matching the labeled and unlabeled tags in the pattern for
# the parser rule or token. For example, if {@code label} is {@code "foo"},
# the result will contain <em>all</em> of the following.</p>
#
# <ul>
# <li>Parse tree nodes matching tags of the form {@code <foo:anyRuleName>} and
# {@code <foo:AnyTokenName>}.</li>
# <li>Parse tree nodes matching tags of the form {@code <anyLabel:foo>}.</li>
# <li>Parse tree nodes matching tags of the form {@code <foo>}.</li>
# </ul>
#
# @param label The label.
#
# @return A collection of all {@link ParseTree} nodes matching tags with
# the specified {@code label}. If no nodes matched the label, an empty list
# is returned.
#
def getAll(self, label):
nodes = self.labels.get(label, None)
if nodes is None:
return list()
else:
return nodes
#
# Gets a value indicating whether the match operation succeeded.
#
# @return {@code true} if the match operation succeeded; otherwise,
# {@code false}.
#
def succeeded(self):
return self.mismatchedNode is None
#
# {@inheritDoc}
#
def __str__(self):
return unicode(self)
def __unicode__(self):
with StringIO() as buf:
buf.write(u"Match ")
buf.write(u"succeeded" if self.succeeded() else "failed")
buf.write(u"; found ")
buf.write(unicode(len(self.labels)))
buf.write(u" labels")
return buf.getvalue()

View File

@ -0,0 +1,94 @@
#
# [The "BSD license"]
# Copyright (c) 2013 Terence Parr
# Copyright (c) 2013 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# A pattern like {@code <ID> = <expr>;} converted to a {@link ParseTree} by
# {@link ParseTreePatternMatcher#compile(String, int)}.
#
from antlr4.xpath.XPath import XPath
class ParseTreePattern(object):
# Construct a new instance of the {@link ParseTreePattern} class.
#
# @param matcher The {@link ParseTreePatternMatcher} which created this
# tree pattern.
# @param pattern The tree pattern in concrete syntax form.
# @param patternRuleIndex The parser rule which serves as the root of the
# tree pattern.
# @param patternTree The tree pattern in {@link ParseTree} form.
#
def __init__(self, matcher, pattern, patternRuleIndex , patternTree):
self.matcher = matcher
self.patternRuleIndex = patternRuleIndex
self.pattern = pattern
self.patternTree = patternTree
#
# Match a specific parse tree against this tree pattern.
#
# @param tree The parse tree to match against this tree pattern.
# @return A {@link ParseTreeMatch} object describing the result of the
# match operation. The {@link ParseTreeMatch#succeeded()} method can be
# used to determine whether or not the match was successful.
#
def match(self, tree):
return self.matcher.match(tree, self)
#
# Determine whether or not a parse tree matches this tree pattern.
#
# @param tree The parse tree to match against this tree pattern.
# @return {@code true} if {@code tree} is a match for the current tree
# pattern; otherwise, {@code false}.
#
def matches(self, tree):
return self.matcher.match(tree, self).succeeded()
# Find all nodes using XPath and then try to match those subtrees against
# this tree pattern.
#
# @param tree The {@link ParseTree} to match against this pattern.
# @param xpath An expression matching the nodes
#
# @return A collection of {@link ParseTreeMatch} objects describing the
# successful matches. Unsuccessful matches are omitted from the result,
# regardless of the reason for the failure.
#
def findAll(self, tree, xpath):
subtrees = XPath.findAll(tree, xpath, self.matcher.parser)
matches = list()
for t in subtrees:
match = self.match(t)
if match.succeeded():
matches.append(match)
return matches

View File

@ -0,0 +1,392 @@
#
# [The "BSD license"]
# Copyright (c) 2013 Terence Parr
# Copyright (c) 2013 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# A tree pattern matching mechanism for ANTLR {@link ParseTree}s.
#
# <p>Patterns are strings of source input text with special tags representing
# token or rule references such as:</p>
#
# <p>{@code <ID> = <expr>;}</p>
#
# <p>Given a pattern start rule such as {@code statement}, this object constructs
# a {@link ParseTree} with placeholders for the {@code ID} and {@code expr}
# subtree. Then the {@link #match} routines can compare an actual
# {@link ParseTree} from a parse with this pattern. Tag {@code <ID>} matches
# any {@code ID} token and tag {@code <expr>} references the result of the
# {@code expr} rule (generally an instance of {@code ExprContext}.</p>
#
# <p>Pattern {@code x = 0;} is a similar pattern that matches the same pattern
# except that it requires the identifier to be {@code x} and the expression to
# be {@code 0}.</p>
#
# <p>The {@link #matches} routines return {@code true} or {@code false} based
# upon a match for the tree rooted at the parameter sent in. The
# {@link #match} routines return a {@link ParseTreeMatch} object that
# contains the parse tree, the parse tree pattern, and a map from tag name to
# matched nodes (more below). A subtree that fails to match, returns with
# {@link ParseTreeMatch#mismatchedNode} set to the first tree node that did not
# match.</p>
#
# <p>For efficiency, you can compile a tree pattern in string form to a
# {@link ParseTreePattern} object.</p>
#
# <p>See {@code TestParseTreeMatcher} for lots of examples.
# {@link ParseTreePattern} has two static helper methods:
# {@link ParseTreePattern#findAll} and {@link ParseTreePattern#match} that
# are easy to use but not super efficient because they create new
# {@link ParseTreePatternMatcher} objects each time and have to compile the
# pattern in string form before using it.</p>
#
# <p>The lexer and parser that you pass into the {@link ParseTreePatternMatcher}
# constructor are used to parse the pattern in string form. The lexer converts
# the {@code <ID> = <expr>;} into a sequence of four tokens (assuming lexer
# throws out whitespace or puts it on a hidden channel). Be aware that the
# input stream is reset for the lexer (but not the parser; a
# {@link ParserInterpreter} is created to parse the input.). Any user-defined
# fields you have put into the lexer might get changed when this mechanism asks
# it to scan the pattern string.</p>
#
# <p>Normally a parser does not accept token {@code <expr>} as a valid
# {@code expr} but, from the parser passed in, we create a special version of
# the underlying grammar representation (an {@link ATN}) that allows imaginary
# tokens representing rules ({@code <expr>}) to match entire rules. We call
# these <em>bypass alternatives</em>.</p>
#
# <p>Delimiters are {@code <} and {@code >}, with {@code \} as the escape string
# by default, but you can set them to whatever you want using
# {@link #setDelimiters}. You must escape both start and stop strings
# {@code \<} and {@code \>}.</p>
#
from antlr4 import CommonTokenStream, ParserRuleContext
from antlr4.InputStream import InputStream
from antlr4.ListTokenSource import ListTokenSource
from antlr4.Token import Token
from antlr4.error.ErrorStrategy import BailErrorStrategy
from antlr4.error.Errors import RecognitionException, ParseCancellationException
from antlr4.tree.Chunk import TagChunk, TextChunk
from antlr4.tree.RuleTagToken import RuleTagToken
from antlr4.tree.TokenTagToken import TokenTagToken
from antlr4.tree.Tree import TerminalNode, RuleNode
class CannotInvokeStartRule(Exception):
def __init__(self, e):
super(CannotInvokeStartRule, self).__init__(e)
class StartRuleDoesNotConsumeFullPattern(Exception):
pass
class ParseTreePatternMatcher(object):
# Constructs a {@link ParseTreePatternMatcher} or from a {@link Lexer} and
# {@link Parser} object. The lexer input stream is altered for tokenizing
# the tree patterns. The parser is used as a convenient mechanism to get
# the grammar name, plus token, rule names.
def __init__(self, lexer, parser):
self.lexer = lexer
self.parser = parser
self.start = "<"
self.stop = ">"
self.escape = "\\" # e.g., \< and \> must escape BOTH!
# Set the delimiters used for marking rule and token tags within concrete
# syntax used by the tree pattern parser.
#
# @param start The start delimiter.
# @param stop The stop delimiter.
# @param escapeLeft The escape sequence to use for escaping a start or stop delimiter.
#
# @exception IllegalArgumentException if {@code start} is {@code null} or empty.
# @exception IllegalArgumentException if {@code stop} is {@code null} or empty.
#
def setDelimiters(self, start, stop, escapeLeft):
if start is None or len(start)==0:
raise Exception("start cannot be null or empty")
if stop is None or len(stop)==0:
raise Exception("stop cannot be null or empty")
self.start = start
self.stop = stop
self.escape = escapeLeft
# Does {@code pattern} matched as rule {@code patternRuleIndex} match {@code tree}?#
def matchesRuleIndex(self, tree, pattern, patternRuleIndex):
p = self.compileTreePattern(pattern, patternRuleIndex)
return self.matches(tree, p)
# Does {@code pattern} matched as rule patternRuleIndex match tree? Pass in a
# compiled pattern instead of a string representation of a tree pattern.
#
def matchesPattern(self, tree, pattern):
mismatchedNode = self.matchImpl(tree, pattern.patternTree, dict())
return mismatchedNode is None
#
# Compare {@code pattern} matched as rule {@code patternRuleIndex} against
# {@code tree} and return a {@link ParseTreeMatch} object that contains the
# matched elements, or the node at which the match failed.
#
def matchRuleIndex(self, tree, pattern, patternRuleIndex):
p = self.compileTreePattern(pattern, patternRuleIndex)
return self.matchPattern(tree, p)
#
# Compare {@code pattern} matched against {@code tree} and return a
# {@link ParseTreeMatch} object that contains the matched elements, or the
# node at which the match failed. Pass in a compiled pattern instead of a
# string representation of a tree pattern.
#
def matchPattern(self, tree, pattern):
labels = dict()
mismatchedNode = self.matchImpl(tree, pattern.patternTree, labels)
from antlr4.tree.ParseTreeMatch import ParseTreeMatch
return ParseTreeMatch(tree, pattern, labels, mismatchedNode)
#
# For repeated use of a tree pattern, compile it to a
# {@link ParseTreePattern} using this method.
#
def compileTreePattern(self, pattern, patternRuleIndex):
tokenList = self.tokenize(pattern)
tokenSrc = ListTokenSource(tokenList)
tokens = CommonTokenStream(tokenSrc)
from antlr4.ParserInterpreter import ParserInterpreter
parserInterp = ParserInterpreter(self.parser.grammarFileName, self.parser.tokenNames,
self.parser.ruleNames, self.parser.getATNWithBypassAlts(),tokens)
tree = None
try:
parserInterp.setErrorHandler(BailErrorStrategy())
tree = parserInterp.parse(patternRuleIndex)
except ParseCancellationException as e:
raise e.cause
except RecognitionException as e:
raise e
except Exception as e:
raise CannotInvokeStartRule(e)
# Make sure tree pattern compilation checks for a complete parse
if tokens.LA(1)!=Token.EOF:
raise StartRuleDoesNotConsumeFullPattern()
from antlr4.tree.ParseTreePattern import ParseTreePattern
return ParseTreePattern(self, pattern, patternRuleIndex, tree)
#
# Recursively walk {@code tree} against {@code patternTree}, filling
# {@code match.}{@link ParseTreeMatch#labels labels}.
#
# @return the first node encountered in {@code tree} which does not match
# a corresponding node in {@code patternTree}, or {@code null} if the match
# was successful. The specific node returned depends on the matching
# algorithm used by the implementation, and may be overridden.
#
def matchImpl(self, tree, patternTree, labels):
if tree is None:
raise Exception("tree cannot be null")
if patternTree is None:
raise Exception("patternTree cannot be null")
# x and <ID>, x and y, or x and x; or could be mismatched types
if isinstance(tree, TerminalNode) and isinstance(patternTree, TerminalNode ):
mismatchedNode = None
# both are tokens and they have same type
if tree.symbol.type == patternTree.symbol.type:
if isinstance( patternTree.symbol, TokenTagToken ): # x and <ID>
tokenTagToken = patternTree.symbol
# track label->list-of-nodes for both token name and label (if any)
self.map(labels, tokenTagToken.tokenName, tree)
if tokenTagToken.label is not None:
self.map(labels, tokenTagToken.label, tree)
elif tree.getText()==patternTree.getText():
# x and x
pass
else:
# x and y
if mismatchedNode is None:
mismatchedNode = tree
else:
if mismatchedNode is None:
mismatchedNode = tree
return mismatchedNode
if isinstance(tree, ParserRuleContext) and isinstance(patternTree, ParserRuleContext):
mismatchedNode = None
# (expr ...) and <expr>
ruleTagToken = self.getRuleTagToken(patternTree)
if ruleTagToken is not None:
m = None
if tree.ruleContext.ruleIndex == patternTree.ruleContext.ruleIndex:
# track label->list-of-nodes for both rule name and label (if any)
self.map(labels, ruleTagToken.ruleName, tree)
if ruleTagToken.label is not None:
self.map(labels, ruleTagToken.label, tree)
else:
if mismatchedNode is None:
mismatchedNode = tree
return mismatchedNode
# (expr ...) and (expr ...)
if tree.getChildCount()!=patternTree.getChildCount():
if mismatchedNode is None:
mismatchedNode = tree
return mismatchedNode
n = tree.getChildCount()
for i in range(0, n):
childMatch = self.matchImpl(tree.getChild(i), patternTree.getChild(i), labels)
if childMatch is not None:
return childMatch
return mismatchedNode
# if nodes aren't both tokens or both rule nodes, can't match
return tree
def map(self, labels, label, tree):
v = labels.get(label, None)
if v is None:
v = list()
labels[label] = v
v.append(tree)
# Is {@code t} {@code (expr <expr>)} subtree?#
def getRuleTagToken(self, tree):
if isinstance( tree, RuleNode ):
if tree.getChildCount()==1 and isinstance(tree.getChild(0), TerminalNode ):
c = tree.getChild(0)
if isinstance( c.symbol, RuleTagToken ):
return c.symbol
return None
def tokenize(self, pattern):
# split pattern into chunks: sea (raw input) and islands (<ID>, <expr>)
chunks = self.split(pattern)
# create token stream from text and tags
tokens = list()
for chunk in chunks:
if isinstance( chunk, TagChunk ):
# add special rule token or conjure up new token from name
if chunk.tag[0].isupper():
ttype = self.parser.getTokenType(chunk.tag)
if ttype==Token.INVALID_TYPE:
raise Exception("Unknown token " + str(chunk.tag) + " in pattern: " + pattern)
tokens.append(TokenTagToken(chunk.tag, ttype, chunk.label))
elif chunk.tag[0].islower():
ruleIndex = self.parser.getRuleIndex(chunk.tag)
if ruleIndex==-1:
raise Exception("Unknown rule " + str(chunk.tag) + " in pattern: " + pattern)
ruleImaginaryTokenType = self.parser.getATNWithBypassAlts().ruleToTokenType[ruleIndex]
tokens.append(RuleTagToken(chunk.tag, ruleImaginaryTokenType, chunk.label))
else:
raise Exception("invalid tag: " + str(chunk.tag) + " in pattern: " + pattern)
else:
self.lexer.setInputStream(InputStream(chunk.text))
t = self.lexer.nextToken()
while t.type!=Token.EOF:
tokens.append(t)
t = self.lexer.nextToken()
return tokens
# Split {@code <ID> = <e:expr> ;} into 4 chunks for tokenizing by {@link #tokenize}.#
def split(self, pattern):
p = 0
n = len(pattern)
chunks = list()
# find all start and stop indexes first, then collect
starts = list()
stops = list()
while p < n :
if p == pattern.find(self.escape + self.start, p):
p += len(self.escape) + len(self.start)
elif p == pattern.find(self.escape + self.stop, p):
p += len(self.escape) + len(self.stop)
elif p == pattern.find(self.start, p):
starts.append(p)
p += len(self.start)
elif p == pattern.find(self.stop, p):
stops.append(p)
p += len(self.stop)
else:
p += 1
nt = len(starts)
if nt > len(stops):
raise Exception("unterminated tag in pattern: " + pattern)
if nt < len(stops):
raise Exception("missing start tag in pattern: " + pattern)
for i in range(0, nt):
if starts[i] >= stops[i]:
raise Exception("tag delimiters out of order in pattern: " + pattern)
# collect into chunks now
if nt==0:
chunks.append(TextChunk(pattern))
if nt>0 and starts[0]>0: # copy text up to first tag into chunks
text = pattern[0:starts[0]]
chunks.add(TextChunk(text))
for i in range(0, nt):
# copy inside of <tag>
tag = pattern[starts[i] + len(self.start) : stops[i]]
ruleOrToken = tag
label = None
colon = tag.find(':')
if colon >= 0:
label = tag[0:colon]
ruleOrToken = tag[colon+1 : len(tag)]
chunks.append(TagChunk(label, ruleOrToken))
if i+1 < len(starts):
# copy from end of <tag> to start of next
text = pattern[stops[i] + len(self.stop) : starts[i + 1]]
chunks.append(TextChunk(text))
if nt > 0 :
afterLastTag = stops[nt - 1] + len(self.stop)
if afterLastTag < n : # copy text from end of last tag to end
text = pattern[afterLastTag : n]
chunks.append(TextChunk(text))
# strip out the escape sequences from text chunks but not tags
for i in range(0, len(chunks)):
c = chunks[i]
if isinstance( c, TextChunk ):
unescaped = c.text.replace(self.escape, "")
if len(unescaped) < len(c.text):
chunks[i] = TextChunk(unescaped)
return chunks

View File

@ -0,0 +1,74 @@
#
# [The "BSD license"]
# Copyright (c) 2013 Terence Parr
# Copyright (c) 2013 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# A {@link Token} object representing an entire subtree matched by a parser
# rule; e.g., {@code <expr>}. These tokens are created for {@link TagChunk}
# chunks where the tag corresponds to a parser rule.
#
from antlr4.Token import Token
class RuleTagToken(Token):
#
# Constructs a new instance of {@link RuleTagToken} with the specified rule
# name, bypass token type, and label.
#
# @param ruleName The name of the parser rule this rule tag matches.
# @param bypassTokenType The bypass token type assigned to the parser rule.
# @param label The label associated with the rule tag, or {@code null} if
# the rule tag is unlabeled.
#
# @exception IllegalArgumentException if {@code ruleName} is {@code null}
# or empty.
def __init__(self, ruleName, bypassTokenType, label=None):
if ruleName is None or len(ruleName)==0:
raise Exception("ruleName cannot be null or empty.")
self.source = None
self.type = bypassTokenType # token type of the token
self.channel = Token.DEFAULT_CHANNEL # The parser ignores everything not on DEFAULT_CHANNEL
self.start = -1 # optional; return -1 if not implemented.
self.stop = -1 # optional; return -1 if not implemented.
self.tokenIndex = -1 # from 0..n-1 of the token object in the input stream
self.line = 0 # line=1..n of the 1st character
self.column = -1 # beginning of the line at which it occurs, 0..n-1
self.label = label
self._text = self.getText() # text of the token.
self.ruleName = ruleName
def getText(self):
if self.label is None:
return "<" + self.ruleName + ">"
else:
return "<" + self.label + ":" + self.ruleName + ">"

View File

@ -0,0 +1,72 @@
#
# [The "BSD license"]
# Copyright (c) 2013 Terence Parr
# Copyright (c) 2013 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# A {@link Token} object representing a token of a particular type; e.g.,
# {@code <ID>}. These tokens are created for {@link TagChunk} chunks where the
# tag corresponds to a lexer rule or token type.
#
from antlr4.Token import CommonToken
class TokenTagToken(CommonToken):
# Constructs a new instance of {@link TokenTagToken} with the specified
# token name, type, and label.
#
# @param tokenName The token name.
# @param type The token type.
# @param label The label associated with the token tag, or {@code null} if
# the token tag is unlabeled.
#
def __init__(self, tokenName, type, label=None):
super(TokenTagToken, self).__init__(type=type)
self.tokenName = tokenName
self.label = label
self._text = self.getText()
#
# {@inheritDoc}
#
# <p>The implementation for {@link TokenTagToken} returns the token tag
# formatted with {@code <} and {@code >} delimiters.</p>
#
def getText(self):
if self.label is None:
return "<" + self.tokenName + ">"
else:
return "<" + self.label + ":" + self.tokenName + ">"
# <p>The implementation for {@link TokenTagToken} returns a string of the form
# {@code tokenName:type}.</p>
#
def __unicode__(self):
return self.tokenName + u":" + unicode(self.type)

View File

@ -0,0 +1,191 @@
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#/
# The basic notion of a tree has a parent, a payload, and a list of children.
# It is the most abstract interface for all the trees used by ANTLR.
#/
from antlr4.Token import Token
INVALID_INTERVAL = (-1, -2)
class Tree(object):
def __str__(self):
return unicode(self)
class SyntaxTree(Tree):
pass
class ParseTree(SyntaxTree):
pass
class RuleNode(ParseTree):
pass
class TerminalNode(ParseTree):
pass
class ErrorNode(TerminalNode):
pass
class ParseTreeVisitor(object):
def visit(self, tree):
return tree.accept(self)
def visitChildren(self, node):
result = self.defaultResult()
n = node.getChildCount()
for i in range(n):
if not self.shouldVisitNextChild(node, result):
return
c = node.getChild(i)
childResult = c.accept(self)
result = self.aggregateResult(result, childResult)
return result
def visitTerminal(self, node):
return self.defaultResult()
def visitErrorNode(self, node):
return self.defaultResult()
def defaultResult(self):
return None
def aggregateResult(self, aggregate, nextResult):
return nextResult
def shouldVisitNextChild(self, node, currentResult):
return True
class ParseTreeListener(object):
def visitTerminal(self, node):
pass
def visitErrorNode(self, node):
pass
def enterEveryRule(self, ctx):
pass
def exitEveryRule(self, ctx):
pass
class TerminalNodeImpl(TerminalNode):
def __init__(self, symbol):
self.parentCtx = None
self.symbol = symbol
def getChild(self, i):
return None
def getSymbol(self):
return self.symbol
def getParent(self):
return self.parentCtx
def getPayload(self):
return self.symbol
def getSourceInterval(self):
if self.symbol is None:
return INVALID_INTERVAL
tokenIndex = self.symbol.tokenIndex
return (tokenIndex, tokenIndex)
def getChildCount(self):
return 0
def accept(self, visitor):
return visitor.visitTerminal(self)
def getText(self):
return self.symbol.text
def __unicode__(self):
if self.symbol.type == Token.EOF:
return "<EOF>"
else:
return self.symbol.text
# Represents a token that was consumed during resynchronization
# rather than during a valid match operation. For example,
# we will create this kind of a node during single token insertion
# and deletion as well as during "consume until error recovery set"
# upon no viable alternative exceptions.
class ErrorNodeImpl(TerminalNodeImpl,ErrorNode):
def __init__(self, token):
super(ErrorNodeImpl, self).__init__(token)
def accept(self, visitor):
return visitor.visitErrorNode(self)
class ParseTreeWalker(object):
DEFAULT = None
def walk(self, listener, t):
if isinstance(t, ErrorNode):
listener.visitErrorNode(t)
return
elif isinstance(t, TerminalNode):
listener.visitTerminal(t)
return
self.enterRule(listener, t)
for child in t.getChildren():
self.walk(listener, child)
self.exitRule(listener, t)
#
# The discovery of a rule node, involves sending two events: the generic
# {@link ParseTreeListener#enterEveryRule} and a
# {@link RuleContext}-specific event. First we trigger the generic and then
# the rule specific. We to them in reverse order upon finishing the node.
#
def enterRule(self, listener, r):
ctx = r.getRuleContext()
listener.enterEveryRule(ctx)
ctx.enterRule(listener)
def exitRule(self, listener, r):
ctx = r.getRuleContext()
ctx.exitRule(listener)
listener.exitEveryRule(ctx)
ParseTreeWalker.DEFAULT = ParseTreeWalker()

View File

@ -0,0 +1,134 @@
#
# [The "BSD license"]
# Copyright (c) 2012 Terence Parr
# Copyright (c) 2012 Sam Harwell
# Copyright (c) 2014 Eric Vergnaud
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# A set of utility routines useful for all kinds of ANTLR trees.#
from io import StringIO
from antlr4.Token import Token
from antlr4.Utils import escapeWhitespace
from antlr4.tree.Tree import RuleNode, ErrorNode, TerminalNode
class Trees(object):
# Print out a whole tree in LISP form. {@link #getNodeText} is used on the
# node payloads to get the text for the nodes. Detect
# parse trees and extract data appropriately.
@classmethod
def toStringTree(cls, t, ruleNames=None, recog=None):
if recog is not None:
ruleNames = recog.ruleNames
s = escapeWhitespace(cls.getNodeText(t, ruleNames), False)
if t.getChildCount()==0:
return s
with StringIO() as buf:
buf.write(u"(")
buf.write(s)
buf.write(u' ')
for i in range(0, t.getChildCount()):
if i > 0:
buf.write(u' ')
buf.write(cls.toStringTree(t.getChild(i), ruleNames))
buf.write(u")")
return buf.getvalue()
@classmethod
def getNodeText(cls, t, ruleNames=None, recog=None):
if recog is not None:
ruleNames = recog.ruleNames
if ruleNames is not None:
if isinstance(t, RuleNode):
return ruleNames[t.getRuleContext().getRuleIndex()]
elif isinstance( t, ErrorNode):
return unicode(t)
elif isinstance(t, TerminalNode):
if t.symbol is not None:
return t.symbol.text
# no recog for rule names
payload = t.getPayload()
if isinstance(payload, Token ):
return payload.text
return unicode(t.getPayload())
# Return ordered list of all children of this node
@classmethod
def getChildren(cls, t):
return [ t.getChild(i) for i in range(0, t.getChildCount()) ]
# Return a list of all ancestors of this node. The first node of
# list is the root and the last is the parent of this node.
#
@classmethod
def getAncestors(cls, t):
ancestors = []
t = t.getParent()
while t is not None:
ancestors.append(0, t) # insert at start
t = t.getParent()
return ancestors
@classmethod
def findAllTokenNodes(cls, t, ttype):
return cls.findAllNodes(t, ttype, True)
@classmethod
def findAllRuleNodes(cls, t, ruleIndex):
return cls.findAllNodes(t, ruleIndex, False)
@classmethod
def findAllNodes(cls, t, index, findTokens):
nodes = []
cls._findAllNodes(t, index, findTokens, nodes)
return nodes
@classmethod
def _findAllNodes(cls, t, index, findTokens, nodes):
from antlr4.ParserRuleContext import ParserRuleContext
# check this node (the root) first
if findTokens and isinstance(t, TerminalNode):
if t.symbol.type==index:
nodes.append(t)
elif not findTokens and isinstance(t, ParserRuleContext):
if t.ruleIndex == index:
nodes.append(t)
# check children
for i in range(0, t.getChildCount()):
cls._findAllNodes(t.getChild(i), index, findTokens, nodes)
@classmethod
def descendants(cls, t):
nodes = []
nodes.append(t)
for i in range(0, t.getChildCount()):
nodes.extend(cls.descendants(t.getChild(i)))
return nodes

View File

@ -0,0 +1,346 @@
#
# Represent a subset of XPath XML path syntax for use in identifying nodes in
# parse trees.
#
# <p>
# Split path into words and separators {@code /} and {@code //} via ANTLR
# itself then walk path elements from left to right. At each separator-word
# pair, find set of nodes. Next stage uses those as work list.</p>
#
# <p>
# The basic interface is
# {@link XPath#findAll ParseTree.findAll}{@code (tree, pathString, parser)}.
# But that is just shorthand for:</p>
#
# <pre>
# {@link XPath} p = new {@link XPath#XPath XPath}(parser, pathString);
# return p.{@link #evaluate evaluate}(tree);
# </pre>
#
# <p>
# See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this
# allows operators:</p>
#
# <dl>
# <dt>/</dt> <dd>root</dd>
# <dt>//</dt> <dd>anywhere</dd>
# <dt>!</dt> <dd>invert; this must appear directly after root or anywhere
# operator</dd>
# </dl>
#
# <p>
# and path elements:</p>
#
# <dl>
# <dt>ID</dt> <dd>token name</dd>
# <dt>'string'</dt> <dd>any string literal token from the grammar</dd>
# <dt>expr</dt> <dd>rule name</dd>
# <dt>*</dt> <dd>wildcard matching any node</dd>
# </dl>
#
# <p>
# Whitespace is not allowed.</p>
#
from io import StringIO
from antlr4 import CommonTokenStream, DFA, PredictionContextCache, Lexer, LexerATNSimulator, ParserRuleContext, TerminalNode
from antlr4.atn.ATNDeserializer import ATNDeserializer
from antlr4.InputStream import InputStream
from antlr4.Token import Token
from antlr4.error.ErrorListener import ErrorListener
from antlr4.error.Errors import LexerNoViableAltException
from antlr4.tree.Trees import Trees
def serializedATN():
with StringIO() as buf:
buf.write(u"\3\u0430\ud6d1\u8206\uad2d\u4417\uaef1\u8d80\uaadd\2")
buf.write(u"\n\64\b\1\4\2\t\2\4\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7")
buf.write(u"\t\7\4\b\t\b\4\t\t\t\3\2\3\2\3\2\3\3\3\3\3\4\3\4\3\5")
buf.write(u"\3\5\3\6\3\6\7\6\37\n\6\f\6\16\6\"\13\6\3\6\3\6\3\7\3")
buf.write(u"\7\5\7(\n\7\3\b\3\b\3\t\3\t\7\t.\n\t\f\t\16\t\61\13\t")
buf.write(u"\3\t\3\t\3/\2\n\3\5\5\6\7\7\t\b\13\t\r\2\17\2\21\n\3")
buf.write(u"\2\4\7\2\62;aa\u00b9\u00b9\u0302\u0371\u2041\u2042\17")
buf.write(u"\2C\\c|\u00c2\u00d8\u00da\u00f8\u00fa\u0301\u0372\u037f")
buf.write(u"\u0381\u2001\u200e\u200f\u2072\u2191\u2c02\u2ff1\u3003")
buf.write(u"\ud801\uf902\ufdd1\ufdf2\uffff\64\2\3\3\2\2\2\2\5\3\2")
buf.write(u"\2\2\2\7\3\2\2\2\2\t\3\2\2\2\2\13\3\2\2\2\2\21\3\2\2")
buf.write(u"\2\3\23\3\2\2\2\5\26\3\2\2\2\7\30\3\2\2\2\t\32\3\2\2")
buf.write(u"\2\13\34\3\2\2\2\r\'\3\2\2\2\17)\3\2\2\2\21+\3\2\2\2")
buf.write(u"\23\24\7\61\2\2\24\25\7\61\2\2\25\4\3\2\2\2\26\27\7\61")
buf.write(u"\2\2\27\6\3\2\2\2\30\31\7,\2\2\31\b\3\2\2\2\32\33\7#")
buf.write(u"\2\2\33\n\3\2\2\2\34 \5\17\b\2\35\37\5\r\7\2\36\35\3")
buf.write(u"\2\2\2\37\"\3\2\2\2 \36\3\2\2\2 !\3\2\2\2!#\3\2\2\2\"")
buf.write(u" \3\2\2\2#$\b\6\2\2$\f\3\2\2\2%(\5\17\b\2&(\t\2\2\2\'")
buf.write(u"%\3\2\2\2\'&\3\2\2\2(\16\3\2\2\2)*\t\3\2\2*\20\3\2\2")
buf.write(u"\2+/\7)\2\2,.\13\2\2\2-,\3\2\2\2.\61\3\2\2\2/\60\3\2")
buf.write(u"\2\2/-\3\2\2\2\60\62\3\2\2\2\61/\3\2\2\2\62\63\7)\2\2")
buf.write(u"\63\22\3\2\2\2\6\2 \'/\3\3\6\2")
return buf.getvalue()
class XPathLexer(Lexer):
atn = ATNDeserializer().deserialize(serializedATN())
decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ]
TOKEN_REF = 1
RULE_REF = 2
ANYWHERE = 3
ROOT = 4
WILDCARD = 5
BANG = 6
ID = 7
STRING = 8
modeNames = [ u"DEFAULT_MODE" ]
literalNames = [ u"<INVALID>",
u"'//'", u"'/'", u"'*'", u"'!'" ]
symbolicNames = [ u"<INVALID>",
u"TOKEN_REF", u"RULE_REF", u"ANYWHERE", u"ROOT", u"WILDCARD",
u"BANG", u"ID", u"STRING" ]
ruleNames = [ u"ANYWHERE", u"ROOT", u"WILDCARD", u"BANG", u"ID", u"NameChar",
u"NameStartChar", u"STRING" ]
grammarFileName = u"XPathLexer.g4"
def __init__(self, input=None):
super(XPathLexer, self).__init__(input)
self.checkVersion("4.5")
self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache())
self._actions = None
self._predicates = None
def action(self, localctx, ruleIndex, actionIndex):
if self._actions is None:
actions = dict()
actions[4] = self.ID_action
self._actions = actions
action = self._actions.get(ruleIndex, None)
if action is not None:
action(localctx, actionIndex)
else:
raise Exception("No registered action for:" + str(ruleIndex))
def ID_action(self, localctx , actionIndex):
if actionIndex == 0:
char = self.text[0]
if char.isupper():
self.type = XPathLexer.TOKEN_REF
else:
self.type = XPathLexer.RULE_REF
class XPath(object):
WILDCARD = "*" # word not operator/separator
NOT = "!" # word for invert operator
def __init__(self, parser, path):
self.parser = parser
self.path = path
self.elements = self.split(path)
def split(self, path):
input = InputStream(path)
lexer = XPathLexer(input)
def recover(self, e):
raise e
lexer.recover = recover
lexer.removeErrorListeners()
lexer.addErrorListener(ErrorListener()) # XPathErrorListener does no more
tokenStream = CommonTokenStream(lexer)
try:
tokenStream.fill()
except LexerNoViableAltException as e:
pos = lexer.getColumn()
msg = "Invalid tokens or characters at index " + str(pos) + " in path '" + path + "'"
raise Exception(msg, e)
tokens = tokenStream.getTokens()
elements = list()
n = len(tokens)
i=0
while i < n :
el = tokens[i]
next = None
if el.type in [XPathLexer.ROOT, XPathLexer.ANYWHERE]:
anywhere = el.type == XPathLexer.ANYWHERE
i += 1
next = tokens[i]
invert = next.type==XPathLexer.BANG
if invert:
i += 1
next = tokens[i]
pathElement = self.getXPathElement(next, anywhere)
pathElement.invert = invert
elements.append(pathElement)
i += 1
elif el.type in [XPathLexer.TOKEN_REF, XPathLexer.RULE_REF, XPathLexer.WILDCARD] :
elements.append( self.getXPathElement(el, False) )
i += 1
elif el.type==Token.EOF :
break
else:
raise Exception("Unknown path element " + str(el))
return elements
#
# Convert word like {@code#} or {@code ID} or {@code expr} to a path
# element. {@code anywhere} is {@code true} if {@code //} precedes the
# word.
#
def getXPathElement(self, wordToken, anywhere):
if wordToken.type==Token.EOF:
raise Exception("Missing path element at end of path")
word = wordToken.text
ttype = self.parser.getTokenType(word)
ruleIndex = self.parser.getRuleIndex(word)
if wordToken.type==XPathLexer.WILDCARD :
return XPathWildcardAnywhereElement() if anywhere else XPathWildcardElement()
elif wordToken.type in [XPathLexer.TOKEN_REF, XPathLexer.STRING]:
if ttype==Token.INVALID_TYPE:
raise Exception( word + " at index " + str(wordToken.startIndex) + " isn't a valid token name")
return XPathTokenAnywhereElement(word, ttype) if anywhere else XPathTokenElement(word, ttype)
else:
if ruleIndex==-1:
raise Exception( word + " at index " + str(wordToken.getStartIndex()) + " isn't a valid rule name")
return XPathRuleAnywhereElement(word, ruleIndex) if anywhere else XPathRuleElement(word, ruleIndex)
def findAll(self, tree, xpath, parser):
p = XPath(parser, xpath)
return p.evaluate(tree)
#
# Return a list of all nodes starting at {@code t} as root that satisfy the
# path. The root {@code /} is relative to the node passed to
# {@link #evaluate}.
#
def evaluate(self, t):
dummyRoot = ParserRuleContext()
dummyRoot.children = [t] # don't set t's parent.
work = [dummyRoot]
for i in range(0, len(self.elements)):
next = set()
for node in work:
if len( node.children) > 0 :
# only try to match next element if it has children
# e.g., //func/*/stat might have a token node for which
# we can't go looking for stat nodes.
matching = self.elements[i].evaluate(node)
next |= matching
i += 1
work = next
return work
class XPathElement(object):
def __init__(self, nodeName):
self.nodeName = nodeName
self.invert = False
def __str__(self):
return unicode(self)
def __unicode__(self):
return type(self).__name__ + "[" + ("!" if self.invert else "") + self.nodeName + "]"
#
# Either {@code ID} at start of path or {@code ...//ID} in middle of path.
#
class XPathRuleAnywhereElement(XPathElement):
def __init__(self, ruleName, ruleIndex):
super(XPathRuleAnywhereElement, self).__init__(ruleName)
self.ruleIndex = ruleIndex
def evaluate(self, t):
return Trees.findAllRuleNodes(t, self.ruleIndex)
class XPathRuleElement(XPathElement):
def __init__(self, ruleName, ruleIndex):
super(XPathRuleElement, self).__init__(ruleName)
self.ruleIndex = ruleIndex
def evaluate(self, t):
# return all children of t that match nodeName
nodes = []
for c in Trees.getChildren(t):
if isinstance(c, ParserRuleContext ):
if (c.ruleIndex == self.ruleIndex ) == (not self.invert):
nodes.append(c)
return nodes
class XPathTokenAnywhereElement(XPathElement):
def __init__(self, ruleName, tokenType):
super(XPathTokenAnywhereElement, self).__init__(ruleName)
self.tokenType = tokenType
def evaluate(self, t):
return Trees.findAllTokenNodes(t, self.tokenType)
class XPathTokenElement(XPathElement):
def __init__(self, ruleName, tokenType):
super(XPathTokenElement, self).__init__(ruleName)
self.tokenType = tokenType
def evaluate(self, t):
# return all children of t that match nodeName
nodes = []
for c in Trees.getChildren(t):
if isinstance(c, TerminalNode):
if (c.symbol.type == self.tokenType ) == (not self.invert):
nodes.append(c)
return nodes
class XPathWildcardAnywhereElement(XPathElement):
def __init__(self):
super(XPathWildcardAnywhereElement, self).__init__(XPath.WILDCARD)
def evaluate(self, t):
if self.invert:
return list() # !* is weird but valid (empty)
else:
return Trees.descendants(t)
class XPathWildcardElement(XPathElement):
def __init__(self):
super(XPathWildcardElement, self).__init__(XPath.WILDCARD)
def evaluate(self, t):
if self.invert:
return list() # !* is weird but valid (empty)
else:
return Trees.getChildren(t)

View File

@ -0,0 +1 @@
__author__ = 'ericvergnaud'

View File

@ -0,0 +1,805 @@
/*
* [The "BSD license"]
* Copyright (c) 2012 Terence Parr
* Copyright (c) 2012 Sam Harwell
* Copyright (c) 2014 Eric Vergnaud
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** ANTLR tool checks output templates are compatible with tool code generation.
* For now, a simple string match used on x.y of x.y.z scheme.
* Must match Tool.VERSION during load to templates.
*
* REQUIRED.
*/
pythonTypeInitMap ::= [
"bool":"False",
"int":"0",
"float":"0.0",
"str":"",
default:"None" // anything other than a primitive type is an object
]
// args must be <object-model-object>, <fields-resulting-in-STs>
ParserFile(file, parser, namedActions) ::= <<
<fileHeader(file.grammarFileName, file.ANTLRVersion)>
# encoding: utf-8
from __future__ import print_function
from antlr4 import *
from io import StringIO
<namedActions.header>
<parser>
>>
ListenerFile(file, header) ::= <<
<fileHeader(file.grammarFileName, file.ANTLRVersion)>
from antlr4 import *
<header>
# This class defines a complete listener for a parse tree produced by <file.parserName>.
class <file.grammarName>Listener(ParseTreeListener):
<file.listenerNames:{lname |
# Enter a parse tree produced by <file.parserName>#<lname>.
def enter<lname; format="cap">(self, ctx):
pass
# Exit a parse tree produced by <file.parserName>#<lname>.
def exit<lname; format="cap">(self, ctx):
pass
}; separator="\n">
>>
VisitorFile(file, header) ::= <<
<fileHeader(file.grammarFileName, file.ANTLRVersion)>
from antlr4 import *
<header>
# This class defines a complete generic visitor for a parse tree produced by <file.parserName>.
class <file.grammarName>Visitor(ParseTreeVisitor):
<file.visitorNames:{lname |
# Visit a parse tree produced by <file.parserName>#<lname>.
def visit<lname; format="cap">(self, ctx):
return self.visitChildren(ctx)
}; separator="\n">
>>
fileHeader(grammarFileName, ANTLRVersion) ::= <<
# Generated from <grammarFileName> by ANTLR <ANTLRVersion>
>>
Parser(parser, funcs, atn, sempredFuncs, superClass) ::= <<
<Parser_(ctor="parser_ctor", ...)>
>>
Parser_(parser, funcs, atn, sempredFuncs, ctor, superClass) ::= <<
<if(superClass)>
from .<superClass> import <superClass>
<endif>
<atn>
class <parser.name> ( <if(superClass)><superClass><else>Parser<endif> ):
grammarFileName = "<parser.grammarFileName>"
atn = ATNDeserializer().deserialize(serializedATN())
decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ]
sharedContextCache = PredictionContextCache()
literalNames = [ <parser.literalNames:{t | u<t>}; null="u\"\<INVALID>\"", separator=", ", wrap, anchor> ]
symbolicNames = [ <parser.symbolicNames:{t | u<t>}; null="u\"\<INVALID>\"", separator=", ", wrap, anchor> ]
<parser.rules:{r | RULE_<r.name> = <r.index>}; separator="\n", wrap, anchor>
ruleNames = [ <parser.ruleNames:{r | u"<r>"}; separator=", ", wrap, anchor> ]
EOF = <TokenLabelType()>.EOF
<if(parser.tokens)>
<parser.tokens:{k | <k>=<parser.tokens.(k)>}; separator="\n", wrap, anchor>
<endif>
<parser:(ctor)()>
<namedActions.members>
<funcs; separator="\n">
<if(sempredFuncs)>
def sempred(self, localctx, ruleIndex, predIndex):
if self._predicates == None:
self._predicates = dict()
<parser.sempredFuncs.values:{f |
self._predicates[<f.ruleIndex>] = self.<f.name>_sempred}; separator="\n ">
pred = self._predicates.get(ruleIndex, None)
if pred is None:
raise Exception("No predicate with index:" + str(ruleIndex))
else:
return pred(localctx, predIndex)
<sempredFuncs.values; separator="\n">
<endif>
>>
dumpActions(recog, argFuncs, actionFuncs, sempredFuncs) ::= <<
<if(actionFuncs)>
def action(self, localctx, ruleIndex, actionIndex):
if self._actions is None:
actions = dict()
<recog.actionFuncs.values:{f|
actions[<f.ruleIndex>] = self.<f.name>_action }; separator="\n">
self._actions = actions
action = self._actions.get(ruleIndex, None)
if action is not None:
action(localctx, actionIndex)
else:
raise Exception("No registered action for:" + str(ruleIndex))
<actionFuncs.values; separator="\n">
<endif>
<if(sempredFuncs)>
def sempred(self, localctx, ruleIndex, predIndex):
if self._predicates is None:
preds = dict()
<recog.sempredFuncs.values:{f|
preds[<f.ruleIndex>] = self.<f.name>_sempred}; separator="\n">
self._predicates = preds
pred = self._predicates.get(ruleIndex, None)
if pred is not None:
return pred(localctx, predIndex)
else:
raise Exception("No registered predicate for:" + str(ruleIndex))
<sempredFuncs.values; separator="\n">
<endif>
>>
parser_ctor(p) ::= <<
def __init__(self, input):
super(<parser.name>, self).__init__(input)
self.checkVersion("<file.ANTLRVersion>")
self._interp = ParserATNSimulator(self, self.atn, self.decisionsToDFA, self.sharedContextCache)
self._predicates = None
>>
/* This generates a private method since the actionIndex is generated, making an
* overriding implementation impossible to maintain.
*/
RuleActionFunction(r, actions) ::= <<
def <r.name>_action(self, localctx , actionIndex):
<actions:{index|
<if(first(actions))>
if actionIndex == <index>:
<actions.(index)>
<elseif(rest(actions))>
elif actionIndex == <index>:
<actions.(index)>
<endif> }; separator="\n">
>>
/* This generates a private method since the predIndex is generated, making an
* overriding implementation impossible to maintain.
*/
RuleSempredFunction(r, actions) ::= <<
def <r.name>_sempred(self, localctx, predIndex):
<actions:{index|
<if(first(actions))>
if predIndex == <index>:
return <actions.(index)>
<elseif(rest(actions))>
elif predIndex == <index>:
return <actions.(index)>
<endif> }; separator="\n">
>>
RuleFunction(currentRule,args,code,locals,ruleCtx,altLabelCtxs,namedActions,finallyAction,postamble,exceptions) ::= <<
<ruleCtx>
<altLabelCtxs:{l | <altLabelCtxs.(l)>}; separator="\n">
def <currentRule.name>(self<currentRule.args:{a | , <a.name>}>):
localctx = <parser.name>.<currentRule.ctxType>(self, self._ctx, self.state<currentRule.args:{a | , <a.name>}>)
self.enterRule(localctx, <currentRule.startState>, self.RULE_<currentRule.name>)
<namedActions.init>
<locals; separator="\n">
try:
<code>
<postamble; separator="\n">
<namedActions.after>
<if(exceptions)>
<exceptions; separator="\n">
<else>
except RecognitionException as re:
localctx.exception = re
self._errHandler.reportError(self, re)
self._errHandler.recover(self, re)
<endif>
finally:
<finallyAction>
self.exitRule()
return localctx
>>
LeftRecursiveRuleFunction(currentRule,args,code,locals,ruleCtx,altLabelCtxs,
namedActions,finallyAction,postamble) ::=
<<
<ruleCtx>
<altLabelCtxs:{l | <altLabelCtxs.(l)>}; separator="\n">
def <currentRule.name>(self, _p=0<if(currentRule.args)>, <args:{a | , <a>}><endif>):
_parentctx = self._ctx
_parentState = self.state
localctx = <parser.name>.<currentRule.ctxType>(self, self._ctx, _parentState<args:{a | , <a.name>}>)
_prevctx = localctx
_startState = <currentRule.startState>
self.enterRecursionRule(localctx, <currentRule.startState>, self.RULE_<currentRule.name>, _p)
<namedActions.init>
<locals; separator="\n">
try:
<code>
<postamble; separator="\n">
<namedActions.after>
except RecognitionException as re:
localctx.exception = re
self._errHandler.reportError(self, re)
self._errHandler.recover(self, re)
finally:
<finallyAction>
self.unrollRecursionContexts(_parentctx)
return localctx
>>
CodeBlockForOuterMostAlt(currentOuterMostAltCodeBlock, locals, preamble, ops) ::= <<
<if(currentOuterMostAltCodeBlock.altLabel)>localctx = <parser.name>.<currentOuterMostAltCodeBlock.altLabel; format="cap">Context(self, localctx)<endif>
self.enterOuterAlt(localctx, <currentOuterMostAltCodeBlock.alt.altNum>)
<CodeBlockForAlt(currentAltCodeBlock=currentOuterMostAltCodeBlock, ...)>
>>
CodeBlockForAlt(currentAltCodeBlock, locals, preamble, ops) ::= <<
<locals; separator="\n">
<preamble; separator="\n">
<ops; separator="\n">
>>
LL1AltBlock(choice, preamble, alts, error) ::= <<
self.state = <choice.stateNumber>
<!_errHandler.sync(this);!>
<if(choice.label)><labelref(choice.label)> = _input.LT(1)<endif>
<preamble; separator="\n">
token = self._input.LA(1)
<choice.altLook,alts:{look,alt| <cases(ttypes=look)>
<alt>
}; separator="\nel">
else:
<error>
>>
LL1OptionalBlock(choice, alts, error) ::= <<
self.state = <choice.stateNumber>
<!_errHandler.sync(this);!>
token = self._input.LA(1)
<choice.altLook,alts:{look,alt| <cases(ttypes=look)>
<alt>
pass}; separator="\nel">
else:
<error>
>>
LL1OptionalBlockSingleAlt(choice, expr, alts, preamble, error, followExpr) ::= <<
self.state = <choice.stateNumber>
<!_errHandler.sync(this);!>
<preamble; separator="\n">
if <expr>:
<alts; separator="\n">
<!else if ( !(<followExpr>) ) <error>!>
>>
LL1StarBlockSingleAlt(choice, loopExpr, alts, preamble, iteration) ::= <<
self.state = <choice.stateNumber>
self._errHandler.sync(self)
<preamble; separator="\n">
while <loopExpr>:
<alts; separator="\n">
self.state = <choice.loopBackStateNumber>
self._errHandler.sync(self)
<iteration>
>>
LL1PlusBlockSingleAlt(choice, loopExpr, alts, preamble, iteration) ::= <<
self.state = <choice.blockStartStateNumber> <! alt block decision !>
self._errHandler.sync(self)
<preamble; separator="\n">
while True:
<alts; separator="\n">
self.state = <choice.stateNumber> <! loopback/exit decision !>
self._errHandler.sync(self)
<iteration>
if not (<loopExpr>):
break
>>
// LL(*) stuff
AltBlock(choice, preamble, alts, error) ::= <<
self.state = <choice.stateNumber>
<!_errHandler.sync(this);!>
<if(choice.label)><labelref(choice.label)> = _input.LT(1)<endif>
<preamble; separator="\n">
la_ = self._interp.adaptivePredict(self._input,<choice.decision>,self._ctx)
<alts:{alt |
if la_ == <i>:
<alt>
pass
}; separator="\nel">
>>
OptionalBlock(choice, alts, error) ::= <<
self.state = <choice.stateNumber>
<!_errHandler.sync(this);!>
la_ = self._interp.adaptivePredict(self._input,<choice.decision>,self._ctx)
<alts:{alt |
if la_ == <i><if(!choice.ast.greedy)>+1<endif>:
<alt>
}; separator="\nel">
>>
StarBlock(choice, alts, sync, iteration) ::= <<
self.state = <choice.stateNumber>
self._errHandler.sync(self)
_alt = self._interp.adaptivePredict(self._input,<choice.decision>,self._ctx)
while _alt!=<choice.exitAlt> and _alt!=ATN.INVALID_ALT_NUMBER:
if _alt==1<if(!choice.ast.greedy)>+1<endif>:
<iteration>
<alts> <! should only be one !>
self.state = <choice.loopBackStateNumber>
self._errHandler.sync(self)
_alt = self._interp.adaptivePredict(self._input,<choice.decision>,self._ctx)
>>
PlusBlock(choice, alts, error) ::= <<
self.state = <choice.blockStartStateNumber> <! alt block decision !>
self._errHandler.sync(self)
_alt = 1<if(!choice.ast.greedy)>+1<endif>
while _alt!=<choice.exitAlt> and _alt!=ATN.INVALID_ALT_NUMBER:
<alts:{alt|
if _alt == <i><if(!choice.ast.greedy)>+1<endif>:
<alt>
}; separator="\nel">
else:
<error>
self.state = <choice.loopBackStateNumber> <! loopback/exit decision !>
self._errHandler.sync(self)
_alt = self._interp.adaptivePredict(self._input,<choice.decision>,self._ctx)
>>
Sync(s) ::= "sync(<s.expecting.name>)"
ThrowNoViableAlt(t) ::= "raise NoViableAltException(self)"
TestSetInline(s) ::= <<
<s.bitsets:{bits | <if(rest(rest(bits.ttypes)))><bitsetBitfieldComparison(s, bits)><else><bitsetInlineComparison(s, bits)><endif>}; separator=" or ">
>>
// Java language spec 15.19 - shift operators mask operands rather than overflow to 0... need range test
testShiftInRange(shiftAmount) ::= <<
((<shiftAmount>) & ~0x3f) == 0
>>
// produces smaller bytecode only when bits.ttypes contains more than two items
bitsetBitfieldComparison(s, bits) ::= <%
(<testShiftInRange({<offsetShiftVar(s.varName, bits.shift)>})> and ((1 \<\< <offsetShiftVar(s.varName, bits.shift)>) & (<bits.ttypes:{ttype | (1 \<\< <offsetShiftType(ttype, bits.shift)>)}; separator=" | ">)) != 0)
%>
isZero ::= [
"0":true,
default:false
]
offsetShiftVar(shiftAmount, offset) ::= <%
<if(!isZero.(offset))>(<shiftAmount> - <offset>)<else><shiftAmount><endif>
%>
offsetShiftType(shiftAmount, offset) ::= <%
<if(!isZero.(offset))>(<parser.name>.<shiftAmount> - <offset>)<else><parser.name>.<shiftAmount><endif>
%>
// produces more efficient bytecode when bits.ttypes contains at most two items
bitsetInlineComparison(s, bits) ::= <%
<bits.ttypes:{ttype | <s.varName>==<parser.name>.<ttype>}; separator=" or ">
%>
cases(ttypes) ::= <<
if token in [<ttypes:{t | <parser.name>.<t>}; separator=", ">]:
>>
InvokeRule(r, argExprsChunks) ::= <<
self.state = <r.stateNumber>
<if(r.labels)><r.labels:{l | <labelref(l)> = }><endif>self.<r.name>(<if(r.ast.options.p)><r.ast.options.p><if(argExprsChunks)>,<endif><endif><argExprsChunks>)
>>
MatchToken(m) ::= <<
self.state = <m.stateNumber>
<if(m.labels)><m.labels:{l | <labelref(l)> = }><endif>self.match(<parser.name>.<m.name>)
>>
MatchSet(m, expr, capture) ::= "<CommonSetStuff(m, expr, capture, false)>"
MatchNotSet(m, expr, capture) ::= "<CommonSetStuff(m, expr, capture, true)>"
CommonSetStuff(m, expr, capture, invert) ::= <<
self.state = <m.stateNumber>
<if(m.labels)><m.labels:{l | <labelref(l)> = }>self._input.LT(1)<endif>
<capture>
<if(invert)>if <m.varName> \<= 0 or <expr><else>if not(<expr>)<endif>:
<if(m.labels)><m.labels:{l | <labelref(l)> = }><else> <endif>self._errHandler.recoverInline(self)
else:
self.consume()
>>
Wildcard(w) ::= <<
self.state = <w.stateNumber>
<if(w.labels)><w.labels:{l | <labelref(l)> = }><endif>self.matchWildcard()
>>
// ACTION STUFF
Action(a, foo, chunks) ::= "<chunks>"
ArgAction(a, chunks) ::= "<chunks>"
SemPred(p, chunks, failChunks) ::= <<
self.state = <p.stateNumber>
if not <chunks>:
from antlr4.error.Errors import FailedPredicateException
raise FailedPredicateException(self, <p.predicate><if(failChunks)>, <failChunks><elseif(p.msg)>, <p.msg><endif>)
>>
ExceptionClause(e, catchArg, catchAction) ::= <<
catch (<catchArg>) {
<catchAction>
}
>>
// lexer actions are not associated with model objects
LexerSkipCommand() ::= "skip()"
LexerMoreCommand() ::= "more()"
LexerPopModeCommand() ::= "popMode()"
LexerTypeCommand(arg) ::= "_type = <arg>"
LexerChannelCommand(arg) ::= "_channel = <arg>"
LexerModeCommand(arg) ::= "_mode = <arg>"
LexerPushModeCommand(arg) ::= "pushMode(<arg>)"
ActionText(t) ::= "<t.text>"
ActionTemplate(t) ::= "<t.st>"
ArgRef(a) ::= "localctx.<a.name>"
LocalRef(a) ::= "localctx.<a.name>"
RetValueRef(a) ::= "localctx.<a.name>"
QRetValueRef(a) ::= "<ctx(a)>.<a.dict>.<a.name>"
/** How to translate $tokenLabel */
TokenRef(t) ::= "<ctx(t)>.<t.name>"
LabelRef(t) ::= "<ctx(t)>.<t.name>"
ListLabelRef(t) ::= "<ctx(t)>.<ListLabelName(t.name)>"
SetAttr(s,rhsChunks) ::= "<ctx(s)>.<s.name> = <rhsChunks>"
TokenLabelType() ::= "<file.TokenLabelType; null={Token}>"
InputSymbolType() ::= "<file.InputSymbolType; null={Token}>"
TokenPropertyRef_text(t) ::= "(None if <ctx(t)>.<t.label> is None else <ctx(t)>.<t.label>.text)"
TokenPropertyRef_type(t) ::= "(0 if <ctx(t)>.<t.label> is None else <ctx(t)>.<t.label>.type()"
TokenPropertyRef_line(t) ::= "(0 if <ctx(t)>.<t.label> is None else <ctx(t)>.<t.label>.line)"
TokenPropertyRef_pos(t) ::= "(0 if <ctx(t)>.<t.label> is None else <ctx(t)>.<t.label>.column)"
TokenPropertyRef_channel(t) ::= "(0 if (<ctx(t)>.<t.label> is None else <ctx(t)>.<t.label>.channel)"
TokenPropertyRef_index(t) ::= "(0 if <ctx(t)>.<t.label> is None else <ctx(t)>.<t.label>.tokenIndex)"
TokenPropertyRef_int(t) ::= "(0 if <ctx(t)>.<t.label> is None else int(<ctx(t)>.<t.label>.text))"
RulePropertyRef_start(r) ::= "(None if <ctx(r)>.<r.label> is None else <ctx(r)>.<r.label>.start)"
RulePropertyRef_stop(r) ::= "(None if <ctx(r)>.<r.label> is None else <ctx(r)>.<r.label>.stop)"
RulePropertyRef_text(r) ::= "(None if <ctx(r)>.<r.label> is None else self._input.getText((<ctx(r)>.<r.label>.start,<ctx(r)>.<r.label>.stop)))"
RulePropertyRef_ctx(r) ::= "<ctx(r)>.<r.label>"
RulePropertyRef_parser(r) ::= "self"
ThisRulePropertyRef_start(r) ::= "localctx.start"
ThisRulePropertyRef_stop(r) ::= "localctx.stop"
ThisRulePropertyRef_text(r) ::= "self._input.getText((localctx.start, self._input.LT(-1)))"
ThisRulePropertyRef_ctx(r) ::= "localctx"
ThisRulePropertyRef_parser(r) ::= "self"
NonLocalAttrRef(s) ::= "getInvokingContext(<s.ruleIndex>).<s.name>"
SetNonLocalAttr(s, rhsChunks) ::= "getInvokingContext(<s.ruleIndex>).<s.name> = <rhsChunks>"
AddToLabelList(a) ::= "<ctx(a.label)>.<a.listName>.append(<labelref(a.label)>)"
TokenDecl(t) ::= "self.<t.name> = None # <TokenLabelType()>"
TokenTypeDecl(t) ::= "self.<t.name> = 0 # <TokenLabelType()> type"
TokenListDecl(t) ::= "self.<t.name> = list() # of <TokenLabelType()>s"
RuleContextDecl(r) ::= "self.<r.name> = None # <r.ctxName>"
RuleContextListDecl(rdecl) ::= "self.<rdecl.name> = list() # of <rdecl.ctxName>s"
ContextTokenGetterDecl(t) ::= <<
def <t.name>(self):
return self.getToken(<parser.name>.<t.name>, 0)
>>
// should never be called
ContextTokenListGetterDecl(t) ::= <<
def <t.name>_list(self):
return self.getTokens(<parser.name>.<t.name>)
>>
ContextTokenListIndexedGetterDecl(t) ::= <<
def <t.name>(self, i=None):
if i is None:
return self.getTokens(<parser.name>.<t.name>)
else:
return self.getToken(<parser.name>.<t.name>, i)
>>
ContextRuleGetterDecl(r) ::= <<
def <r.name>(self):
return self.getTypedRuleContext(<parser.name>.<r.ctxName>,0)
>>
// should never be called
ContextRuleListGetterDecl(r) ::= <<
def <r.name>_list(self):
return self.getTypedRuleContexts(<parser.name>.<r.ctxName>)
>>
ContextRuleListIndexedGetterDecl(r) ::= <<
def <r.name>(self, i=None):
if i is None:
return self.getTypedRuleContexts(<parser.name>.<r.ctxName>)
else:
return self.getTypedRuleContext(<parser.name>.<r.ctxName>,i)
>>
LexerRuleContext() ::= "RuleContext"
/** The rule context name is the rule followed by a suffix; e.g.,
* r becomes rContext.
*/
RuleContextNameSuffix() ::= "Context"
ImplicitTokenLabel(tokenName) ::= "_<tokenName>"
ImplicitRuleLabel(ruleName) ::= "_<ruleName>"
ImplicitSetLabel(id) ::= "_tset<id>"
ListLabelName(label) ::= "<label>"
CaptureNextToken(d) ::= "<d.varName> = self._input.LT(1)"
CaptureNextTokenType(d) ::= "<d.varName> = self._input.LA(1)"
StructDecl(struct,ctorAttrs,attrs,getters,dispatchMethods,interfaces,extensionMembers,
superClass={ParserRuleContext}) ::= <<
class <struct.name>(<superClass>):
def __init__(self, parser, parent=None, invokingState=-1<struct.ctorAttrs:{a | , <a.name>=None}>):
super(<parser.name>.<struct.name>, self).__init__(parent, invokingState)
self.parser = parser
<attrs:{a | <a>}; separator="\n">
<struct.ctorAttrs:{a | self.<a.name> = <a.name>}; separator="\n">
<getters:{g | <g>}; separator="\n\n">
def getRuleIndex(self):
return <parser.name>.RULE_<struct.derivedFromName>
<if(struct.provideCopyFrom)> <! don't need copy unless we have subclasses !>
def copyFrom(self, ctx):
super(<parser.name>.<struct.name>, self).copyFrom(ctx)
<struct.attrs:{a | self.<a.name> = ctx.<a.name>}; separator="\n">
<endif>
<dispatchMethods; separator="\n">
<extensionMembers; separator="\n">
>>
AltLabelStructDecl(struct,attrs,getters,dispatchMethods) ::= <<
class <struct.name>(<currentRule.name; format="cap">Context):
def __init__(self, parser, ctx): # actually a <parser.name>.<currentRule.name; format="cap">Context)
super(<parser.name>.<struct.name>, self).__init__(parser)
<attrs:{a | <a>}; separator="\n">
self.copyFrom(ctx)
<getters:{g | <g>}; separator="\n">
<dispatchMethods; separator="\n">
>>
ListenerDispatchMethod(method) ::= <<
def <if(method.isEnter)>enter<else>exit<endif>Rule(self, listener):
if hasattr(listener, "<if(method.isEnter)>enter<else>exit<endif><struct.derivedFromName; format="cap">"):
listener.<if(method.isEnter)>enter<else>exit<endif><struct.derivedFromName; format="cap">(self)
>>
VisitorDispatchMethod(method) ::= <<
def accept(self, visitor):
if hasattr(visitor, "visit<struct.derivedFromName; format="cap">"):
return visitor.visit<struct.derivedFromName; format="cap">(self)
else:
return visitor.visitChildren(self)
>>
AttributeDecl(d) ::= "self.<d.name> = <if(d.initValue)><d.initValue><else>None<endif>"
/** If we don't know location of label def x, use this template */
labelref(x) ::= "<if(!x.isLocal)>localctx.<endif><x.name>"
/** For any action chunk, what is correctly-typed context struct ptr? */
ctx(actionChunk) ::= "localctx"
// used for left-recursive rules
recRuleAltPredicate(ruleName,opPrec) ::= "self.precpred(self._ctx, <opPrec>)"
recRuleSetReturnAction(src,name) ::= "$<name>=$<src>.<name>"
recRuleSetStopToken() ::= "self._ctx.stop = self._input.LT(-1)"
recRuleAltStartAction(ruleName, ctxName, label) ::= <<
localctx = <parser.name>.<ctxName>Context(self, _parentctx, _parentState)
<if(label)>localctx.<label> = _prevctx<endif>
self.pushNewRecursionContext(localctx, _startState, self.RULE_<ruleName>)
>>
recRuleLabeledAltStartAction(ruleName, currentAltLabel, label, isListLabel) ::= <<
localctx = <parser.name>.<currentAltLabel; format="cap">Context(self, <parser.name>.<ruleName; format="cap">Context(self, _parentctx, _parentState))
<if(label)>
<if(isListLabel)>
localctx.<label>.append(_prevctx)
<else>
localctx.<label> = _prevctx
<endif>
<endif>
self.pushNewRecursionContext(localctx, _startState, self.RULE_<ruleName>)
>>
recRuleReplaceContext(ctxName) ::= <<
localctx = <parser.name>.<ctxName>Context(self, localctx)
self._ctx = localctx
_prevctx = localctx
>>
recRuleSetPrevCtx() ::= <<
if self._parseListeners is not None:
self.triggerExitRuleEvent()
_prevctx = localctx
>>
LexerFile(lexerFile, lexer, namedActions) ::= <<
<fileHeader(lexerFile.grammarFileName, lexerFile.ANTLRVersion)>
# encoding: utf-8
from __future__ import print_function
from antlr4 import *
from io import StringIO
<namedActions.header>
<lexer>
>>
Lexer(lexer, atn, actionFuncs, sempredFuncs, superClass) ::= <<
<atn>
class <lexer.name>(<if(superClass)><superClass><else>Lexer<endif>):
atn = ATNDeserializer().deserialize(serializedATN())
decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ]
<rest(lexer.modes):{m| <m> = <i>}; separator="\n">
<lexer.tokens:{k | <k> = <lexer.tokens.(k)>}; separator="\n", wrap, anchor>
modeNames = [ <lexer.modes:{m| u"<m>"}; separator=", ", wrap, anchor> ]
literalNames = [ u"\<INVALID>",
<lexer.literalNames:{t | u<t>}; separator=", ", wrap, anchor> ]
symbolicNames = [ u"\<INVALID>",
<lexer.symbolicNames:{t | u<t>}; separator=", ", wrap, anchor> ]
ruleNames = [ <lexer.ruleNames:{r | u"<r>"}; separator=", ", wrap, anchor> ]
grammarFileName = u"<lexer.grammarFileName>"
def __init__(self, input=None):
super(<lexer.name>, self).__init__(input)
self.checkVersion("<lexerFile.ANTLRVersion>")
self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache())
self._actions = None
self._predicates = None
<namedActions.members>
<dumpActions(lexer, "", actionFuncs, sempredFuncs)>
>>
SerializedATN(model) ::= <<
<! only one segment, can be inlined !>
def serializedATN():
with StringIO() as buf:
buf.write(u"<model.serialized; wrap={")<\n> buf.write(u"}>")
return buf.getvalue()
>>
/** Using a type to init value map, try to init a type; if not in table
* must be an object, default value is "null".
*/
initValue(typeName) ::= <<
<javaTypeInitMap.(typeName)>
>>
codeFileExtension() ::= ".py"