From ff1283affb4df5d5192aa53aee395643b874a57a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Henrique?= Date: Tue, 3 Sep 2019 19:21:42 -0300 Subject: [PATCH 1/7] General XPath fixes for the Python3 runtime --- contributors.txt | 1 + runtime/Python3/src/antlr4/xpath/XPath.py | 117 +++++++++++----------- 2 files changed, 62 insertions(+), 56 deletions(-) diff --git a/contributors.txt b/contributors.txt index d40bacd5f..55687a831 100644 --- a/contributors.txt +++ b/contributors.txt @@ -221,3 +221,4 @@ YYYY/MM/DD, github id, Full name, email 2019/07/11, olowo726, Olof Wolgast, olof@baah.se 2019/07/16, abhijithneilabraham, Abhijith Neil Abraham, abhijithneilabrahampk@gmail.com 2019/07/26, Braavos96, Eric Hettiaratchi, erichettiaratchi@gmail.com +2019/09/03, João Henrique, johnnyonflame@hotmail.com diff --git a/runtime/Python3/src/antlr4/xpath/XPath.py b/runtime/Python3/src/antlr4/xpath/XPath.py index c49e3ba94..dcffad518 100644 --- a/runtime/Python3/src/antlr4/xpath/XPath.py +++ b/runtime/Python3/src/antlr4/xpath/XPath.py @@ -47,7 +47,7 @@ #

# Whitespace is not allowed.

# -from antlr4 import CommonTokenStream, DFA, PredictionContextCache, Lexer, LexerATNSimulator +from antlr4 import CommonTokenStream, DFA, PredictionContextCache, Lexer, LexerATNSimulator, ParserRuleContext, TerminalNode from antlr4.InputStream import InputStream from antlr4.Parser import Parser from antlr4.RuleContext import RuleContext @@ -134,7 +134,7 @@ class XPathLexer(Lexer): if _action is not None: _action(localctx, actionIndex) else: - raise Exception("No registered action for:" + str(ruleIndex)) + raise Exception("No registered action for: %d" % ruleIndex) def ID_action(self, localctx:RuleContext , actionIndex:int): if actionIndex == 0: @@ -166,40 +166,40 @@ class XPath(object): try: tokenStream.fill() except LexerNoViableAltException as e: - pos = lexer.getColumn() - msg = "Invalid tokens or characters at index " + str(pos) + " in path '" + path + "'" + pos = lexer.column + msg = "Invalid tokens or characters at index %d in path '%s'" % (pos, path) raise Exception(msg, e) - tokens = tokenStream.getTokens() + tokens = iter(tokenStream.tokens) elements = list() - n = len(tokens) - i=0 - while i < n : - el = tokens[i] - next = None + for el in tokens: + invert = False + anywhere = False + # Check for path separators, if none assume root if el.type in [XPathLexer.ROOT, XPathLexer.ANYWHERE]: - anywhere = el.type == XPathLexer.ANYWHERE - i += 1 - next = tokens[i] - invert = next.type==XPathLexer.BANG - if invert: - i += 1 - next = tokens[i] - pathElement = self.getXPathElement(next, anywhere) - pathElement.invert = invert - elements.append(pathElement) - i += 1 - - elif el.type in [XPathLexer.TOKEN_REF, XPathLexer.RULE_REF, XPathLexer.WILDCARD] : - elements.append( self.getXPathElement(el, False) ) - i += 1 - - elif el.type==Token.EOF : - break - + anywhere = el.type == XPathLexer.ANYWHERE + next_el = next(tokens, None) + if not next_el: + raise Exception('Missing element after %s' % el.getText()) + else: + el = next_el + # Check for bangs + if el.type == XPathLexer.BANG: + invert = True + next_el = next(tokens, None) + if not next_el: + raise Exception('Missing element after %s' % el.getText()) + else: + el = next_el + # Add searched element + if el.type in [XPathLexer.TOKEN_REF, XPathLexer.RULE_REF, XPathLexer.WILDCARD]: + element = self.getXPathElement(el, anywhere) + element.invert = invert + elements.append(element) + elif el.type==Token.EOF: + break else: - raise Exception("Unknown path element " + str(el)) - + raise Exception("Unknown path element %s" % lexer.symbolicNames[el.type]) return elements # @@ -210,24 +210,32 @@ class XPath(object): def getXPathElement(self, wordToken:Token, anywhere:bool): if wordToken.type==Token.EOF: raise Exception("Missing path element at end of path") + word = wordToken.text - ttype = self.parser.getTokenType(word) - ruleIndex = self.parser.getRuleIndex(word) - if wordToken.type==XPathLexer.WILDCARD : - return XPathWildcardAnywhereElement() if anywhere else XPathWildcardElement() elif wordToken.type in [XPathLexer.TOKEN_REF, XPathLexer.STRING]: + tsource = self.parser.getTokenStream().tokenSource - if ttype==Token.INVALID_TYPE: - raise Exception( word + " at index " + str(wordToken.startIndex) + " isn't a valid token name") + ttype = litType = None + if wordToken.type == XPathLexer.TOKEN_REF: + ttype = (tsource.ruleNames.index(word) + 1) if word in tsource.ruleNames else None + else: + litType = tsource.literalNames.index(word) if word in tsource.literalNames else None + + # Decide which one is it + ttype = ttype or litType + + if not ttype: + raise Exception("%s at index %d isn't a valid token name" % (word, wordToken.tokenIndex)) return XPathTokenAnywhereElement(word, ttype) if anywhere else XPathTokenElement(word, ttype) else: + ruleIndex = self.parser.ruleNames.index(word) if word in self.parser.ruleNames else None - if ruleIndex==-1: - raise Exception( word + " at index " + str(wordToken.getStartIndex()) + " isn't a valid rule name") + if not ruleIndex: + raise Exception("%s at index %d isn't a valid rule name" % (word, wordToken.tokenIndex)) return XPathRuleAnywhereElement(word, ruleIndex) if anywhere else XPathRuleElement(word, ruleIndex) @@ -246,18 +254,16 @@ class XPath(object): dummyRoot.children = [t] # don't set t's parent. work = [dummyRoot] - - for i in range(0, len(self.elements)): - next = set() + for element in self.elements: + work_next = list() for node in work: - if len( node.children) > 0 : + if not isinstance(node, TerminalNode) and node.children: # only try to match next element if it has children # e.g., //func/*/stat might have a token node for which # we can't go looking for stat nodes. - matching = self.elements[i].evaluate(node) - next |= matching - i += 1 - work = next + matching = element.evaluate(node) + work_next.extend(matching) + work = work_next return work @@ -283,8 +289,8 @@ class XPathRuleAnywhereElement(XPathElement): self.ruleIndex = ruleIndex def evaluate(self, t:ParseTree): - return Trees.findAllRuleNodes(t, self.ruleIndex) - + # return all ParserRuleContext descendants of t that match ruleIndex (or do not match if inverted) + return filter(lambda c: isinstance(c, ParserRuleContext) and (self.invert ^ (c.getRuleIndex() == self.ruleIndex)), Trees.descendants(t)) class XPathRuleElement(XPathElement): @@ -293,9 +299,8 @@ class XPathRuleElement(XPathElement): self.ruleIndex = ruleIndex def evaluate(self, t:ParseTree): - # return all children of t that match nodeName - return [c for c in Trees.getChildren(t) if isinstance(c, ParserRuleContext) and (c.ruleIndex == self.ruleIndex) == (not self.invert)] - + # return all ParserRuleContext children of t that match ruleIndex (or do not match if inverted) + return filter(lambda c: isinstance(c, ParserRuleContext) and (self.invert ^ (c.getRuleIndex() == self.ruleIndex)), Trees.getChildren(t)) class XPathTokenAnywhereElement(XPathElement): @@ -304,8 +309,8 @@ class XPathTokenAnywhereElement(XPathElement): self.tokenType = tokenType def evaluate(self, t:ParseTree): - return Trees.findAllTokenNodes(t, self.tokenType) - + # return all TerminalNode descendants of t that match tokenType (or do not match if inverted) + return filter(lambda c: isinstance(c, TerminalNode) and (self.invert ^ (c.symbol.type == self.tokenType)), Trees.descendants(t)) class XPathTokenElement(XPathElement): @@ -314,8 +319,8 @@ class XPathTokenElement(XPathElement): self.tokenType = tokenType def evaluate(self, t:ParseTree): - # return all children of t that match nodeName - return [c for c in Trees.getChildren(t) if isinstance(c, TerminalNode) and (c.symbol.type == self.tokenType) == (not self.invert)] + # return all TerminalNode children of t that match tokenType (or do not match if inverted) + return filter(lambda c: isinstance(c, TerminalNode) and (self.invert ^ (c.symbol.type == self.tokenType)), Trees.getChildren(t)) class XPathWildcardAnywhereElement(XPathElement): From 4c2f091e8c9f8e1d40686d7e84453027cc55fff1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Henrique?= Date: Wed, 4 Sep 2019 14:34:19 -0300 Subject: [PATCH 2/7] Style touch-ups on Python3 XPath implementation --- runtime/Python3/src/antlr4/xpath/XPath.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/runtime/Python3/src/antlr4/xpath/XPath.py b/runtime/Python3/src/antlr4/xpath/XPath.py index dcffad518..4e8670ff1 100644 --- a/runtime/Python3/src/antlr4/xpath/XPath.py +++ b/runtime/Python3/src/antlr4/xpath/XPath.py @@ -218,23 +218,22 @@ class XPath(object): elif wordToken.type in [XPathLexer.TOKEN_REF, XPathLexer.STRING]: tsource = self.parser.getTokenStream().tokenSource - ttype = litType = None + ttype = -1 if wordToken.type == XPathLexer.TOKEN_REF: - ttype = (tsource.ruleNames.index(word) + 1) if word in tsource.ruleNames else None + if word in tsource.ruleNames: + ttype = tsource.ruleNames.index(word) + 1 else: - litType = tsource.literalNames.index(word) if word in tsource.literalNames else None + if word in tsource.literalNames: + ttype = tsource.literalNames.index(word) - # Decide which one is it - ttype = ttype or litType - - if not ttype: + if ttype == -1: raise Exception("%s at index %d isn't a valid token name" % (word, wordToken.tokenIndex)) return XPathTokenAnywhereElement(word, ttype) if anywhere else XPathTokenElement(word, ttype) else: - ruleIndex = self.parser.ruleNames.index(word) if word in self.parser.ruleNames else None + ruleIndex = self.parser.ruleNames.index(word) if word in self.parser.ruleNames else -1 - if not ruleIndex: + if ruleIndex == -1: raise Exception("%s at index %d isn't a valid rule name" % (word, wordToken.tokenIndex)) return XPathRuleAnywhereElement(word, ruleIndex) if anywhere else XPathRuleElement(word, ruleIndex) From ae2a689a68408b2f76e7c1c7c3f51731fc293ffa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Henrique?= Date: Wed, 4 Sep 2019 15:33:24 -0300 Subject: [PATCH 3/7] Fixed missing XPathLexer.STRING case --- runtime/Python3/src/antlr4/xpath/XPath.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/Python3/src/antlr4/xpath/XPath.py b/runtime/Python3/src/antlr4/xpath/XPath.py index 4e8670ff1..07fcd0b54 100644 --- a/runtime/Python3/src/antlr4/xpath/XPath.py +++ b/runtime/Python3/src/antlr4/xpath/XPath.py @@ -192,7 +192,7 @@ class XPath(object): else: el = next_el # Add searched element - if el.type in [XPathLexer.TOKEN_REF, XPathLexer.RULE_REF, XPathLexer.WILDCARD]: + if el.type in [XPathLexer.TOKEN_REF, XPathLexer.RULE_REF, XPathLexer.WILDCARD, XPathLexer.STRING]: element = self.getXPathElement(el, anywhere) element.invert = invert elements.append(element) From f15a9f76287ea3083488672b17535ea63470f2ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Henrique?= Date: Wed, 4 Sep 2019 15:53:02 -0300 Subject: [PATCH 4/7] Prevent XPath from returning the same node multiple times in Python3 --- runtime/Python3/src/antlr4/xpath/XPath.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/runtime/Python3/src/antlr4/xpath/XPath.py b/runtime/Python3/src/antlr4/xpath/XPath.py index 07fcd0b54..4d3a4f56f 100644 --- a/runtime/Python3/src/antlr4/xpath/XPath.py +++ b/runtime/Python3/src/antlr4/xpath/XPath.py @@ -261,6 +261,11 @@ class XPath(object): # e.g., //func/*/stat might have a token node for which # we can't go looking for stat nodes. matching = element.evaluate(node) + + # See issue antlr#370 - Prevents XPath from returning the + # same node multiple times + matching = filter(lambda m: m not in work_next, matching) + work_next.extend(matching) work = work_next From 8da2ce3044cf9411b83e04812d0bd76da0b2680c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Henrique?= Date: Wed, 4 Sep 2019 15:59:12 -0300 Subject: [PATCH 5/7] Added XPath test on Python3 target --- runtime/Python3/test/expr/Expr.g4 | 31 ++ runtime/Python3/test/expr/ExprLexer.py | 94 ++++ runtime/Python3/test/expr/ExprParser.py | 658 ++++++++++++++++++++++++ runtime/Python3/test/run.py | 1 + runtime/Python3/test/xpathtest.py | 89 ++++ 5 files changed, 873 insertions(+) create mode 100644 runtime/Python3/test/expr/Expr.g4 create mode 100644 runtime/Python3/test/expr/ExprLexer.py create mode 100644 runtime/Python3/test/expr/ExprParser.py create mode 100644 runtime/Python3/test/xpathtest.py diff --git a/runtime/Python3/test/expr/Expr.g4 b/runtime/Python3/test/expr/Expr.g4 new file mode 100644 index 000000000..662079641 --- /dev/null +++ b/runtime/Python3/test/expr/Expr.g4 @@ -0,0 +1,31 @@ +// Taken from "tool-testsuite/test/org/antlr/v4/test/tool/TestXPath.java" +// Builds ExprLexer.py and ExprParser.py + +grammar Expr; +prog: func+ ; +func: 'def' ID '(' arg (',' arg)* ')' body ; +body: '{' stat+ '}' ; +arg : ID ; +stat: expr ';' # printExpr + | ID '=' expr ';' # assign + | 'return' expr ';' # ret + | ';' # blank + ; +expr: expr ('*'|'/') expr # MulDiv + | expr ('+'|'-') expr # AddSub + | primary # prim + ; +primary + : INT # int + | ID # id + | '(' expr ')' # parens + ; +MUL : '*' ; // assigns token name to '*' used above in grammar +DIV : '/' ; +ADD : '+' ; +SUB : '-' ; +RETURN : 'return' ; +ID : [a-zA-Z]+ ; // match identifiers +INT : [0-9]+ ; // match integers +NEWLINE:'\r'? '\n' -> skip; // return newlines to parser (is end-statement signal) +WS : [ \t]+ -> skip ; // toss out whitespace \ No newline at end of file diff --git a/runtime/Python3/test/expr/ExprLexer.py b/runtime/Python3/test/expr/ExprLexer.py new file mode 100644 index 000000000..e338b0b9e --- /dev/null +++ b/runtime/Python3/test/expr/ExprLexer.py @@ -0,0 +1,94 @@ +# Generated from expr/Expr.g4 by ANTLR 4.7.2 +from antlr4 import * +from io import StringIO +from typing.io import TextIO +import sys + + +def serializedATN(): + with StringIO() as buf: + buf.write("\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\2\23") + buf.write("^\b\1\4\2\t\2\4\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7\t\7") + buf.write("\4\b\t\b\4\t\t\t\4\n\t\n\4\13\t\13\4\f\t\f\4\r\t\r\4\16") + buf.write("\t\16\4\17\t\17\4\20\t\20\4\21\t\21\4\22\t\22\3\2\3\2") + buf.write("\3\2\3\2\3\3\3\3\3\4\3\4\3\5\3\5\3\6\3\6\3\7\3\7\3\b\3") + buf.write("\b\3\t\3\t\3\n\3\n\3\13\3\13\3\f\3\f\3\r\3\r\3\16\3\16") + buf.write("\3\16\3\16\3\16\3\16\3\16\3\17\6\17H\n\17\r\17\16\17I") + buf.write("\3\20\6\20M\n\20\r\20\16\20N\3\21\5\21R\n\21\3\21\3\21") + buf.write("\3\21\3\21\3\22\6\22Y\n\22\r\22\16\22Z\3\22\3\22\2\2\23") + buf.write("\3\3\5\4\7\5\t\6\13\7\r\b\17\t\21\n\23\13\25\f\27\r\31") + buf.write("\16\33\17\35\20\37\21!\22#\23\3\2\5\4\2C\\c|\3\2\62;\4") + buf.write("\2\13\13\"\"\2a\2\3\3\2\2\2\2\5\3\2\2\2\2\7\3\2\2\2\2") + buf.write("\t\3\2\2\2\2\13\3\2\2\2\2\r\3\2\2\2\2\17\3\2\2\2\2\21") + buf.write("\3\2\2\2\2\23\3\2\2\2\2\25\3\2\2\2\2\27\3\2\2\2\2\31\3") + buf.write("\2\2\2\2\33\3\2\2\2\2\35\3\2\2\2\2\37\3\2\2\2\2!\3\2\2") + buf.write("\2\2#\3\2\2\2\3%\3\2\2\2\5)\3\2\2\2\7+\3\2\2\2\t-\3\2") + buf.write("\2\2\13/\3\2\2\2\r\61\3\2\2\2\17\63\3\2\2\2\21\65\3\2") + buf.write("\2\2\23\67\3\2\2\2\259\3\2\2\2\27;\3\2\2\2\31=\3\2\2\2") + buf.write("\33?\3\2\2\2\35G\3\2\2\2\37L\3\2\2\2!Q\3\2\2\2#X\3\2\2") + buf.write("\2%&\7f\2\2&\'\7g\2\2\'(\7h\2\2(\4\3\2\2\2)*\7*\2\2*\6") + buf.write("\3\2\2\2+,\7.\2\2,\b\3\2\2\2-.\7+\2\2.\n\3\2\2\2/\60\7") + buf.write("}\2\2\60\f\3\2\2\2\61\62\7\177\2\2\62\16\3\2\2\2\63\64") + buf.write("\7=\2\2\64\20\3\2\2\2\65\66\7?\2\2\66\22\3\2\2\2\678\7") + buf.write(",\2\28\24\3\2\2\29:\7\61\2\2:\26\3\2\2\2;<\7-\2\2<\30") + buf.write("\3\2\2\2=>\7/\2\2>\32\3\2\2\2?@\7t\2\2@A\7g\2\2AB\7v\2") + buf.write("\2BC\7w\2\2CD\7t\2\2DE\7p\2\2E\34\3\2\2\2FH\t\2\2\2GF") + buf.write("\3\2\2\2HI\3\2\2\2IG\3\2\2\2IJ\3\2\2\2J\36\3\2\2\2KM\t") + buf.write("\3\2\2LK\3\2\2\2MN\3\2\2\2NL\3\2\2\2NO\3\2\2\2O \3\2\2") + buf.write("\2PR\7\17\2\2QP\3\2\2\2QR\3\2\2\2RS\3\2\2\2ST\7\f\2\2") + buf.write("TU\3\2\2\2UV\b\21\2\2V\"\3\2\2\2WY\t\4\2\2XW\3\2\2\2Y") + buf.write("Z\3\2\2\2ZX\3\2\2\2Z[\3\2\2\2[\\\3\2\2\2\\]\b\22\2\2]") + buf.write("$\3\2\2\2\7\2INQZ\3\b\2\2") + return buf.getvalue() + + +class ExprLexer(Lexer): + + atn = ATNDeserializer().deserialize(serializedATN()) + + decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ] + + T__0 = 1 + T__1 = 2 + T__2 = 3 + T__3 = 4 + T__4 = 5 + T__5 = 6 + T__6 = 7 + T__7 = 8 + MUL = 9 + DIV = 10 + ADD = 11 + SUB = 12 + RETURN = 13 + ID = 14 + INT = 15 + NEWLINE = 16 + WS = 17 + + channelNames = [ u"DEFAULT_TOKEN_CHANNEL", u"HIDDEN" ] + + modeNames = [ "DEFAULT_MODE" ] + + literalNames = [ "", + "'def'", "'('", "','", "')'", "'{'", "'}'", "';'", "'='", "'*'", + "'/'", "'+'", "'-'", "'return'" ] + + symbolicNames = [ "", + "MUL", "DIV", "ADD", "SUB", "RETURN", "ID", "INT", "NEWLINE", + "WS" ] + + ruleNames = [ "T__0", "T__1", "T__2", "T__3", "T__4", "T__5", "T__6", + "T__7", "MUL", "DIV", "ADD", "SUB", "RETURN", "ID", "INT", + "NEWLINE", "WS" ] + + grammarFileName = "Expr.g4" + + def __init__(self, input=None, output:TextIO = sys.stdout): + super().__init__(input, output) + self.checkVersion("4.7.2") + self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache()) + self._actions = None + self._predicates = None + + diff --git a/runtime/Python3/test/expr/ExprParser.py b/runtime/Python3/test/expr/ExprParser.py new file mode 100644 index 000000000..598c778d1 --- /dev/null +++ b/runtime/Python3/test/expr/ExprParser.py @@ -0,0 +1,658 @@ +# Generated from expr/Expr.g4 by ANTLR 4.7.2 +# encoding: utf-8 +from antlr4 import * +from io import StringIO +from typing.io import TextIO +import sys + +def serializedATN(): + with StringIO() as buf: + buf.write("\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\3\23") + buf.write("S\4\2\t\2\4\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7\t\7\4\b") + buf.write("\t\b\3\2\6\2\22\n\2\r\2\16\2\23\3\3\3\3\3\3\3\3\3\3\3") + buf.write("\3\7\3\34\n\3\f\3\16\3\37\13\3\3\3\3\3\3\3\3\4\3\4\6\4") + buf.write("&\n\4\r\4\16\4\'\3\4\3\4\3\5\3\5\3\6\3\6\3\6\3\6\3\6\3") + buf.write("\6\3\6\3\6\3\6\3\6\3\6\3\6\3\6\5\6;\n\6\3\7\3\7\3\7\3") + buf.write("\7\3\7\3\7\3\7\3\7\3\7\7\7F\n\7\f\7\16\7I\13\7\3\b\3\b") + buf.write("\3\b\3\b\3\b\3\b\5\bQ\n\b\3\b\2\3\f\t\2\4\6\b\n\f\16\2") + buf.write("\4\3\2\13\f\3\2\r\16\2U\2\21\3\2\2\2\4\25\3\2\2\2\6#\3") + buf.write("\2\2\2\b+\3\2\2\2\n:\3\2\2\2\f<\3\2\2\2\16P\3\2\2\2\20") + buf.write("\22\5\4\3\2\21\20\3\2\2\2\22\23\3\2\2\2\23\21\3\2\2\2") + buf.write("\23\24\3\2\2\2\24\3\3\2\2\2\25\26\7\3\2\2\26\27\7\20\2") + buf.write("\2\27\30\7\4\2\2\30\35\5\b\5\2\31\32\7\5\2\2\32\34\5\b") + buf.write("\5\2\33\31\3\2\2\2\34\37\3\2\2\2\35\33\3\2\2\2\35\36\3") + buf.write("\2\2\2\36 \3\2\2\2\37\35\3\2\2\2 !\7\6\2\2!\"\5\6\4\2") + buf.write("\"\5\3\2\2\2#%\7\7\2\2$&\5\n\6\2%$\3\2\2\2&\'\3\2\2\2") + buf.write("\'%\3\2\2\2\'(\3\2\2\2()\3\2\2\2)*\7\b\2\2*\7\3\2\2\2") + buf.write("+,\7\20\2\2,\t\3\2\2\2-.\5\f\7\2./\7\t\2\2/;\3\2\2\2\60") + buf.write("\61\7\20\2\2\61\62\7\n\2\2\62\63\5\f\7\2\63\64\7\t\2\2") + buf.write("\64;\3\2\2\2\65\66\7\17\2\2\66\67\5\f\7\2\678\7\t\2\2") + buf.write("8;\3\2\2\29;\7\t\2\2:-\3\2\2\2:\60\3\2\2\2:\65\3\2\2\2") + buf.write(":9\3\2\2\2;\13\3\2\2\2<=\b\7\1\2=>\5\16\b\2>G\3\2\2\2") + buf.write("?@\f\5\2\2@A\t\2\2\2AF\5\f\7\6BC\f\4\2\2CD\t\3\2\2DF\5") + buf.write("\f\7\5E?\3\2\2\2EB\3\2\2\2FI\3\2\2\2GE\3\2\2\2GH\3\2\2") + buf.write("\2H\r\3\2\2\2IG\3\2\2\2JQ\7\21\2\2KQ\7\20\2\2LM\7\4\2") + buf.write("\2MN\5\f\7\2NO\7\6\2\2OQ\3\2\2\2PJ\3\2\2\2PK\3\2\2\2P") + buf.write("L\3\2\2\2Q\17\3\2\2\2\t\23\35\':EGP") + return buf.getvalue() + + +class ExprParser ( Parser ): + + grammarFileName = "Expr.g4" + + atn = ATNDeserializer().deserialize(serializedATN()) + + decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ] + + sharedContextCache = PredictionContextCache() + + literalNames = [ "", "'def'", "'('", "','", "')'", "'{'", "'}'", + "';'", "'='", "'*'", "'/'", "'+'", "'-'", "'return'" ] + + symbolicNames = [ "", "", "", "", + "", "", "", "", + "", "MUL", "DIV", "ADD", "SUB", "RETURN", + "ID", "INT", "NEWLINE", "WS" ] + + RULE_prog = 0 + RULE_func = 1 + RULE_body = 2 + RULE_arg = 3 + RULE_stat = 4 + RULE_expr = 5 + RULE_primary = 6 + + ruleNames = [ "prog", "func", "body", "arg", "stat", "expr", "primary" ] + + EOF = Token.EOF + T__0=1 + T__1=2 + T__2=3 + T__3=4 + T__4=5 + T__5=6 + T__6=7 + T__7=8 + MUL=9 + DIV=10 + ADD=11 + SUB=12 + RETURN=13 + ID=14 + INT=15 + NEWLINE=16 + WS=17 + + def __init__(self, input:TokenStream, output:TextIO = sys.stdout): + super().__init__(input, output) + self.checkVersion("4.7.2") + self._interp = ParserATNSimulator(self, self.atn, self.decisionsToDFA, self.sharedContextCache) + self._predicates = None + + + + class ProgContext(ParserRuleContext): + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def func(self, i:int=None): + if i is None: + return self.getTypedRuleContexts(ExprParser.FuncContext) + else: + return self.getTypedRuleContext(ExprParser.FuncContext,i) + + + def getRuleIndex(self): + return ExprParser.RULE_prog + + + + + def prog(self): + + localctx = ExprParser.ProgContext(self, self._ctx, self.state) + self.enterRule(localctx, 0, self.RULE_prog) + self._la = 0 # Token type + try: + self.enterOuterAlt(localctx, 1) + self.state = 15 + self._errHandler.sync(self) + _la = self._input.LA(1) + while True: + self.state = 14 + self.func() + self.state = 17 + self._errHandler.sync(self) + _la = self._input.LA(1) + if not (_la==ExprParser.T__0): + break + + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + class FuncContext(ParserRuleContext): + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def ID(self): + return self.getToken(ExprParser.ID, 0) + + def arg(self, i:int=None): + if i is None: + return self.getTypedRuleContexts(ExprParser.ArgContext) + else: + return self.getTypedRuleContext(ExprParser.ArgContext,i) + + + def body(self): + return self.getTypedRuleContext(ExprParser.BodyContext,0) + + + def getRuleIndex(self): + return ExprParser.RULE_func + + + + + def func(self): + + localctx = ExprParser.FuncContext(self, self._ctx, self.state) + self.enterRule(localctx, 2, self.RULE_func) + self._la = 0 # Token type + try: + self.enterOuterAlt(localctx, 1) + self.state = 19 + self.match(ExprParser.T__0) + self.state = 20 + self.match(ExprParser.ID) + self.state = 21 + self.match(ExprParser.T__1) + self.state = 22 + self.arg() + self.state = 27 + self._errHandler.sync(self) + _la = self._input.LA(1) + while _la==ExprParser.T__2: + self.state = 23 + self.match(ExprParser.T__2) + self.state = 24 + self.arg() + self.state = 29 + self._errHandler.sync(self) + _la = self._input.LA(1) + + self.state = 30 + self.match(ExprParser.T__3) + self.state = 31 + self.body() + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + class BodyContext(ParserRuleContext): + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def stat(self, i:int=None): + if i is None: + return self.getTypedRuleContexts(ExprParser.StatContext) + else: + return self.getTypedRuleContext(ExprParser.StatContext,i) + + + def getRuleIndex(self): + return ExprParser.RULE_body + + + + + def body(self): + + localctx = ExprParser.BodyContext(self, self._ctx, self.state) + self.enterRule(localctx, 4, self.RULE_body) + self._la = 0 # Token type + try: + self.enterOuterAlt(localctx, 1) + self.state = 33 + self.match(ExprParser.T__4) + self.state = 35 + self._errHandler.sync(self) + _la = self._input.LA(1) + while True: + self.state = 34 + self.stat() + self.state = 37 + self._errHandler.sync(self) + _la = self._input.LA(1) + if not ((((_la) & ~0x3f) == 0 and ((1 << _la) & ((1 << ExprParser.T__1) | (1 << ExprParser.T__6) | (1 << ExprParser.RETURN) | (1 << ExprParser.ID) | (1 << ExprParser.INT))) != 0)): + break + + self.state = 39 + self.match(ExprParser.T__5) + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + class ArgContext(ParserRuleContext): + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def ID(self): + return self.getToken(ExprParser.ID, 0) + + def getRuleIndex(self): + return ExprParser.RULE_arg + + + + + def arg(self): + + localctx = ExprParser.ArgContext(self, self._ctx, self.state) + self.enterRule(localctx, 6, self.RULE_arg) + try: + self.enterOuterAlt(localctx, 1) + self.state = 41 + self.match(ExprParser.ID) + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + class StatContext(ParserRuleContext): + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + + def getRuleIndex(self): + return ExprParser.RULE_stat + + + def copyFrom(self, ctx:ParserRuleContext): + super().copyFrom(ctx) + + + + class RetContext(StatContext): + + def __init__(self, parser, ctx:ParserRuleContext): # actually a ExprParser.StatContext + super().__init__(parser) + self.copyFrom(ctx) + + def RETURN(self): + return self.getToken(ExprParser.RETURN, 0) + def expr(self): + return self.getTypedRuleContext(ExprParser.ExprContext,0) + + + + class BlankContext(StatContext): + + def __init__(self, parser, ctx:ParserRuleContext): # actually a ExprParser.StatContext + super().__init__(parser) + self.copyFrom(ctx) + + + + class PrintExprContext(StatContext): + + def __init__(self, parser, ctx:ParserRuleContext): # actually a ExprParser.StatContext + super().__init__(parser) + self.copyFrom(ctx) + + def expr(self): + return self.getTypedRuleContext(ExprParser.ExprContext,0) + + + + class AssignContext(StatContext): + + def __init__(self, parser, ctx:ParserRuleContext): # actually a ExprParser.StatContext + super().__init__(parser) + self.copyFrom(ctx) + + def ID(self): + return self.getToken(ExprParser.ID, 0) + def expr(self): + return self.getTypedRuleContext(ExprParser.ExprContext,0) + + + + + def stat(self): + + localctx = ExprParser.StatContext(self, self._ctx, self.state) + self.enterRule(localctx, 8, self.RULE_stat) + try: + self.state = 56 + self._errHandler.sync(self) + la_ = self._interp.adaptivePredict(self._input,3,self._ctx) + if la_ == 1: + localctx = ExprParser.PrintExprContext(self, localctx) + self.enterOuterAlt(localctx, 1) + self.state = 43 + self.expr(0) + self.state = 44 + self.match(ExprParser.T__6) + pass + + elif la_ == 2: + localctx = ExprParser.AssignContext(self, localctx) + self.enterOuterAlt(localctx, 2) + self.state = 46 + self.match(ExprParser.ID) + self.state = 47 + self.match(ExprParser.T__7) + self.state = 48 + self.expr(0) + self.state = 49 + self.match(ExprParser.T__6) + pass + + elif la_ == 3: + localctx = ExprParser.RetContext(self, localctx) + self.enterOuterAlt(localctx, 3) + self.state = 51 + self.match(ExprParser.RETURN) + self.state = 52 + self.expr(0) + self.state = 53 + self.match(ExprParser.T__6) + pass + + elif la_ == 4: + localctx = ExprParser.BlankContext(self, localctx) + self.enterOuterAlt(localctx, 4) + self.state = 55 + self.match(ExprParser.T__6) + pass + + + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + class ExprContext(ParserRuleContext): + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + + def getRuleIndex(self): + return ExprParser.RULE_expr + + + def copyFrom(self, ctx:ParserRuleContext): + super().copyFrom(ctx) + + + class PrimContext(ExprContext): + + def __init__(self, parser, ctx:ParserRuleContext): # actually a ExprParser.ExprContext + super().__init__(parser) + self.copyFrom(ctx) + + def primary(self): + return self.getTypedRuleContext(ExprParser.PrimaryContext,0) + + + + class MulDivContext(ExprContext): + + def __init__(self, parser, ctx:ParserRuleContext): # actually a ExprParser.ExprContext + super().__init__(parser) + self.copyFrom(ctx) + + def expr(self, i:int=None): + if i is None: + return self.getTypedRuleContexts(ExprParser.ExprContext) + else: + return self.getTypedRuleContext(ExprParser.ExprContext,i) + + def MUL(self): + return self.getToken(ExprParser.MUL, 0) + def DIV(self): + return self.getToken(ExprParser.DIV, 0) + + + class AddSubContext(ExprContext): + + def __init__(self, parser, ctx:ParserRuleContext): # actually a ExprParser.ExprContext + super().__init__(parser) + self.copyFrom(ctx) + + def expr(self, i:int=None): + if i is None: + return self.getTypedRuleContexts(ExprParser.ExprContext) + else: + return self.getTypedRuleContext(ExprParser.ExprContext,i) + + def ADD(self): + return self.getToken(ExprParser.ADD, 0) + def SUB(self): + return self.getToken(ExprParser.SUB, 0) + + + + def expr(self, _p:int=0): + _parentctx = self._ctx + _parentState = self.state + localctx = ExprParser.ExprContext(self, self._ctx, _parentState) + _prevctx = localctx + _startState = 10 + self.enterRecursionRule(localctx, 10, self.RULE_expr, _p) + self._la = 0 # Token type + try: + self.enterOuterAlt(localctx, 1) + localctx = ExprParser.PrimContext(self, localctx) + self._ctx = localctx + _prevctx = localctx + + self.state = 59 + self.primary() + self._ctx.stop = self._input.LT(-1) + self.state = 69 + self._errHandler.sync(self) + _alt = self._interp.adaptivePredict(self._input,5,self._ctx) + while _alt!=2 and _alt!=ATN.INVALID_ALT_NUMBER: + if _alt==1: + if self._parseListeners is not None: + self.triggerExitRuleEvent() + _prevctx = localctx + self.state = 67 + self._errHandler.sync(self) + la_ = self._interp.adaptivePredict(self._input,4,self._ctx) + if la_ == 1: + localctx = ExprParser.MulDivContext(self, ExprParser.ExprContext(self, _parentctx, _parentState)) + self.pushNewRecursionContext(localctx, _startState, self.RULE_expr) + self.state = 61 + if not self.precpred(self._ctx, 3): + from antlr4.error.Errors import FailedPredicateException + raise FailedPredicateException(self, "self.precpred(self._ctx, 3)") + self.state = 62 + _la = self._input.LA(1) + if not(_la==ExprParser.MUL or _la==ExprParser.DIV): + self._errHandler.recoverInline(self) + else: + self._errHandler.reportMatch(self) + self.consume() + self.state = 63 + self.expr(4) + pass + + elif la_ == 2: + localctx = ExprParser.AddSubContext(self, ExprParser.ExprContext(self, _parentctx, _parentState)) + self.pushNewRecursionContext(localctx, _startState, self.RULE_expr) + self.state = 64 + if not self.precpred(self._ctx, 2): + from antlr4.error.Errors import FailedPredicateException + raise FailedPredicateException(self, "self.precpred(self._ctx, 2)") + self.state = 65 + _la = self._input.LA(1) + if not(_la==ExprParser.ADD or _la==ExprParser.SUB): + self._errHandler.recoverInline(self) + else: + self._errHandler.reportMatch(self) + self.consume() + self.state = 66 + self.expr(3) + pass + + + self.state = 71 + self._errHandler.sync(self) + _alt = self._interp.adaptivePredict(self._input,5,self._ctx) + + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.unrollRecursionContexts(_parentctx) + return localctx + + class PrimaryContext(ParserRuleContext): + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + + def getRuleIndex(self): + return ExprParser.RULE_primary + + + def copyFrom(self, ctx:ParserRuleContext): + super().copyFrom(ctx) + + + + class ParensContext(PrimaryContext): + + def __init__(self, parser, ctx:ParserRuleContext): # actually a ExprParser.PrimaryContext + super().__init__(parser) + self.copyFrom(ctx) + + def expr(self): + return self.getTypedRuleContext(ExprParser.ExprContext,0) + + + + class IdContext(PrimaryContext): + + def __init__(self, parser, ctx:ParserRuleContext): # actually a ExprParser.PrimaryContext + super().__init__(parser) + self.copyFrom(ctx) + + def ID(self): + return self.getToken(ExprParser.ID, 0) + + + class IntContext(PrimaryContext): + + def __init__(self, parser, ctx:ParserRuleContext): # actually a ExprParser.PrimaryContext + super().__init__(parser) + self.copyFrom(ctx) + + def INT(self): + return self.getToken(ExprParser.INT, 0) + + + + def primary(self): + + localctx = ExprParser.PrimaryContext(self, self._ctx, self.state) + self.enterRule(localctx, 12, self.RULE_primary) + try: + self.state = 78 + self._errHandler.sync(self) + token = self._input.LA(1) + if token in [ExprParser.INT]: + localctx = ExprParser.IntContext(self, localctx) + self.enterOuterAlt(localctx, 1) + self.state = 72 + self.match(ExprParser.INT) + pass + elif token in [ExprParser.ID]: + localctx = ExprParser.IdContext(self, localctx) + self.enterOuterAlt(localctx, 2) + self.state = 73 + self.match(ExprParser.ID) + pass + elif token in [ExprParser.T__1]: + localctx = ExprParser.ParensContext(self, localctx) + self.enterOuterAlt(localctx, 3) + self.state = 74 + self.match(ExprParser.T__1) + self.state = 75 + self.expr(0) + self.state = 76 + self.match(ExprParser.T__3) + pass + else: + raise NoViableAltException(self) + + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + + def sempred(self, localctx:RuleContext, ruleIndex:int, predIndex:int): + if self._predicates == None: + self._predicates = dict() + self._predicates[5] = self.expr_sempred + pred = self._predicates.get(ruleIndex, None) + if pred is None: + raise Exception("No predicate with index:" + str(ruleIndex)) + else: + return pred(localctx, predIndex) + + def expr_sempred(self, localctx:ExprContext, predIndex:int): + if predIndex == 0: + return self.precpred(self._ctx, 3) + + + if predIndex == 1: + return self.precpred(self._ctx, 2) + + + + + diff --git a/runtime/Python3/test/run.py b/runtime/Python3/test/run.py index c9ae18877..5aad7896c 100644 --- a/runtime/Python3/test/run.py +++ b/runtime/Python3/test/run.py @@ -3,5 +3,6 @@ import os src_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'src') sys.path.insert(0,src_path) from TestTokenStreamRewriter import TestTokenStreamRewriter +from xpathtest import XPathTest import unittest unittest.main() \ No newline at end of file diff --git a/runtime/Python3/test/xpathtest.py b/runtime/Python3/test/xpathtest.py new file mode 100644 index 000000000..278bffe5b --- /dev/null +++ b/runtime/Python3/test/xpathtest.py @@ -0,0 +1,89 @@ +import antlr4 +from antlr4 import InputStream, CommonTokenStream, TerminalNode +from antlr4.xpath.XPath import XPath +import unittest +from expr.ExprParser import ExprParser +from expr.ExprLexer import ExprLexer + +def tokenToString(token, ruleNames): + if isinstance(token, TerminalNode): + return str(token) + else: + return ruleNames[token.getRuleIndex()] + +class XPathTest(unittest.TestCase): + def setUp(self): + self.input_stream = InputStream( + "def f(x,y) { x = 3+4; y; ; }\n" + "def g(x) { return 1+2*x; }\n" + ) + + # Create the Token Stream + self.lexer = ExprLexer(self.input_stream) + self.stream = CommonTokenStream(self.lexer) + self.stream.fill() + + # Create the parser and expression parse tree + self.parser = ExprParser(self.stream) + self.tree = self.parser.prog() + + def testValidPaths(self): + valid_paths = [ + "/prog/func", # all funcs under prog at root + "/prog/*", # all children of prog at root + "/*/func", # all func kids of any root node + "prog", # prog must be root node + "/prog", # prog must be root node + "/*", # any root + "*", # any root + "//ID", # any ID in tree + "//expr/primary/ID", # any ID child of a primary under any expr + "//body//ID", # any ID under a body + "//'return'", # any 'return' literal in tree, matched by literal name + "//RETURN", # any 'return' literal in tree, matched by symbolic name + "//primary/*", # all kids of any primary + "//func/*/stat", # all stat nodes grandkids of any func node + "/prog/func/'def'", # all def literal kids of func kid of prog + "//stat/';'", # all ';' under any stat node + "//expr/primary/!ID",# anything but ID under primary under any expr node + "//expr/!primary", # anything but primary under any expr node + "//!*", # nothing anywhere + "/!*", # nothing at root + "//expr//ID" # any ID under any expression (tests antlr/antlr4#370) + ] + + expected_results = [ + "[func, func]", + "[func, func]", + "[func, func]", + "[prog]", + "[prog]", + "[prog]", + "[prog]", + "[f, x, y, x, y, g, x, x]", + "[y, x]", + "[x, y, x]", + "[return]", + "[return]", + "[3, 4, y, 1, 2, x]", + "[stat, stat, stat, stat]", + "[def, def]", + "[;, ;, ;, ;]", + "[3, 4, 1, 2]", + "[expr, expr, expr, expr, expr, expr]", + "[]", + "[]", + "[y, x]", + ] + + for path, expected in zip(valid_paths, expected_results): + # Build test string + res = XPath.findAll(self.tree, path, self.parser) + res_str = ", ".join([tokenToString(token, self.parser.ruleNames) for token in res]) + res_str = "[%s]" % res_str + + # Test against expected output + self.assertEqual(res_str, expected, "Failed test %s" % path) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From b0eb1825fb943ffee8d92af6ef09d1b858ad1b00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Henrique?= Date: Wed, 4 Sep 2019 16:43:12 -0300 Subject: [PATCH 6/7] Spaces/Tabs mishap --- runtime/Python3/test/xpathtest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/Python3/test/xpathtest.py b/runtime/Python3/test/xpathtest.py index 278bffe5b..03b6101a9 100644 --- a/runtime/Python3/test/xpathtest.py +++ b/runtime/Python3/test/xpathtest.py @@ -15,7 +15,7 @@ class XPathTest(unittest.TestCase): def setUp(self): self.input_stream = InputStream( "def f(x,y) { x = 3+4; y; ; }\n" - "def g(x) { return 1+2*x; }\n" + "def g(x) { return 1+2*x; }\n" ) # Create the Token Stream From ae282133430c326e4523f075f19a61c549e8d9a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Henrique?= Date: Thu, 5 Sep 2019 14:29:25 -0300 Subject: [PATCH 7/7] Python3 XPath: Use Token.INVALID_TYPE instead of -1 on getXPathElement --- runtime/Python3/src/antlr4/xpath/XPath.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/Python3/src/antlr4/xpath/XPath.py b/runtime/Python3/src/antlr4/xpath/XPath.py index 4d3a4f56f..58b05c466 100644 --- a/runtime/Python3/src/antlr4/xpath/XPath.py +++ b/runtime/Python3/src/antlr4/xpath/XPath.py @@ -218,7 +218,7 @@ class XPath(object): elif wordToken.type in [XPathLexer.TOKEN_REF, XPathLexer.STRING]: tsource = self.parser.getTokenStream().tokenSource - ttype = -1 + ttype = Token.INVALID_TYPE if wordToken.type == XPathLexer.TOKEN_REF: if word in tsource.ruleNames: ttype = tsource.ruleNames.index(word) + 1 @@ -226,7 +226,7 @@ class XPath(object): if word in tsource.literalNames: ttype = tsource.literalNames.index(word) - if ttype == -1: + if ttype == Token.INVALID_TYPE: raise Exception("%s at index %d isn't a valid token name" % (word, wordToken.tokenIndex)) return XPathTokenAnywhereElement(word, ttype) if anywhere else XPathTokenElement(word, ttype)