From 238b55a0b67e0c0abfa09a0a007dfadfb833f0b2 Mon Sep 17 00:00:00 2001 From: Vladi Lyga Date: Mon, 28 Nov 2016 18:08:12 +0200 Subject: [PATCH 1/4] Extensive TokenStreamRewriter tests + many python2 runtime fixes. Including fix for https://github.com/antlr/antlr4/issues/550 --- .../Python2/src/antlr4/BufferedTokenStream.py | 10 +- .../Python2/src/antlr4/TokenStreamRewriter.py | 99 +++- .../Python2/tests/TestTokenStreamRewriter.py | 548 ++++++++++++++++++ runtime/Python2/tests/__init__.py | 0 runtime/Python2/tests/mocks/TestLexer.py | 101 ++++ runtime/Python2/tests/mocks/__init__.py | 0 6 files changed, 730 insertions(+), 28 deletions(-) create mode 100644 runtime/Python2/tests/TestTokenStreamRewriter.py create mode 100644 runtime/Python2/tests/__init__.py create mode 100644 runtime/Python2/tests/mocks/TestLexer.py create mode 100644 runtime/Python2/tests/mocks/__init__.py diff --git a/runtime/Python2/src/antlr4/BufferedTokenStream.py b/runtime/Python2/src/antlr4/BufferedTokenStream.py index fe11d2118..17df81b86 100644 --- a/runtime/Python2/src/antlr4/BufferedTokenStream.py +++ b/runtime/Python2/src/antlr4/BufferedTokenStream.py @@ -295,14 +295,20 @@ class BufferedTokenStream(TokenStream): # Get the text of all tokens in this buffer.#/ def getText(self, interval=None): + """ + + :param interval: + :type interval: antlr4.IntervalSet.Interval + :return: + """ self.lazyInit() self.fill() if interval is None: interval = (0, len(self.tokens)-1) - start = interval[0] + start = interval.start if isinstance(start, Token): start = start.tokenIndex - stop = interval[1] + stop = interval.stop if isinstance(stop, Token): stop = stop.tokenIndex if start is None or stop is None or start<0 or stop<0: diff --git a/runtime/Python2/src/antlr4/TokenStreamRewriter.py b/runtime/Python2/src/antlr4/TokenStreamRewriter.py index abc1f558f..964ea3e76 100644 --- a/runtime/Python2/src/antlr4/TokenStreamRewriter.py +++ b/runtime/Python2/src/antlr4/TokenStreamRewriter.py @@ -1,8 +1,37 @@ +""" +[The "BSD license"] + Copyright (c) 2012 Terence Parr + Copyright (c) 2012 Sam Harwell + All rights reserved. + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + from StringIO import StringIO from antlr4.Token import Token from antlr4.CommonTokenStream import CommonTokenStream +from antlr4.IntervalSet import Interval + class TokenStreamRewriter(object): DEFAULT_PROGRAM_NAME = "default" @@ -35,7 +64,10 @@ class TokenStreamRewriter(object): self.insertAfter(token.tokenIndex, text, program_name) def insertAfter(self, index, text, program_name=DEFAULT_PROGRAM_NAME): - self.insertBefore(program_name, index + 1, text) + op = self.InsertAfterOp(self.tokens, index + 1, text) + rewrites = self.getProgram(program_name) + op.instructionIndex = len(rewrites) + rewrites.append(op) def insertBeforeIndex(self, index, text): self.insertBefore(self.DEFAULT_PROGRAM_NAME, index, text) @@ -78,8 +110,8 @@ class TokenStreamRewriter(object): def delete(self, program_name, from_idx, to_idx): if isinstance(from_idx, Token): - self.replace(program_name, from_idx.tokenIndex, to_idx.tokenIndex, None) - self.replace(program_name, from_idx, to_idx, None) + self.replace(program_name, from_idx.tokenIndex, to_idx.tokenIndex, "") + self.replace(program_name, from_idx, to_idx, "") def lastRewriteTokenIndex(self, program_name=DEFAULT_PROGRAM_NAME): return self.lastRewriteTokenIndexes.get(program_name, -1) @@ -90,6 +122,9 @@ class TokenStreamRewriter(object): def getProgram(self, program_name): return self.programs.setdefault(program_name, []) + def getDefaultText(self): + return self.getText(self.DEFAULT_PROGRAM_NAME, Interval(0, len(self.tokens.tokens))) + def getText(self, program_name, interval): """ :type interval: Interval.Interval @@ -111,7 +146,7 @@ class TokenStreamRewriter(object): indexToOp = self._reduceToSingleOperationPerIndex(rewrites) i = start while all((i <= stop, i < len(self.tokens.tokens))): - op = indexToOp.get(i) + op = indexToOp.pop(i, None) token = self.tokens.get(i) if op is None: if token.type != Token.EOF: buf.write(token.text) @@ -119,9 +154,10 @@ class TokenStreamRewriter(object): else: i = op.execute(buf) - if stop == len(self.tokens.tokens)-1: - for op in indexToOp.itervalues(): - if op.index >= len(self.tokens.tokens)-1: buf.write(op.text) + if stop == len(self.tokens.tokens) - 1: + for op in indexToOp.values(): + if op.index >= len(self.tokens.tokens) - 1: buf.write( + op.text) # TODO: this check is probably not needed return buf.getvalue() @@ -131,7 +167,7 @@ class TokenStreamRewriter(object): if any((rop is None, not isinstance(rop, TokenStreamRewriter.ReplaceOp))): continue # Wipe prior inserts within range - inserts = [op for op in rewrites[:i] if isinstance(rop, TokenStreamRewriter.InsertBeforeOp)] + inserts = [op for op in rewrites[:i] if type(op) is TokenStreamRewriter.InsertBeforeOp] for iop in inserts: if iop.index == rop.index: rewrites[iop.instructionIndex] = None @@ -140,47 +176,50 @@ class TokenStreamRewriter(object): rewrites[iop.instructionIndex] = None # Drop any prior replaces contained within - prevReplaces = [op for op in rewrites[:i] if isinstance(op, TokenStreamRewriter.ReplaceOp)] + prevReplaces = [op for op in rewrites[:i] if type(op) is TokenStreamRewriter.ReplaceOp] for prevRop in prevReplaces: if all((prevRop.index >= rop.index, prevRop.last_index <= rop.last_index)): - rewrites[prevRop.instructioIndex] = None + rewrites[prevRop.instructionIndex] = None continue - isDisjoint = any((prevRop.last_indexrop)) + isDisjoint = any((prevRop.last_index < rop.index, prevRop.index > rop.last_index)) isSame = all((prevRop.index == rop.index, prevRop.last_index == rop.last_index)) if all((prevRop.text is None, rop.text is None, not isDisjoint)): - rewrites[prevRop.instructioIndex] = None + rewrites[prevRop.instructionIndex] = None rop.index = min(prevRop.index, rop.index) rop.last_index = min(prevRop.last_index, rop.last_index) print('New rop {}'.format(rop)) elif not all((isDisjoint, isSame)): raise ValueError("replace op boundaries of {} overlap with previous {}".format(rop, prevRop)) - # Walk inserts + # Walk inserts before for i, iop in enumerate(rewrites): if any((iop is None, not isinstance(iop, TokenStreamRewriter.InsertBeforeOp))): continue - prevInserts = [op for op in rewrites[:i] if isinstance(iop, TokenStreamRewriter.InsertBeforeOp)] - for prevIop in prevInserts: - if prevIop.index == iop.index: + prevInserts = [op for op in rewrites[:i] if isinstance(op, TokenStreamRewriter.InsertBeforeOp)] + for i, prevIop in enumerate(prevInserts): + if prevIop.index == iop.index and type(prevIop) is TokenStreamRewriter.InsertBeforeOp: iop.text += prevIop.text rewrites[i] = None + elif prevIop.index == iop.index and type(prevIop) is TokenStreamRewriter.InsertAfterOp: + iop.text = prevIop.text + iop.text + rewrites[i] = None # look for replaces where iop.index is in range; error - prevReplaces = [op for op in rewrites[:i] if isinstance(rop, TokenStreamRewriter.ReplaceOp)] + prevReplaces = [op for op in rewrites[:i] if type(op) is TokenStreamRewriter.ReplaceOp] for rop in prevReplaces: if iop.index == rop.index: rop.text = iop.text + rop.text rewrites[i] = None continue - if all((iop.index >= rop.index, iop.index <= rop.index)): + if all((iop.index >= rop.index, iop.index <= rop.last_index)): raise ValueError("insert op {} within boundaries of previous {}".format(iop, rop)) - reduced = {} - for i, op in enumerate(rewrites): - if op is None: continue - if reduced.get(op.index): raise ValueError('should be only one op per index') - reduced[op.index] = op + reduced = {} + for i, op in enumerate(rewrites): + if op is None: continue + if reduced.get(op.index): raise ValueError('should be only one op per index') + reduced[op.index] = op - return reduced + return reduced class RewriteOperation(object): @@ -206,7 +245,7 @@ class TokenStreamRewriter(object): return self.index def __str__(self): - pass + return '<{}@{}:"{}">'.format(self.__class__.__name__, self.tokens.get(self.index), self.text) class InsertBeforeOp(RewriteOperation): @@ -219,6 +258,9 @@ class TokenStreamRewriter(object): buf.write(self.tokens.get(self.index).text) return self.index + 1 + class InsertAfterOp(InsertBeforeOp): + pass + class ReplaceOp(RewriteOperation): def __init__(self, from_idx, to_idx, tokens, text): @@ -228,4 +270,9 @@ class TokenStreamRewriter(object): def execute(self, buf): if self.text: buf.write(self.text) - return self.last_index + 1 \ No newline at end of file + return self.last_index + 1 + + def __str__(self): + if self.text: + return ''.format(self.tokens.get(self.index), self.tokens.get(self.last_index), + self.text) diff --git a/runtime/Python2/tests/TestTokenStreamRewriter.py b/runtime/Python2/tests/TestTokenStreamRewriter.py new file mode 100644 index 000000000..b5a2dc886 --- /dev/null +++ b/runtime/Python2/tests/TestTokenStreamRewriter.py @@ -0,0 +1,548 @@ +""" +[The "BSD license"] + Copyright (c) 2012 Terence Parr + Copyright (c) 2012 Sam Harwell + All rights reserved. + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" + +import unittest + +from antlr4.IntervalSet import Interval + +from mocks.TestLexer import TestLexer, TestLexer2 +from antlr4.TokenStreamRewriter import TokenStreamRewriter +from antlr4.InputStream import InputStream +from antlr4.CommonTokenStream import CommonTokenStream + + +class TestTokenStreamRewriter(unittest.TestCase): + def testInsertBeforeIndexZero(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + rewriter.insertBeforeIndex(0, '0') + + self.assertEquals(rewriter.getDefaultText(), '0abc') + + def testInsertAfterLastIndex(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + rewriter.insertAfter(10, 'x') + + self.assertEquals(rewriter.getDefaultText(), 'abcx') + + def test2InsertBeforeAfterMiddleIndex(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.insertBeforeIndex(1, 'x') + rewriter.insertAfter(1, 'x') + + self.assertEquals(rewriter.getDefaultText(), 'axbxc') + + def testReplaceIndex(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceIndex(0, 'x') + + self.assertEquals(rewriter.getDefaultText(), 'xbc') + + def testReplaceLastIndex(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceIndex(2, 'x') + + self.assertEquals(rewriter.getDefaultText(), 'abx') + + def testReplaceMiddleIndex(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceIndex(1, 'x') + + self.assertEquals(rewriter.getDefaultText(), 'axc') + + def testToStringStartStop(self): + input = InputStream('x = 3 * 0;') + lexer = TestLexer2(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceRange(4, 8, '0') + + self.assertEquals(rewriter.getDefaultText(), 'x = 0;') + self.assertEquals(rewriter.getText('default', Interval(0, 9)), 'x = 0;') + self.assertEquals(rewriter.getText('default', Interval(4, 8)), '0') + + def testToStringStartStop2(self): + input = InputStream('x = 3 * 0 + 2 * 0;') + lexer = TestLexer2(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + self.assertEquals('x = 3 * 0 + 2 * 0;', rewriter.getDefaultText()) + + # replace 3 * 0 with 0 + rewriter.replaceRange(4, 8, '0') + self.assertEquals('x = 0 + 2 * 0;', rewriter.getDefaultText()) + self.assertEquals('x = 0 + 2 * 0;', rewriter.getText('default', Interval(0, 17))) + self.assertEquals('0', rewriter.getText('default', Interval(4, 8))) + self.assertEquals('x = 0', rewriter.getText('default', Interval(0, 8))) + self.assertEquals('2 * 0', rewriter.getText('default', Interval(12, 16))) + + rewriter.insertAfter(17, "// comment") + self.assertEquals('2 * 0;// comment', rewriter.getText('default', Interval(12, 18))) + + self.assertEquals('x = 0', rewriter.getText('default', Interval(0, 8))) + + def test2ReplaceMiddleIndex(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceIndex(1, 'x') + rewriter.replaceIndex(1, 'y') + + self.assertEquals('ayc', rewriter.getDefaultText()) + + def test2ReplaceMiddleIndex1InsertBefore(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.insertBeforeIndex(0, "_") + rewriter.replaceIndex(1, 'x') + rewriter.replaceIndex(1, 'y') + + self.assertEquals('_ayc', rewriter.getDefaultText()) + + def test2InsertMiddleIndex(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.insertBeforeIndex(1, 'x') + rewriter.insertBeforeIndex(1, 'y') + + self.assertEquals('ayxbc', rewriter.getDefaultText()) + + def testReplaceThenDeleteMiddleIndex(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceRange(0, 2, 'x') + rewriter.insertBeforeIndex(1, '0') + + with self.assertRaises(ValueError) as ctx: + rewriter.getDefaultText() + self.assertEquals( + 'insert op ,1:1]:"0"> within boundaries of previous ,1:0]..[@2,2:2=\'c\',<3>,1:2]:"x">', + ctx.exception.message + ) + + def testInsertThenReplaceSameIndex(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.insertBeforeIndex(0, '0') + rewriter.replaceIndex(0, 'x') + + self.assertEquals('0xbc', rewriter.getDefaultText()) + + def test2InsertThenReplaceIndex0(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.insertBeforeIndex(0, 'x') + rewriter.insertBeforeIndex(0, 'y') + rewriter.replaceIndex(0, 'z') + + self.assertEquals('yxzbc', rewriter.getDefaultText()) + + def testReplaceThenInsertBeforeLastIndex(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceIndex(2, 'x') + rewriter.insertBeforeIndex(2, 'y') + + self.assertEquals('abyx', rewriter.getDefaultText()) + + def testReplaceThenInsertAfterLastIndex(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceIndex(2, 'x') + rewriter.insertAfter(2, 'y') + + self.assertEquals('abxy', rewriter.getDefaultText()) + + def testReplaceRangeThenInsertAtLeftEdge(self): + input = InputStream('abcccba') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceRange(2, 4, 'x') + rewriter.insertBeforeIndex(2, 'y') + + self.assertEquals('abyxba', rewriter.getDefaultText()) + + def testReplaceRangeThenInsertAtRightEdge(self): + input = InputStream('abcccba') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceRange(2, 4, 'x') + rewriter.insertBeforeIndex(4, 'y') + + with self.assertRaises(ValueError) as ctx: + rewriter.getDefaultText() + msg = ctx.exception.message + self.assertEquals( + "insert op ,1:4]:\"y\"> within boundaries of previous ,1:2]..[@4,4:4='c',<3>,1:4]:\"x\">", + msg + ) + + def testReplaceRangeThenInsertAfterRightEdge(self): + input = InputStream('abcccba') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceRange(2, 4, 'x') + rewriter.insertAfter(4, 'y') + + self.assertEquals('abxyba', rewriter.getDefaultText()) + + def testReplaceAll(self): + input = InputStream('abcccba') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceRange(0, 6, 'x') + + self.assertEquals('x', rewriter.getDefaultText()) + + def testReplaceSubsetThenFetch(self): + input = InputStream('abcccba') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceRange(2, 4, 'xyz') + + self.assertEquals('abxyzba', rewriter.getDefaultText()) + + def testReplaceThenReplaceSuperset(self): + input = InputStream('abcccba') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceRange(2, 4, 'xyz') + rewriter.replaceRange(3, 5, 'foo') + + with self.assertRaises(ValueError) as ctx: + rewriter.getDefaultText() + msg = ctx.exception.message + self.assertEquals( + """replace op boundaries of ,1:3]..[@5,5:5='b',<2>,1:5]:"foo"> overlap with previous ,1:2]..[@4,4:4='c',<3>,1:4]:"xyz">""", + msg + ) + + def testReplaceThenReplaceLowerIndexedSuperset(self): + input = InputStream('abcccba') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceRange(2, 4, 'xyz') + rewriter.replaceRange(1, 3, 'foo') + + with self.assertRaises(ValueError) as ctx: + rewriter.getDefaultText() + msg = ctx.exception.message + self.assertEquals( + """replace op boundaries of ,1:1]..[@3,3:3='c',<3>,1:3]:"foo"> overlap with previous ,1:2]..[@4,4:4='c',<3>,1:4]:"xyz">""", + msg + ) + + def testReplaceSingleMiddleThenOverlappingSuperset(self): + input = InputStream('abcba') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceIndex(2, 'xyz') + rewriter.replaceRange(0, 3, 'foo') + + self.assertEquals('fooa', rewriter.getDefaultText()) + + def testCombineInserts(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.insertBeforeIndex(0, 'x') + rewriter.insertBeforeIndex(0, 'y') + + self.assertEquals('yxabc', rewriter.getDefaultText()) + + def testCombine3Inserts(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.insertBeforeIndex(1, 'x') + rewriter.insertBeforeIndex(0, 'y') + rewriter.insertBeforeIndex(1, 'z') + + self.assertEquals('yazxbc', rewriter.getDefaultText()) + + def testCombineInsertOnLeftWithReplace(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceRange(0, 2, 'foo') + rewriter.insertBeforeIndex(0, 'z') + + self.assertEquals('zfoo', rewriter.getDefaultText()) + + def testCombineInsertOnLeftWithDelete(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.delete('default', 0, 2) + rewriter.insertBeforeIndex(0, 'z') + + self.assertEquals('z', rewriter.getDefaultText()) + + def testDisjointInserts(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.insertBeforeIndex(1, 'x') + rewriter.insertBeforeIndex(2, 'y') + rewriter.insertBeforeIndex(0, 'z') + + self.assertEquals('zaxbyc', rewriter.getDefaultText()) + + def testOverlappingReplace(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceRange(1, 2, 'foo') + rewriter.replaceRange(0, 3, 'bar') + + self.assertEquals('bar', rewriter.getDefaultText()) + + def testOverlappingReplace2(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceRange(0, 3, 'bar') + rewriter.replaceRange(1, 2, 'foo') + + with self.assertRaises(ValueError) as ctx: + rewriter.getDefaultText() + + self.assertEquals( + """replace op boundaries of ,1:1]..[@2,2:2='c',<3>,1:2]:"foo"> overlap with previous ,1:0]..[@3,3:2='',<-1>,1:3]:"bar">""", + ctx.exception.message + ) + + def testOverlappingReplace3(self): + input = InputStream('abcc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceRange(1, 2, 'foo') + rewriter.replaceRange(0, 2, 'bar') + + self.assertEquals('barc', rewriter.getDefaultText()) + + def testOverlappingReplace4(self): + input = InputStream('abcc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceRange(1, 2, 'foo') + rewriter.replaceRange(1, 3, 'bar') + + self.assertEquals('abar', rewriter.getDefaultText()) + + def testDropIdenticalReplace(self): + input = InputStream('abcc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceRange(1, 2, 'foo') + rewriter.replaceRange(1, 2, 'foo') + + self.assertEquals('afooc', rewriter.getDefaultText()) + + def testDropPrevCoveredInsert(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.insertBeforeIndex(1, 'foo') + rewriter.replaceRange(1, 2, 'foo') + + self.assertEquals('afoofoo', rewriter.getDefaultText()) + + def testLeaveAloneDisjointInsert(self): + input = InputStream('abcc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.insertBeforeIndex(1, 'x') + rewriter.replaceRange(2, 3, 'foo') + + self.assertEquals('axbfoo', rewriter.getDefaultText()) + + def testLeaveAloneDisjointInsert2(self): + input = InputStream('abcc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.replaceRange(2, 3, 'foo') + rewriter.insertBeforeIndex(1, 'x') + + self.assertEquals('axbfoo', rewriter.getDefaultText()) + + def testInsertBeforeTokenThenDeleteThatToken(self): + input = InputStream('abc') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.insertBeforeIndex(1, 'foo') + rewriter.replaceRange(1, 2, 'foo') + + self.assertEquals('afoofoo', rewriter.getDefaultText()) + + # Test for fix for: https://github.com/antlr/antlr4/issues/550 + def testPreservesOrderOfContiguousInserts(self): + input = InputStream('aa') + lexer = TestLexer(input) + stream = CommonTokenStream(lexer=lexer) + stream.fill() + rewriter = TokenStreamRewriter(tokens=stream) + + rewriter.insertBeforeIndex(0, '') + rewriter.insertAfter(0, '') + rewriter.insertBeforeIndex(1, '') + rewriter.insertAfter(1, '') + + self.assertEquals('aa', rewriter.getDefaultText()) + + +if __name__ == '__main__': + unittest.main() diff --git a/runtime/Python2/tests/__init__.py b/runtime/Python2/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/runtime/Python2/tests/mocks/TestLexer.py b/runtime/Python2/tests/mocks/TestLexer.py new file mode 100644 index 000000000..5da52c762 --- /dev/null +++ b/runtime/Python2/tests/mocks/TestLexer.py @@ -0,0 +1,101 @@ +# Generated from /Users/lyga/Dropbox/code/python/antlr4-learn/test_grammar/T.g4 by ANTLR 4.5.3 +# encoding: utf-8 +from __future__ import print_function +from antlr4 import * +from io import StringIO + + +def serializedATN(): + with StringIO() as buf: + buf.write(u"\3\u0430\ud6d1\u8206\uad2d\u4417\uaef1\u8d80\uaadd\2") + buf.write(u"\5\17\b\1\4\2\t\2\4\3\t\3\4\4\t\4\3\2\3\2\3\3\3\3\3\4") + buf.write(u"\3\4\2\2\5\3\3\5\4\7\5\3\2\2\16\2\3\3\2\2\2\2\5\3\2\2") + buf.write(u"\2\2\7\3\2\2\2\3\t\3\2\2\2\5\13\3\2\2\2\7\r\3\2\2\2\t") + buf.write(u"\n\7c\2\2\n\4\3\2\2\2\13\f\7d\2\2\f\6\3\2\2\2\r\16\7") + buf.write(u"e\2\2\16\b\3\2\2\2\3\2\2") + return buf.getvalue() + + +class TestLexer(Lexer): + atn = ATNDeserializer().deserialize(serializedATN()) + + decisionsToDFA = [DFA(ds, i) for i, ds in enumerate(atn.decisionToState)] + + A = 1 + B = 2 + C = 3 + + modeNames = [u"DEFAULT_MODE"] + + literalNames = [u"", + u"'a'", u"'b'", u"'c'"] + + symbolicNames = [u"", + u"A", u"B", u"C"] + + ruleNames = [u"A", u"B", u"C"] + + grammarFileName = u"T.g4" + + def __init__(self, input=None): + super(TestLexer, self).__init__(input) + self.checkVersion("4.6") + self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache()) + self._actions = None + self._predicates = None + + + +def serializedATN2(): + with StringIO() as buf: + buf.write(u"\3\u0430\ud6d1\u8206\uad2d\u4417\uaef1\u8d80\uaadd\2") + buf.write(u"\t(\b\1\4\2\t\2\4\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7\t") + buf.write(u"\7\4\b\t\b\3\2\6\2\23\n\2\r\2\16\2\24\3\3\6\3\30\n\3") + buf.write(u"\r\3\16\3\31\3\4\3\4\3\5\3\5\3\6\3\6\3\7\3\7\3\b\6\b") + buf.write(u"%\n\b\r\b\16\b&\2\2\t\3\3\5\4\7\5\t\6\13\7\r\b\17\t\3") + buf.write(u"\2\2*\2\3\3\2\2\2\2\5\3\2\2\2\2\7\3\2\2\2\2\t\3\2\2\2") + buf.write(u"\2\13\3\2\2\2\2\r\3\2\2\2\2\17\3\2\2\2\3\22\3\2\2\2\5") + buf.write(u"\27\3\2\2\2\7\33\3\2\2\2\t\35\3\2\2\2\13\37\3\2\2\2\r") + buf.write(u"!\3\2\2\2\17$\3\2\2\2\21\23\4c|\2\22\21\3\2\2\2\23\24") + buf.write(u"\3\2\2\2\24\22\3\2\2\2\24\25\3\2\2\2\25\4\3\2\2\2\26") + buf.write(u"\30\4\62;\2\27\26\3\2\2\2\30\31\3\2\2\2\31\27\3\2\2\2") + buf.write(u"\31\32\3\2\2\2\32\6\3\2\2\2\33\34\7=\2\2\34\b\3\2\2\2") + buf.write(u"\35\36\7?\2\2\36\n\3\2\2\2\37 \7-\2\2 \f\3\2\2\2!\"\7") + buf.write(u",\2\2\"\16\3\2\2\2#%\7\"\2\2$#\3\2\2\2%&\3\2\2\2&$\3") + buf.write(u"\2\2\2&\'\3\2\2\2\'\20\3\2\2\2\6\2\24\31&\2") + return buf.getvalue() + + +class TestLexer2(Lexer): + + atn = ATNDeserializer().deserialize(serializedATN2()) + + decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ] + + + ID = 1 + INT = 2 + SEMI = 3 + ASSIGN = 4 + PLUS = 5 + MULT = 6 + WS = 7 + + modeNames = [ u"DEFAULT_MODE" ] + + literalNames = [ u"", + u"';'", u"'='", u"'+'", u"'*'" ] + + symbolicNames = [ u"", + u"ID", u"INT", u"SEMI", u"ASSIGN", u"PLUS", u"MULT", u"WS" ] + + ruleNames = [ u"ID", u"INT", u"SEMI", u"ASSIGN", u"PLUS", u"MULT", u"WS" ] + + grammarFileName = u"T2.g4" + + def __init__(self, input=None): + super(TestLexer2, self).__init__(input) + self.checkVersion("4.6") + self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache()) + self._actions = None + self._predicates = None diff --git a/runtime/Python2/tests/mocks/__init__.py b/runtime/Python2/tests/mocks/__init__.py new file mode 100644 index 000000000..e69de29bb From f50fb7df156bda289bd3b520df9cbc2ae344a986 Mon Sep 17 00:00:00 2001 From: Vladi Lyga Date: Wed, 30 Nov 2016 13:55:54 +0200 Subject: [PATCH 2/4] Fix error when Interval is rewritten with a tuple --- runtime/Python2/src/antlr4/BufferedTokenStream.py | 15 ++++++++++----- runtime/Python2/tests/TestTokenStreamRewriter.py | 4 +++- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/runtime/Python2/src/antlr4/BufferedTokenStream.py b/runtime/Python2/src/antlr4/BufferedTokenStream.py index 17df81b86..78312d62a 100644 --- a/runtime/Python2/src/antlr4/BufferedTokenStream.py +++ b/runtime/Python2/src/antlr4/BufferedTokenStream.py @@ -39,6 +39,8 @@ # {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a # {@link CommonTokenStream}.

from io import StringIO + +from antlr4.IntervalSet import Interval from antlr4.Token import Token from antlr4.error.Errors import IllegalStateException @@ -293,18 +295,18 @@ class BufferedTokenStream(TokenStream): def getSourceName(self): return self.tokenSource.getSourceName() - # Get the text of all tokens in this buffer.#/ def getText(self, interval=None): """ + Get the text of all tokens in this buffer. :param interval: :type interval: antlr4.IntervalSet.Interval - :return: + :return: string """ self.lazyInit() self.fill() if interval is None: - interval = (0, len(self.tokens)-1) + interval = Interval(0, len(self.tokens)-1) start = interval.start if isinstance(start, Token): start = start.tokenIndex @@ -323,9 +325,12 @@ class BufferedTokenStream(TokenStream): buf.write(t.text) return buf.getvalue() - - # Get all tokens from lexer until EOF#/ def fill(self): + """ + Get all tokens from lexer until EOF + + :return: None + """ self.lazyInit() while self.fetch(1000)==1000: pass diff --git a/runtime/Python2/tests/TestTokenStreamRewriter.py b/runtime/Python2/tests/TestTokenStreamRewriter.py index b5a2dc886..b5ac26597 100644 --- a/runtime/Python2/tests/TestTokenStreamRewriter.py +++ b/runtime/Python2/tests/TestTokenStreamRewriter.py @@ -528,8 +528,10 @@ class TestTokenStreamRewriter(unittest.TestCase): self.assertEquals('afoofoo', rewriter.getDefaultText()) - # Test for fix for: https://github.com/antlr/antlr4/issues/550 def testPreservesOrderOfContiguousInserts(self): + """ + Test for fix for: https://github.com/antlr/antlr4/issues/550 + """ input = InputStream('aa') lexer = TestLexer(input) stream = CommonTokenStream(lexer=lexer) From 8b60373c43f94049f712e8a41b7f6c5ed294bf78 Mon Sep 17 00:00:00 2001 From: Vladi Lyga Date: Wed, 30 Nov 2016 17:16:35 +0200 Subject: [PATCH 3/4] fix: enable to access interval as a list --- runtime/Python2/src/antlr4/BufferedTokenStream.py | 7 +++---- runtime/Python2/src/antlr4/IntervalSet.py | 7 +++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/runtime/Python2/src/antlr4/BufferedTokenStream.py b/runtime/Python2/src/antlr4/BufferedTokenStream.py index 78312d62a..bf0ea969a 100644 --- a/runtime/Python2/src/antlr4/BufferedTokenStream.py +++ b/runtime/Python2/src/antlr4/BufferedTokenStream.py @@ -40,7 +40,6 @@ # {@link CommonTokenStream}.

from io import StringIO -from antlr4.IntervalSet import Interval from antlr4.Token import Token from antlr4.error.Errors import IllegalStateException @@ -306,11 +305,11 @@ class BufferedTokenStream(TokenStream): self.lazyInit() self.fill() if interval is None: - interval = Interval(0, len(self.tokens)-1) - start = interval.start + interval = (0, len(self.tokens)-1) + start = interval[0] if isinstance(start, Token): start = start.tokenIndex - stop = interval.stop + stop = interval[1] if isinstance(stop, Token): stop = stop.tokenIndex if start is None or stop is None or start<0 or stop<0: diff --git a/runtime/Python2/src/antlr4/IntervalSet.py b/runtime/Python2/src/antlr4/IntervalSet.py index 3a55a2631..c4c93749e 100644 --- a/runtime/Python2/src/antlr4/IntervalSet.py +++ b/runtime/Python2/src/antlr4/IntervalSet.py @@ -18,6 +18,13 @@ class Interval(object): def __iter__(self): return iter(self.range) + def __getitem__(self, idx): + if idx == 0: + return self.start + elif idx == 1: + return self.stop + raise IndexError('Interval index out or range [{}]'.format(idx)) + class IntervalSet(object): def __init__(self): From 09db23160f87e6bbda6f0fa56c152bd4502d44fd Mon Sep 17 00:00:00 2001 From: Vladi Date: Thu, 1 Dec 2016 12:19:29 +0200 Subject: [PATCH 4/4] fix for #550 --- .../antlr/v4/runtime/TokenStreamRewriter.java | 37 +++++++++++++------ .../v4/test/tool/TestTokenStreamRewriter.java | 3 +- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/runtime/Java/src/org/antlr/v4/runtime/TokenStreamRewriter.java b/runtime/Java/src/org/antlr/v4/runtime/TokenStreamRewriter.java index c9b74c18e..fa437e9b5 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/TokenStreamRewriter.java +++ b/runtime/Java/src/org/antlr/v4/runtime/TokenStreamRewriter.java @@ -120,10 +120,10 @@ public class TokenStreamRewriter { // Define the rewrite operation hierarchy public class RewriteOperation { - /** What index into rewrites List are we? */ - protected int instructionIndex; - /** Token buffer index. */ - protected int index; + /** What index into rewrites List are we? */ + protected int instructionIndex; + /** Token buffer index. */ + protected int index; protected Object text; protected RewriteOperation(int index) { @@ -166,6 +166,13 @@ public class TokenStreamRewriter { } } + class InsertAfterOp extends InsertBeforeOp { + + public InsertAfterOp(int index, Object text) { + super(index, text); + } + } + /** I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp * instructions. */ @@ -255,7 +262,10 @@ public class TokenStreamRewriter { public void insertAfter(String programName, int index, Object text) { // to insert after, just insert before next index (even if past end) - insertBefore(programName,index+1, text); + RewriteOperation op = new InsertAfterOp(index + 1, text); + List rewrites = getProgram(programName); + op.instructionIndex = rewrites.size(); + rewrites.add(op); } public void insertBefore(Token t, Object text) { @@ -546,13 +556,16 @@ public class TokenStreamRewriter { // combine current insert with prior if any at same index List prevInserts = getKindOfOps(rewrites, InsertBeforeOp.class, i); for (InsertBeforeOp prevIop : prevInserts) { - if ( prevIop.index == iop.index ) { // combine objects - // convert to strings...we're in process of toString'ing - // whole token buffer so no lazy eval issue with any templates - iop.text = catOpText(iop.text,prevIop.text); - // delete redundant prior insert - rewrites.set(prevIop.instructionIndex, null); - } + if ( prevIop.index == iop.index && InsertAfterOp.class.isInstance(prevIop) ) { + iop.text = catOpText(prevIop.text, iop.text); + rewrites.set(prevIop.instructionIndex, null); + } else if ( prevIop.index == iop.index && InsertBeforeOp.class.isInstance(prevIop) ) { // combine objects + // convert to strings...we're in process of toString'ing + // whole token buffer so no lazy eval issue with any templates + iop.text = catOpText(iop.text,prevIop.text); + // delete redundant prior insert + rewrites.set(prevIop.instructionIndex, null); + } } // look for replaces where iop.index is in range; error List prevReplaces = getKindOfOps(rewrites, ReplaceOp.class, i); diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestTokenStreamRewriter.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestTokenStreamRewriter.java index cbf90a198..8b98ab557 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestTokenStreamRewriter.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestTokenStreamRewriter.java @@ -889,9 +889,8 @@ public class TestTokenStreamRewriter extends BaseJavaToolTest { assertEquals(expecting, result); } - // Test for https://github.com/antlr/antlr4/issues/550 + // Test Fix for https://github.com/antlr/antlr4/issues/550 @Test - @Ignore public void testPreservesOrderOfContiguousInserts() throws Exception { LexerGrammar g = new LexerGrammar( "lexer grammar T;\n"+