From ee8d2f9950201fe2af15af5122273d1879a65210 Mon Sep 17 00:00:00 2001 From: Ronny Pfannschmidt Date: Fri, 9 Mar 2012 13:12:18 +0100 Subject: [PATCH] junitxml: use a exclusive match on the legal ranges of xml for binary escaping, fixes issue 126 --- CHANGELOG | 6 ++++++ _pytest/junitxml.py | 22 ++++++++++++++-------- testing/test_junitxml.py | 30 +++++++++++++----------------- 3 files changed, 33 insertions(+), 25 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 9ed1859e6..276141d79 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,9 @@ +Changese between 2.2.3 and ... +----------------------------------- + +- fix issue 126: correctly match all invalid xml characters for junitxml + binary escape + Changes between 2.2.2 and 2.2.3 ---------------------------------------- diff --git a/_pytest/junitxml.py b/_pytest/junitxml.py index 1c4bbd82e..2859d2f68 100644 --- a/_pytest/junitxml.py +++ b/_pytest/junitxml.py @@ -34,15 +34,21 @@ class Junit(py.xml.Namespace): # this dynamically instead of hardcoding it. The spec range of valid # chars is: Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] # | [#x10000-#x10FFFF] -_illegal_unichrs = [(0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x19), - (0xD800, 0xDFFF), (0xFDD0, 0xFFFF)] -_illegal_ranges = [unicode("%s-%s") % (unichr(low), unichr(high)) - for (low, high) in _illegal_unichrs +_legal_chars = (0x09, 0x0A, 0x0d) +_legal_ranges = ( + (0x20, 0xD7FF), + (0xE000, 0xFFFD), + (0x10000, 0x10FFFF), +) +_legal_xml_re = [unicode("%s-%s") % (unichr(low), unichr(high)) + for (low, high) in _legal_ranges if low < sys.maxunicode] -illegal_xml_re = re.compile(unicode('[%s]') % - unicode('').join(_illegal_ranges)) -del _illegal_unichrs -del _illegal_ranges +_legal_xml_re = [unichr(x) for x in _legal_chars] + _legal_xml_re +illegal_xml_re = re.compile(unicode('[^%s]') % + unicode('').join(_legal_xml_re)) +del _legal_chars +del _legal_ranges +del _legal_xml_re def bin_xml_escape(arg): def repl(matchobj): diff --git a/testing/test_junitxml.py b/testing/test_junitxml.py index 54e440601..c82ee84c6 100644 --- a/testing/test_junitxml.py +++ b/testing/test_junitxml.py @@ -340,7 +340,7 @@ def test_nullbyte_replace(testdir): assert '#x0' in text -def test_invalid_xml_escape(testdir): +def test_invalid_xml_escape(): # Test some more invalid xml chars, the full range should be # tested really but let's just thest the edges of the ranges # intead. @@ -355,27 +355,23 @@ def test_invalid_xml_escape(testdir): except NameError: unichr = chr u = py.builtin._totext - invalid = (0x1, 0xB, 0xC, 0xE, 0x19,) - # 0xD800, 0xDFFF, 0xFFFE, 0x0FFFF) #, 0x110000) + invalid = (0x00, 0x1, 0xB, 0xC, 0xE, 0x19, + 033, # issue #126 + 0xD800, 0xDFFF, 0xFFFE, 0x0FFFF) #, 0x110000) valid = (0x9, 0xA, 0x20,) # 0xD, 0xD7FF, 0xE000, 0xFFFD, 0x10000, 0x10FFFF) - all = invalid + valid - prints = [u(" sys.stdout.write('''0x%X-->%s<--''')") % (i, unichr(i)) - for i in all] - testdir.makepyfile(u("# -*- coding: UTF-8 -*-"), - u("import sys"), - u("def test_print_bytes():"), - u("\n").join(prints), - u(" assert False")) - xmlf = testdir.tmpdir.join('junit.xml') - result = testdir.runpytest('--junitxml=%s' % xmlf) - text = xmlf.read() + + from _pytest.junitxml import bin_xml_escape + + for i in invalid: + got = bin_xml_escape(unichr(i)) if i <= 0xFF: - assert '#x%02X' % i in text + expected = '#x%02X' % i else: - assert '#x%04X' % i in text + expected = '#x%04X' % i + assert got == expected for i in valid: - assert chr(i) in text + assert chr(i) == bin_xml_escape(unichr(i)) def test_logxml_path_expansion(): from _pytest.junitxml import LogXML