junitxml: use a exclusive match on the legal ranges of xml for binary escaping, fixes issue 126

This commit is contained in:
Ronny Pfannschmidt 2012-03-09 13:12:18 +01:00
parent 51d29cf4c6
commit ee8d2f9950
3 changed files with 33 additions and 25 deletions

View File

@ -1,3 +1,9 @@
Changese between 2.2.3 and ...
-----------------------------------
- fix issue 126: correctly match all invalid xml characters for junitxml
binary escape
Changes between 2.2.2 and 2.2.3
----------------------------------------

View File

@ -34,15 +34,21 @@ class Junit(py.xml.Namespace):
# this dynamically instead of hardcoding it. The spec range of valid
# chars is: Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
# | [#x10000-#x10FFFF]
_illegal_unichrs = [(0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x19),
(0xD800, 0xDFFF), (0xFDD0, 0xFFFF)]
_illegal_ranges = [unicode("%s-%s") % (unichr(low), unichr(high))
for (low, high) in _illegal_unichrs
_legal_chars = (0x09, 0x0A, 0x0d)
_legal_ranges = (
(0x20, 0xD7FF),
(0xE000, 0xFFFD),
(0x10000, 0x10FFFF),
)
_legal_xml_re = [unicode("%s-%s") % (unichr(low), unichr(high))
for (low, high) in _legal_ranges
if low < sys.maxunicode]
illegal_xml_re = re.compile(unicode('[%s]') %
unicode('').join(_illegal_ranges))
del _illegal_unichrs
del _illegal_ranges
_legal_xml_re = [unichr(x) for x in _legal_chars] + _legal_xml_re
illegal_xml_re = re.compile(unicode('[^%s]') %
unicode('').join(_legal_xml_re))
del _legal_chars
del _legal_ranges
del _legal_xml_re
def bin_xml_escape(arg):
def repl(matchobj):

View File

@ -340,7 +340,7 @@ def test_nullbyte_replace(testdir):
assert '#x0' in text
def test_invalid_xml_escape(testdir):
def test_invalid_xml_escape():
# Test some more invalid xml chars, the full range should be
# tested really but let's just thest the edges of the ranges
# intead.
@ -355,27 +355,23 @@ def test_invalid_xml_escape(testdir):
except NameError:
unichr = chr
u = py.builtin._totext
invalid = (0x1, 0xB, 0xC, 0xE, 0x19,)
# 0xD800, 0xDFFF, 0xFFFE, 0x0FFFF) #, 0x110000)
invalid = (0x00, 0x1, 0xB, 0xC, 0xE, 0x19,
033, # issue #126
0xD800, 0xDFFF, 0xFFFE, 0x0FFFF) #, 0x110000)
valid = (0x9, 0xA, 0x20,) # 0xD, 0xD7FF, 0xE000, 0xFFFD, 0x10000, 0x10FFFF)
all = invalid + valid
prints = [u(" sys.stdout.write('''0x%X-->%s<--''')") % (i, unichr(i))
for i in all]
testdir.makepyfile(u("# -*- coding: UTF-8 -*-"),
u("import sys"),
u("def test_print_bytes():"),
u("\n").join(prints),
u(" assert False"))
xmlf = testdir.tmpdir.join('junit.xml')
result = testdir.runpytest('--junitxml=%s' % xmlf)
text = xmlf.read()
from _pytest.junitxml import bin_xml_escape
for i in invalid:
got = bin_xml_escape(unichr(i))
if i <= 0xFF:
assert '#x%02X' % i in text
expected = '#x%02X' % i
else:
assert '#x%04X' % i in text
expected = '#x%04X' % i
assert got == expected
for i in valid:
assert chr(i) in text
assert chr(i) == bin_xml_escape(unichr(i))
def test_logxml_path_expansion():
from _pytest.junitxml import LogXML