junitxml: use a exclusive match on the legal ranges of xml for binary escaping, fixes issue 126

2012-03-09 13:12:18 +01:00 · 2012-03-09 13:12:18 +01:00 · ee8d2f9950
parent 51d29cf4c6
commit ee8d2f9950
3 changed files with 33 additions and 25 deletions
--- a/6
+++ b/6
@ -1,3 +1,9 @@
+Changese between 2.2.3 and ...
+-----------------------------------
+
+- fix issue 126: correctly match all invalid xml characters for junitxml
+  binary escape
+
 Changes between 2.2.2 and 2.2.3
 ----------------------------------------

--- a/_pytest/junitxml.py
+++ b/_pytest/junitxml.py
@ -34,15 +34,21 @@ class Junit(py.xml.Namespace):
 # this dynamically instead of hardcoding it.  The spec range of valid
 # chars is: Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
 #                    | [#x10000-#x10FFFF]
-_illegal_unichrs = [(0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x19),
-                   (0xD800, 0xDFFF), (0xFDD0, 0xFFFF)]
-_illegal_ranges = [unicode("%s-%s") % (unichr(low), unichr(high))
-                  for (low, high) in _illegal_unichrs
+_legal_chars = (0x09, 0x0A, 0x0d)
+_legal_ranges = (
+    (0x20, 0xD7FF),
+    (0xE000, 0xFFFD),
+    (0x10000, 0x10FFFF),
+)
+_legal_xml_re = [unicode("%s-%s") % (unichr(low), unichr(high))
+                  for (low, high) in _legal_ranges
                  if low < sys.maxunicode]
-illegal_xml_re = re.compile(unicode('[%s]') %
-                            unicode('').join(_illegal_ranges))
-del _illegal_unichrs
-del _illegal_ranges
+_legal_xml_re = [unichr(x) for x in _legal_chars] + _legal_xml_re
+illegal_xml_re = re.compile(unicode('[^%s]') %
+                            unicode('').join(_legal_xml_re))
+del _legal_chars
+del _legal_ranges
+del _legal_xml_re

 def bin_xml_escape(arg):
    def repl(matchobj):
--- a/testing/test_junitxml.py
+++ b/testing/test_junitxml.py
@ -340,7 +340,7 @@ def test_nullbyte_replace(testdir):
    assert '#x0' in text


-def test_invalid_xml_escape(testdir):
+def test_invalid_xml_escape():
    # Test some more invalid xml chars, the full range should be
    # tested really but let's just thest the edges of the ranges
    # intead.
@ -355,27 +355,23 @@ def test_invalid_xml_escape(testdir):
    except NameError:
        unichr = chr
    u = py.builtin._totext
-    invalid = (0x1, 0xB, 0xC, 0xE, 0x19,)
-               # 0xD800, 0xDFFF, 0xFFFE, 0x0FFFF) #, 0x110000)
+    invalid = (0x00, 0x1, 0xB, 0xC, 0xE, 0x19,
+                033, # issue #126
+               0xD800, 0xDFFF, 0xFFFE, 0x0FFFF) #, 0x110000)
    valid = (0x9, 0xA, 0x20,) # 0xD, 0xD7FF, 0xE000, 0xFFFD, 0x10000, 0x10FFFF)
-    all = invalid + valid
-    prints = [u("    sys.stdout.write('''0x%X-->%s<--''')") % (i, unichr(i))
-              for i in all]
-    testdir.makepyfile(u("# -*- coding: UTF-8 -*-"),
-                       u("import sys"),
-                       u("def test_print_bytes():"),
-                       u("\n").join(prints),
-                       u("    assert False"))
-    xmlf = testdir.tmpdir.join('junit.xml')
-    result = testdir.runpytest('--junitxml=%s' % xmlf)
-    text = xmlf.read()
+    
+    from _pytest.junitxml import bin_xml_escape
+
+
    for i in invalid:
+        got = bin_xml_escape(unichr(i))
        if i <= 0xFF:
-            assert '#x%02X' % i in text
+            expected = '#x%02X' % i
        else:
-            assert '#x%04X' % i in text
+            expected = '#x%04X' % i
+        assert got == expected
    for i in valid:
-        assert chr(i) in text
+        assert chr(i) == bin_xml_escape(unichr(i))

 def test_logxml_path_expansion():
    from _pytest.junitxml import LogXML