From ee8d2f9950201fe2af15af5122273d1879a65210 Mon Sep 17 00:00:00 2001
From: Ronny Pfannschmidt <Ronny.Pfannschmidt@gmx.de>
Date: Fri, 9 Mar 2012 13:12:18 +0100
Subject: [PATCH] junitxml: use a exclusive match on the legal ranges of xml
 for binary escaping, fixes issue 126

---
 CHANGELOG                |  6 ++++++
 _pytest/junitxml.py      | 22 ++++++++++++++--------
 testing/test_junitxml.py | 30 +++++++++++++-----------------
 3 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 9ed1859e6..276141d79 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,9 @@
+Changese between 2.2.3 and ...
+-----------------------------------
+
+- fix issue 126: correctly match all invalid xml characters for junitxml
+  binary escape
+
 Changes between 2.2.2 and 2.2.3
 ----------------------------------------
 
diff --git a/_pytest/junitxml.py b/_pytest/junitxml.py
index 1c4bbd82e..2859d2f68 100644
--- a/_pytest/junitxml.py
+++ b/_pytest/junitxml.py
@@ -34,15 +34,21 @@ class Junit(py.xml.Namespace):
 # this dynamically instead of hardcoding it.  The spec range of valid
 # chars is: Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
 #                    | [#x10000-#x10FFFF]
-_illegal_unichrs = [(0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x19),
-                   (0xD800, 0xDFFF), (0xFDD0, 0xFFFF)]
-_illegal_ranges = [unicode("%s-%s") % (unichr(low), unichr(high))
-                  for (low, high) in _illegal_unichrs
+_legal_chars = (0x09, 0x0A, 0x0d)
+_legal_ranges = (
+    (0x20, 0xD7FF),
+    (0xE000, 0xFFFD),
+    (0x10000, 0x10FFFF),
+)
+_legal_xml_re = [unicode("%s-%s") % (unichr(low), unichr(high))
+                  for (low, high) in _legal_ranges
                   if low < sys.maxunicode]
-illegal_xml_re = re.compile(unicode('[%s]') %
-                            unicode('').join(_illegal_ranges))
-del _illegal_unichrs
-del _illegal_ranges
+_legal_xml_re = [unichr(x) for x in _legal_chars] + _legal_xml_re
+illegal_xml_re = re.compile(unicode('[^%s]') %
+                            unicode('').join(_legal_xml_re))
+del _legal_chars
+del _legal_ranges
+del _legal_xml_re
 
 def bin_xml_escape(arg):
     def repl(matchobj):
diff --git a/testing/test_junitxml.py b/testing/test_junitxml.py
index 54e440601..c82ee84c6 100644
--- a/testing/test_junitxml.py
+++ b/testing/test_junitxml.py
@@ -340,7 +340,7 @@ def test_nullbyte_replace(testdir):
     assert '#x0' in text
 
 
-def test_invalid_xml_escape(testdir):
+def test_invalid_xml_escape():
     # Test some more invalid xml chars, the full range should be
     # tested really but let's just thest the edges of the ranges
     # intead.
@@ -355,27 +355,23 @@ def test_invalid_xml_escape(testdir):
     except NameError:
         unichr = chr
     u = py.builtin._totext
-    invalid = (0x1, 0xB, 0xC, 0xE, 0x19,)
-               # 0xD800, 0xDFFF, 0xFFFE, 0x0FFFF) #, 0x110000)
+    invalid = (0x00, 0x1, 0xB, 0xC, 0xE, 0x19,
+                033, # issue #126
+               0xD800, 0xDFFF, 0xFFFE, 0x0FFFF) #, 0x110000)
     valid = (0x9, 0xA, 0x20,) # 0xD, 0xD7FF, 0xE000, 0xFFFD, 0x10000, 0x10FFFF)
-    all = invalid + valid
-    prints = [u("    sys.stdout.write('''0x%X-->%s<--''')") % (i, unichr(i))
-              for i in all]
-    testdir.makepyfile(u("# -*- coding: UTF-8 -*-"),
-                       u("import sys"),
-                       u("def test_print_bytes():"),
-                       u("\n").join(prints),
-                       u("    assert False"))
-    xmlf = testdir.tmpdir.join('junit.xml')
-    result = testdir.runpytest('--junitxml=%s' % xmlf)
-    text = xmlf.read()
+    
+    from _pytest.junitxml import bin_xml_escape
+
+
     for i in invalid:
+        got = bin_xml_escape(unichr(i))
         if i <= 0xFF:
-            assert '#x%02X' % i in text
+            expected = '#x%02X' % i
         else:
-            assert '#x%04X' % i in text
+            expected = '#x%04X' % i
+        assert got == expected
     for i in valid:
-        assert chr(i) in text
+        assert chr(i) == bin_xml_escape(unichr(i))
 
 def test_logxml_path_expansion():
     from _pytest.junitxml import LogXML