From 1c1918eb222ba4ed04aeb221f4e8580e4b62a39a Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Sat, 16 Apr 2011 00:09:25 +0100 Subject: [PATCH] Prevent null-characters from appearing in junitxml's output The Jenkins XML parser does not deal with null-characters inside the XML. This replaces any null character with nothing in the XML output, which makes no visual difference. --- _pytest/junitxml.py | 44 +++++++++++++++++++++++++- testing/test_junitxml.py | 68 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+), 1 deletion(-) diff --git a/_pytest/junitxml.py b/_pytest/junitxml.py index d28b19efb..d92842db0 100644 --- a/_pytest/junitxml.py +++ b/_pytest/junitxml.py @@ -5,8 +5,42 @@ Based on initial code from Ross Lawley. import py import os +import re +import sys import time + +# Python 2.X and 3.X compatibility +try: + unichr(65) +except NameError: + unichr = chr +try: + unicode('A') +except NameError: + unicode = str +try: + long(1) +except NameError: + long = int + + +# We need to get the subset of the invalid unicode ranges according to +# XML 1.0 which are valid in this python build. Hence we calculate +# this dynamically instead of hardcoding it. The spec range of valid +# chars is: Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] +# | [#x10000-#x10FFFF] +_illegal_unichrs = [(0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x19), + (0xD800, 0xDFFF), (0xFDD0, 0xFFFF)] +_illegal_ranges = [unicode("%s-%s") % (unichr(low), unichr(high)) + for (low, high) in _illegal_unichrs + if low < sys.maxunicode] +illegal_xml_re = re.compile(unicode('[%s]') % + unicode('').join(_illegal_ranges)) +del _illegal_unichrs +del _illegal_ranges + + def pytest_addoption(parser): group = parser.getgroup("terminal reporting") group.addoption('--junitxml', action="store", dest="xmlpath", @@ -28,6 +62,7 @@ def pytest_unconfigure(config): del config._xml config.pluginmanager.unregister(xml) + class LogXML(object): def __init__(self, logfile, prefix): self.logfile = logfile @@ -55,7 +90,14 @@ class LogXML(object): self.test_logs.append("") def appendlog(self, fmt, *args): - args = tuple([py.xml.escape(arg) for arg in args]) + def repl(matchobj): + i = ord(matchobj.group()) + if i <= 0xFF: + return unicode('#x%02X') % i + else: + return unicode('#x%04X') % i + args = tuple([illegal_xml_re.sub(repl, py.xml.escape(arg)) + for arg in args]) self.test_logs.append(fmt % args) def append_pass(self, report): diff --git a/testing/test_junitxml.py b/testing/test_junitxml.py index 4521ff1ac..a19df36eb 100644 --- a/testing/test_junitxml.py +++ b/testing/test_junitxml.py @@ -283,3 +283,71 @@ class TestNonPython: assert_attr(fnode, message="test failure") assert "custom item runtest failed" in fnode.toxml() + +def test_nullbyte(testdir): + # A null byte can not occur in XML (see section 2.2 of the spec) + testdir.makepyfile(""" + import sys + def test_print_nullbyte(): + sys.stdout.write('Here the null -->' + chr(0) + '<--') + sys.stdout.write('In repr form -->' + repr(chr(0)) + '<--') + assert False + """) + xmlf = testdir.tmpdir.join('junit.xml') + result = testdir.runpytest('--junitxml=%s' % xmlf) + text = xmlf.read() + assert '\x00' not in text + assert '#x00' in text + + +def test_nullbyte_replace(testdir): + # Check if the null byte gets replaced + testdir.makepyfile(""" + import sys + def test_print_nullbyte(): + sys.stdout.write('Here the null -->' + chr(0) + '<--') + sys.stdout.write('In repr form -->' + repr(chr(0)) + '<--') + assert False + """) + xmlf = testdir.tmpdir.join('junit.xml') + result = testdir.runpytest('--junitxml=%s' % xmlf) + text = xmlf.read() + assert '#x0' in text + + +def test_invalid_xml_escape(testdir): + # Test some more invalid xml chars, the full range should be + # tested really but let's just thest the edges of the ranges + # intead. + # XXX This only tests low unicode character points for now as + # there are some issues with the testing infrastructure for + # the higher ones. + # XXX Testing 0xD (\r) is tricky as it overwrites the just written + # line in the output, so we skip it too. + global unichr + try: + unichr(65) + except NameError: + unichr = chr + u = py.builtin._totext + invalid = (0x1, 0xB, 0xC, 0xE, 0x19,) + # 0xD800, 0xDFFF, 0xFFFE, 0x0FFFF) #, 0x110000) + valid = (0x9, 0xA, 0x20,) # 0xD, 0xD7FF, 0xE000, 0xFFFD, 0x10000, 0x10FFFF) + all = invalid + valid + prints = [u(" sys.stdout.write('''0x%X-->%s<--''')") % (i, unichr(i)) + for i in all] + testdir.makepyfile(u("# -*- coding: UTF-8 -*-"), + u("import sys"), + u("def test_print_bytes():"), + u("\n").join(prints), + u(" assert False")) + xmlf = testdir.tmpdir.join('junit.xml') + result = testdir.runpytest('--junitxml=%s' % xmlf) + text = xmlf.read() + for i in invalid: + if i <= 0xFF: + assert '#x%02X' % i in text + else: + assert '#x%04X' % i in text + for i in valid: + assert chr(i) in text