From 5212911b1991aa749d828b31177dce99237068ed Mon Sep 17 00:00:00 2001
From: Malcolm Tredinnick <malcolm.tredinnick@gmail.com>
Date: Wed, 4 Apr 2007 06:43:28 +0000
Subject: [PATCH] Made django.utils.html.escape() work with unicode strings
 (and unicode-like objects). Refs #3897.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@4919 bcc190cf-cafb-0310-a4f2-bffc1f526a37
---
 django/utils/html.py                          |  3 ++-
 tests/regressiontests/defaultfilters/tests.py | 19 ++++++++++++-------
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/django/utils/html.py b/django/utils/html.py
index baa2fb06fc..d06440b52e 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -1,6 +1,7 @@
 "HTML utilities suitable for global use."
 
 import re, string
+from django.utils.encoding import smart_unicode
 
 # Configuration for urlize() function
 LEADING_PUNCTUATION  = ['(', '<', '&lt;']
@@ -24,7 +25,7 @@ del x # Temporary variable
 def escape(html):
     "Returns the given HTML with ampersands, quotes and carets encoded"
     if not isinstance(html, basestring):
-        html = str(html)
+        html = smart_unicode(html)
     return html.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;')
 
 def linebreaks(value):
diff --git a/tests/regressiontests/defaultfilters/tests.py b/tests/regressiontests/defaultfilters/tests.py
index db3f7fab2a..4a2e9432b0 100644
--- a/tests/regressiontests/defaultfilters/tests.py
+++ b/tests/regressiontests/defaultfilters/tests.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 r"""
 >>> floatformat(7.7)
 '7.7'
@@ -87,19 +89,19 @@ u'\xeb'
 >>> truncatewords('A sentence with a few words in it', 'not a number')
 'A sentence with a few words in it'
 
->>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 0) 
+>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 0)
 ''
- 
->>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 2) 
+
+>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 2)
 '<p>one <a href="#">two ...</a></p>'
- 
->>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 4) 
+
+>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 4)
 '<p>one <a href="#">two - three <br>four ...</a></p>'
 
->>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 5) 
+>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 5)
 '<p>one <a href="#">two - three <br>four</a> five</p>'
 
->>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 100) 
+>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 100)
 '<p>one <a href="#">two - three <br>four</a> five</p>'
 
 >>> upper('Mixed case input')
@@ -166,6 +168,9 @@ u'\xcb'
 >>> escape('<some html & special characters > here')
 '&lt;some html &amp; special characters &gt; here'
 
+>>> escape(u'<some html & special characters > here ĐÅ€£')
+u'&lt;some html &amp; special characters &gt; here \xc4\x90\xc3\x85\xe2\x82\xac\xc2\xa3'
+
 >>> linebreaks('line 1')
 '<p>line 1</p>'