i18n security fix. Details will be posted shortly to the Django mailing lists and the official weblog.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@6608 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2007-10-26 19:52:42 +00:00 · 2007-10-26 19:52:42 +00:00 · 842a771e05
parent cb6ecfe9d6
commit 842a771e05
2 changed files with 74 additions and 40 deletions
--- a/django/utils/translation/trans_real.py
+++ b/django/utils/translation/trans_real.py
@ -1,8 +1,12 @@
 "Translation helper functions"
-import os, re, sys
+import locale
 import os
 import re
 import sys
 import gettext as gettext_module
 from cStringIO import StringIO
 from django.utils.encoding import force_unicode
 try:
@ -25,15 +29,25 @@ _active = {}
 # The default translation is based on the settings file.
 _default = None
-# This is a cache for accept-header to translation object mappings to prevent
+# This is a cache for normalised accept-header languages to prevent multiple
-# the accept parser to run multiple times for one user.
+# file lookups when checking the same locale on repeated requests.
 _accepted = {}
-def to_locale(language):
+# Format of Accept-Language header values. From RFC 2616, section 14.4 and 3.9.
 accept_language_re = re.compile(r'''
        ([A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*|\*)   # "en", "en-au", "x-y-z", "*"
        (?:;q=(0(?:\.\d{,3})?|1(?:.0{,3})?))?   # Optional "q=1.00", "q=0.8"
        (?:\s*,\s*|$)                            # Multiple accepts per header.
        ''', re.VERBOSE)
 def to_locale(language, to_lower=False):
    "Turns a language name (en-us) into a locale name (en_US)."
    p = language.find('-')
    if p >= 0:
-        return language[:p].lower()+'_'+language[p+1:].upper()
+        if to_lower:
            return language[:p].lower()+'_'+language[p+1:].lower()
        else:
            return language[:p].lower()+'_'+language[p+1:].upper()
    else:
        return language.lower()
@ -334,46 +348,40 @@ def get_language_from_request(request):
        if lang_code in supported and lang_code is not None and check_for_language(lang_code):
            return lang_code
-    lang_code = request.COOKIES.get('django_language', None)
+    lang_code = request.COOKIES.get('django_language')
-    if lang_code in supported and lang_code is not None and check_for_language(lang_code):
+    if lang_code and lang_code in supported and check_for_language(lang_code):
        return lang_code
-    accept = request.META.get('HTTP_ACCEPT_LANGUAGE', None)
+    accept = request.META.get('HTTP_ACCEPT_LANGUAGE', '')
-    if accept is not None:
+    for lang, unused in parse_accept_lang_header(accept):
        if lang == '*':
            break
-        t = _accepted.get(accept, None)
+        # We have a very restricted form for our language files (no encoding
-        if t is not None:
+        # specifier, since they all must be UTF-8 and only one possible
-            return t
+        # language each time. So we avoid the overhead of gettext.find() and
        # look up the MO file manually.
-        def _parsed(el):
+        normalized = locale.locale_alias.get(to_locale(lang, True))
-            p = el.find(';q=')
+        if not normalized:
-            if p >= 0:
+            continue
                lang = el[:p].strip()
                order = int(float(el[p+3:].strip())*100)
            else:
                lang = el
                order = 100
            p = lang.find('-')
            if p >= 0:
                mainlang = lang[:p]
            else:
                mainlang = lang
            return (lang, mainlang, order)
-        langs = [_parsed(el) for el in accept.split(',')]
+        # Remove the default encoding from locale_alias
-        langs.sort(lambda a,b: -1*cmp(a[2], b[2]))
+        normalized = normalized.split('.')[0]
-        for lang, mainlang, order in langs:
+        if normalized in _accepted:
-            if lang in supported or mainlang in supported:
+            # We've seen this locale before and have an MO file for it, so no
-                langfile = gettext_module.find('django', globalpath, [to_locale(lang)])
+            # need to check again.
-                if langfile:
+            return _accepted[normalized]
-                    # reconstruct the actual language from the language
+
-                    # filename, because otherwise we might incorrectly
+        for lang in (normalized, normalized.split('_')[0]):
-                    # report de_DE if we only have de available, but
+            if lang not in supported:
-                    # did find de_DE because of language normalization
+                continue
-                    lang = langfile[len(globalpath):].split(os.path.sep)[1]
+            langfile = os.path.join(globalpath, lang, 'LC_MESSAGES',
-                    _accepted[accept] = lang
+                    'django.mo')
-                    return lang
+            if os.path.exists(langfile):
                _accepted[normalized] = lang
            return lang
    return settings.LANGUAGE_CODE
@ -505,3 +513,23 @@ def templatize(src):
                out.write(blankout(t.contents, 'X'))
    return out.getvalue()
 def parse_accept_lang_header(lang_string):
    """
    Parses the lang_string, which is the body of an HTTP Accept-Language
    header, and returns a list of (lang, q-value), ordered by 'q' values.
    Any format errors in lang_string results in an empty list being returned.
    """
    result = []
    pieces = accept_language_re.split(lang_string)
    if pieces[-1]:
        return []
    for i in range(0, len(pieces) - 1, 3):
        first, lang, priority = pieces[i : i + 3]
        if first:
            return []
        priority = priority and float(priority) or 1.0
        result.append((lang, priority))
    result.sort(lambda x, y: -cmp(x[1], y[1]))
    return result
--- a/tests/regressiontests/i18n/tests.py
+++ b/tests/regressiontests/i18n/tests.py
@ -1,6 +1,7 @@
 # coding: utf-8
 import misc
-ur"""
+regressions = ur"""
 Format string interpolation should work with *_lazy objects.
 >>> from django.utils.translation import ugettext_lazy, activate, deactivate, gettext_lazy
@ -39,3 +40,8 @@ unicode(string_concat(...)) should not raise a TypeError - #4796
 >>> unicode(django.utils.translation.string_concat("dja", "ngo"))
 u'django'
 """
 __test__ = {
    'regressions': regressions,
    'misc': misc.tests,
 }