i18n security fix. Details will be posted shortly to the Django mailing lists and the official weblog.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@6608 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Jacob Kaplan-Moss 2007-10-26 19:52:42 +00:00
parent cb6ecfe9d6
commit 842a771e05
2 changed files with 74 additions and 40 deletions

View File

@ -1,8 +1,12 @@
"Translation helper functions" "Translation helper functions"
import os, re, sys import locale
import os
import re
import sys
import gettext as gettext_module import gettext as gettext_module
from cStringIO import StringIO from cStringIO import StringIO
from django.utils.encoding import force_unicode from django.utils.encoding import force_unicode
try: try:
@ -25,14 +29,24 @@ _active = {}
# The default translation is based on the settings file. # The default translation is based on the settings file.
_default = None _default = None
# This is a cache for accept-header to translation object mappings to prevent # This is a cache for normalised accept-header languages to prevent multiple
# the accept parser to run multiple times for one user. # file lookups when checking the same locale on repeated requests.
_accepted = {} _accepted = {}
def to_locale(language): # Format of Accept-Language header values. From RFC 2616, section 14.4 and 3.9.
accept_language_re = re.compile(r'''
([A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*|\*) # "en", "en-au", "x-y-z", "*"
(?:;q=(0(?:\.\d{,3})?|1(?:.0{,3})?))? # Optional "q=1.00", "q=0.8"
(?:\s*,\s*|$) # Multiple accepts per header.
''', re.VERBOSE)
def to_locale(language, to_lower=False):
"Turns a language name (en-us) into a locale name (en_US)." "Turns a language name (en-us) into a locale name (en_US)."
p = language.find('-') p = language.find('-')
if p >= 0: if p >= 0:
if to_lower:
return language[:p].lower()+'_'+language[p+1:].lower()
else:
return language[:p].lower()+'_'+language[p+1:].upper() return language[:p].lower()+'_'+language[p+1:].upper()
else: else:
return language.lower() return language.lower()
@ -334,45 +348,39 @@ def get_language_from_request(request):
if lang_code in supported and lang_code is not None and check_for_language(lang_code): if lang_code in supported and lang_code is not None and check_for_language(lang_code):
return lang_code return lang_code
lang_code = request.COOKIES.get('django_language', None) lang_code = request.COOKIES.get('django_language')
if lang_code in supported and lang_code is not None and check_for_language(lang_code): if lang_code and lang_code in supported and check_for_language(lang_code):
return lang_code return lang_code
accept = request.META.get('HTTP_ACCEPT_LANGUAGE', None) accept = request.META.get('HTTP_ACCEPT_LANGUAGE', '')
if accept is not None: for lang, unused in parse_accept_lang_header(accept):
if lang == '*':
break
t = _accepted.get(accept, None) # We have a very restricted form for our language files (no encoding
if t is not None: # specifier, since they all must be UTF-8 and only one possible
return t # language each time. So we avoid the overhead of gettext.find() and
# look up the MO file manually.
def _parsed(el): normalized = locale.locale_alias.get(to_locale(lang, True))
p = el.find(';q=') if not normalized:
if p >= 0: continue
lang = el[:p].strip()
order = int(float(el[p+3:].strip())*100)
else:
lang = el
order = 100
p = lang.find('-')
if p >= 0:
mainlang = lang[:p]
else:
mainlang = lang
return (lang, mainlang, order)
langs = [_parsed(el) for el in accept.split(',')] # Remove the default encoding from locale_alias
langs.sort(lambda a,b: -1*cmp(a[2], b[2])) normalized = normalized.split('.')[0]
for lang, mainlang, order in langs: if normalized in _accepted:
if lang in supported or mainlang in supported: # We've seen this locale before and have an MO file for it, so no
langfile = gettext_module.find('django', globalpath, [to_locale(lang)]) # need to check again.
if langfile: return _accepted[normalized]
# reconstruct the actual language from the language
# filename, because otherwise we might incorrectly for lang in (normalized, normalized.split('_')[0]):
# report de_DE if we only have de available, but if lang not in supported:
# did find de_DE because of language normalization continue
lang = langfile[len(globalpath):].split(os.path.sep)[1] langfile = os.path.join(globalpath, lang, 'LC_MESSAGES',
_accepted[accept] = lang 'django.mo')
if os.path.exists(langfile):
_accepted[normalized] = lang
return lang return lang
return settings.LANGUAGE_CODE return settings.LANGUAGE_CODE
@ -505,3 +513,23 @@ def templatize(src):
out.write(blankout(t.contents, 'X')) out.write(blankout(t.contents, 'X'))
return out.getvalue() return out.getvalue()
def parse_accept_lang_header(lang_string):
"""
Parses the lang_string, which is the body of an HTTP Accept-Language
header, and returns a list of (lang, q-value), ordered by 'q' values.
Any format errors in lang_string results in an empty list being returned.
"""
result = []
pieces = accept_language_re.split(lang_string)
if pieces[-1]:
return []
for i in range(0, len(pieces) - 1, 3):
first, lang, priority = pieces[i : i + 3]
if first:
return []
priority = priority and float(priority) or 1.0
result.append((lang, priority))
result.sort(lambda x, y: -cmp(x[1], y[1]))
return result

View File

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
import misc
ur""" regressions = ur"""
Format string interpolation should work with *_lazy objects. Format string interpolation should work with *_lazy objects.
>>> from django.utils.translation import ugettext_lazy, activate, deactivate, gettext_lazy >>> from django.utils.translation import ugettext_lazy, activate, deactivate, gettext_lazy
@ -39,3 +40,8 @@ unicode(string_concat(...)) should not raise a TypeError - #4796
>>> unicode(django.utils.translation.string_concat("dja", "ngo")) >>> unicode(django.utils.translation.string_concat("dja", "ngo"))
u'django' u'django'
""" """
__test__ = {
'regressions': regressions,
'misc': misc.tests,
}