Fixed #21389 -- Accept most valid language codes
By removing the 'supported' keyword from the detection methods and only relying on a cached settings.LANGUAGES, the speed of said methods has been improved; around 4x raw performance. This allows us to stop checking Python's incomplete list of locales, and rely on a less restrictive regular expression for accepting certain locales. HTTP Accept-Language is defined as being case-insensitive, based on this fact extra performance improvements have been made; it wouldn't make sense to check for case differences.
This commit is contained in:
parent
48a8b714d4
commit
2bab9d6d9e
|
@ -1,7 +1,5 @@
|
|||
"This is the locale selecting middleware that will look at accept headers"
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.urlresolvers import (is_valid_path, get_resolver,
|
||||
LocaleRegexURLResolver)
|
||||
|
@ -21,7 +19,6 @@ class LocaleMiddleware(object):
|
|||
response_redirect_class = HttpResponseRedirect
|
||||
|
||||
def __init__(self):
|
||||
self._supported_languages = OrderedDict(settings.LANGUAGES)
|
||||
self._is_language_prefix_patterns_used = False
|
||||
for url_pattern in get_resolver(None).url_patterns:
|
||||
if isinstance(url_pattern, LocaleRegexURLResolver):
|
||||
|
@ -37,9 +34,7 @@ class LocaleMiddleware(object):
|
|||
|
||||
def process_response(self, request, response):
|
||||
language = translation.get_language()
|
||||
language_from_path = translation.get_language_from_path(
|
||||
request.path_info, supported=self._supported_languages
|
||||
)
|
||||
language_from_path = translation.get_language_from_path(request.path_info)
|
||||
if (response.status_code == 404 and not language_from_path
|
||||
and self.is_language_prefix_patterns_used()):
|
||||
urlconf = getattr(request, 'urlconf', None)
|
||||
|
|
|
@ -187,8 +187,8 @@ def get_language_from_request(request, check_path=False):
|
|||
return _trans.get_language_from_request(request, check_path)
|
||||
|
||||
|
||||
def get_language_from_path(path, supported=None):
|
||||
return _trans.get_language_from_path(path, supported=supported)
|
||||
def get_language_from_path(path):
|
||||
return _trans.get_language_from_path(path)
|
||||
|
||||
|
||||
def templatize(src, origin=None):
|
||||
|
|
|
@ -68,5 +68,5 @@ def get_language_from_request(request, check_path=False):
|
|||
return settings.LANGUAGE_CODE
|
||||
|
||||
|
||||
def get_language_from_path(request, supported=None):
|
||||
def get_language_from_path(request):
|
||||
return None
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from collections import OrderedDict
|
||||
import locale
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
@ -29,9 +28,9 @@ _active = local()
|
|||
# The default translation is based on the settings file.
|
||||
_default = None
|
||||
|
||||
# This is a cache for normalized accept-header languages to prevent multiple
|
||||
# file lookups when checking the same locale on repeated requests.
|
||||
_accepted = {}
|
||||
# This is a cache of settings.LANGUAGES in an OrderedDict for easy lookups by
|
||||
# key
|
||||
_supported = None
|
||||
|
||||
# magic gettext number to separate context from message
|
||||
CONTEXT_SEPARATOR = "\x04"
|
||||
|
@ -63,9 +62,11 @@ def reset_cache(**kwargs):
|
|||
Reset global state when LANGUAGES setting has been changed, as some
|
||||
languages should no longer be accepted.
|
||||
"""
|
||||
if kwargs['setting'] == 'LANGUAGES':
|
||||
global _accepted
|
||||
_accepted = {}
|
||||
if kwargs['setting'] in ('LANGUAGES', 'LANGUAGE_CODE'):
|
||||
global _supported
|
||||
_supported = None
|
||||
check_for_language.cache_clear()
|
||||
get_supported_language_variant.cache_clear()
|
||||
|
||||
|
||||
def to_locale(language, to_lower=False):
|
||||
|
@ -388,7 +389,7 @@ def all_locale_paths():
|
|||
return [globalpath] + list(settings.LOCALE_PATHS)
|
||||
|
||||
|
||||
@lru_cache.lru_cache(maxsize=None)
|
||||
@lru_cache.lru_cache()
|
||||
def check_for_language(lang_code):
|
||||
"""
|
||||
Checks whether there is a global language file for the given language
|
||||
|
@ -404,39 +405,42 @@ def check_for_language(lang_code):
|
|||
return False
|
||||
|
||||
|
||||
def get_supported_language_variant(lang_code, supported=None, strict=False):
|
||||
@lru_cache.lru_cache(maxsize=1000)
|
||||
def get_supported_language_variant(lang_code, strict=False):
|
||||
"""
|
||||
Returns the language-code that's listed in supported languages, possibly
|
||||
selecting a more generic variant. Raises LookupError if nothing found.
|
||||
|
||||
If `strict` is False (the default), the function will look for an alternative
|
||||
country-specific variant when the currently checked is not found.
|
||||
|
||||
lru_cache should have a maxsize to prevent from memory exhaustion attacks,
|
||||
as the provided language codes are taken from the HTTP request. See also
|
||||
<https://www.djangoproject.com/weblog/2007/oct/26/security-fix/>.
|
||||
"""
|
||||
if supported is None:
|
||||
global _supported
|
||||
if _supported is None:
|
||||
from django.conf import settings
|
||||
supported = OrderedDict(settings.LANGUAGES)
|
||||
_supported = OrderedDict(settings.LANGUAGES)
|
||||
if lang_code:
|
||||
# some browsers use deprecated language codes -- #18419
|
||||
replacement = _BROWSERS_DEPRECATED_LOCALES.get(lang_code)
|
||||
if lang_code not in supported and replacement in supported:
|
||||
if lang_code not in _supported and replacement in _supported:
|
||||
return replacement
|
||||
# if fr-CA is not supported, try fr-ca; if that fails, fallback to fr.
|
||||
# if fr-ca is not supported, try fr.
|
||||
generic_lang_code = lang_code.split('-')[0]
|
||||
variants = (lang_code, lang_code.lower(), generic_lang_code,
|
||||
generic_lang_code.lower())
|
||||
for code in variants:
|
||||
if code in supported and check_for_language(code):
|
||||
for code in (lang_code, generic_lang_code):
|
||||
if code in _supported and check_for_language(code):
|
||||
return code
|
||||
if not strict:
|
||||
# if fr-fr is not supported, try fr-ca.
|
||||
for supported_code in supported:
|
||||
if supported_code.startswith((generic_lang_code + '-',
|
||||
generic_lang_code.lower() + '-')):
|
||||
for supported_code in _supported:
|
||||
if supported_code.startswith(generic_lang_code + '-'):
|
||||
return supported_code
|
||||
raise LookupError(lang_code)
|
||||
|
||||
|
||||
def get_language_from_path(path, supported=None, strict=False):
|
||||
def get_language_from_path(path, strict=False):
|
||||
"""
|
||||
Returns the language-code if there is a valid language-code
|
||||
found in the `path`.
|
||||
|
@ -444,15 +448,12 @@ def get_language_from_path(path, supported=None, strict=False):
|
|||
If `strict` is False (the default), the function will look for an alternative
|
||||
country-specific variant when the currently checked is not found.
|
||||
"""
|
||||
if supported is None:
|
||||
from django.conf import settings
|
||||
supported = OrderedDict(settings.LANGUAGES)
|
||||
regex_match = language_code_prefix_re.match(path)
|
||||
if not regex_match:
|
||||
return None
|
||||
lang_code = regex_match.group(1)
|
||||
try:
|
||||
return get_supported_language_variant(lang_code, supported, strict=strict)
|
||||
return get_supported_language_variant(lang_code, strict=strict)
|
||||
except LookupError:
|
||||
return None
|
||||
|
||||
|
@ -467,25 +468,26 @@ def get_language_from_request(request, check_path=False):
|
|||
If check_path is True, the URL path prefix will be checked for a language
|
||||
code, otherwise this is skipped for backwards compatibility.
|
||||
"""
|
||||
global _accepted
|
||||
from django.conf import settings
|
||||
supported = OrderedDict(settings.LANGUAGES)
|
||||
global _supported
|
||||
if _supported is None:
|
||||
_supported = OrderedDict(settings.LANGUAGES)
|
||||
|
||||
if check_path:
|
||||
lang_code = get_language_from_path(request.path_info, supported)
|
||||
lang_code = get_language_from_path(request.path_info)
|
||||
if lang_code is not None:
|
||||
return lang_code
|
||||
|
||||
if hasattr(request, 'session'):
|
||||
# for backwards compatibility django_language is also checked (remove in 1.8)
|
||||
lang_code = request.session.get(LANGUAGE_SESSION_KEY, request.session.get('django_language'))
|
||||
if lang_code in supported and lang_code is not None and check_for_language(lang_code):
|
||||
if lang_code in _supported and lang_code is not None and check_for_language(lang_code):
|
||||
return lang_code
|
||||
|
||||
lang_code = request.COOKIES.get(settings.LANGUAGE_COOKIE_NAME)
|
||||
|
||||
try:
|
||||
return get_supported_language_variant(lang_code, supported)
|
||||
return get_supported_language_variant(lang_code)
|
||||
except LookupError:
|
||||
pass
|
||||
|
||||
|
@ -494,29 +496,16 @@ def get_language_from_request(request, check_path=False):
|
|||
if accept_lang == '*':
|
||||
break
|
||||
|
||||
# 'normalized' is the root name of the locale in POSIX format (which is
|
||||
# the format used for the directories holding the MO files).
|
||||
normalized = locale.locale_alias.get(to_locale(accept_lang, True))
|
||||
if not normalized:
|
||||
if not language_code_re.search(accept_lang):
|
||||
continue
|
||||
# Remove the default encoding from locale_alias.
|
||||
normalized = normalized.split('.')[0]
|
||||
|
||||
if normalized in _accepted:
|
||||
# We've seen this locale before and have an MO file for it, so no
|
||||
# need to check again.
|
||||
return _accepted[normalized]
|
||||
|
||||
try:
|
||||
accept_lang = get_supported_language_variant(accept_lang, supported)
|
||||
return get_supported_language_variant(accept_lang)
|
||||
except LookupError:
|
||||
continue
|
||||
else:
|
||||
_accepted[normalized] = accept_lang
|
||||
return accept_lang
|
||||
|
||||
try:
|
||||
return get_supported_language_variant(settings.LANGUAGE_CODE, supported)
|
||||
return get_supported_language_variant(settings.LANGUAGE_CODE)
|
||||
except LookupError:
|
||||
return settings.LANGUAGE_CODE
|
||||
|
||||
|
@ -732,7 +721,7 @@ def parse_accept_lang_header(lang_string):
|
|||
Any format errors in lang_string results in an empty list being returned.
|
||||
"""
|
||||
result = []
|
||||
pieces = accept_language_re.split(lang_string)
|
||||
pieces = accept_language_re.split(lang_string.lower())
|
||||
if pieces[-1]:
|
||||
return []
|
||||
for i in range(0, len(pieces) - 1, 3):
|
||||
|
|
|
@ -1125,6 +1125,14 @@ Miscellaneous
|
|||
For example, if you use multi-inheritance, you need to define custom primary
|
||||
key fields on parent models, otherwise the default ``id`` fields will clash.
|
||||
|
||||
* :meth:`~django.utils.translation.parse_accept_lang_header` now returns
|
||||
lowercase locales, instead of the case as it was provided. As locales should
|
||||
be treated case-insensitive this allows us to speed up locale detection.
|
||||
|
||||
* :meth:`~django.utils.translation.get_language_from_path` and
|
||||
:meth:`~django.utils.translation.trans_real.get_supported_language_variant`
|
||||
now no longer have a ``supported`` argument.
|
||||
|
||||
.. _deprecated-features-1.7:
|
||||
|
||||
Features deprecated in 1.7
|
||||
|
|
|
@ -821,10 +821,10 @@ class MiscTests(TestCase):
|
|||
p = trans_real.parse_accept_lang_header
|
||||
# Good headers.
|
||||
self.assertEqual([('de', 1.0)], p('de'))
|
||||
self.assertEqual([('en-AU', 1.0)], p('en-AU'))
|
||||
self.assertEqual([('en-au', 1.0)], p('en-AU'))
|
||||
self.assertEqual([('es-419', 1.0)], p('es-419'))
|
||||
self.assertEqual([('*', 1.0)], p('*;q=1.00'))
|
||||
self.assertEqual([('en-AU', 0.123)], p('en-AU;q=0.123'))
|
||||
self.assertEqual([('en-au', 0.123)], p('en-AU;q=0.123'))
|
||||
self.assertEqual([('en-au', 0.5)], p('en-au;q=0.5'))
|
||||
self.assertEqual([('en-au', 1.0)], p('en-au;q=1.0'))
|
||||
self.assertEqual([('da', 1.0), ('en', 0.5), ('en-gb', 0.25)], p('da, en-gb;q=0.25, en;q=0.5'))
|
||||
|
@ -884,6 +884,24 @@ class MiscTests(TestCase):
|
|||
r.META = {'HTTP_ACCEPT_LANGUAGE': 'zh-cn,de'}
|
||||
self.assertEqual(g(r), 'zh-cn')
|
||||
|
||||
r.META = {'HTTP_ACCEPT_LANGUAGE': 'NL'}
|
||||
self.assertEqual('nl', g(r))
|
||||
|
||||
r.META = {'HTTP_ACCEPT_LANGUAGE': 'fy'}
|
||||
self.assertEqual('fy', g(r))
|
||||
|
||||
r.META = {'HTTP_ACCEPT_LANGUAGE': 'ia'}
|
||||
self.assertEqual('ia', g(r))
|
||||
|
||||
r.META = {'HTTP_ACCEPT_LANGUAGE': 'sr-latn'}
|
||||
self.assertEqual('sr-latn', g(r))
|
||||
|
||||
r.META = {'HTTP_ACCEPT_LANGUAGE': 'zh-hans'}
|
||||
self.assertEqual('zh-hans', g(r))
|
||||
|
||||
r.META = {'HTTP_ACCEPT_LANGUAGE': 'zh-hant'}
|
||||
self.assertEqual('zh-hant', g(r))
|
||||
|
||||
@override_settings(
|
||||
LANGUAGES=(
|
||||
('en', 'English'),
|
||||
|
|
Loading…
Reference in New Issue