Refs #23919 -- Removed re.U and re.UNICODE (default on Python 3).

2017-01-18 22:52:25 +01:00 · 2017-01-18 22:52:25 +01:00 · c22212220a
parent 990ce6386c
commit c22212220a
7 changed files with 23 additions and 19 deletions
--- a/django/core/validators.py
+++ b/django/core/validators.py
@ -241,7 +241,7 @@ validate_slug = RegexValidator(
    'invalid'
 )

-slug_unicode_re = _lazy_re_compile(r'^[-\w]+\Z', re.U)
+slug_unicode_re = _lazy_re_compile(r'^[-\w]+\Z')
 validate_unicode_slug = RegexValidator(
    slug_unicode_re,
    _("Enter a valid 'slug' consisting of Unicode letters, numbers, underscores, or hyphens."),
--- a/django/db/migrations/serializer.py
+++ b/django/db/migrations/serializer.py
@ -5,6 +5,7 @@ import decimal
 import enum
 import functools
 import math
+import re
 import types
 import uuid
 from importlib import import_module
@ -241,11 +242,14 @@ class RegexSerializer(BaseSerializer):
    def serialize(self):
        imports = {"import re"}
        regex_pattern, pattern_imports = serializer_factory(self.value.pattern).serialize()
-        regex_flags, flag_imports = serializer_factory(self.value.flags).serialize()
+        # Turn off default implicit flags (e.g. re.U) because regexes with the
+        # same implicit and explicit flags aren't equal.
+        flags = self.value.flags ^ re.compile('').flags
+        regex_flags, flag_imports = serializer_factory(flags).serialize()
        imports.update(pattern_imports)
        imports.update(flag_imports)
        args = [regex_pattern]
-        if self.value.flags:
+        if flags:
            args.append(regex_flags)
        return "re.compile(%s)" % ', '.join(args), imports

--- a/django/forms/fields.py
+++ b/django/forms/fields.py
@ -522,7 +522,7 @@ class RegexField(CharField):

    def _set_regex(self, regex):
        if isinstance(regex, str):
-            regex = re.compile(regex, re.UNICODE)
+            regex = re.compile(regex)
        self._regex = regex
        if hasattr(self, '_regex_validator') and self._regex_validator in self.validators:
            self.validators.remove(self._regex_validator)
--- a/django/template/base.py
+++ b/django/template/base.py
@ -621,7 +621,7 @@ filter_raw_string = r"""
    'arg_sep': re.escape(FILTER_ARGUMENT_SEPARATOR),
 }

-filter_re = re.compile(filter_raw_string, re.UNICODE | re.VERBOSE)
+filter_re = re.compile(filter_raw_string, re.VERBOSE)


 class FilterExpression:
--- a/django/urls/resolvers.py
+++ b/django/urls/resolvers.py
@ -99,7 +99,7 @@ class LocaleRegexDescriptor:
        Compile and return the given regular expression.
        """
        try:
-            return re.compile(regex, re.UNICODE)
+            return re.compile(regex)
        except re.error as e:
            raise ImproperlyConfigured(
                '"%s" is not a valid regular expression: %s' % (regex, e)
@ -453,7 +453,7 @@ class RegexURLResolver(LocaleRegexProvider):
                # Then, if we have a match, redo the substitution with quoted
                # arguments in order to return a properly encoded URL.
                candidate_pat = _prefix.replace('%', '%%') + result
-                if re.search('^%s%s' % (re.escape(_prefix), pattern), candidate_pat % candidate_subs, re.UNICODE):
+                if re.search('^%s%s' % (re.escape(_prefix), pattern), candidate_pat % candidate_subs):
                    # safe characters from `pchar` definition of RFC 3986
                    url = urlquote(candidate_pat % candidate_subs, safe=RFC3986_SUBDELIMS + str('/~:@'))
                    # Don't allow construction of scheme relative urls.
@ -513,5 +513,5 @@ class LocaleRegexURLResolver(RegexURLResolver):
                regex_string = ''
            else:
                regex_string = '^%s/' % language_code
-            self._regex_dict[language_code] = re.compile(regex_string, re.UNICODE)
+            self._regex_dict[language_code] = re.compile(regex_string)
        return self._regex_dict[language_code]
--- a/django/utils/text.py
+++ b/django/utils/text.py
@ -19,8 +19,8 @@ def capfirst(x):


 # Set up regular expressions
-re_words = re.compile(r'<.*?>|((?:\w[-\w]*|&.*?;)+)', re.U | re.S)
-re_chars = re.compile(r'<.*?>|(.)', re.U | re.S)
+re_words = re.compile(r'<.*?>|((?:\w[-\w]*|&.*?;)+)', re.S)
+re_chars = re.compile(r'<.*?>|(.)', re.S)
 re_tag = re.compile(r'<(/)?([^ ]+?)(?:(\s*/)| .*?)?>', re.S)
 re_newlines = re.compile(r'\r\n|\r')  # Used in normalize_newlines
 re_camel_case = re.compile(r'(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))')
@ -417,8 +417,8 @@ def slugify(value, allow_unicode=False):
    value = force_text(value)
    if allow_unicode:
        value = unicodedata.normalize('NFKC', value)
-        value = re.sub(r'[^\w\s-]', '', value, flags=re.U).strip().lower()
-        return mark_safe(re.sub(r'[-\s]+', '-', value, flags=re.U))
+        value = re.sub(r'[^\w\s-]', '', value).strip().lower()
+        return mark_safe(re.sub(r'[-\s]+', '-', value))
    value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
    value = re.sub(r'[^\w\s-]', '', value).strip().lower()
    return mark_safe(re.sub(r'[-\s]+', '-', value))
--- a/tests/migrations/test_writer.py
+++ b/tests/migrations/test_writer.py
@ -247,7 +247,7 @@ class WriterTests(SimpleTestCase):
        )

    def test_serialize_lazy_objects(self):
-        pattern = re.compile(r'^foo$', re.UNICODE)
+        pattern = re.compile(r'^foo$')
        lazy_pattern = SimpleLazyObject(lambda: pattern)
        self.assertEqual(self.serialize_round_trip(lazy_pattern), pattern)

@ -403,7 +403,7 @@ class WriterTests(SimpleTestCase):
        """
        Make sure compiled regex can be serialized.
        """
-        regex = re.compile(r'^\w+$', re.U)
+        regex = re.compile(r'^\w+$')
        self.assertSerializedEqual(regex)

    def test_serialize_class_based_validators(self):
@ -417,18 +417,18 @@ class WriterTests(SimpleTestCase):
        self.serialize_round_trip(validator)

        # Test with a compiled regex.
-        validator = RegexValidator(regex=re.compile(r'^\w+$', re.U))
+        validator = RegexValidator(regex=re.compile(r'^\w+$'))
        string = MigrationWriter.serialize(validator)[0]
-        self.assertEqual(string, "django.core.validators.RegexValidator(regex=re.compile('^\\\\w+$', 32))")
+        self.assertEqual(string, "django.core.validators.RegexValidator(regex=re.compile('^\\\\w+$'))")
        self.serialize_round_trip(validator)

        # Test a string regex with flag
-        validator = RegexValidator(r'^[0-9]+$', flags=re.U)
+        validator = RegexValidator(r'^[0-9]+$', flags=re.S)
        string = MigrationWriter.serialize(validator)[0]
        if PY36:
-            self.assertEqual(string, "django.core.validators.RegexValidator('^[0-9]+$', flags=re.RegexFlag(32))")
+            self.assertEqual(string, "django.core.validators.RegexValidator('^[0-9]+$', flags=re.RegexFlag(16))")
        else:
-            self.assertEqual(string, "django.core.validators.RegexValidator('^[0-9]+$', flags=32)")
+            self.assertEqual(string, "django.core.validators.RegexValidator('^[0-9]+$', flags=16)")
        self.serialize_round_trip(validator)

        # Test message and code