mirror of https://github.com/django/django.git
Fixed 35467 -- Replaced urlparse with urlsplit where appropriate.
This work should not generate any change of functionality, and `urlsplit` is approximately 6x faster. Most use cases of `urlparse` didn't touch the path, so they can be converted to `urlsplit` without any issue. Most of those which do use `.path`, simply parse the URL, mutate the querystring, then put them back together, which is also fine (so long as urlunsplit is used).
This commit is contained in:
parent
02dab94c7b
commit
ff308a0604
|
@ -6,7 +6,7 @@ import warnings
|
|||
from functools import partial, update_wrapper
|
||||
from urllib.parse import parse_qsl
|
||||
from urllib.parse import quote as urlquote
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
from django import forms
|
||||
from django.conf import settings
|
||||
|
@ -1384,7 +1384,7 @@ class ModelAdmin(BaseModelAdmin):
|
|||
)
|
||||
|
||||
def _get_preserved_qsl(self, request, preserved_filters):
|
||||
query_string = urlparse(request.build_absolute_uri()).query
|
||||
query_string = urlsplit(request.build_absolute_uri()).query
|
||||
return parse_qsl(query_string.replace(preserved_filters, ""))
|
||||
|
||||
def response_add(self, request, obj, post_url_continue=None):
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from urllib.parse import parse_qsl, unquote, urlparse, urlunparse
|
||||
from urllib.parse import parse_qsl, unquote, urlsplit, urlunsplit
|
||||
|
||||
from django import template
|
||||
from django.contrib.admin.utils import quote
|
||||
|
@ -24,8 +24,8 @@ def add_preserved_filters(context, url, popup=False, to_field=None):
|
|||
preserved_filters = context.get("preserved_filters")
|
||||
preserved_qsl = context.get("preserved_qsl")
|
||||
|
||||
parsed_url = list(urlparse(url))
|
||||
parsed_qs = dict(parse_qsl(parsed_url[4]))
|
||||
parsed_url = list(urlsplit(url))
|
||||
parsed_qs = dict(parse_qsl(parsed_url[3]))
|
||||
merged_qs = {}
|
||||
|
||||
if preserved_qsl:
|
||||
|
@ -66,5 +66,5 @@ def add_preserved_filters(context, url, popup=False, to_field=None):
|
|||
|
||||
merged_qs.update(parsed_qs)
|
||||
|
||||
parsed_url[4] = urlencode(merged_qs)
|
||||
return urlunparse(parsed_url)
|
||||
parsed_url[3] = urlencode(merged_qs)
|
||||
return urlunsplit(parsed_url)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import asyncio
|
||||
from functools import wraps
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
from asgiref.sync import async_to_sync, sync_to_async
|
||||
|
||||
|
@ -25,8 +25,8 @@ def user_passes_test(
|
|||
resolved_login_url = resolve_url(login_url or settings.LOGIN_URL)
|
||||
# If the login url is the same scheme and net location then just
|
||||
# use the path as the "next" url.
|
||||
login_scheme, login_netloc = urlparse(resolved_login_url)[:2]
|
||||
current_scheme, current_netloc = urlparse(path)[:2]
|
||||
login_scheme, login_netloc = urlsplit(resolved_login_url)[:2]
|
||||
current_scheme, current_netloc = urlsplit(path)[:2]
|
||||
if (not login_scheme or login_scheme == current_scheme) and (
|
||||
not login_netloc or login_netloc == current_netloc
|
||||
):
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from functools import partial
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib import auth
|
||||
|
@ -74,8 +74,8 @@ class LoginRequiredMiddleware(MiddlewareMixin):
|
|||
resolved_login_url = resolve_url(self.get_login_url(view_func))
|
||||
# If the login url is the same scheme and net location then use the
|
||||
# path as the "next" url.
|
||||
login_scheme, login_netloc = urlparse(resolved_login_url)[:2]
|
||||
current_scheme, current_netloc = urlparse(path)[:2]
|
||||
login_scheme, login_netloc = urlsplit(resolved_login_url)[:2]
|
||||
current_scheme, current_netloc = urlsplit(path)[:2]
|
||||
if (not login_scheme or login_scheme == current_scheme) and (
|
||||
not login_netloc or login_netloc == current_netloc
|
||||
):
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth import REDIRECT_FIELD_NAME
|
||||
|
@ -51,8 +51,8 @@ class AccessMixin:
|
|||
resolved_login_url = resolve_url(self.get_login_url())
|
||||
# If the login url is the same scheme and net location then use the
|
||||
# path as the "next" url.
|
||||
login_scheme, login_netloc = urlparse(resolved_login_url)[:2]
|
||||
current_scheme, current_netloc = urlparse(path)[:2]
|
||||
login_scheme, login_netloc = urlsplit(resolved_login_url)[:2]
|
||||
current_scheme, current_netloc = urlsplit(path)[:2]
|
||||
if (not login_scheme or login_scheme == current_scheme) and (
|
||||
not login_netloc or login_netloc == current_netloc
|
||||
):
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from urllib.parse import urlparse, urlunparse
|
||||
from urllib.parse import urlsplit, urlunsplit
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
|
@ -183,13 +183,13 @@ def redirect_to_login(next, login_url=None, redirect_field_name=REDIRECT_FIELD_N
|
|||
"""
|
||||
resolved_url = resolve_url(login_url or settings.LOGIN_URL)
|
||||
|
||||
login_url_parts = list(urlparse(resolved_url))
|
||||
login_url_parts = list(urlsplit(resolved_url))
|
||||
if redirect_field_name:
|
||||
querystring = QueryDict(login_url_parts[4], mutable=True)
|
||||
querystring = QueryDict(login_url_parts[3], mutable=True)
|
||||
querystring[redirect_field_name] = next
|
||||
login_url_parts[4] = querystring.urlencode(safe="/")
|
||||
login_url_parts[3] = querystring.urlencode(safe="/")
|
||||
|
||||
return HttpResponseRedirect(urlunparse(login_url_parts))
|
||||
return HttpResponseRedirect(urlunsplit(login_url_parts))
|
||||
|
||||
|
||||
# Class-based password reset views
|
||||
|
|
|
@ -36,13 +36,13 @@ class StaticFilesHandlerMixin:
|
|||
* the host is provided as part of the base_url
|
||||
* the request's path isn't under the media path (or equal)
|
||||
"""
|
||||
return path.startswith(self.base_url[2]) and not self.base_url[1]
|
||||
return path.startswith(self.base_url.path) and not self.base_url.netloc
|
||||
|
||||
def file_path(self, url):
|
||||
"""
|
||||
Return the relative path to the media file on disk for the given URL.
|
||||
"""
|
||||
relative_url = url.removeprefix(self.base_url[2])
|
||||
relative_url = url.removeprefix(self.base_url.path)
|
||||
return url2pathname(relative_url)
|
||||
|
||||
def serve(self, request):
|
||||
|
|
|
@ -792,13 +792,13 @@ class URLField(CharField):
|
|||
def to_python(self, value):
|
||||
def split_url(url):
|
||||
"""
|
||||
Return a list of url parts via urlparse.urlsplit(), or raise
|
||||
Return a list of url parts via urlsplit(), or raise
|
||||
ValidationError for some malformed URLs.
|
||||
"""
|
||||
try:
|
||||
return list(urlsplit(url))
|
||||
except ValueError:
|
||||
# urlparse.urlsplit can raise a ValueError with some
|
||||
# urlsplit can raise a ValueError with some
|
||||
# misformatted URLs.
|
||||
raise ValidationError(self.error_messages["invalid"], code="invalid")
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ import time
|
|||
import warnings
|
||||
from email.header import Header
|
||||
from http.client import responses
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
from asgiref.sync import async_to_sync, sync_to_async
|
||||
|
||||
|
@ -616,7 +616,7 @@ class HttpResponseRedirectBase(HttpResponse):
|
|||
def __init__(self, redirect_to, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self["Location"] = iri_to_uri(redirect_to)
|
||||
parsed = urlparse(str(redirect_to))
|
||||
parsed = urlsplit(str(redirect_to))
|
||||
if parsed.scheme and parsed.scheme not in self.allowed_schemes:
|
||||
raise DisallowedRedirect(
|
||||
"Unsafe redirect to URL with protocol '%s'" % parsed.scheme
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import re
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import PermissionDenied
|
||||
|
@ -171,7 +171,7 @@ class BrokenLinkEmailsMiddleware(MiddlewareMixin):
|
|||
|
||||
# The referer is equal to the current URL, ignoring the scheme (assumed
|
||||
# to be a poorly implemented bot).
|
||||
parsed_referer = urlparse(referer)
|
||||
parsed_referer = urlsplit(referer)
|
||||
if parsed_referer.netloc in ["", domain] and parsed_referer.path == uri:
|
||||
return True
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ against request forgeries from other sites.
|
|||
import logging
|
||||
import string
|
||||
from collections import defaultdict
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import DisallowedHost, ImproperlyConfigured
|
||||
|
@ -174,7 +174,7 @@ class CsrfViewMiddleware(MiddlewareMixin):
|
|||
@cached_property
|
||||
def csrf_trusted_origins_hosts(self):
|
||||
return [
|
||||
urlparse(origin).netloc.lstrip("*")
|
||||
urlsplit(origin).netloc.lstrip("*")
|
||||
for origin in settings.CSRF_TRUSTED_ORIGINS
|
||||
]
|
||||
|
||||
|
@ -190,7 +190,7 @@ class CsrfViewMiddleware(MiddlewareMixin):
|
|||
"""
|
||||
allowed_origin_subdomains = defaultdict(list)
|
||||
for parsed in (
|
||||
urlparse(origin)
|
||||
urlsplit(origin)
|
||||
for origin in settings.CSRF_TRUSTED_ORIGINS
|
||||
if "*" in origin
|
||||
):
|
||||
|
@ -284,7 +284,7 @@ class CsrfViewMiddleware(MiddlewareMixin):
|
|||
if request_origin in self.allowed_origins_exact:
|
||||
return True
|
||||
try:
|
||||
parsed_origin = urlparse(request_origin)
|
||||
parsed_origin = urlsplit(request_origin)
|
||||
except ValueError:
|
||||
return False
|
||||
request_scheme = parsed_origin.scheme
|
||||
|
@ -300,7 +300,7 @@ class CsrfViewMiddleware(MiddlewareMixin):
|
|||
raise RejectRequest(REASON_NO_REFERER)
|
||||
|
||||
try:
|
||||
referer = urlparse(referer)
|
||||
referer = urlsplit(referer)
|
||||
except ValueError:
|
||||
raise RejectRequest(REASON_MALFORMED_REFERER)
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ from functools import partial
|
|||
from http import HTTPStatus
|
||||
from importlib import import_module
|
||||
from io import BytesIO, IOBase
|
||||
from urllib.parse import unquote_to_bytes, urljoin, urlparse, urlsplit
|
||||
from urllib.parse import unquote_to_bytes, urljoin, urlsplit
|
||||
|
||||
from asgiref.sync import sync_to_async
|
||||
|
||||
|
@ -458,11 +458,7 @@ class RequestFactory:
|
|||
return json.dumps(data, cls=self.json_encoder) if should_encode else data
|
||||
|
||||
def _get_path(self, parsed):
|
||||
path = parsed.path
|
||||
# If there are parameters, add them
|
||||
if parsed.params:
|
||||
path += ";" + parsed.params
|
||||
path = unquote_to_bytes(path)
|
||||
path = unquote_to_bytes(parsed.path)
|
||||
# Replace the behavior where non-ASCII values in the WSGI environ are
|
||||
# arbitrarily decoded with ISO-8859-1.
|
||||
# Refs comment in `get_bytes_from_wsgi()`.
|
||||
|
@ -647,7 +643,7 @@ class RequestFactory:
|
|||
**extra,
|
||||
):
|
||||
"""Construct an arbitrary HTTP request."""
|
||||
parsed = urlparse(str(path)) # path can be lazy
|
||||
parsed = urlsplit(str(path)) # path can be lazy
|
||||
data = force_bytes(data, settings.DEFAULT_CHARSET)
|
||||
r = {
|
||||
"PATH_INFO": self._get_path(parsed),
|
||||
|
@ -671,8 +667,7 @@ class RequestFactory:
|
|||
# If QUERY_STRING is absent or empty, we want to extract it from the URL.
|
||||
if not r.get("QUERY_STRING"):
|
||||
# WSGI requires latin-1 encoded strings. See get_path_info().
|
||||
query_string = parsed[4].encode().decode("iso-8859-1")
|
||||
r["QUERY_STRING"] = query_string
|
||||
r["QUERY_STRING"] = parsed.query.encode().decode("iso-8859-1")
|
||||
return self.request(**r)
|
||||
|
||||
|
||||
|
@ -748,7 +743,7 @@ class AsyncRequestFactory(RequestFactory):
|
|||
**extra,
|
||||
):
|
||||
"""Construct an arbitrary HTTP request."""
|
||||
parsed = urlparse(str(path)) # path can be lazy.
|
||||
parsed = urlsplit(str(path)) # path can be lazy.
|
||||
data = force_bytes(data, settings.DEFAULT_CHARSET)
|
||||
s = {
|
||||
"method": method,
|
||||
|
@ -772,7 +767,7 @@ class AsyncRequestFactory(RequestFactory):
|
|||
else:
|
||||
# If QUERY_STRING is absent or empty, we want to extract it from
|
||||
# the URL.
|
||||
s["query_string"] = parsed[4]
|
||||
s["query_string"] = parsed.query
|
||||
if headers:
|
||||
extra.update(HttpHeaders.to_asgi_names(headers))
|
||||
s["headers"] += [
|
||||
|
|
|
@ -21,7 +21,7 @@ from urllib.parse import (
|
|||
urljoin,
|
||||
urlparse,
|
||||
urlsplit,
|
||||
urlunparse,
|
||||
urlunsplit,
|
||||
)
|
||||
from urllib.request import url2pathname
|
||||
|
||||
|
@ -541,11 +541,9 @@ class SimpleTestCase(unittest.TestCase):
|
|||
def normalize(url):
|
||||
"""Sort the URL's query string parameters."""
|
||||
url = str(url) # Coerce reverse_lazy() URLs.
|
||||
scheme, netloc, path, params, query, fragment = urlparse(url)
|
||||
scheme, netloc, path, query, fragment = urlsplit(url)
|
||||
query_parts = sorted(parse_qsl(query))
|
||||
return urlunparse(
|
||||
(scheme, netloc, path, params, urlencode(query_parts), fragment)
|
||||
)
|
||||
return urlunsplit((scheme, netloc, path, urlencode(query_parts), fragment))
|
||||
|
||||
if msg_prefix:
|
||||
msg_prefix += ": "
|
||||
|
@ -1637,11 +1635,11 @@ class FSFilesHandler(WSGIHandler):
|
|||
* the host is provided as part of the base_url
|
||||
* the request's path isn't under the media path (or equal)
|
||||
"""
|
||||
return path.startswith(self.base_url[2]) and not self.base_url[1]
|
||||
return path.startswith(self.base_url.path) and not self.base_url.netloc
|
||||
|
||||
def file_path(self, url):
|
||||
"""Return the relative path to the file on disk for the given URL."""
|
||||
relative_url = url.removeprefix(self.base_url[2])
|
||||
relative_url = url.removeprefix(self.base_url.path)
|
||||
return url2pathname(relative_url)
|
||||
|
||||
def get_response(self, request):
|
||||
|
|
|
@ -6,7 +6,7 @@ from datetime import datetime, timezone
|
|||
from email.utils import formatdate
|
||||
from urllib.parse import quote, unquote
|
||||
from urllib.parse import urlencode as original_urlencode
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
from django.utils.datastructures import MultiValueDict
|
||||
from django.utils.regex_helper import _lazy_re_compile
|
||||
|
@ -271,11 +271,11 @@ def url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
|
|||
|
||||
def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
|
||||
# Chrome considers any URL with more than two slashes to be absolute, but
|
||||
# urlparse is not so flexible. Treat any url with three slashes as unsafe.
|
||||
# urlsplit is not so flexible. Treat any url with three slashes as unsafe.
|
||||
if url.startswith("///"):
|
||||
return False
|
||||
try:
|
||||
url_info = urlparse(url)
|
||||
url_info = urlsplit(url)
|
||||
except ValueError: # e.g. invalid IPv6 addresses
|
||||
return False
|
||||
# Forbid URLs like http:///example.com - with a scheme, but without a hostname.
|
||||
|
|
|
@ -203,7 +203,7 @@ A :class:`ResolverMatch` object can also be assigned to a triple::
|
|||
One possible use of :func:`~django.urls.resolve` would be to test whether a
|
||||
view would raise a ``Http404`` error before redirecting to it::
|
||||
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlsplit
|
||||
from django.urls import resolve
|
||||
from django.http import Http404, HttpResponseRedirect
|
||||
|
||||
|
@ -215,7 +215,7 @@ view would raise a ``Http404`` error before redirecting to it::
|
|||
# modify the request and response as required, e.g. change locale
|
||||
# and set corresponding locale cookie
|
||||
|
||||
view, args, kwargs = resolve(urlparse(next)[2])
|
||||
view, args, kwargs = resolve(urlsplit(next).path)
|
||||
kwargs["request"] = request
|
||||
try:
|
||||
view(*args, **kwargs)
|
||||
|
|
|
@ -4,7 +4,7 @@ import re
|
|||
import unittest
|
||||
import zoneinfo
|
||||
from unittest import mock
|
||||
from urllib.parse import parse_qsl, urljoin, urlparse
|
||||
from urllib.parse import parse_qsl, urljoin, urlsplit
|
||||
|
||||
from django import forms
|
||||
from django.contrib import admin
|
||||
|
@ -357,7 +357,7 @@ class AdminViewBasicTest(AdminViewBasicTestCase):
|
|||
**save_option,
|
||||
},
|
||||
)
|
||||
parsed_url = urlparse(response.url)
|
||||
parsed_url = urlsplit(response.url)
|
||||
self.assertEqual(parsed_url.query, qsl)
|
||||
|
||||
def test_change_query_string_persists(self):
|
||||
|
@ -386,7 +386,7 @@ class AdminViewBasicTest(AdminViewBasicTestCase):
|
|||
**save_option,
|
||||
},
|
||||
)
|
||||
parsed_url = urlparse(response.url)
|
||||
parsed_url = urlsplit(response.url)
|
||||
self.assertEqual(parsed_url.query, qsl)
|
||||
|
||||
def test_basic_edit_GET(self):
|
||||
|
@ -8032,11 +8032,11 @@ class AdminKeepChangeListFiltersTests(TestCase):
|
|||
Assert that two URLs are equal despite the ordering
|
||||
of their querystring. Refs #22360.
|
||||
"""
|
||||
parsed_url1 = urlparse(url1)
|
||||
parsed_url1 = urlsplit(url1)
|
||||
path1 = parsed_url1.path
|
||||
parsed_qs1 = dict(parse_qsl(parsed_url1.query))
|
||||
|
||||
parsed_url2 = urlparse(url2)
|
||||
parsed_url2 = urlsplit(url2)
|
||||
path2 = parsed_url2.path
|
||||
parsed_qs2 = dict(parse_qsl(parsed_url2.query))
|
||||
|
||||
|
|
|
@ -709,25 +709,21 @@ class CsrfViewMiddlewareTestMixin(CsrfFunctionTestMixin):
|
|||
response = mw.process_view(req, post_form_view, (), {})
|
||||
self.assertContains(response, malformed_referer_msg, status_code=403)
|
||||
# missing scheme
|
||||
# >>> urlparse('//example.com/')
|
||||
# ParseResult(
|
||||
# scheme='', netloc='example.com', path='/', params='', query='', fragment='',
|
||||
# )
|
||||
# >>> urlsplit('//example.com/')
|
||||
# SplitResult(scheme='', netloc='example.com', path='/', query='', fragment='')
|
||||
req.META["HTTP_REFERER"] = "//example.com/"
|
||||
self._check_referer_rejects(mw, req)
|
||||
response = mw.process_view(req, post_form_view, (), {})
|
||||
self.assertContains(response, malformed_referer_msg, status_code=403)
|
||||
# missing netloc
|
||||
# >>> urlparse('https://')
|
||||
# ParseResult(
|
||||
# scheme='https', netloc='', path='', params='', query='', fragment='',
|
||||
# )
|
||||
# >>> urlsplit('https://')
|
||||
# SplitResult(scheme='https', netloc='', path='', query='', fragment='')
|
||||
req.META["HTTP_REFERER"] = "https://"
|
||||
self._check_referer_rejects(mw, req)
|
||||
response = mw.process_view(req, post_form_view, (), {})
|
||||
self.assertContains(response, malformed_referer_msg, status_code=403)
|
||||
# Invalid URL
|
||||
# >>> urlparse('https://[')
|
||||
# >>> urlsplit('https://[')
|
||||
# ValueError: Invalid IPv6 URL
|
||||
req.META["HTTP_REFERER"] = "https://["
|
||||
self._check_referer_rejects(mw, req)
|
||||
|
@ -979,7 +975,7 @@ class CsrfViewMiddlewareTestMixin(CsrfFunctionTestMixin):
|
|||
@override_settings(ALLOWED_HOSTS=["www.example.com"])
|
||||
def test_bad_origin_cannot_be_parsed(self):
|
||||
"""
|
||||
A POST request with an origin that can't be parsed by urlparse() is
|
||||
A POST request with an origin that can't be parsed by urlsplit() is
|
||||
rejected.
|
||||
"""
|
||||
req = self._get_POST_request_with_token()
|
||||
|
|
Loading…
Reference in New Issue