mirror of https://github.com/django/django.git
Fixed #8149 -- Made File.__iter__() support universal newlines.
The following are recognized as ending a line: the Unix end-of-line convention '\n', the Windows convention '\r\n', and the old Macintosh convention '\r'. http://www.python.org/dev/peps/pep-0278 Thanks tchaumeny for review.
This commit is contained in:
parent
eab3dc195e
commit
eb4f6de980
|
@ -102,16 +102,22 @@ class File(FileProxyMixin):
|
||||||
# Iterate over this file-like object by newlines
|
# Iterate over this file-like object by newlines
|
||||||
buffer_ = None
|
buffer_ = None
|
||||||
for chunk in self.chunks():
|
for chunk in self.chunks():
|
||||||
chunk_buffer = BytesIO(chunk)
|
for line in chunk.splitlines(True):
|
||||||
|
|
||||||
for line in chunk_buffer:
|
|
||||||
if buffer_:
|
if buffer_:
|
||||||
line = buffer_ + line
|
if endswith_cr(buffer_) and not equals_lf(line):
|
||||||
|
# Line split after a \r newline; yield buffer_.
|
||||||
|
yield buffer_
|
||||||
|
# Continue with line.
|
||||||
|
else:
|
||||||
|
# Line either split without a newline (line
|
||||||
|
# continues after buffer_) or with \r\n
|
||||||
|
# newline (line == b'\n').
|
||||||
|
line = buffer_ + line
|
||||||
|
# buffer_ handled, clear it.
|
||||||
buffer_ = None
|
buffer_ = None
|
||||||
|
|
||||||
# If this is the end of a line, yield
|
# If this is the end of a \n or \r\n line, yield.
|
||||||
# otherwise, wait for the next round
|
if endswith_lf(line):
|
||||||
if line[-1:] in (b'\n', b'\r'):
|
|
||||||
yield line
|
yield line
|
||||||
else:
|
else:
|
||||||
buffer_ = line
|
buffer_ = line
|
||||||
|
@ -165,3 +171,24 @@ class ContentFile(File):
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def endswith_cr(line):
|
||||||
|
"""
|
||||||
|
Return True if line (a text or byte string) ends with '\r'.
|
||||||
|
"""
|
||||||
|
return line.endswith('\r' if isinstance(line, six.text_type) else b'\r')
|
||||||
|
|
||||||
|
|
||||||
|
def endswith_lf(line):
|
||||||
|
"""
|
||||||
|
Return True if line (a text or byte string) ends with '\n'.
|
||||||
|
"""
|
||||||
|
return line.endswith('\n' if isinstance(line, six.text_type) else b'\n')
|
||||||
|
|
||||||
|
|
||||||
|
def equals_lf(line):
|
||||||
|
"""
|
||||||
|
Return True if line (a text or byte string) equals '\n'.
|
||||||
|
"""
|
||||||
|
return line == ('\n' if isinstance(line, six.text_type) else b'\n')
|
||||||
|
|
|
@ -53,6 +53,15 @@ The ``File`` Class
|
||||||
|
|
||||||
Iterate over the file yielding one line at a time.
|
Iterate over the file yielding one line at a time.
|
||||||
|
|
||||||
|
.. versionchanged:: 1.8
|
||||||
|
|
||||||
|
``File`` now uses `universal newlines`_. The following are
|
||||||
|
recognized as ending a line: the Unix end-of-line convention
|
||||||
|
``'\n'``, the Windows convention ``'\r\n'``, and the old Macintosh
|
||||||
|
convention ``'\r'``.
|
||||||
|
|
||||||
|
.. _universal newlines: http://www.python.org/dev/peps/pep-0278
|
||||||
|
|
||||||
.. method:: chunks([chunk_size=None])
|
.. method:: chunks([chunk_size=None])
|
||||||
|
|
||||||
Iterate over the file yielding "chunks" of a given size. ``chunk_size``
|
Iterate over the file yielding "chunks" of a given size. ``chunk_size``
|
||||||
|
|
|
@ -82,10 +82,15 @@ Here are some useful attributes of ``UploadedFile``:
|
||||||
for line in uploadedfile:
|
for line in uploadedfile:
|
||||||
do_something_with(line)
|
do_something_with(line)
|
||||||
|
|
||||||
However, *unlike* standard Python files, :class:`UploadedFile` only
|
Lines are split using `universal newlines`_. The following are recognized
|
||||||
understands ``\n`` (also known as "Unix-style") line endings. If you know
|
as ending a line: the Unix end-of-line convention ``'\n'``, the Windows
|
||||||
that you need to handle uploaded files with different line endings, you'll
|
convention ``'\r\n'``, and the old Macintosh convention ``'\r'``.
|
||||||
need to do so in your view.
|
|
||||||
|
.. _universal newlines: http://www.python.org/dev/peps/pep-0278
|
||||||
|
|
||||||
|
.. versionchanged:: 1.8
|
||||||
|
|
||||||
|
Previously lines were only split on the Unix end-of-line ``'\n'``.
|
||||||
|
|
||||||
Subclasses of ``UploadedFile`` include:
|
Subclasses of ``UploadedFile`` include:
|
||||||
|
|
||||||
|
|
|
@ -659,6 +659,13 @@ Miscellaneous
|
||||||
* By default, :ref:`call_command <call-command>` now always skips the check
|
* By default, :ref:`call_command <call-command>` now always skips the check
|
||||||
framework (unless you pass it ``skip_checks=False``).
|
framework (unless you pass it ``skip_checks=False``).
|
||||||
|
|
||||||
|
* When iterating over lines, :class:`~django.core.files.File` now uses
|
||||||
|
`universal newlines`_. The following are recognized as ending a line: the
|
||||||
|
Unix end-of-line convention ``'\n'``, the Windows convention ``'\r\n'``, and
|
||||||
|
the old Macintosh convention ``'\r'``.
|
||||||
|
|
||||||
|
.. _universal newlines: http://www.python.org/dev/peps/pep-0278
|
||||||
|
|
||||||
.. _deprecated-features-1.8:
|
.. _deprecated-features-1.8:
|
||||||
|
|
||||||
Features deprecated in 1.8
|
Features deprecated in 1.8
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from io import BytesIO
|
from io import BytesIO, StringIO
|
||||||
import os
|
import os
|
||||||
import gzip
|
import gzip
|
||||||
import tempfile
|
import tempfile
|
||||||
|
@ -72,6 +72,54 @@ class FileTests(unittest.TestCase):
|
||||||
file = File(BytesIO(b'one\ntwo\nthree'))
|
file = File(BytesIO(b'one\ntwo\nthree'))
|
||||||
self.assertEqual(list(file), [b'one\n', b'two\n', b'three'])
|
self.assertEqual(list(file), [b'one\n', b'two\n', b'three'])
|
||||||
|
|
||||||
|
def test_file_iteration_windows_newlines(self):
|
||||||
|
"""
|
||||||
|
#8149 - File objects with \r\n line endings should yield lines
|
||||||
|
when iterated over.
|
||||||
|
"""
|
||||||
|
f = File(BytesIO(b'one\r\ntwo\r\nthree'))
|
||||||
|
self.assertEqual(list(f), [b'one\r\n', b'two\r\n', b'three'])
|
||||||
|
|
||||||
|
def test_file_iteration_mac_newlines(self):
|
||||||
|
"""
|
||||||
|
#8149 - File objects with \r line endings should yield lines
|
||||||
|
when iterated over.
|
||||||
|
"""
|
||||||
|
f = File(BytesIO(b'one\rtwo\rthree'))
|
||||||
|
self.assertEqual(list(f), [b'one\r', b'two\r', b'three'])
|
||||||
|
|
||||||
|
def test_file_iteration_mixed_newlines(self):
|
||||||
|
f = File(BytesIO(b'one\rtwo\nthree\r\nfour'))
|
||||||
|
self.assertEqual(list(f), [b'one\r', b'two\n', b'three\r\n', b'four'])
|
||||||
|
|
||||||
|
def test_file_iteration_with_unix_newline_at_chunk_boundary(self):
|
||||||
|
f = File(BytesIO(b'one\ntwo\nthree'))
|
||||||
|
# Set chunk size to create a boundary after \n:
|
||||||
|
# b'one\n...
|
||||||
|
# ^
|
||||||
|
f.DEFAULT_CHUNK_SIZE = 4
|
||||||
|
self.assertEqual(list(f), [b'one\n', b'two\n', b'three'])
|
||||||
|
|
||||||
|
def test_file_iteration_with_windows_newline_at_chunk_boundary(self):
|
||||||
|
f = File(BytesIO(b'one\r\ntwo\r\nthree'))
|
||||||
|
# Set chunk size to create a boundary between \r and \n:
|
||||||
|
# b'one\r\n...
|
||||||
|
# ^
|
||||||
|
f.DEFAULT_CHUNK_SIZE = 4
|
||||||
|
self.assertEqual(list(f), [b'one\r\n', b'two\r\n', b'three'])
|
||||||
|
|
||||||
|
def test_file_iteration_with_mac_newline_at_chunk_boundary(self):
|
||||||
|
f = File(BytesIO(b'one\rtwo\rthree'))
|
||||||
|
# Set chunk size to create a boundary after \r:
|
||||||
|
# b'one\r...
|
||||||
|
# ^
|
||||||
|
f.DEFAULT_CHUNK_SIZE = 4
|
||||||
|
self.assertEqual(list(f), [b'one\r', b'two\r', b'three'])
|
||||||
|
|
||||||
|
def test_file_iteration_with_text(self):
|
||||||
|
f = File(StringIO('one\ntwo\nthree'))
|
||||||
|
self.assertEqual(list(f), ['one\n', 'two\n', 'three'])
|
||||||
|
|
||||||
|
|
||||||
class NoNameFileTestCase(unittest.TestCase):
|
class NoNameFileTestCase(unittest.TestCase):
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Reference in New Issue