Fixed #8149 -- Made File.__iter__() support universal newlines.
The following are recognized as ending a line: the Unix end-of-line convention '\n', the Windows convention '\r\n', and the old Macintosh convention '\r'. http://www.python.org/dev/peps/pep-0278 Thanks tchaumeny for review.
This commit is contained in:
parent
eab3dc195e
commit
eb4f6de980
|
@ -102,16 +102,22 @@ class File(FileProxyMixin):
|
|||
# Iterate over this file-like object by newlines
|
||||
buffer_ = None
|
||||
for chunk in self.chunks():
|
||||
chunk_buffer = BytesIO(chunk)
|
||||
|
||||
for line in chunk_buffer:
|
||||
for line in chunk.splitlines(True):
|
||||
if buffer_:
|
||||
if endswith_cr(buffer_) and not equals_lf(line):
|
||||
# Line split after a \r newline; yield buffer_.
|
||||
yield buffer_
|
||||
# Continue with line.
|
||||
else:
|
||||
# Line either split without a newline (line
|
||||
# continues after buffer_) or with \r\n
|
||||
# newline (line == b'\n').
|
||||
line = buffer_ + line
|
||||
# buffer_ handled, clear it.
|
||||
buffer_ = None
|
||||
|
||||
# If this is the end of a line, yield
|
||||
# otherwise, wait for the next round
|
||||
if line[-1:] in (b'\n', b'\r'):
|
||||
# If this is the end of a \n or \r\n line, yield.
|
||||
if endswith_lf(line):
|
||||
yield line
|
||||
else:
|
||||
buffer_ = line
|
||||
|
@ -165,3 +171,24 @@ class ContentFile(File):
|
|||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
def endswith_cr(line):
|
||||
"""
|
||||
Return True if line (a text or byte string) ends with '\r'.
|
||||
"""
|
||||
return line.endswith('\r' if isinstance(line, six.text_type) else b'\r')
|
||||
|
||||
|
||||
def endswith_lf(line):
|
||||
"""
|
||||
Return True if line (a text or byte string) ends with '\n'.
|
||||
"""
|
||||
return line.endswith('\n' if isinstance(line, six.text_type) else b'\n')
|
||||
|
||||
|
||||
def equals_lf(line):
|
||||
"""
|
||||
Return True if line (a text or byte string) equals '\n'.
|
||||
"""
|
||||
return line == ('\n' if isinstance(line, six.text_type) else b'\n')
|
||||
|
|
|
@ -53,6 +53,15 @@ The ``File`` Class
|
|||
|
||||
Iterate over the file yielding one line at a time.
|
||||
|
||||
.. versionchanged:: 1.8
|
||||
|
||||
``File`` now uses `universal newlines`_. The following are
|
||||
recognized as ending a line: the Unix end-of-line convention
|
||||
``'\n'``, the Windows convention ``'\r\n'``, and the old Macintosh
|
||||
convention ``'\r'``.
|
||||
|
||||
.. _universal newlines: http://www.python.org/dev/peps/pep-0278
|
||||
|
||||
.. method:: chunks([chunk_size=None])
|
||||
|
||||
Iterate over the file yielding "chunks" of a given size. ``chunk_size``
|
||||
|
|
|
@ -82,10 +82,15 @@ Here are some useful attributes of ``UploadedFile``:
|
|||
for line in uploadedfile:
|
||||
do_something_with(line)
|
||||
|
||||
However, *unlike* standard Python files, :class:`UploadedFile` only
|
||||
understands ``\n`` (also known as "Unix-style") line endings. If you know
|
||||
that you need to handle uploaded files with different line endings, you'll
|
||||
need to do so in your view.
|
||||
Lines are split using `universal newlines`_. The following are recognized
|
||||
as ending a line: the Unix end-of-line convention ``'\n'``, the Windows
|
||||
convention ``'\r\n'``, and the old Macintosh convention ``'\r'``.
|
||||
|
||||
.. _universal newlines: http://www.python.org/dev/peps/pep-0278
|
||||
|
||||
.. versionchanged:: 1.8
|
||||
|
||||
Previously lines were only split on the Unix end-of-line ``'\n'``.
|
||||
|
||||
Subclasses of ``UploadedFile`` include:
|
||||
|
||||
|
|
|
@ -659,6 +659,13 @@ Miscellaneous
|
|||
* By default, :ref:`call_command <call-command>` now always skips the check
|
||||
framework (unless you pass it ``skip_checks=False``).
|
||||
|
||||
* When iterating over lines, :class:`~django.core.files.File` now uses
|
||||
`universal newlines`_. The following are recognized as ending a line: the
|
||||
Unix end-of-line convention ``'\n'``, the Windows convention ``'\r\n'``, and
|
||||
the old Macintosh convention ``'\r'``.
|
||||
|
||||
.. _universal newlines: http://www.python.org/dev/peps/pep-0278
|
||||
|
||||
.. _deprecated-features-1.8:
|
||||
|
||||
Features deprecated in 1.8
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from io import BytesIO
|
||||
from io import BytesIO, StringIO
|
||||
import os
|
||||
import gzip
|
||||
import tempfile
|
||||
|
@ -72,6 +72,54 @@ class FileTests(unittest.TestCase):
|
|||
file = File(BytesIO(b'one\ntwo\nthree'))
|
||||
self.assertEqual(list(file), [b'one\n', b'two\n', b'three'])
|
||||
|
||||
def test_file_iteration_windows_newlines(self):
|
||||
"""
|
||||
#8149 - File objects with \r\n line endings should yield lines
|
||||
when iterated over.
|
||||
"""
|
||||
f = File(BytesIO(b'one\r\ntwo\r\nthree'))
|
||||
self.assertEqual(list(f), [b'one\r\n', b'two\r\n', b'three'])
|
||||
|
||||
def test_file_iteration_mac_newlines(self):
|
||||
"""
|
||||
#8149 - File objects with \r line endings should yield lines
|
||||
when iterated over.
|
||||
"""
|
||||
f = File(BytesIO(b'one\rtwo\rthree'))
|
||||
self.assertEqual(list(f), [b'one\r', b'two\r', b'three'])
|
||||
|
||||
def test_file_iteration_mixed_newlines(self):
|
||||
f = File(BytesIO(b'one\rtwo\nthree\r\nfour'))
|
||||
self.assertEqual(list(f), [b'one\r', b'two\n', b'three\r\n', b'four'])
|
||||
|
||||
def test_file_iteration_with_unix_newline_at_chunk_boundary(self):
|
||||
f = File(BytesIO(b'one\ntwo\nthree'))
|
||||
# Set chunk size to create a boundary after \n:
|
||||
# b'one\n...
|
||||
# ^
|
||||
f.DEFAULT_CHUNK_SIZE = 4
|
||||
self.assertEqual(list(f), [b'one\n', b'two\n', b'three'])
|
||||
|
||||
def test_file_iteration_with_windows_newline_at_chunk_boundary(self):
|
||||
f = File(BytesIO(b'one\r\ntwo\r\nthree'))
|
||||
# Set chunk size to create a boundary between \r and \n:
|
||||
# b'one\r\n...
|
||||
# ^
|
||||
f.DEFAULT_CHUNK_SIZE = 4
|
||||
self.assertEqual(list(f), [b'one\r\n', b'two\r\n', b'three'])
|
||||
|
||||
def test_file_iteration_with_mac_newline_at_chunk_boundary(self):
|
||||
f = File(BytesIO(b'one\rtwo\rthree'))
|
||||
# Set chunk size to create a boundary after \r:
|
||||
# b'one\r...
|
||||
# ^
|
||||
f.DEFAULT_CHUNK_SIZE = 4
|
||||
self.assertEqual(list(f), [b'one\r', b'two\r', b'three'])
|
||||
|
||||
def test_file_iteration_with_text(self):
|
||||
f = File(StringIO('one\ntwo\nthree'))
|
||||
self.assertEqual(list(f), ['one\n', 'two\n', 'three'])
|
||||
|
||||
|
||||
class NoNameFileTestCase(unittest.TestCase):
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue