Refs #30190 -- Minor edits to JSONL serializer.

Follow up to e29637681b.
This commit is contained in:
Mariusz Felisiak 2020-06-17 07:59:40 +02:00 committed by GitHub
parent e36028fb9c
commit 78c811334c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 53 additions and 104 deletions

View File

@ -50,7 +50,7 @@ def Deserializer(stream_or_string, **options):
if not line.strip(): if not line.strip():
continue continue
try: try:
yield list(PythonDeserializer([json.loads(line), ], **options))[0] yield from PythonDeserializer([json.loads(line)], **options)
except (GeneratorExit, DeserializationError): except (GeneratorExit, DeserializationError):
raise raise
except Exception as exc: except Exception as exc:

View File

@ -318,12 +318,12 @@ JSONL
.. versionadded:: 3.2 .. versionadded:: 3.2
*JSONL* stands for *JSON Lines*. With this format, objects are separated by new *JSONL* stands for *JSON Lines*. With this format, objects are separated by new
lines, and each line contains a valid JSON object. JSONL serialized data look lines, and each line contains a valid JSON object. JSONL serialized data looks
like this:: like this::
{ "pk": "4b678b301dfd8a4e0dad910de3ae245b", "model": "sessions.session", "fields": { ... }} {"pk": "4b678b301dfd8a4e0dad910de3ae245b", "model": "sessions.session", "fields": {...}}
{ "pk": "88bea72c02274f3c9bf1cb2bb8cee4fc", "model": "sessions.session", "fields": { ... }} {"pk": "88bea72c02274f3c9bf1cb2bb8cee4fc", "model": "sessions.session", "fields": {...}}
{ "pk": "9cf0e26691b64147a67e2a9f06ad7a53", "model": "sessions.session", "fields": { ... }} {"pk": "9cf0e26691b64147a67e2a9f06ad7a53", "model": "sessions.session", "fields": {...}}
JSONL can be useful for populating large databases, since the data can be JSONL can be useful for populating large databases, since the data can be
processed line by line, rather than being loaded into memory all at once. processed line by line, rather than being loaded into memory all at once.

View File

@ -15,32 +15,20 @@ from .tests import SerializersTestBase, SerializersTransactionTestBase
class JsonlSerializerTestCase(SerializersTestBase, TestCase): class JsonlSerializerTestCase(SerializersTestBase, TestCase):
serializer_name = "jsonl" serializer_name = "jsonl"
pkless_str = [ pkless_str = [
"""{ '{"pk": null,"model": "serializers.category","fields": {"name": "Reference"}}',
"pk": null, '{"model": "serializers.category","fields": {"name": "Non-fiction"}}',
"model": "serializers.category",
"fields": {"name": "Reference"}
}""",
"""{
"model": "serializers.category",
"fields": {"name": "Non-fiction"}
}"""
] ]
pkless_str = "\n".join([s.replace("\n", "") for s in pkless_str]) pkless_str = "\n".join([s.replace("\n", "") for s in pkless_str])
mapping_ordering_str = """{ mapping_ordering_str = (
"model": "serializers.article", '{"model": "serializers.article","pk": %(article_pk)s,'
"pk": %(article_pk)s, '"fields": {'
"fields": { '"author": %(author_pk)s,'
"author": %(author_pk)s, '"headline": "Poker has no place on ESPN",'
"headline": "Poker has no place on ESPN", '"pub_date": "2006-06-16T11:00:00",'
"pub_date": "2006-06-16T11:00:00", '"categories": [%(first_category_pk)s,%(second_category_pk)s],'
"categories": [ '"meta_data": []}}\n'
%(first_category_pk)s, )
%(second_category_pk)s
],
"meta_data": []
}
}""".replace("\n", "") + "\n"
@staticmethod @staticmethod
def _validate_output(serial_str): def _validate_output(serial_str):
@ -82,7 +70,7 @@ class JsonlSerializerTestCase(SerializersTestBase, TestCase):
s = serializers.jsonl.Serializer() s = serializers.jsonl.Serializer()
json_data = s.serialize( json_data = s.serialize(
[ScoreDecimal(score=decimal.Decimal(1.0))], cls=CustomJSONEncoder [ScoreDecimal(score=decimal.Decimal(1.0))], cls=CustomJSONEncoder,
) )
self.assertIn('"fields": {"score": "1"}', json_data) self.assertIn('"fields": {"score": "1"}', json_data)
@ -93,55 +81,40 @@ class JsonlSerializerTestCase(SerializersTestBase, TestCase):
def test_helpful_error_message_invalid_pk(self): def test_helpful_error_message_invalid_pk(self):
""" """
If there is an invalid primary key, the error message should contain If there is an invalid primary key, the error message contains the
the model associated with it. model associated with it.
""" """
test_string = """{ test_string = (
"pk": "badpk", '{"pk": "badpk","model": "serializers.player",'
"model": "serializers.player", '"fields": {"name": "Bob","rank": 1,"team": "Team"}}'
"fields": { )
"name": "Bob",
"rank": 1,
"team": "Team"
}
}""".replace("\n", "")
with self.assertRaisesMessage(DeserializationError, "(serializers.player:pk=badpk)"): with self.assertRaisesMessage(DeserializationError, "(serializers.player:pk=badpk)"):
list(serializers.deserialize('jsonl', test_string)) list(serializers.deserialize('jsonl', test_string))
def test_helpful_error_message_invalid_field(self): def test_helpful_error_message_invalid_field(self):
""" """
If there is an invalid field value, the error message should contain If there is an invalid field value, the error message contains the
the model associated with it. model associated with it.
""" """
test_string = """{ test_string = (
"pk": "1", '{"pk": "1","model": "serializers.player",'
"model": "serializers.player", '"fields": {"name": "Bob","rank": "invalidint","team": "Team"}}'
"fields": { )
"name": "Bob",
"rank": "invalidint",
"team": "Team"
}
}""".replace("\n", "")
expected = "(serializers.player:pk=1) field_value was 'invalidint'" expected = "(serializers.player:pk=1) field_value was 'invalidint'"
with self.assertRaisesMessage(DeserializationError, expected): with self.assertRaisesMessage(DeserializationError, expected):
list(serializers.deserialize('jsonl', test_string)) list(serializers.deserialize('jsonl', test_string))
def test_helpful_error_message_for_foreign_keys(self): def test_helpful_error_message_for_foreign_keys(self):
""" """
Invalid foreign keys with a natural key should throw a helpful error Invalid foreign keys with a natural key throws a helpful error message,
message, such as what the failing key is. such as what the failing key is.
""" """
test_string = """{ test_string = (
"pk": 1, '{"pk": 1, "model": "serializers.category",'
"model": "serializers.category", '"fields": {'
"fields": { '"name": "Unknown foreign key",'
"name": "Unknown foreign key", '"meta_data": ["doesnotexist","metadata"]}}'
"meta_data": [ )
"doesnotexist",
"metadata"
]
}
}""".replace("\n", "")
key = ["doesnotexist", "metadata"] key = ["doesnotexist", "metadata"]
expected = "(serializers.category:pk=1) field_value was '%r'" % key expected = "(serializers.category:pk=1) field_value was '%r'" % key
with self.assertRaisesMessage(DeserializationError, expected): with self.assertRaisesMessage(DeserializationError, expected):
@ -149,7 +122,7 @@ class JsonlSerializerTestCase(SerializersTestBase, TestCase):
def test_helpful_error_message_for_many2many_non_natural(self): def test_helpful_error_message_for_many2many_non_natural(self):
""" """
Invalid many-to-many keys should throw a helpful error message. Invalid many-to-many keys throws a helpful error message.
""" """
test_strings = [ test_strings = [
"""{ """{
@ -165,16 +138,12 @@ class JsonlSerializerTestCase(SerializersTestBase, TestCase):
"""{ """{
"pk": 1, "pk": 1,
"model": "serializers.author", "model": "serializers.author",
"fields": { "fields": {"name": "Agnes"}
"name": "Agnes"
}
}""", }""",
"""{ """{
"pk": 1, "pk": 1,
"model": "serializers.category", "model": "serializers.category",
"fields": { "fields": {"name": "Reference"}
"name": "Reference"
}
}""" }"""
] ]
test_string = "\n".join([s.replace("\n", "") for s in test_strings]) test_string = "\n".join([s.replace("\n", "") for s in test_strings])
@ -184,18 +153,14 @@ class JsonlSerializerTestCase(SerializersTestBase, TestCase):
def test_helpful_error_message_for_many2many_natural1(self): def test_helpful_error_message_for_many2many_natural1(self):
""" """
Invalid many-to-many keys should throw a helpful error message. Invalid many-to-many keys throws a helpful error message where one of a
This tests the code path where one of a list of natural keys is invalid. list of natural keys is invalid.
""" """
test_strings = [ test_strings = [
"""{ """{
"pk": 1, "pk": 1,
"model": "serializers.categorymetadata", "model": "serializers.categorymetadata",
"fields": { "fields": {"kind": "author","name": "meta1","value": "Agnes"}
"kind": "author",
"name": "meta1",
"value": "Agnes"
}
}""", }""",
"""{ """{
"pk": 1, "pk": 1,
@ -214,9 +179,7 @@ class JsonlSerializerTestCase(SerializersTestBase, TestCase):
"""{ """{
"pk": 1, "pk": 1,
"model": "serializers.author", "model": "serializers.author",
"fields": { "fields": {"name": "Agnes"}
"name": "Agnes"
}
}""" }"""
] ]
test_string = "\n".join([s.replace("\n", "") for s in test_strings]) test_string = "\n".join([s.replace("\n", "") for s in test_strings])
@ -228,9 +191,8 @@ class JsonlSerializerTestCase(SerializersTestBase, TestCase):
def test_helpful_error_message_for_many2many_natural2(self): def test_helpful_error_message_for_many2many_natural2(self):
""" """
Invalid many-to-many keys should throw a helpful error message. This Invalid many-to-many keys throws a helpful error message where a
tests the code path where a natural many-to-many key has only a single natural many-to-many key has only a single value.
value.
""" """
test_strings = [ test_strings = [
"""{ """{
@ -246,18 +208,12 @@ class JsonlSerializerTestCase(SerializersTestBase, TestCase):
"""{ """{
"pk": 1, "pk": 1,
"model": "serializers.categorymetadata", "model": "serializers.categorymetadata",
"fields": { "fields": {"kind": "author","name": "meta1","value": "Agnes"}
"kind": "author",
"name": "meta1",
"value": "Agnes"
}
}""", }""",
"""{ """{
"pk": 1, "pk": 1,
"model": "serializers.author", "model": "serializers.author",
"fields": { "fields": {"name": "Agnes"}
"name": "Agnes"
}
}""" }"""
] ]
test_string = "\n".join([s.replace("\n", "") for s in test_strings]) test_string = "\n".join([s.replace("\n", "") for s in test_strings])
@ -270,12 +226,9 @@ class JsonlSerializerTestCase(SerializersTestBase, TestCase):
""" """
Not iterable many-to-many field value throws a helpful error message. Not iterable many-to-many field value throws a helpful error message.
""" """
test_string = """{ test_string = (
"pk": 1, '{"pk": 1,"model": "serializers.m2mdata","fields": {"data": null}}'
"model": "serializers.m2mdata", )
"fields": {"data": null}
}""".replace("\n", "")
expected = "(serializers.m2mdata:pk=1) field_value was 'None'" expected = "(serializers.m2mdata:pk=1) field_value was 'None'"
with self.assertRaisesMessage(DeserializationError, expected): with self.assertRaisesMessage(DeserializationError, expected):
next(serializers.deserialize('jsonl', test_string, ignore=False)) next(serializers.deserialize('jsonl', test_string, ignore=False))
@ -297,16 +250,12 @@ class JsonSerializerTransactionTestCase(SerializersTransactionTestBase, Transact
"""{ """{
"pk": 1, "pk": 1,
"model": "serializers.category", "model": "serializers.category",
"fields": { "fields": {"name": "Reference"}
"name": "Reference"
}
}""", }""",
"""{ """{
"pk": 1, "pk": 1,
"model": "serializers.author", "model": "serializers.author",
"fields": { "fields": {"name": "Agnes"}
"name": "Agnes"
}
}""" }"""
] ]
fwd_ref_str = "\n".join([s.replace("\n", "") for s in fwd_ref_str]) fwd_ref_str = "\n".join([s.replace("\n", "") for s in fwd_ref_str])