from math import ceil from operator import attrgetter from django.core.exceptions import FieldDoesNotExist from django.db import ( IntegrityError, NotSupportedError, OperationalError, ProgrammingError, connection, ) from django.db.models import FileField, Value from django.db.models.functions import Lower from django.test import ( TestCase, override_settings, skipIfDBFeature, skipUnlessDBFeature, ) from .models import ( BigAutoFieldModel, Country, NoFields, NullableFields, Pizzeria, ProxyCountry, ProxyMultiCountry, ProxyMultiProxyCountry, ProxyProxyCountry, RelatedModel, Restaurant, SmallAutoFieldModel, State, TwoFields, UpsertConflict, ) class BulkCreateTests(TestCase): def setUp(self): self.data = [ Country(name="United States of America", iso_two_letter="US"), Country(name="The Netherlands", iso_two_letter="NL"), Country(name="Germany", iso_two_letter="DE"), Country(name="Czech Republic", iso_two_letter="CZ"), ] def test_simple(self): created = Country.objects.bulk_create(self.data) self.assertEqual(created, self.data) self.assertQuerysetEqual( Country.objects.order_by("-name"), [ "United States of America", "The Netherlands", "Germany", "Czech Republic", ], attrgetter("name"), ) created = Country.objects.bulk_create([]) self.assertEqual(created, []) self.assertEqual(Country.objects.count(), 4) @skipUnlessDBFeature("has_bulk_insert") def test_efficiency(self): with self.assertNumQueries(1): Country.objects.bulk_create(self.data) @skipUnlessDBFeature("has_bulk_insert") def test_long_non_ascii_text(self): """ Inserting non-ASCII values with a length in the range 2001 to 4000 characters, i.e. 4002 to 8000 bytes, must be set as a CLOB on Oracle (#22144). """ Country.objects.bulk_create([Country(description="Ж" * 3000)]) self.assertEqual(Country.objects.count(), 1) @skipUnlessDBFeature("has_bulk_insert") def test_long_and_short_text(self): Country.objects.bulk_create( [ Country(description="a" * 4001, iso_two_letter="A"), Country(description="a", iso_two_letter="B"), Country(description="Ж" * 2001, iso_two_letter="C"), Country(description="Ж", iso_two_letter="D"), ] ) self.assertEqual(Country.objects.count(), 4) def test_multi_table_inheritance_unsupported(self): expected_message = "Can't bulk create a multi-table inherited model" with self.assertRaisesMessage(ValueError, expected_message): Pizzeria.objects.bulk_create( [ Pizzeria(name="The Art of Pizza"), ] ) with self.assertRaisesMessage(ValueError, expected_message): ProxyMultiCountry.objects.bulk_create( [ ProxyMultiCountry(name="Fillory", iso_two_letter="FL"), ] ) with self.assertRaisesMessage(ValueError, expected_message): ProxyMultiProxyCountry.objects.bulk_create( [ ProxyMultiProxyCountry(name="Fillory", iso_two_letter="FL"), ] ) def test_proxy_inheritance_supported(self): ProxyCountry.objects.bulk_create( [ ProxyCountry(name="Qwghlm", iso_two_letter="QW"), Country(name="Tortall", iso_two_letter="TA"), ] ) self.assertQuerysetEqual( ProxyCountry.objects.all(), {"Qwghlm", "Tortall"}, attrgetter("name"), ordered=False, ) ProxyProxyCountry.objects.bulk_create( [ ProxyProxyCountry(name="Netherlands", iso_two_letter="NT"), ] ) self.assertQuerysetEqual( ProxyProxyCountry.objects.all(), { "Qwghlm", "Tortall", "Netherlands", }, attrgetter("name"), ordered=False, ) def test_non_auto_increment_pk(self): State.objects.bulk_create( [State(two_letter_code=s) for s in ["IL", "NY", "CA", "ME"]] ) self.assertQuerysetEqual( State.objects.order_by("two_letter_code"), [ "CA", "IL", "ME", "NY", ], attrgetter("two_letter_code"), ) @skipUnlessDBFeature("has_bulk_insert") def test_non_auto_increment_pk_efficiency(self): with self.assertNumQueries(1): State.objects.bulk_create( [State(two_letter_code=s) for s in ["IL", "NY", "CA", "ME"]] ) self.assertQuerysetEqual( State.objects.order_by("two_letter_code"), [ "CA", "IL", "ME", "NY", ], attrgetter("two_letter_code"), ) @skipIfDBFeature("allows_auto_pk_0") def test_zero_as_autoval(self): """ Zero as id for AutoField should raise exception in MySQL, because MySQL does not allow zero for automatic primary key if the NO_AUTO_VALUE_ON_ZERO SQL mode is not enabled. """ valid_country = Country(name="Germany", iso_two_letter="DE") invalid_country = Country(id=0, name="Poland", iso_two_letter="PL") msg = "The database backend does not accept 0 as a value for AutoField." with self.assertRaisesMessage(ValueError, msg): Country.objects.bulk_create([valid_country, invalid_country]) def test_batch_same_vals(self): # SQLite had a problem where all the same-valued models were # collapsed to one insert. Restaurant.objects.bulk_create([Restaurant(name="foo") for i in range(0, 2)]) self.assertEqual(Restaurant.objects.count(), 2) def test_large_batch(self): TwoFields.objects.bulk_create( [TwoFields(f1=i, f2=i + 1) for i in range(0, 1001)] ) self.assertEqual(TwoFields.objects.count(), 1001) self.assertEqual( TwoFields.objects.filter(f1__gte=450, f1__lte=550).count(), 101 ) self.assertEqual(TwoFields.objects.filter(f2__gte=901).count(), 101) @skipUnlessDBFeature("has_bulk_insert") def test_large_single_field_batch(self): # SQLite had a problem with more than 500 UNIONed selects in single # query. Restaurant.objects.bulk_create([Restaurant() for i in range(0, 501)]) @skipUnlessDBFeature("has_bulk_insert") def test_large_batch_efficiency(self): with override_settings(DEBUG=True): connection.queries_log.clear() TwoFields.objects.bulk_create( [TwoFields(f1=i, f2=i + 1) for i in range(0, 1001)] ) self.assertLess(len(connection.queries), 10) def test_large_batch_mixed(self): """ Test inserting a large batch with objects having primary key set mixed together with objects without PK set. """ TwoFields.objects.bulk_create( [ TwoFields(id=i if i % 2 == 0 else None, f1=i, f2=i + 1) for i in range(100000, 101000) ] ) self.assertEqual(TwoFields.objects.count(), 1000) # We can't assume much about the ID's created, except that the above # created IDs must exist. id_range = range(100000, 101000, 2) self.assertEqual(TwoFields.objects.filter(id__in=id_range).count(), 500) self.assertEqual(TwoFields.objects.exclude(id__in=id_range).count(), 500) @skipUnlessDBFeature("has_bulk_insert") def test_large_batch_mixed_efficiency(self): """ Test inserting a large batch with objects having primary key set mixed together with objects without PK set. """ with override_settings(DEBUG=True): connection.queries_log.clear() TwoFields.objects.bulk_create( [ TwoFields(id=i if i % 2 == 0 else None, f1=i, f2=i + 1) for i in range(100000, 101000) ] ) self.assertLess(len(connection.queries), 10) def test_explicit_batch_size(self): objs = [TwoFields(f1=i, f2=i) for i in range(0, 4)] num_objs = len(objs) TwoFields.objects.bulk_create(objs, batch_size=1) self.assertEqual(TwoFields.objects.count(), num_objs) TwoFields.objects.all().delete() TwoFields.objects.bulk_create(objs, batch_size=2) self.assertEqual(TwoFields.objects.count(), num_objs) TwoFields.objects.all().delete() TwoFields.objects.bulk_create(objs, batch_size=3) self.assertEqual(TwoFields.objects.count(), num_objs) TwoFields.objects.all().delete() TwoFields.objects.bulk_create(objs, batch_size=num_objs) self.assertEqual(TwoFields.objects.count(), num_objs) def test_empty_model(self): NoFields.objects.bulk_create([NoFields() for i in range(2)]) self.assertEqual(NoFields.objects.count(), 2) @skipUnlessDBFeature("has_bulk_insert") def test_explicit_batch_size_efficiency(self): objs = [TwoFields(f1=i, f2=i) for i in range(0, 100)] with self.assertNumQueries(2): TwoFields.objects.bulk_create(objs, 50) TwoFields.objects.all().delete() with self.assertNumQueries(1): TwoFields.objects.bulk_create(objs, len(objs)) @skipUnlessDBFeature("has_bulk_insert") def test_explicit_batch_size_respects_max_batch_size(self): objs = [Country(name=f"Country {i}") for i in range(1000)] fields = ["name", "iso_two_letter", "description"] max_batch_size = max(connection.ops.bulk_batch_size(fields, objs), 1) with self.assertNumQueries(ceil(len(objs) / max_batch_size)): Country.objects.bulk_create(objs, batch_size=max_batch_size + 1) @skipUnlessDBFeature("has_bulk_insert") def test_bulk_insert_expressions(self): Restaurant.objects.bulk_create( [ Restaurant(name="Sam's Shake Shack"), Restaurant(name=Lower(Value("Betty's Beetroot Bar"))), ] ) bbb = Restaurant.objects.filter(name="betty's beetroot bar") self.assertEqual(bbb.count(), 1) @skipUnlessDBFeature("has_bulk_insert") def test_bulk_insert_nullable_fields(self): fk_to_auto_fields = { "auto_field": NoFields.objects.create(), "small_auto_field": SmallAutoFieldModel.objects.create(), "big_auto_field": BigAutoFieldModel.objects.create(), } # NULL can be mixed with other values in nullable fields nullable_fields = [ field for field in NullableFields._meta.get_fields() if field.name != "id" ] NullableFields.objects.bulk_create( [ NullableFields(**{**fk_to_auto_fields, field.name: None}) for field in nullable_fields ] ) self.assertEqual(NullableFields.objects.count(), len(nullable_fields)) for field in nullable_fields: with self.subTest(field=field): field_value = "" if isinstance(field, FileField) else None self.assertEqual( NullableFields.objects.filter(**{field.name: field_value}).count(), 1, ) @skipUnlessDBFeature("can_return_rows_from_bulk_insert") def test_set_pk_and_insert_single_item(self): with self.assertNumQueries(1): countries = Country.objects.bulk_create([self.data[0]]) self.assertEqual(len(countries), 1) self.assertEqual(Country.objects.get(pk=countries[0].pk), countries[0]) @skipUnlessDBFeature("can_return_rows_from_bulk_insert") def test_set_pk_and_query_efficiency(self): with self.assertNumQueries(1): countries = Country.objects.bulk_create(self.data) self.assertEqual(len(countries), 4) self.assertEqual(Country.objects.get(pk=countries[0].pk), countries[0]) self.assertEqual(Country.objects.get(pk=countries[1].pk), countries[1]) self.assertEqual(Country.objects.get(pk=countries[2].pk), countries[2]) self.assertEqual(Country.objects.get(pk=countries[3].pk), countries[3]) @skipUnlessDBFeature("can_return_rows_from_bulk_insert") def test_set_state(self): country_nl = Country(name="Netherlands", iso_two_letter="NL") country_be = Country(name="Belgium", iso_two_letter="BE") Country.objects.bulk_create([country_nl]) country_be.save() # Objects save via bulk_create() and save() should have equal state. self.assertEqual(country_nl._state.adding, country_be._state.adding) self.assertEqual(country_nl._state.db, country_be._state.db) def test_set_state_with_pk_specified(self): state_ca = State(two_letter_code="CA") state_ny = State(two_letter_code="NY") State.objects.bulk_create([state_ca]) state_ny.save() # Objects save via bulk_create() and save() should have equal state. self.assertEqual(state_ca._state.adding, state_ny._state.adding) self.assertEqual(state_ca._state.db, state_ny._state.db) @skipIfDBFeature("supports_ignore_conflicts") def test_ignore_conflicts_value_error(self): message = "This database backend does not support ignoring conflicts." with self.assertRaisesMessage(NotSupportedError, message): TwoFields.objects.bulk_create(self.data, ignore_conflicts=True) @skipUnlessDBFeature("supports_ignore_conflicts") def test_ignore_conflicts_ignore(self): data = [ TwoFields(f1=1, f2=1), TwoFields(f1=2, f2=2), TwoFields(f1=3, f2=3), ] TwoFields.objects.bulk_create(data) self.assertEqual(TwoFields.objects.count(), 3) # With ignore_conflicts=True, conflicts are ignored. conflicting_objects = [ TwoFields(f1=2, f2=2), TwoFields(f1=3, f2=3), ] TwoFields.objects.bulk_create([conflicting_objects[0]], ignore_conflicts=True) TwoFields.objects.bulk_create(conflicting_objects, ignore_conflicts=True) self.assertEqual(TwoFields.objects.count(), 3) self.assertIsNone(conflicting_objects[0].pk) self.assertIsNone(conflicting_objects[1].pk) # New objects are created and conflicts are ignored. new_object = TwoFields(f1=4, f2=4) TwoFields.objects.bulk_create( conflicting_objects + [new_object], ignore_conflicts=True ) self.assertEqual(TwoFields.objects.count(), 4) self.assertIsNone(new_object.pk) # Without ignore_conflicts=True, there's a problem. with self.assertRaises(IntegrityError): TwoFields.objects.bulk_create(conflicting_objects) def test_nullable_fk_after_parent(self): parent = NoFields() child = NullableFields(auto_field=parent, integer_field=88) parent.save() NullableFields.objects.bulk_create([child]) child = NullableFields.objects.get(integer_field=88) self.assertEqual(child.auto_field, parent) @skipUnlessDBFeature("can_return_rows_from_bulk_insert") def test_nullable_fk_after_parent_bulk_create(self): parent = NoFields() child = NullableFields(auto_field=parent, integer_field=88) NoFields.objects.bulk_create([parent]) NullableFields.objects.bulk_create([child]) child = NullableFields.objects.get(integer_field=88) self.assertEqual(child.auto_field, parent) def test_unsaved_parent(self): parent = NoFields() msg = ( "bulk_create() prohibited to prevent data loss due to unsaved " "related object 'auto_field'." ) with self.assertRaisesMessage(ValueError, msg): NullableFields.objects.bulk_create([NullableFields(auto_field=parent)]) def test_invalid_batch_size_exception(self): msg = "Batch size must be a positive integer." with self.assertRaisesMessage(ValueError, msg): Country.objects.bulk_create([], batch_size=-1) @skipIfDBFeature("supports_update_conflicts") def test_update_conflicts_unsupported(self): msg = "This database backend does not support updating conflicts." with self.assertRaisesMessage(NotSupportedError, msg): Country.objects.bulk_create(self.data, update_conflicts=True) @skipUnlessDBFeature("supports_ignore_conflicts", "supports_update_conflicts") def test_ignore_update_conflicts_exclusive(self): msg = "ignore_conflicts and update_conflicts are mutually exclusive" with self.assertRaisesMessage(ValueError, msg): Country.objects.bulk_create( self.data, ignore_conflicts=True, update_conflicts=True, ) @skipUnlessDBFeature("supports_update_conflicts") def test_update_conflicts_no_update_fields(self): msg = ( "Fields that will be updated when a row insertion fails on " "conflicts must be provided." ) with self.assertRaisesMessage(ValueError, msg): Country.objects.bulk_create(self.data, update_conflicts=True) @skipUnlessDBFeature("supports_update_conflicts") @skipIfDBFeature("supports_update_conflicts_with_target") def test_update_conflicts_unique_field_unsupported(self): msg = ( "This database backend does not support updating conflicts with " "specifying unique fields that can trigger the upsert." ) with self.assertRaisesMessage(NotSupportedError, msg): TwoFields.objects.bulk_create( [TwoFields(f1=1, f2=1), TwoFields(f1=2, f2=2)], update_conflicts=True, update_fields=["f2"], unique_fields=["f1"], ) @skipUnlessDBFeature("supports_update_conflicts") def test_update_conflicts_nonexistent_update_fields(self): unique_fields = None if connection.features.supports_update_conflicts_with_target: unique_fields = ["f1"] msg = "TwoFields has no field named 'nonexistent'" with self.assertRaisesMessage(FieldDoesNotExist, msg): TwoFields.objects.bulk_create( [TwoFields(f1=1, f2=1), TwoFields(f1=2, f2=2)], update_conflicts=True, update_fields=["nonexistent"], unique_fields=unique_fields, ) @skipUnlessDBFeature( "supports_update_conflicts", "supports_update_conflicts_with_target", ) def test_update_conflicts_unique_fields_required(self): msg = "Unique fields that can trigger the upsert must be provided." with self.assertRaisesMessage(ValueError, msg): TwoFields.objects.bulk_create( [TwoFields(f1=1, f2=1), TwoFields(f1=2, f2=2)], update_conflicts=True, update_fields=["f1"], ) @skipUnlessDBFeature( "supports_update_conflicts", "supports_update_conflicts_with_target", ) def test_update_conflicts_invalid_update_fields(self): msg = "bulk_create() can only be used with concrete fields in update_fields." # Reverse one-to-one relationship. with self.assertRaisesMessage(ValueError, msg): Country.objects.bulk_create( self.data, update_conflicts=True, update_fields=["relatedmodel"], unique_fields=["pk"], ) # Many-to-many relationship. with self.assertRaisesMessage(ValueError, msg): RelatedModel.objects.bulk_create( [RelatedModel(country=self.data[0])], update_conflicts=True, update_fields=["big_auto_fields"], unique_fields=["country"], ) @skipUnlessDBFeature( "supports_update_conflicts", "supports_update_conflicts_with_target", ) def test_update_conflicts_pk_in_update_fields(self): msg = "bulk_create() cannot be used with primary keys in update_fields." with self.assertRaisesMessage(ValueError, msg): BigAutoFieldModel.objects.bulk_create( [BigAutoFieldModel()], update_conflicts=True, update_fields=["id"], unique_fields=["id"], ) @skipUnlessDBFeature( "supports_update_conflicts", "supports_update_conflicts_with_target", ) def test_update_conflicts_invalid_unique_fields(self): msg = "bulk_create() can only be used with concrete fields in unique_fields." # Reverse one-to-one relationship. with self.assertRaisesMessage(ValueError, msg): Country.objects.bulk_create( self.data, update_conflicts=True, update_fields=["name"], unique_fields=["relatedmodel"], ) # Many-to-many relationship. with self.assertRaisesMessage(ValueError, msg): RelatedModel.objects.bulk_create( [RelatedModel(country=self.data[0])], update_conflicts=True, update_fields=["name"], unique_fields=["big_auto_fields"], ) def _test_update_conflicts_two_fields(self, unique_fields): TwoFields.objects.bulk_create( [ TwoFields(f1=1, f2=1, name="a"), TwoFields(f1=2, f2=2, name="b"), ] ) self.assertEqual(TwoFields.objects.count(), 2) conflicting_objects = [ TwoFields(f1=1, f2=1, name="c"), TwoFields(f1=2, f2=2, name="d"), ] TwoFields.objects.bulk_create( conflicting_objects, update_conflicts=True, unique_fields=unique_fields, update_fields=["name"], ) self.assertEqual(TwoFields.objects.count(), 2) self.assertCountEqual( TwoFields.objects.values("f1", "f2", "name"), [ {"f1": 1, "f2": 1, "name": "c"}, {"f1": 2, "f2": 2, "name": "d"}, ], ) @skipUnlessDBFeature( "supports_update_conflicts", "supports_update_conflicts_with_target" ) def test_update_conflicts_two_fields_unique_fields_first(self): self._test_update_conflicts_two_fields(["f1"]) @skipUnlessDBFeature( "supports_update_conflicts", "supports_update_conflicts_with_target" ) def test_update_conflicts_two_fields_unique_fields_second(self): self._test_update_conflicts_two_fields(["f2"]) @skipUnlessDBFeature( "supports_update_conflicts", "supports_update_conflicts_with_target" ) def test_update_conflicts_two_fields_unique_fields_both(self): with self.assertRaises((OperationalError, ProgrammingError)): self._test_update_conflicts_two_fields(["f1", "f2"]) @skipUnlessDBFeature("supports_update_conflicts") @skipIfDBFeature("supports_update_conflicts_with_target") def test_update_conflicts_two_fields_no_unique_fields(self): self._test_update_conflicts_two_fields([]) def _test_update_conflicts_unique_two_fields(self, unique_fields): Country.objects.bulk_create(self.data) self.assertEqual(Country.objects.count(), 4) new_data = [ # Conflicting countries. Country( name="Germany", iso_two_letter="DE", description=("Germany is a country in Central Europe."), ), Country( name="Czech Republic", iso_two_letter="CZ", description=( "The Czech Republic is a landlocked country in Central Europe." ), ), # New countries. Country(name="Australia", iso_two_letter="AU"), Country( name="Japan", iso_two_letter="JP", description=("Japan is an island country in East Asia."), ), ] Country.objects.bulk_create( new_data, update_conflicts=True, update_fields=["description"], unique_fields=unique_fields, ) self.assertEqual(Country.objects.count(), 6) self.assertCountEqual( Country.objects.values("iso_two_letter", "description"), [ {"iso_two_letter": "US", "description": ""}, {"iso_two_letter": "NL", "description": ""}, { "iso_two_letter": "DE", "description": ("Germany is a country in Central Europe."), }, { "iso_two_letter": "CZ", "description": ( "The Czech Republic is a landlocked country in Central Europe." ), }, {"iso_two_letter": "AU", "description": ""}, { "iso_two_letter": "JP", "description": ("Japan is an island country in East Asia."), }, ], ) @skipUnlessDBFeature( "supports_update_conflicts", "supports_update_conflicts_with_target" ) def test_update_conflicts_unique_two_fields_unique_fields_both(self): self._test_update_conflicts_unique_two_fields(["iso_two_letter", "name"]) @skipUnlessDBFeature( "supports_update_conflicts", "supports_update_conflicts_with_target" ) def test_update_conflicts_unique_two_fields_unique_fields_one(self): with self.assertRaises((OperationalError, ProgrammingError)): self._test_update_conflicts_unique_two_fields(["iso_two_letter"]) @skipUnlessDBFeature("supports_update_conflicts") @skipIfDBFeature("supports_update_conflicts_with_target") def test_update_conflicts_unique_two_fields_unique_no_unique_fields(self): self._test_update_conflicts_unique_two_fields([]) def _test_update_conflicts(self, unique_fields): UpsertConflict.objects.bulk_create( [ UpsertConflict(number=1, rank=1, name="John"), UpsertConflict(number=2, rank=2, name="Mary"), UpsertConflict(number=3, rank=3, name="Hannah"), ] ) self.assertEqual(UpsertConflict.objects.count(), 3) conflicting_objects = [ UpsertConflict(number=1, rank=4, name="Steve"), UpsertConflict(number=2, rank=2, name="Olivia"), UpsertConflict(number=3, rank=1, name="Hannah"), ] UpsertConflict.objects.bulk_create( conflicting_objects, update_conflicts=True, update_fields=["name", "rank"], unique_fields=unique_fields, ) self.assertEqual(UpsertConflict.objects.count(), 3) self.assertCountEqual( UpsertConflict.objects.values("number", "rank", "name"), [ {"number": 1, "rank": 4, "name": "Steve"}, {"number": 2, "rank": 2, "name": "Olivia"}, {"number": 3, "rank": 1, "name": "Hannah"}, ], ) UpsertConflict.objects.bulk_create( conflicting_objects + [UpsertConflict(number=4, rank=4, name="Mark")], update_conflicts=True, update_fields=["name", "rank"], unique_fields=unique_fields, ) self.assertEqual(UpsertConflict.objects.count(), 4) self.assertCountEqual( UpsertConflict.objects.values("number", "rank", "name"), [ {"number": 1, "rank": 4, "name": "Steve"}, {"number": 2, "rank": 2, "name": "Olivia"}, {"number": 3, "rank": 1, "name": "Hannah"}, {"number": 4, "rank": 4, "name": "Mark"}, ], ) @skipUnlessDBFeature( "supports_update_conflicts", "supports_update_conflicts_with_target" ) def test_update_conflicts_unique_fields(self): self._test_update_conflicts(unique_fields=["number"]) @skipUnlessDBFeature("supports_update_conflicts") @skipIfDBFeature("supports_update_conflicts_with_target") def test_update_conflicts_no_unique_fields(self): self._test_update_conflicts([])