From 75d627888bf42f8de6064a0bd665c98c0df66c55 Mon Sep 17 00:00:00 2001 From: Kathryn Killebrew Date: Thu, 31 Jan 2019 19:50:16 -0500 Subject: [PATCH] Fixed #30020 -- Fixed reading nulls with LayerMapping. --- AUTHORS | 1 + django/contrib/gis/gdal/field.py | 32 ++-- django/contrib/gis/gdal/prototypes/ds.py | 8 +- .../contrib/gis/gdal/prototypes/generation.py | 13 +- django/contrib/gis/utils/layermapping.py | 6 +- .../data/has_nulls/has_nulls.geojson | 139 ++++++++++++++++++ tests/gis_tests/gdal_tests/test_ds.py | 39 ++++- tests/gis_tests/layermap/models.py | 30 ++++ tests/gis_tests/layermap/tests.py | 60 +++++++- 9 files changed, 302 insertions(+), 26 deletions(-) create mode 100644 tests/gis_tests/data/has_nulls/has_nulls.geojson diff --git a/AUTHORS b/AUTHORS index 539f1c5cc3..f305662f51 100644 --- a/AUTHORS +++ b/AUTHORS @@ -465,6 +465,7 @@ answer newbie questions, and generally made Django that much better: Karderio Karen Tracey Karol Sikora + Kathryn Killebrew Katie Miller Keith Bussell Kenneth Love diff --git a/django/contrib/gis/gdal/field.py b/django/contrib/gis/gdal/field.py index 2a6591088d..f67e7a5c03 100644 --- a/django/contrib/gis/gdal/field.py +++ b/django/contrib/gis/gdal/field.py @@ -34,11 +34,6 @@ class Field(GDALBase): # Setting the class depending upon the OGR Field Type (OFT) self.__class__ = OGRFieldTypes[self.type] - # OFTReal with no precision should be an OFTInteger. - if isinstance(self, OFTReal) and self.precision == 0: - self.__class__ = OFTInteger - self._double = True - def __str__(self): "Return the string representation of the Field." return str(self.value).strip() @@ -46,22 +41,26 @@ class Field(GDALBase): # #### Field Methods #### def as_double(self): "Retrieve the Field's value as a double (float)." - return capi.get_field_as_double(self._feat.ptr, self._index) + return capi.get_field_as_double(self._feat.ptr, self._index) if self.is_set else None def as_int(self, is_64=False): "Retrieve the Field's value as an integer." if is_64: - return capi.get_field_as_integer64(self._feat.ptr, self._index) + return capi.get_field_as_integer64(self._feat.ptr, self._index) if self.is_set else None else: - return capi.get_field_as_integer(self._feat.ptr, self._index) + return capi.get_field_as_integer(self._feat.ptr, self._index) if self.is_set else None def as_string(self): "Retrieve the Field's value as a string." + if not self.is_set: + return None string = capi.get_field_as_string(self._feat.ptr, self._index) return force_text(string, encoding=self._feat.encoding, strings_only=True) def as_datetime(self): "Retrieve the Field's value as a tuple of date & time components." + if not self.is_set: + return None yy, mm, dd, hh, mn, ss, tz = [c_int() for i in range(7)] status = capi.get_field_as_datetime( self._feat.ptr, self._index, byref(yy), byref(mm), byref(dd), @@ -72,6 +71,11 @@ class Field(GDALBase): raise GDALException('Unable to retrieve date & time information from the field.') # #### Field Properties #### + @property + def is_set(self): + "Return True if the value of this field isn't null, False otherwise." + return capi.is_field_set(self._feat.ptr, self._index) + @property def name(self): "Return the name of this Field." @@ -107,18 +111,12 @@ class Field(GDALBase): # ### The Field sub-classes for each OGR Field type. ### class OFTInteger(Field): - _double = False _bit64 = False @property def value(self): "Return an integer contained in this field." - if self._double: - # If this is really from an OFTReal field with no precision, - # read as a double and cast as Python int (to prevent overflow). - return int(self.as_double()) - else: - return self.as_int(self._bit64) + return self.as_int(self._bit64) @property def type(self): @@ -158,7 +156,7 @@ class OFTDate(Field): try: yy, mm, dd, hh, mn, ss, tz = self.as_datetime() return date(yy.value, mm.value, dd.value) - except (ValueError, GDALException): + except (TypeError, ValueError, GDALException): return None @@ -173,7 +171,7 @@ class OFTDateTime(Field): try: yy, mm, dd, hh, mn, ss, tz = self.as_datetime() return datetime(yy.value, mm.value, dd.value, hh.value, mn.value, ss.value) - except (ValueError, GDALException): + except (TypeError, ValueError, GDALException): return None diff --git a/django/contrib/gis/gdal/prototypes/ds.py b/django/contrib/gis/gdal/prototypes/ds.py index 030d142c1c..5a10de3575 100644 --- a/django/contrib/gis/gdal/prototypes/ds.py +++ b/django/contrib/gis/gdal/prototypes/ds.py @@ -8,8 +8,8 @@ from ctypes import POINTER, c_char_p, c_double, c_int, c_long, c_void_p from django.contrib.gis.gdal.envelope import OGREnvelope from django.contrib.gis.gdal.libgdal import GDAL_VERSION, lgdal from django.contrib.gis.gdal.prototypes.generation import ( - const_string_output, double_output, geom_output, int64_output, int_output, - srs_output, void_output, voidptr_output, + bool_output, const_string_output, double_output, geom_output, int64_output, + int_output, srs_output, void_output, voidptr_output, ) c_int_p = POINTER(c_int) # shortcut type @@ -70,6 +70,10 @@ get_field_as_double = double_output(lgdal.OGR_F_GetFieldAsDouble, [c_void_p, c_i get_field_as_integer = int_output(lgdal.OGR_F_GetFieldAsInteger, [c_void_p, c_int]) if GDAL_VERSION >= (2, 0): get_field_as_integer64 = int64_output(lgdal.OGR_F_GetFieldAsInteger64, [c_void_p, c_int]) +if GDAL_VERSION >= (2, 2): + is_field_set = bool_output(lgdal.OGR_F_IsFieldSetAndNotNull, [c_void_p, c_int]) +else: + is_field_set = bool_output(lgdal.OGR_F_IsFieldSet, [c_void_p, c_int]) get_field_as_string = const_string_output(lgdal.OGR_F_GetFieldAsString, [c_void_p, c_int]) get_field_index = int_output(lgdal.OGR_F_GetFieldIndex, [c_void_p, c_char_p]) diff --git a/django/contrib/gis/gdal/prototypes/generation.py b/django/contrib/gis/gdal/prototypes/generation.py index 012a5baab0..0b26585161 100644 --- a/django/contrib/gis/gdal/prototypes/generation.py +++ b/django/contrib/gis/gdal/prototypes/generation.py @@ -2,7 +2,9 @@ This module contains functions that generate ctypes prototypes for the GDAL routines. """ -from ctypes import POINTER, c_char_p, c_double, c_int, c_int64, c_void_p +from ctypes import ( + POINTER, c_bool, c_char_p, c_double, c_int, c_int64, c_void_p, +) from functools import partial from django.contrib.gis.gdal.prototypes.errcheck import ( @@ -15,6 +17,15 @@ class gdal_char_p(c_char_p): pass +def bool_output(func, argtypes, errcheck=None): + """Generate a ctypes function that returns a boolean value.""" + func.argtypes = argtypes + func.restype = c_bool + if errcheck: + func.errcheck = errcheck + return func + + def double_output(func, argtypes, errcheck=False, strarg=False, cpl=False): "Generate a ctypes function that returns a double value." func.argtypes = argtypes diff --git a/django/contrib/gis/utils/layermapping.py b/django/contrib/gis/utils/layermapping.py index 91537ba3ac..32434bb457 100644 --- a/django/contrib/gis/utils/layermapping.py +++ b/django/contrib/gis/utils/layermapping.py @@ -61,6 +61,7 @@ class LayerMapping: FIELD_TYPES = { models.AutoField: OFTInteger, models.BigAutoField: OFTInteger64, + models.BooleanField: (OFTInteger, OFTReal, OFTString), models.IntegerField: (OFTInteger, OFTReal, OFTString), models.FloatField: (OFTInteger, OFTReal), models.DateField: OFTDate, @@ -72,6 +73,7 @@ class LayerMapping: models.SlugField: OFTString, models.TextField: OFTString, models.URLField: OFTString, + models.UUIDField: OFTString, models.BigIntegerField: (OFTInteger, OFTReal, OFTString), models.SmallIntegerField: (OFTInteger, OFTReal, OFTString), models.PositiveSmallIntegerField: (OFTInteger, OFTReal, OFTString), @@ -343,13 +345,13 @@ class LayerMapping: """ if (isinstance(ogr_field, OFTString) and isinstance(model_field, (models.CharField, models.TextField))): - if self.encoding: + if self.encoding and ogr_field.value is not None: # The encoding for OGR data sources may be specified here # (e.g., 'cp437' for Census Bureau boundary files). val = force_text(ogr_field.value, self.encoding) else: val = ogr_field.value - if model_field.max_length and len(val) > model_field.max_length: + if model_field.max_length and val is not None and len(val) > model_field.max_length: raise InvalidString('%s model field maximum string length is %s, given %s characters.' % (model_field.name, model_field.max_length, len(val))) elif isinstance(ogr_field, OFTReal) and isinstance(model_field, models.DecimalField): diff --git a/tests/gis_tests/data/has_nulls/has_nulls.geojson b/tests/gis_tests/data/has_nulls/has_nulls.geojson new file mode 100644 index 0000000000..0fccf19684 --- /dev/null +++ b/tests/gis_tests/data/has_nulls/has_nulls.geojson @@ -0,0 +1,139 @@ +{ + "type": "FeatureCollection", + "name": "has_nulls", + "crs": { + "type": "name", + "properties": { + "name": "urn:ogc:def:crs:OGC:1.3:CRS84" + } + }, + "features": [ + { + "type": "Feature", + "properties": { + "uuid": "1378c26f-cbe6-44b0-929f-eb330d4991f5", + "boolean": true, + "datetime": "1994-08-14T11:32:14+0000", + "name": "Philadelphia", + "integer": 5, + "num": 1.001 + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + -75.14923095703125, + 39.8928799002948 + ], + [ + -75.25772094726562, + 39.889718875996685 + ], + [ + -75.201416015625, + 40.03076909369051 + ], + [ + -75.07644653320312, + 39.99500778093748 + ], + [ + -75.14785766601562, + 39.95291166179976 + ], + [ + -75.135498046875, + 39.91605629078665 + ], + [ + -75.14923095703125, + 39.8928799002948 + ] + ] + ] + } + }, + { + "type": "Feature", + "properties": { + "uuid": "fa2ba67c-a135-4338-b924-a9622b5d869f", + "integer": null, + "num": null + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + -75.11215209960938, + 39.94870062390347 + ], + [ + -75.02838134765625, + 39.990799335838034 + ], + [ + -74.959716796875, + 39.920269337633975 + ], + [ + -75.025634765625, + 39.8465036024177 + ], + [ + -75.12725830078125, + 39.871803651624425 + ], + [ + -75.11215209960938, + 39.94870062390347 + ] + ] + ] + } + }, + { + "type": "Feature", + "properties": { + "uuid": "4494c1f3-55ab-4256-b365-12115cb388d5", + "datetime": "2018-11-29T03:02:52+0000", + "name": "north", + "integer": 8, + "num": 0.0, + "boolean": false + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + -75.24948120117188, + 39.99395569397331 + ], + [ + -75.27420043945312, + 40.0507451947963 + ], + [ + -75.20553588867188, + 40.119040222688795 + ], + [ + -75.08193969726562, + 40.03287211677227 + ], + [ + -75.16433715820312, + 39.97396296240704 + ], + [ + -75.24948120117188, + 39.99395569397331 + ] + ] + ] + } + } + ] +} diff --git a/tests/gis_tests/gdal_tests/test_ds.py b/tests/gis_tests/gdal_tests/test_ds.py index ea0cec9211..10ad8436b3 100644 --- a/tests/gis_tests/gdal_tests/test_ds.py +++ b/tests/gis_tests/gdal_tests/test_ds.py @@ -1,10 +1,13 @@ import os import re +from datetime import datetime from django.contrib.gis.gdal import ( GDAL_VERSION, DataSource, Envelope, GDALException, OGRGeometry, ) -from django.contrib.gis.gdal.field import OFTInteger, OFTReal, OFTString +from django.contrib.gis.gdal.field import ( + OFTDateTime, OFTInteger, OFTReal, OFTString, +) from django.test import SimpleTestCase from ..test_data import TEST_DATA, TestDS, get_ds_file @@ -17,6 +20,8 @@ wgs_84_wkt = ( # Using a regex because of small differences depending on GDAL versions. wgs_84_wkt_regex = r'^GEOGCS\["(GCS_)?WGS[ _](19)?84".*$' +datetime_format = '%Y-%m-%dT%H:%M:%S' + # List of acceptable data sources. ds_list = ( TestDS( @@ -53,7 +58,37 @@ ds_list = ( fields={'float': OFTReal, 'int': OFTInteger, 'str': OFTString}, extent=(-1.01513, -0.558245, 0.161876, 0.839637), # Got extent from QGIS srs_wkt=wgs_84_wkt, - ) + ), + TestDS( + 'has_nulls', nfeat=3, nfld=6, geom='POLYGON', gtype=3, + driver='GeoJSON', ext='geojson', + fields={ + 'uuid': OFTString, + 'name': OFTString, + 'num': OFTReal, + 'integer': OFTInteger, + 'datetime': OFTDateTime, + 'boolean': OFTInteger, + }, + extent=(-75.274200, 39.846504, -74.959717, 40.119040), # Got extent from QGIS + field_values={ + 'uuid': [ + '1378c26f-cbe6-44b0-929f-eb330d4991f5', + 'fa2ba67c-a135-4338-b924-a9622b5d869f', + '4494c1f3-55ab-4256-b365-12115cb388d5', + ], + 'name': ['Philadelphia', None, 'north'], + 'num': [1.001, None, 0.0], + 'integer': [5, None, 8], + 'boolean': [True, None, False], + 'datetime': [ + datetime.strptime('1994-08-14T11:32:14', datetime_format), + None, + datetime.strptime('2018-11-29T03:02:52', datetime_format), + ] + }, + fids=range(3), + ), ) bad_ds = (TestDS('foo'),) diff --git a/tests/gis_tests/layermap/models.py b/tests/gis_tests/layermap/models.py index 7c029a0ee8..b488eedffe 100644 --- a/tests/gis_tests/layermap/models.py +++ b/tests/gis_tests/layermap/models.py @@ -69,6 +69,26 @@ class Invalid(models.Model): point = models.PointField() +class HasNulls(models.Model): + uuid = models.UUIDField(primary_key=True, editable=False) + geom = models.PolygonField(srid=4326, blank=True, null=True) + datetime = models.DateTimeField(blank=True, null=True) + integer = models.IntegerField(blank=True, null=True) + num = models.FloatField(blank=True, null=True) + boolean = models.BooleanField(blank=True, null=True) + name = models.CharField(blank=True, null=True, max_length=20) + + +class DoesNotAllowNulls(models.Model): + uuid = models.UUIDField(primary_key=True, editable=False) + geom = models.PolygonField(srid=4326) + datetime = models.DateTimeField() + integer = models.IntegerField() + num = models.FloatField() + boolean = models.BooleanField() + name = models.CharField(max_length=20) + + # Mapping dictionaries for the models above. co_mapping = { 'name': 'Name', @@ -95,3 +115,13 @@ inter_mapping = { 'length': 'Length', 'path': 'LINESTRING', } + +has_nulls_mapping = { + 'geom': 'POLYGON', + 'uuid': 'uuid', + 'datetime': 'datetime', + 'name': 'name', + 'integer': 'integer', + 'num': 'num', + 'boolean': 'boolean', +} diff --git a/tests/gis_tests/layermap/tests.py b/tests/gis_tests/layermap/tests.py index 1efa643211..50fdb4815a 100644 --- a/tests/gis_tests/layermap/tests.py +++ b/tests/gis_tests/layermap/tests.py @@ -1,3 +1,4 @@ +import datetime import os import unittest from copy import copy @@ -13,8 +14,9 @@ from django.db import connection from django.test import TestCase, override_settings from .models import ( - City, County, CountyFeat, ICity1, ICity2, Interstate, Invalid, State, - city_mapping, co_mapping, cofeat_mapping, inter_mapping, + City, County, CountyFeat, DoesNotAllowNulls, HasNulls, ICity1, ICity2, + Interstate, Invalid, State, city_mapping, co_mapping, cofeat_mapping, + has_nulls_mapping, inter_mapping, ) shp_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.pardir, 'data')) @@ -22,6 +24,7 @@ city_shp = os.path.join(shp_path, 'cities', 'cities.shp') co_shp = os.path.join(shp_path, 'counties', 'counties.shp') inter_shp = os.path.join(shp_path, 'interstates', 'interstates.shp') invalid_shp = os.path.join(shp_path, 'invalid', 'emptypoints.shp') +has_nulls_geojson = os.path.join(shp_path, 'has_nulls', 'has_nulls.geojson') # Dictionaries to hold what's expected in the county shapefile. NAMES = ['Bexar', 'Galveston', 'Harris', 'Honolulu', 'Pueblo'] @@ -321,6 +324,59 @@ class LayerMapTest(TestCase): self.assertIsNotNone(hu.mpoly) self.assertEqual(hu.mpoly.ogr.num_coords, 449) + def test_null_number_imported(self): + """LayerMapping import of GeoJSON with a null numeric value.""" + lm = LayerMapping(HasNulls, has_nulls_geojson, has_nulls_mapping) + lm.save() + self.assertEqual(HasNulls.objects.count(), 3) + self.assertEqual(HasNulls.objects.filter(num=0).count(), 1) + self.assertEqual(HasNulls.objects.filter(num__isnull=True).count(), 1) + + def test_null_string_imported(self): + "Test LayerMapping import of GeoJSON with a null string value." + lm = LayerMapping(HasNulls, has_nulls_geojson, has_nulls_mapping) + lm.save() + self.assertEqual(HasNulls.objects.filter(name='None').count(), 0) + num_empty = 1 if connection.features.interprets_empty_strings_as_nulls else 0 + self.assertEqual(HasNulls.objects.filter(name='').count(), num_empty) + self.assertEqual(HasNulls.objects.filter(name__isnull=True).count(), 1) + + def test_nullable_boolean_imported(self): + """LayerMapping import of GeoJSON with a nullable boolean value.""" + lm = LayerMapping(HasNulls, has_nulls_geojson, has_nulls_mapping) + lm.save() + self.assertEqual(HasNulls.objects.filter(boolean=True).count(), 1) + self.assertEqual(HasNulls.objects.filter(boolean=False).count(), 1) + self.assertEqual(HasNulls.objects.filter(boolean__isnull=True).count(), 1) + + def test_nullable_datetime_imported(self): + """LayerMapping import of GeoJSON with a nullable date/time value.""" + lm = LayerMapping(HasNulls, has_nulls_geojson, has_nulls_mapping) + lm.save() + self.assertEqual(HasNulls.objects.filter(datetime__lt=datetime.date(1994, 8, 15)).count(), 1) + self.assertEqual(HasNulls.objects.filter(datetime='2018-11-29T03:02:52').count(), 1) + self.assertEqual(HasNulls.objects.filter(datetime__isnull=True).count(), 1) + + def test_uuids_imported(self): + """LayerMapping import of GeoJSON with UUIDs.""" + lm = LayerMapping(HasNulls, has_nulls_geojson, has_nulls_mapping) + lm.save() + self.assertEqual(HasNulls.objects.filter(uuid='1378c26f-cbe6-44b0-929f-eb330d4991f5').count(), 1) + + def test_null_number_imported_not_allowed(self): + """ + LayerMapping import of GeoJSON with nulls to fields that don't permit + them. + """ + lm = LayerMapping(DoesNotAllowNulls, has_nulls_geojson, has_nulls_mapping) + lm.save(silent=True) + # When a model fails to save due to IntegrityError (null in non-null + # column), subsequent saves fail with "An error occurred in the current + # transaction. You can't execute queries until the end of the 'atomic' + # block." On Oracle and MySQL, the one object that did load appears in + # this count. On other databases, no records appear. + self.assertLessEqual(DoesNotAllowNulls.objects.count(), 1) + class OtherRouter: def db_for_read(self, model, **hints):