Fixed #30020 -- Fixed reading nulls with LayerMapping.

This commit is contained in:
Kathryn Killebrew 2019-01-31 19:50:16 -05:00 committed by Tim Graham
parent 6da28d5edf
commit 75d627888b
9 changed files with 302 additions and 26 deletions

View File

@ -465,6 +465,7 @@ answer newbie questions, and generally made Django that much better:
Karderio <karderio@gmail.com> Karderio <karderio@gmail.com>
Karen Tracey <kmtracey@gmail.com> Karen Tracey <kmtracey@gmail.com>
Karol Sikora <elektrrrus@gmail.com> Karol Sikora <elektrrrus@gmail.com>
Kathryn Killebrew <kathryn.killebrew@gmail.com>
Katie Miller <katie@sub50.com> Katie Miller <katie@sub50.com>
Keith Bussell <kbussell@gmail.com> Keith Bussell <kbussell@gmail.com>
Kenneth Love <kennethlove@gmail.com> Kenneth Love <kennethlove@gmail.com>

View File

@ -34,11 +34,6 @@ class Field(GDALBase):
# Setting the class depending upon the OGR Field Type (OFT) # Setting the class depending upon the OGR Field Type (OFT)
self.__class__ = OGRFieldTypes[self.type] self.__class__ = OGRFieldTypes[self.type]
# OFTReal with no precision should be an OFTInteger.
if isinstance(self, OFTReal) and self.precision == 0:
self.__class__ = OFTInteger
self._double = True
def __str__(self): def __str__(self):
"Return the string representation of the Field." "Return the string representation of the Field."
return str(self.value).strip() return str(self.value).strip()
@ -46,22 +41,26 @@ class Field(GDALBase):
# #### Field Methods #### # #### Field Methods ####
def as_double(self): def as_double(self):
"Retrieve the Field's value as a double (float)." "Retrieve the Field's value as a double (float)."
return capi.get_field_as_double(self._feat.ptr, self._index) return capi.get_field_as_double(self._feat.ptr, self._index) if self.is_set else None
def as_int(self, is_64=False): def as_int(self, is_64=False):
"Retrieve the Field's value as an integer." "Retrieve the Field's value as an integer."
if is_64: if is_64:
return capi.get_field_as_integer64(self._feat.ptr, self._index) return capi.get_field_as_integer64(self._feat.ptr, self._index) if self.is_set else None
else: else:
return capi.get_field_as_integer(self._feat.ptr, self._index) return capi.get_field_as_integer(self._feat.ptr, self._index) if self.is_set else None
def as_string(self): def as_string(self):
"Retrieve the Field's value as a string." "Retrieve the Field's value as a string."
if not self.is_set:
return None
string = capi.get_field_as_string(self._feat.ptr, self._index) string = capi.get_field_as_string(self._feat.ptr, self._index)
return force_text(string, encoding=self._feat.encoding, strings_only=True) return force_text(string, encoding=self._feat.encoding, strings_only=True)
def as_datetime(self): def as_datetime(self):
"Retrieve the Field's value as a tuple of date & time components." "Retrieve the Field's value as a tuple of date & time components."
if not self.is_set:
return None
yy, mm, dd, hh, mn, ss, tz = [c_int() for i in range(7)] yy, mm, dd, hh, mn, ss, tz = [c_int() for i in range(7)]
status = capi.get_field_as_datetime( status = capi.get_field_as_datetime(
self._feat.ptr, self._index, byref(yy), byref(mm), byref(dd), self._feat.ptr, self._index, byref(yy), byref(mm), byref(dd),
@ -72,6 +71,11 @@ class Field(GDALBase):
raise GDALException('Unable to retrieve date & time information from the field.') raise GDALException('Unable to retrieve date & time information from the field.')
# #### Field Properties #### # #### Field Properties ####
@property
def is_set(self):
"Return True if the value of this field isn't null, False otherwise."
return capi.is_field_set(self._feat.ptr, self._index)
@property @property
def name(self): def name(self):
"Return the name of this Field." "Return the name of this Field."
@ -107,18 +111,12 @@ class Field(GDALBase):
# ### The Field sub-classes for each OGR Field type. ### # ### The Field sub-classes for each OGR Field type. ###
class OFTInteger(Field): class OFTInteger(Field):
_double = False
_bit64 = False _bit64 = False
@property @property
def value(self): def value(self):
"Return an integer contained in this field." "Return an integer contained in this field."
if self._double: return self.as_int(self._bit64)
# If this is really from an OFTReal field with no precision,
# read as a double and cast as Python int (to prevent overflow).
return int(self.as_double())
else:
return self.as_int(self._bit64)
@property @property
def type(self): def type(self):
@ -158,7 +156,7 @@ class OFTDate(Field):
try: try:
yy, mm, dd, hh, mn, ss, tz = self.as_datetime() yy, mm, dd, hh, mn, ss, tz = self.as_datetime()
return date(yy.value, mm.value, dd.value) return date(yy.value, mm.value, dd.value)
except (ValueError, GDALException): except (TypeError, ValueError, GDALException):
return None return None
@ -173,7 +171,7 @@ class OFTDateTime(Field):
try: try:
yy, mm, dd, hh, mn, ss, tz = self.as_datetime() yy, mm, dd, hh, mn, ss, tz = self.as_datetime()
return datetime(yy.value, mm.value, dd.value, hh.value, mn.value, ss.value) return datetime(yy.value, mm.value, dd.value, hh.value, mn.value, ss.value)
except (ValueError, GDALException): except (TypeError, ValueError, GDALException):
return None return None

View File

@ -8,8 +8,8 @@ from ctypes import POINTER, c_char_p, c_double, c_int, c_long, c_void_p
from django.contrib.gis.gdal.envelope import OGREnvelope from django.contrib.gis.gdal.envelope import OGREnvelope
from django.contrib.gis.gdal.libgdal import GDAL_VERSION, lgdal from django.contrib.gis.gdal.libgdal import GDAL_VERSION, lgdal
from django.contrib.gis.gdal.prototypes.generation import ( from django.contrib.gis.gdal.prototypes.generation import (
const_string_output, double_output, geom_output, int64_output, int_output, bool_output, const_string_output, double_output, geom_output, int64_output,
srs_output, void_output, voidptr_output, int_output, srs_output, void_output, voidptr_output,
) )
c_int_p = POINTER(c_int) # shortcut type c_int_p = POINTER(c_int) # shortcut type
@ -70,6 +70,10 @@ get_field_as_double = double_output(lgdal.OGR_F_GetFieldAsDouble, [c_void_p, c_i
get_field_as_integer = int_output(lgdal.OGR_F_GetFieldAsInteger, [c_void_p, c_int]) get_field_as_integer = int_output(lgdal.OGR_F_GetFieldAsInteger, [c_void_p, c_int])
if GDAL_VERSION >= (2, 0): if GDAL_VERSION >= (2, 0):
get_field_as_integer64 = int64_output(lgdal.OGR_F_GetFieldAsInteger64, [c_void_p, c_int]) get_field_as_integer64 = int64_output(lgdal.OGR_F_GetFieldAsInteger64, [c_void_p, c_int])
if GDAL_VERSION >= (2, 2):
is_field_set = bool_output(lgdal.OGR_F_IsFieldSetAndNotNull, [c_void_p, c_int])
else:
is_field_set = bool_output(lgdal.OGR_F_IsFieldSet, [c_void_p, c_int])
get_field_as_string = const_string_output(lgdal.OGR_F_GetFieldAsString, [c_void_p, c_int]) get_field_as_string = const_string_output(lgdal.OGR_F_GetFieldAsString, [c_void_p, c_int])
get_field_index = int_output(lgdal.OGR_F_GetFieldIndex, [c_void_p, c_char_p]) get_field_index = int_output(lgdal.OGR_F_GetFieldIndex, [c_void_p, c_char_p])

View File

@ -2,7 +2,9 @@
This module contains functions that generate ctypes prototypes for the This module contains functions that generate ctypes prototypes for the
GDAL routines. GDAL routines.
""" """
from ctypes import POINTER, c_char_p, c_double, c_int, c_int64, c_void_p from ctypes import (
POINTER, c_bool, c_char_p, c_double, c_int, c_int64, c_void_p,
)
from functools import partial from functools import partial
from django.contrib.gis.gdal.prototypes.errcheck import ( from django.contrib.gis.gdal.prototypes.errcheck import (
@ -15,6 +17,15 @@ class gdal_char_p(c_char_p):
pass pass
def bool_output(func, argtypes, errcheck=None):
"""Generate a ctypes function that returns a boolean value."""
func.argtypes = argtypes
func.restype = c_bool
if errcheck:
func.errcheck = errcheck
return func
def double_output(func, argtypes, errcheck=False, strarg=False, cpl=False): def double_output(func, argtypes, errcheck=False, strarg=False, cpl=False):
"Generate a ctypes function that returns a double value." "Generate a ctypes function that returns a double value."
func.argtypes = argtypes func.argtypes = argtypes

View File

@ -61,6 +61,7 @@ class LayerMapping:
FIELD_TYPES = { FIELD_TYPES = {
models.AutoField: OFTInteger, models.AutoField: OFTInteger,
models.BigAutoField: OFTInteger64, models.BigAutoField: OFTInteger64,
models.BooleanField: (OFTInteger, OFTReal, OFTString),
models.IntegerField: (OFTInteger, OFTReal, OFTString), models.IntegerField: (OFTInteger, OFTReal, OFTString),
models.FloatField: (OFTInteger, OFTReal), models.FloatField: (OFTInteger, OFTReal),
models.DateField: OFTDate, models.DateField: OFTDate,
@ -72,6 +73,7 @@ class LayerMapping:
models.SlugField: OFTString, models.SlugField: OFTString,
models.TextField: OFTString, models.TextField: OFTString,
models.URLField: OFTString, models.URLField: OFTString,
models.UUIDField: OFTString,
models.BigIntegerField: (OFTInteger, OFTReal, OFTString), models.BigIntegerField: (OFTInteger, OFTReal, OFTString),
models.SmallIntegerField: (OFTInteger, OFTReal, OFTString), models.SmallIntegerField: (OFTInteger, OFTReal, OFTString),
models.PositiveSmallIntegerField: (OFTInteger, OFTReal, OFTString), models.PositiveSmallIntegerField: (OFTInteger, OFTReal, OFTString),
@ -343,13 +345,13 @@ class LayerMapping:
""" """
if (isinstance(ogr_field, OFTString) and if (isinstance(ogr_field, OFTString) and
isinstance(model_field, (models.CharField, models.TextField))): isinstance(model_field, (models.CharField, models.TextField))):
if self.encoding: if self.encoding and ogr_field.value is not None:
# The encoding for OGR data sources may be specified here # The encoding for OGR data sources may be specified here
# (e.g., 'cp437' for Census Bureau boundary files). # (e.g., 'cp437' for Census Bureau boundary files).
val = force_text(ogr_field.value, self.encoding) val = force_text(ogr_field.value, self.encoding)
else: else:
val = ogr_field.value val = ogr_field.value
if model_field.max_length and len(val) > model_field.max_length: if model_field.max_length and val is not None and len(val) > model_field.max_length:
raise InvalidString('%s model field maximum string length is %s, given %s characters.' % raise InvalidString('%s model field maximum string length is %s, given %s characters.' %
(model_field.name, model_field.max_length, len(val))) (model_field.name, model_field.max_length, len(val)))
elif isinstance(ogr_field, OFTReal) and isinstance(model_field, models.DecimalField): elif isinstance(ogr_field, OFTReal) and isinstance(model_field, models.DecimalField):

View File

@ -0,0 +1,139 @@
{
"type": "FeatureCollection",
"name": "has_nulls",
"crs": {
"type": "name",
"properties": {
"name": "urn:ogc:def:crs:OGC:1.3:CRS84"
}
},
"features": [
{
"type": "Feature",
"properties": {
"uuid": "1378c26f-cbe6-44b0-929f-eb330d4991f5",
"boolean": true,
"datetime": "1994-08-14T11:32:14+0000",
"name": "Philadelphia",
"integer": 5,
"num": 1.001
},
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
-75.14923095703125,
39.8928799002948
],
[
-75.25772094726562,
39.889718875996685
],
[
-75.201416015625,
40.03076909369051
],
[
-75.07644653320312,
39.99500778093748
],
[
-75.14785766601562,
39.95291166179976
],
[
-75.135498046875,
39.91605629078665
],
[
-75.14923095703125,
39.8928799002948
]
]
]
}
},
{
"type": "Feature",
"properties": {
"uuid": "fa2ba67c-a135-4338-b924-a9622b5d869f",
"integer": null,
"num": null
},
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
-75.11215209960938,
39.94870062390347
],
[
-75.02838134765625,
39.990799335838034
],
[
-74.959716796875,
39.920269337633975
],
[
-75.025634765625,
39.8465036024177
],
[
-75.12725830078125,
39.871803651624425
],
[
-75.11215209960938,
39.94870062390347
]
]
]
}
},
{
"type": "Feature",
"properties": {
"uuid": "4494c1f3-55ab-4256-b365-12115cb388d5",
"datetime": "2018-11-29T03:02:52+0000",
"name": "north",
"integer": 8,
"num": 0.0,
"boolean": false
},
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
-75.24948120117188,
39.99395569397331
],
[
-75.27420043945312,
40.0507451947963
],
[
-75.20553588867188,
40.119040222688795
],
[
-75.08193969726562,
40.03287211677227
],
[
-75.16433715820312,
39.97396296240704
],
[
-75.24948120117188,
39.99395569397331
]
]
]
}
}
]
}

View File

@ -1,10 +1,13 @@
import os import os
import re import re
from datetime import datetime
from django.contrib.gis.gdal import ( from django.contrib.gis.gdal import (
GDAL_VERSION, DataSource, Envelope, GDALException, OGRGeometry, GDAL_VERSION, DataSource, Envelope, GDALException, OGRGeometry,
) )
from django.contrib.gis.gdal.field import OFTInteger, OFTReal, OFTString from django.contrib.gis.gdal.field import (
OFTDateTime, OFTInteger, OFTReal, OFTString,
)
from django.test import SimpleTestCase from django.test import SimpleTestCase
from ..test_data import TEST_DATA, TestDS, get_ds_file from ..test_data import TEST_DATA, TestDS, get_ds_file
@ -17,6 +20,8 @@ wgs_84_wkt = (
# Using a regex because of small differences depending on GDAL versions. # Using a regex because of small differences depending on GDAL versions.
wgs_84_wkt_regex = r'^GEOGCS\["(GCS_)?WGS[ _](19)?84".*$' wgs_84_wkt_regex = r'^GEOGCS\["(GCS_)?WGS[ _](19)?84".*$'
datetime_format = '%Y-%m-%dT%H:%M:%S'
# List of acceptable data sources. # List of acceptable data sources.
ds_list = ( ds_list = (
TestDS( TestDS(
@ -53,7 +58,37 @@ ds_list = (
fields={'float': OFTReal, 'int': OFTInteger, 'str': OFTString}, fields={'float': OFTReal, 'int': OFTInteger, 'str': OFTString},
extent=(-1.01513, -0.558245, 0.161876, 0.839637), # Got extent from QGIS extent=(-1.01513, -0.558245, 0.161876, 0.839637), # Got extent from QGIS
srs_wkt=wgs_84_wkt, srs_wkt=wgs_84_wkt,
) ),
TestDS(
'has_nulls', nfeat=3, nfld=6, geom='POLYGON', gtype=3,
driver='GeoJSON', ext='geojson',
fields={
'uuid': OFTString,
'name': OFTString,
'num': OFTReal,
'integer': OFTInteger,
'datetime': OFTDateTime,
'boolean': OFTInteger,
},
extent=(-75.274200, 39.846504, -74.959717, 40.119040), # Got extent from QGIS
field_values={
'uuid': [
'1378c26f-cbe6-44b0-929f-eb330d4991f5',
'fa2ba67c-a135-4338-b924-a9622b5d869f',
'4494c1f3-55ab-4256-b365-12115cb388d5',
],
'name': ['Philadelphia', None, 'north'],
'num': [1.001, None, 0.0],
'integer': [5, None, 8],
'boolean': [True, None, False],
'datetime': [
datetime.strptime('1994-08-14T11:32:14', datetime_format),
None,
datetime.strptime('2018-11-29T03:02:52', datetime_format),
]
},
fids=range(3),
),
) )
bad_ds = (TestDS('foo'),) bad_ds = (TestDS('foo'),)

View File

@ -69,6 +69,26 @@ class Invalid(models.Model):
point = models.PointField() point = models.PointField()
class HasNulls(models.Model):
uuid = models.UUIDField(primary_key=True, editable=False)
geom = models.PolygonField(srid=4326, blank=True, null=True)
datetime = models.DateTimeField(blank=True, null=True)
integer = models.IntegerField(blank=True, null=True)
num = models.FloatField(blank=True, null=True)
boolean = models.BooleanField(blank=True, null=True)
name = models.CharField(blank=True, null=True, max_length=20)
class DoesNotAllowNulls(models.Model):
uuid = models.UUIDField(primary_key=True, editable=False)
geom = models.PolygonField(srid=4326)
datetime = models.DateTimeField()
integer = models.IntegerField()
num = models.FloatField()
boolean = models.BooleanField()
name = models.CharField(max_length=20)
# Mapping dictionaries for the models above. # Mapping dictionaries for the models above.
co_mapping = { co_mapping = {
'name': 'Name', 'name': 'Name',
@ -95,3 +115,13 @@ inter_mapping = {
'length': 'Length', 'length': 'Length',
'path': 'LINESTRING', 'path': 'LINESTRING',
} }
has_nulls_mapping = {
'geom': 'POLYGON',
'uuid': 'uuid',
'datetime': 'datetime',
'name': 'name',
'integer': 'integer',
'num': 'num',
'boolean': 'boolean',
}

View File

@ -1,3 +1,4 @@
import datetime
import os import os
import unittest import unittest
from copy import copy from copy import copy
@ -13,8 +14,9 @@ from django.db import connection
from django.test import TestCase, override_settings from django.test import TestCase, override_settings
from .models import ( from .models import (
City, County, CountyFeat, ICity1, ICity2, Interstate, Invalid, State, City, County, CountyFeat, DoesNotAllowNulls, HasNulls, ICity1, ICity2,
city_mapping, co_mapping, cofeat_mapping, inter_mapping, Interstate, Invalid, State, city_mapping, co_mapping, cofeat_mapping,
has_nulls_mapping, inter_mapping,
) )
shp_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.pardir, 'data')) shp_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.pardir, 'data'))
@ -22,6 +24,7 @@ city_shp = os.path.join(shp_path, 'cities', 'cities.shp')
co_shp = os.path.join(shp_path, 'counties', 'counties.shp') co_shp = os.path.join(shp_path, 'counties', 'counties.shp')
inter_shp = os.path.join(shp_path, 'interstates', 'interstates.shp') inter_shp = os.path.join(shp_path, 'interstates', 'interstates.shp')
invalid_shp = os.path.join(shp_path, 'invalid', 'emptypoints.shp') invalid_shp = os.path.join(shp_path, 'invalid', 'emptypoints.shp')
has_nulls_geojson = os.path.join(shp_path, 'has_nulls', 'has_nulls.geojson')
# Dictionaries to hold what's expected in the county shapefile. # Dictionaries to hold what's expected in the county shapefile.
NAMES = ['Bexar', 'Galveston', 'Harris', 'Honolulu', 'Pueblo'] NAMES = ['Bexar', 'Galveston', 'Harris', 'Honolulu', 'Pueblo']
@ -321,6 +324,59 @@ class LayerMapTest(TestCase):
self.assertIsNotNone(hu.mpoly) self.assertIsNotNone(hu.mpoly)
self.assertEqual(hu.mpoly.ogr.num_coords, 449) self.assertEqual(hu.mpoly.ogr.num_coords, 449)
def test_null_number_imported(self):
"""LayerMapping import of GeoJSON with a null numeric value."""
lm = LayerMapping(HasNulls, has_nulls_geojson, has_nulls_mapping)
lm.save()
self.assertEqual(HasNulls.objects.count(), 3)
self.assertEqual(HasNulls.objects.filter(num=0).count(), 1)
self.assertEqual(HasNulls.objects.filter(num__isnull=True).count(), 1)
def test_null_string_imported(self):
"Test LayerMapping import of GeoJSON with a null string value."
lm = LayerMapping(HasNulls, has_nulls_geojson, has_nulls_mapping)
lm.save()
self.assertEqual(HasNulls.objects.filter(name='None').count(), 0)
num_empty = 1 if connection.features.interprets_empty_strings_as_nulls else 0
self.assertEqual(HasNulls.objects.filter(name='').count(), num_empty)
self.assertEqual(HasNulls.objects.filter(name__isnull=True).count(), 1)
def test_nullable_boolean_imported(self):
"""LayerMapping import of GeoJSON with a nullable boolean value."""
lm = LayerMapping(HasNulls, has_nulls_geojson, has_nulls_mapping)
lm.save()
self.assertEqual(HasNulls.objects.filter(boolean=True).count(), 1)
self.assertEqual(HasNulls.objects.filter(boolean=False).count(), 1)
self.assertEqual(HasNulls.objects.filter(boolean__isnull=True).count(), 1)
def test_nullable_datetime_imported(self):
"""LayerMapping import of GeoJSON with a nullable date/time value."""
lm = LayerMapping(HasNulls, has_nulls_geojson, has_nulls_mapping)
lm.save()
self.assertEqual(HasNulls.objects.filter(datetime__lt=datetime.date(1994, 8, 15)).count(), 1)
self.assertEqual(HasNulls.objects.filter(datetime='2018-11-29T03:02:52').count(), 1)
self.assertEqual(HasNulls.objects.filter(datetime__isnull=True).count(), 1)
def test_uuids_imported(self):
"""LayerMapping import of GeoJSON with UUIDs."""
lm = LayerMapping(HasNulls, has_nulls_geojson, has_nulls_mapping)
lm.save()
self.assertEqual(HasNulls.objects.filter(uuid='1378c26f-cbe6-44b0-929f-eb330d4991f5').count(), 1)
def test_null_number_imported_not_allowed(self):
"""
LayerMapping import of GeoJSON with nulls to fields that don't permit
them.
"""
lm = LayerMapping(DoesNotAllowNulls, has_nulls_geojson, has_nulls_mapping)
lm.save(silent=True)
# When a model fails to save due to IntegrityError (null in non-null
# column), subsequent saves fail with "An error occurred in the current
# transaction. You can't execute queries until the end of the 'atomic'
# block." On Oracle and MySQL, the one object that did load appears in
# this count. On other databases, no records appear.
self.assertLessEqual(DoesNotAllowNulls.objects.count(), 1)
class OtherRouter: class OtherRouter:
def db_for_read(self, model, **hints): def db_for_read(self, model, **hints):