Use smarter string decoding in GeoDjango
The first try to solve the Python 3 GIS encoding/decoding issue was too naive. Using decode() on all read strings is bound to fail as soon as a non-ascii string is concerned. This patch is a little more clever, leaving ascii decoding when plain ascii strings are expected, and allowing to specify a custom encoding in DataSource hierarchy.
This commit is contained in:
parent
a62d53c032
commit
9a2bceed1a
|
@ -45,7 +45,7 @@ from django.contrib.gis.gdal.layer import Layer
|
|||
# Getting the ctypes prototypes for the DataSource.
|
||||
from django.contrib.gis.gdal.prototypes import ds as capi
|
||||
|
||||
from django.utils.encoding import force_bytes
|
||||
from django.utils.encoding import force_bytes, force_text
|
||||
from django.utils import six
|
||||
from django.utils.six.moves import xrange
|
||||
|
||||
|
@ -57,12 +57,14 @@ class DataSource(GDALBase):
|
|||
"Wraps an OGR Data Source object."
|
||||
|
||||
#### Python 'magic' routines ####
|
||||
def __init__(self, ds_input, ds_driver=False, write=False):
|
||||
def __init__(self, ds_input, ds_driver=False, write=False, encoding='utf-8'):
|
||||
# The write flag.
|
||||
if write:
|
||||
self._write = 1
|
||||
else:
|
||||
self._write = 0
|
||||
# See also http://trac.osgeo.org/gdal/wiki/rfc23_ogr_unicode
|
||||
self.encoding = encoding
|
||||
|
||||
# Registering all the drivers, this needs to be done
|
||||
# _before_ we try to open up a data source.
|
||||
|
@ -129,4 +131,5 @@ class DataSource(GDALBase):
|
|||
@property
|
||||
def name(self):
|
||||
"Returns the name of the data source."
|
||||
return capi.get_ds_name(self._ptr)
|
||||
name = capi.get_ds_name(self._ptr)
|
||||
return force_text(name, self.encoding, strings_only=True)
|
||||
|
|
|
@ -7,7 +7,7 @@ from django.contrib.gis.gdal.geometries import OGRGeometry, OGRGeomType
|
|||
# ctypes function prototypes
|
||||
from django.contrib.gis.gdal.prototypes import ds as capi, geom as geom_api
|
||||
|
||||
from django.utils.encoding import force_bytes
|
||||
from django.utils.encoding import force_bytes, force_text
|
||||
from django.utils import six
|
||||
from django.utils.six.moves import xrange
|
||||
|
||||
|
@ -68,6 +68,10 @@ class Feature(GDALBase):
|
|||
return bool(capi.feature_equal(self.ptr, other._ptr))
|
||||
|
||||
#### Feature Properties ####
|
||||
@property
|
||||
def encoding(self):
|
||||
return self._layer._ds.encoding
|
||||
|
||||
@property
|
||||
def fid(self):
|
||||
"Returns the feature identifier."
|
||||
|
@ -76,7 +80,8 @@ class Feature(GDALBase):
|
|||
@property
|
||||
def layer_name(self):
|
||||
"Returns the name of the layer for the feature."
|
||||
return capi.get_feat_name(self._layer._ldefn)
|
||||
name = capi.get_feat_name(self._layer._ldefn)
|
||||
return force_text(name, self.encoding, strings_only=True)
|
||||
|
||||
@property
|
||||
def num_fields(self):
|
||||
|
|
|
@ -3,6 +3,8 @@ from datetime import date, datetime, time
|
|||
from django.contrib.gis.gdal.base import GDALBase
|
||||
from django.contrib.gis.gdal.error import OGRException
|
||||
from django.contrib.gis.gdal.prototypes import ds as capi
|
||||
from django.utils.encoding import force_text
|
||||
|
||||
|
||||
# For more information, see the OGR C API source code:
|
||||
# http://www.gdal.org/ogr/ogr__api_8h.html
|
||||
|
@ -53,7 +55,8 @@ class Field(GDALBase):
|
|||
|
||||
def as_string(self):
|
||||
"Retrieves the Field's value as a string."
|
||||
return capi.get_field_as_string(self._feat.ptr, self._index)
|
||||
string = capi.get_field_as_string(self._feat.ptr, self._index)
|
||||
return force_text(string, encoding=self._feat.encoding, strings_only=True)
|
||||
|
||||
def as_datetime(self):
|
||||
"Retrieves the Field's value as a tuple of date & time components."
|
||||
|
@ -70,7 +73,8 @@ class Field(GDALBase):
|
|||
@property
|
||||
def name(self):
|
||||
"Returns the name of this Field."
|
||||
return capi.get_field_name(self.ptr)
|
||||
name = capi.get_field_name(self.ptr)
|
||||
return force_text(name, encoding=self._feat.encoding, strings_only=True)
|
||||
|
||||
@property
|
||||
def precision(self):
|
||||
|
|
|
@ -14,7 +14,7 @@ from django.contrib.gis.gdal.srs import SpatialReference
|
|||
# GDAL ctypes function prototypes.
|
||||
from django.contrib.gis.gdal.prototypes import ds as capi, geom as geom_api, srs as srs_api
|
||||
|
||||
from django.utils.encoding import force_bytes
|
||||
from django.utils.encoding import force_bytes, force_text
|
||||
from django.utils import six
|
||||
from django.utils.six.moves import xrange
|
||||
|
||||
|
@ -103,7 +103,8 @@ class Layer(GDALBase):
|
|||
@property
|
||||
def name(self):
|
||||
"Returns the name of this layer in the Data Source."
|
||||
return capi.get_fd_name(self._ldefn)
|
||||
name = capi.get_fd_name(self._ldefn)
|
||||
return force_text(name, self._ds.encoding, strings_only=True)
|
||||
|
||||
@property
|
||||
def num_feat(self, force=1):
|
||||
|
@ -135,8 +136,9 @@ class Layer(GDALBase):
|
|||
Returns a list of string names corresponding to each of the Fields
|
||||
available in this Layer.
|
||||
"""
|
||||
return [capi.get_field_name(capi.get_field_defn(self._ldefn, i))
|
||||
for i in xrange(self.num_fields) ]
|
||||
return [force_text(capi.get_field_name(capi.get_field_defn(self._ldefn, i)),
|
||||
self._ds.encoding, strings_only=True)
|
||||
for i in xrange(self.num_fields)]
|
||||
|
||||
@property
|
||||
def field_types(self):
|
||||
|
|
|
@ -17,7 +17,7 @@ cleanup_all = void_output(lgdal.OGRCleanupAll, [], errcheck=False)
|
|||
get_driver = voidptr_output(lgdal.OGRGetDriver, [c_int])
|
||||
get_driver_by_name = voidptr_output(lgdal.OGRGetDriverByName, [c_char_p])
|
||||
get_driver_count = int_output(lgdal.OGRGetDriverCount, [])
|
||||
get_driver_name = const_string_output(lgdal.OGR_Dr_GetName, [c_void_p])
|
||||
get_driver_name = const_string_output(lgdal.OGR_Dr_GetName, [c_void_p], decoding='ascii')
|
||||
|
||||
### DataSource ###
|
||||
open_ds = voidptr_output(lgdal.OGROpen, [c_char_p, c_int, POINTER(c_void_p)])
|
||||
|
|
|
@ -30,10 +30,9 @@ def check_const_string(result, func, cargs, offset=None):
|
|||
if offset:
|
||||
check_err(result)
|
||||
ptr = ptr_byref(cargs, offset)
|
||||
return ptr.value.decode()
|
||||
return ptr.value
|
||||
else:
|
||||
if result is not None:
|
||||
return result.decode()
|
||||
return result
|
||||
|
||||
def check_string(result, func, cargs, offset=-1, str_result=False):
|
||||
"""
|
||||
|
@ -48,13 +47,13 @@ def check_string(result, func, cargs, offset=-1, str_result=False):
|
|||
# For routines that return a string.
|
||||
ptr = result
|
||||
if not ptr: s = None
|
||||
else: s = string_at(result).decode()
|
||||
else: s = string_at(result)
|
||||
else:
|
||||
# Error-code return specified.
|
||||
check_err(result)
|
||||
ptr = ptr_byref(cargs, offset)
|
||||
# Getting the string value
|
||||
s = ptr.value.decode()
|
||||
s = ptr.value
|
||||
# Correctly freeing the allocated memory beind GDAL pointer
|
||||
# w/the VSIFree routine.
|
||||
if ptr: lgdal.VSIFree(ptr)
|
||||
|
|
|
@ -57,7 +57,7 @@ def srs_output(func, argtypes):
|
|||
func.errcheck = check_srs
|
||||
return func
|
||||
|
||||
def const_string_output(func, argtypes, offset=None):
|
||||
def const_string_output(func, argtypes, offset=None, decoding=None):
|
||||
func.argtypes = argtypes
|
||||
if offset:
|
||||
func.restype = c_int
|
||||
|
@ -65,12 +65,15 @@ def const_string_output(func, argtypes, offset=None):
|
|||
func.restype = c_char_p
|
||||
|
||||
def _check_const(result, func, cargs):
|
||||
return check_const_string(result, func, cargs, offset=offset)
|
||||
res = check_const_string(result, func, cargs, offset=offset)
|
||||
if res and decoding:
|
||||
res = res.decode(decoding)
|
||||
return res
|
||||
func.errcheck = _check_const
|
||||
|
||||
return func
|
||||
|
||||
def string_output(func, argtypes, offset=-1, str_result=False):
|
||||
def string_output(func, argtypes, offset=-1, str_result=False, decoding=None):
|
||||
"""
|
||||
Generates a ctypes prototype for the given function with the
|
||||
given argument types that returns a string from a GDAL pointer.
|
||||
|
@ -90,8 +93,11 @@ def string_output(func, argtypes, offset=-1, str_result=False):
|
|||
# Dynamically defining our error-checking function with the
|
||||
# given offset.
|
||||
def _check_str(result, func, cargs):
|
||||
return check_string(result, func, cargs,
|
||||
res = check_string(result, func, cargs,
|
||||
offset=offset, str_result=str_result)
|
||||
if res and decoding:
|
||||
res = res.decode(decoding)
|
||||
return res
|
||||
func.errcheck = _check_str
|
||||
return func
|
||||
|
||||
|
|
|
@ -27,8 +27,8 @@ def topology_func(f):
|
|||
|
||||
# GeoJSON routines.
|
||||
from_json = geom_output(lgdal.OGR_G_CreateGeometryFromJson, [c_char_p])
|
||||
to_json = string_output(lgdal.OGR_G_ExportToJson, [c_void_p], str_result=True)
|
||||
to_kml = string_output(lgdal.OGR_G_ExportToKML, [c_void_p, c_char_p], str_result=True)
|
||||
to_json = string_output(lgdal.OGR_G_ExportToJson, [c_void_p], str_result=True, decoding='ascii')
|
||||
to_kml = string_output(lgdal.OGR_G_ExportToKML, [c_void_p, c_char_p], str_result=True, decoding='ascii')
|
||||
|
||||
# GetX, GetY, GetZ all return doubles.
|
||||
getx = pnt_func(lgdal.OGR_G_GetX)
|
||||
|
@ -57,8 +57,8 @@ destroy_geom = void_output(lgdal.OGR_G_DestroyGeometry, [c_void_p], errcheck=Fal
|
|||
|
||||
# Geometry export routines.
|
||||
to_wkb = void_output(lgdal.OGR_G_ExportToWkb, None, errcheck=True) # special handling for WKB.
|
||||
to_wkt = string_output(lgdal.OGR_G_ExportToWkt, [c_void_p, POINTER(c_char_p)])
|
||||
to_gml = string_output(lgdal.OGR_G_ExportToGML, [c_void_p], str_result=True)
|
||||
to_wkt = string_output(lgdal.OGR_G_ExportToWkt, [c_void_p, POINTER(c_char_p)], decoding='ascii')
|
||||
to_gml = string_output(lgdal.OGR_G_ExportToGML, [c_void_p], str_result=True, decoding='ascii')
|
||||
get_wkbsize = int_output(lgdal.OGR_G_WkbSize, [c_void_p])
|
||||
|
||||
# Geometry spatial-reference related routines.
|
||||
|
@ -73,7 +73,7 @@ get_coord_dim = int_output(lgdal.OGR_G_GetCoordinateDimension, [c_void_p])
|
|||
set_coord_dim = void_output(lgdal.OGR_G_SetCoordinateDimension, [c_void_p, c_int], errcheck=False)
|
||||
|
||||
get_geom_count = int_output(lgdal.OGR_G_GetGeometryCount, [c_void_p])
|
||||
get_geom_name = const_string_output(lgdal.OGR_G_GetGeometryName, [c_void_p])
|
||||
get_geom_name = const_string_output(lgdal.OGR_G_GetGeometryName, [c_void_p], decoding='ascii')
|
||||
get_geom_type = int_output(lgdal.OGR_G_GetGeometryType, [c_void_p])
|
||||
get_point_count = int_output(lgdal.OGR_G_GetPointCount, [c_void_p])
|
||||
get_point = void_output(lgdal.OGR_G_GetPoint, [c_void_p, c_int, POINTER(c_double), POINTER(c_double), POINTER(c_double)], errcheck=False)
|
||||
|
|
|
@ -49,17 +49,17 @@ linear_units = units_func(lgdal.OSRGetLinearUnits)
|
|||
angular_units = units_func(lgdal.OSRGetAngularUnits)
|
||||
|
||||
# For exporting to WKT, PROJ.4, "Pretty" WKT, and XML.
|
||||
to_wkt = string_output(std_call('OSRExportToWkt'), [c_void_p, POINTER(c_char_p)])
|
||||
to_proj = string_output(std_call('OSRExportToProj4'), [c_void_p, POINTER(c_char_p)])
|
||||
to_pretty_wkt = string_output(std_call('OSRExportToPrettyWkt'), [c_void_p, POINTER(c_char_p), c_int], offset=-2)
|
||||
to_wkt = string_output(std_call('OSRExportToWkt'), [c_void_p, POINTER(c_char_p)], decoding='ascii')
|
||||
to_proj = string_output(std_call('OSRExportToProj4'), [c_void_p, POINTER(c_char_p)], decoding='ascii')
|
||||
to_pretty_wkt = string_output(std_call('OSRExportToPrettyWkt'), [c_void_p, POINTER(c_char_p), c_int], offset=-2, decoding='ascii')
|
||||
|
||||
# Memory leak fixed in GDAL 1.5; still exists in 1.4.
|
||||
to_xml = string_output(lgdal.OSRExportToXML, [c_void_p, POINTER(c_char_p), c_char_p], offset=-2)
|
||||
to_xml = string_output(lgdal.OSRExportToXML, [c_void_p, POINTER(c_char_p), c_char_p], offset=-2, decoding='ascii')
|
||||
|
||||
# String attribute retrival routines.
|
||||
get_attr_value = const_string_output(std_call('OSRGetAttrValue'), [c_void_p, c_char_p, c_int])
|
||||
get_auth_name = const_string_output(lgdal.OSRGetAuthorityName, [c_void_p, c_char_p])
|
||||
get_auth_code = const_string_output(lgdal.OSRGetAuthorityCode, [c_void_p, c_char_p])
|
||||
get_attr_value = const_string_output(std_call('OSRGetAttrValue'), [c_void_p, c_char_p, c_int], decoding='ascii')
|
||||
get_auth_name = const_string_output(lgdal.OSRGetAuthorityName, [c_void_p, c_char_p], decoding='ascii')
|
||||
get_auth_code = const_string_output(lgdal.OSRGetAuthorityCode, [c_void_p, c_char_p], decoding='ascii')
|
||||
|
||||
# SRS Properties
|
||||
isgeographic = int_output(lgdal.OSRIsGeographic, [c_void_p])
|
||||
|
|
|
@ -34,7 +34,7 @@ from django.contrib.gis.gdal.error import SRSException
|
|||
from django.contrib.gis.gdal.prototypes import srs as capi
|
||||
|
||||
from django.utils import six
|
||||
from django.utils.encoding import force_bytes, force_text
|
||||
from django.utils.encoding import force_bytes
|
||||
|
||||
|
||||
#### Spatial Reference class. ####
|
||||
|
@ -139,8 +139,7 @@ class SpatialReference(GDALBase):
|
|||
"""
|
||||
if not isinstance(target, six.string_types) or not isinstance(index, int):
|
||||
raise TypeError
|
||||
value = capi.get_attr_value(self.ptr, force_bytes(target), index)
|
||||
return force_text(value, 'ascii', strings_only=True)
|
||||
return capi.get_attr_value(self.ptr, force_bytes(target), index)
|
||||
|
||||
def auth_name(self, target):
|
||||
"Returns the authority name for the given string target node."
|
||||
|
|
|
@ -167,7 +167,8 @@ class DataSourceTest(unittest.TestCase):
|
|||
self.assertEqual(True, isinstance(feat[k], v))
|
||||
|
||||
# Testing Feature.__iter__
|
||||
for fld in feat: self.assertEqual(True, fld.name in source.fields.keys())
|
||||
for fld in feat:
|
||||
self.assertEqual(True, fld.name in source.fields.keys())
|
||||
|
||||
def test05_geometries(self):
|
||||
"Testing Geometries from Data Source Features."
|
||||
|
|
Binary file not shown.
|
@ -0,0 +1 @@
|
|||
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
|
Binary file not shown.
Binary file not shown.
|
@ -1,4 +1,5 @@
|
|||
from __future__ import absolute_import
|
||||
# coding: utf-8
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
import os
|
||||
from copy import copy
|
||||
|
@ -286,6 +287,13 @@ class LayerMapTest(TestCase):
|
|||
self.assertEqual(City.objects.count(), 3)
|
||||
self.assertEqual(City.objects.all().order_by('name_txt')[0].name_txt, "Houston")
|
||||
|
||||
def test_encoded_name(self):
|
||||
""" Test a layer containing utf-8-encoded name """
|
||||
city_shp = os.path.join(shp_path, 'ch-city', 'ch-city.shp')
|
||||
lm = LayerMapping(City, city_shp, city_mapping)
|
||||
lm.save(silent=True, strict=True)
|
||||
self.assertEqual(City.objects.count(), 1)
|
||||
self.assertEqual(City.objects.all()[0].name, "Zürich")
|
||||
|
||||
class OtherRouter(object):
|
||||
def db_for_read(self, model, **hints):
|
||||
|
|
|
@ -18,6 +18,8 @@ from django.contrib.gis.gdal.field import (
|
|||
from django.db import models, transaction
|
||||
from django.contrib.localflavor.us.models import USStateField
|
||||
from django.utils import six
|
||||
from django.utils.encoding import force_text
|
||||
|
||||
|
||||
# LayerMapping exceptions.
|
||||
class LayerMapError(Exception): pass
|
||||
|
@ -65,7 +67,7 @@ class LayerMapping(object):
|
|||
}
|
||||
|
||||
def __init__(self, model, data, mapping, layer=0,
|
||||
source_srs=None, encoding=None,
|
||||
source_srs=None, encoding='utf-8',
|
||||
transaction_mode='commit_on_success',
|
||||
transform=True, unique=None, using=None):
|
||||
"""
|
||||
|
@ -76,7 +78,7 @@ class LayerMapping(object):
|
|||
"""
|
||||
# Getting the DataSource and the associated Layer.
|
||||
if isinstance(data, six.string_types):
|
||||
self.ds = DataSource(data)
|
||||
self.ds = DataSource(data, encoding=encoding)
|
||||
else:
|
||||
self.ds = data
|
||||
self.layer = self.ds[layer]
|
||||
|
@ -330,7 +332,7 @@ class LayerMapping(object):
|
|||
if self.encoding:
|
||||
# The encoding for OGR data sources may be specified here
|
||||
# (e.g., 'cp437' for Census Bureau boundary files).
|
||||
val = six.text_type(ogr_field.value, self.encoding)
|
||||
val = force_text(ogr_field.value, self.encoding)
|
||||
else:
|
||||
val = ogr_field.value
|
||||
if model_field.max_length and len(val) > model_field.max_length:
|
||||
|
|
Loading…
Reference in New Issue