Use smarter string decoding in GeoDjango

The first try to solve the Python 3 GIS encoding/decoding issue
was too naive. Using decode() on all read strings is bound to fail
as soon as a non-ascii string is concerned.
This patch is a little more clever, leaving ascii decoding when
plain ascii strings are expected, and allowing to specify a custom
encoding in DataSource hierarchy.
This commit is contained in:
Claude Paroz 2012-10-06 22:56:47 +02:00
parent a62d53c032
commit 9a2bceed1a
17 changed files with 71 additions and 41 deletions

View File

@ -45,7 +45,7 @@ from django.contrib.gis.gdal.layer import Layer
# Getting the ctypes prototypes for the DataSource. # Getting the ctypes prototypes for the DataSource.
from django.contrib.gis.gdal.prototypes import ds as capi from django.contrib.gis.gdal.prototypes import ds as capi
from django.utils.encoding import force_bytes from django.utils.encoding import force_bytes, force_text
from django.utils import six from django.utils import six
from django.utils.six.moves import xrange from django.utils.six.moves import xrange
@ -57,12 +57,14 @@ class DataSource(GDALBase):
"Wraps an OGR Data Source object." "Wraps an OGR Data Source object."
#### Python 'magic' routines #### #### Python 'magic' routines ####
def __init__(self, ds_input, ds_driver=False, write=False): def __init__(self, ds_input, ds_driver=False, write=False, encoding='utf-8'):
# The write flag. # The write flag.
if write: if write:
self._write = 1 self._write = 1
else: else:
self._write = 0 self._write = 0
# See also http://trac.osgeo.org/gdal/wiki/rfc23_ogr_unicode
self.encoding = encoding
# Registering all the drivers, this needs to be done # Registering all the drivers, this needs to be done
# _before_ we try to open up a data source. # _before_ we try to open up a data source.
@ -129,4 +131,5 @@ class DataSource(GDALBase):
@property @property
def name(self): def name(self):
"Returns the name of the data source." "Returns the name of the data source."
return capi.get_ds_name(self._ptr) name = capi.get_ds_name(self._ptr)
return force_text(name, self.encoding, strings_only=True)

View File

@ -7,7 +7,7 @@ from django.contrib.gis.gdal.geometries import OGRGeometry, OGRGeomType
# ctypes function prototypes # ctypes function prototypes
from django.contrib.gis.gdal.prototypes import ds as capi, geom as geom_api from django.contrib.gis.gdal.prototypes import ds as capi, geom as geom_api
from django.utils.encoding import force_bytes from django.utils.encoding import force_bytes, force_text
from django.utils import six from django.utils import six
from django.utils.six.moves import xrange from django.utils.six.moves import xrange
@ -68,6 +68,10 @@ class Feature(GDALBase):
return bool(capi.feature_equal(self.ptr, other._ptr)) return bool(capi.feature_equal(self.ptr, other._ptr))
#### Feature Properties #### #### Feature Properties ####
@property
def encoding(self):
return self._layer._ds.encoding
@property @property
def fid(self): def fid(self):
"Returns the feature identifier." "Returns the feature identifier."
@ -76,7 +80,8 @@ class Feature(GDALBase):
@property @property
def layer_name(self): def layer_name(self):
"Returns the name of the layer for the feature." "Returns the name of the layer for the feature."
return capi.get_feat_name(self._layer._ldefn) name = capi.get_feat_name(self._layer._ldefn)
return force_text(name, self.encoding, strings_only=True)
@property @property
def num_fields(self): def num_fields(self):

View File

@ -3,6 +3,8 @@ from datetime import date, datetime, time
from django.contrib.gis.gdal.base import GDALBase from django.contrib.gis.gdal.base import GDALBase
from django.contrib.gis.gdal.error import OGRException from django.contrib.gis.gdal.error import OGRException
from django.contrib.gis.gdal.prototypes import ds as capi from django.contrib.gis.gdal.prototypes import ds as capi
from django.utils.encoding import force_text
# For more information, see the OGR C API source code: # For more information, see the OGR C API source code:
# http://www.gdal.org/ogr/ogr__api_8h.html # http://www.gdal.org/ogr/ogr__api_8h.html
@ -53,7 +55,8 @@ class Field(GDALBase):
def as_string(self): def as_string(self):
"Retrieves the Field's value as a string." "Retrieves the Field's value as a string."
return capi.get_field_as_string(self._feat.ptr, self._index) string = capi.get_field_as_string(self._feat.ptr, self._index)
return force_text(string, encoding=self._feat.encoding, strings_only=True)
def as_datetime(self): def as_datetime(self):
"Retrieves the Field's value as a tuple of date & time components." "Retrieves the Field's value as a tuple of date & time components."
@ -70,7 +73,8 @@ class Field(GDALBase):
@property @property
def name(self): def name(self):
"Returns the name of this Field." "Returns the name of this Field."
return capi.get_field_name(self.ptr) name = capi.get_field_name(self.ptr)
return force_text(name, encoding=self._feat.encoding, strings_only=True)
@property @property
def precision(self): def precision(self):

View File

@ -14,7 +14,7 @@ from django.contrib.gis.gdal.srs import SpatialReference
# GDAL ctypes function prototypes. # GDAL ctypes function prototypes.
from django.contrib.gis.gdal.prototypes import ds as capi, geom as geom_api, srs as srs_api from django.contrib.gis.gdal.prototypes import ds as capi, geom as geom_api, srs as srs_api
from django.utils.encoding import force_bytes from django.utils.encoding import force_bytes, force_text
from django.utils import six from django.utils import six
from django.utils.six.moves import xrange from django.utils.six.moves import xrange
@ -103,7 +103,8 @@ class Layer(GDALBase):
@property @property
def name(self): def name(self):
"Returns the name of this layer in the Data Source." "Returns the name of this layer in the Data Source."
return capi.get_fd_name(self._ldefn) name = capi.get_fd_name(self._ldefn)
return force_text(name, self._ds.encoding, strings_only=True)
@property @property
def num_feat(self, force=1): def num_feat(self, force=1):
@ -135,8 +136,9 @@ class Layer(GDALBase):
Returns a list of string names corresponding to each of the Fields Returns a list of string names corresponding to each of the Fields
available in this Layer. available in this Layer.
""" """
return [capi.get_field_name(capi.get_field_defn(self._ldefn, i)) return [force_text(capi.get_field_name(capi.get_field_defn(self._ldefn, i)),
for i in xrange(self.num_fields) ] self._ds.encoding, strings_only=True)
for i in xrange(self.num_fields)]
@property @property
def field_types(self): def field_types(self):

View File

@ -17,7 +17,7 @@ cleanup_all = void_output(lgdal.OGRCleanupAll, [], errcheck=False)
get_driver = voidptr_output(lgdal.OGRGetDriver, [c_int]) get_driver = voidptr_output(lgdal.OGRGetDriver, [c_int])
get_driver_by_name = voidptr_output(lgdal.OGRGetDriverByName, [c_char_p]) get_driver_by_name = voidptr_output(lgdal.OGRGetDriverByName, [c_char_p])
get_driver_count = int_output(lgdal.OGRGetDriverCount, []) get_driver_count = int_output(lgdal.OGRGetDriverCount, [])
get_driver_name = const_string_output(lgdal.OGR_Dr_GetName, [c_void_p]) get_driver_name = const_string_output(lgdal.OGR_Dr_GetName, [c_void_p], decoding='ascii')
### DataSource ### ### DataSource ###
open_ds = voidptr_output(lgdal.OGROpen, [c_char_p, c_int, POINTER(c_void_p)]) open_ds = voidptr_output(lgdal.OGROpen, [c_char_p, c_int, POINTER(c_void_p)])

View File

@ -30,10 +30,9 @@ def check_const_string(result, func, cargs, offset=None):
if offset: if offset:
check_err(result) check_err(result)
ptr = ptr_byref(cargs, offset) ptr = ptr_byref(cargs, offset)
return ptr.value.decode() return ptr.value
else: else:
if result is not None: return result
return result.decode()
def check_string(result, func, cargs, offset=-1, str_result=False): def check_string(result, func, cargs, offset=-1, str_result=False):
""" """
@ -48,13 +47,13 @@ def check_string(result, func, cargs, offset=-1, str_result=False):
# For routines that return a string. # For routines that return a string.
ptr = result ptr = result
if not ptr: s = None if not ptr: s = None
else: s = string_at(result).decode() else: s = string_at(result)
else: else:
# Error-code return specified. # Error-code return specified.
check_err(result) check_err(result)
ptr = ptr_byref(cargs, offset) ptr = ptr_byref(cargs, offset)
# Getting the string value # Getting the string value
s = ptr.value.decode() s = ptr.value
# Correctly freeing the allocated memory beind GDAL pointer # Correctly freeing the allocated memory beind GDAL pointer
# w/the VSIFree routine. # w/the VSIFree routine.
if ptr: lgdal.VSIFree(ptr) if ptr: lgdal.VSIFree(ptr)

View File

@ -57,7 +57,7 @@ def srs_output(func, argtypes):
func.errcheck = check_srs func.errcheck = check_srs
return func return func
def const_string_output(func, argtypes, offset=None): def const_string_output(func, argtypes, offset=None, decoding=None):
func.argtypes = argtypes func.argtypes = argtypes
if offset: if offset:
func.restype = c_int func.restype = c_int
@ -65,12 +65,15 @@ def const_string_output(func, argtypes, offset=None):
func.restype = c_char_p func.restype = c_char_p
def _check_const(result, func, cargs): def _check_const(result, func, cargs):
return check_const_string(result, func, cargs, offset=offset) res = check_const_string(result, func, cargs, offset=offset)
if res and decoding:
res = res.decode(decoding)
return res
func.errcheck = _check_const func.errcheck = _check_const
return func return func
def string_output(func, argtypes, offset=-1, str_result=False): def string_output(func, argtypes, offset=-1, str_result=False, decoding=None):
""" """
Generates a ctypes prototype for the given function with the Generates a ctypes prototype for the given function with the
given argument types that returns a string from a GDAL pointer. given argument types that returns a string from a GDAL pointer.
@ -90,8 +93,11 @@ def string_output(func, argtypes, offset=-1, str_result=False):
# Dynamically defining our error-checking function with the # Dynamically defining our error-checking function with the
# given offset. # given offset.
def _check_str(result, func, cargs): def _check_str(result, func, cargs):
return check_string(result, func, cargs, res = check_string(result, func, cargs,
offset=offset, str_result=str_result) offset=offset, str_result=str_result)
if res and decoding:
res = res.decode(decoding)
return res
func.errcheck = _check_str func.errcheck = _check_str
return func return func

View File

@ -27,8 +27,8 @@ def topology_func(f):
# GeoJSON routines. # GeoJSON routines.
from_json = geom_output(lgdal.OGR_G_CreateGeometryFromJson, [c_char_p]) from_json = geom_output(lgdal.OGR_G_CreateGeometryFromJson, [c_char_p])
to_json = string_output(lgdal.OGR_G_ExportToJson, [c_void_p], str_result=True) to_json = string_output(lgdal.OGR_G_ExportToJson, [c_void_p], str_result=True, decoding='ascii')
to_kml = string_output(lgdal.OGR_G_ExportToKML, [c_void_p, c_char_p], str_result=True) to_kml = string_output(lgdal.OGR_G_ExportToKML, [c_void_p, c_char_p], str_result=True, decoding='ascii')
# GetX, GetY, GetZ all return doubles. # GetX, GetY, GetZ all return doubles.
getx = pnt_func(lgdal.OGR_G_GetX) getx = pnt_func(lgdal.OGR_G_GetX)
@ -57,8 +57,8 @@ destroy_geom = void_output(lgdal.OGR_G_DestroyGeometry, [c_void_p], errcheck=Fal
# Geometry export routines. # Geometry export routines.
to_wkb = void_output(lgdal.OGR_G_ExportToWkb, None, errcheck=True) # special handling for WKB. to_wkb = void_output(lgdal.OGR_G_ExportToWkb, None, errcheck=True) # special handling for WKB.
to_wkt = string_output(lgdal.OGR_G_ExportToWkt, [c_void_p, POINTER(c_char_p)]) to_wkt = string_output(lgdal.OGR_G_ExportToWkt, [c_void_p, POINTER(c_char_p)], decoding='ascii')
to_gml = string_output(lgdal.OGR_G_ExportToGML, [c_void_p], str_result=True) to_gml = string_output(lgdal.OGR_G_ExportToGML, [c_void_p], str_result=True, decoding='ascii')
get_wkbsize = int_output(lgdal.OGR_G_WkbSize, [c_void_p]) get_wkbsize = int_output(lgdal.OGR_G_WkbSize, [c_void_p])
# Geometry spatial-reference related routines. # Geometry spatial-reference related routines.
@ -73,7 +73,7 @@ get_coord_dim = int_output(lgdal.OGR_G_GetCoordinateDimension, [c_void_p])
set_coord_dim = void_output(lgdal.OGR_G_SetCoordinateDimension, [c_void_p, c_int], errcheck=False) set_coord_dim = void_output(lgdal.OGR_G_SetCoordinateDimension, [c_void_p, c_int], errcheck=False)
get_geom_count = int_output(lgdal.OGR_G_GetGeometryCount, [c_void_p]) get_geom_count = int_output(lgdal.OGR_G_GetGeometryCount, [c_void_p])
get_geom_name = const_string_output(lgdal.OGR_G_GetGeometryName, [c_void_p]) get_geom_name = const_string_output(lgdal.OGR_G_GetGeometryName, [c_void_p], decoding='ascii')
get_geom_type = int_output(lgdal.OGR_G_GetGeometryType, [c_void_p]) get_geom_type = int_output(lgdal.OGR_G_GetGeometryType, [c_void_p])
get_point_count = int_output(lgdal.OGR_G_GetPointCount, [c_void_p]) get_point_count = int_output(lgdal.OGR_G_GetPointCount, [c_void_p])
get_point = void_output(lgdal.OGR_G_GetPoint, [c_void_p, c_int, POINTER(c_double), POINTER(c_double), POINTER(c_double)], errcheck=False) get_point = void_output(lgdal.OGR_G_GetPoint, [c_void_p, c_int, POINTER(c_double), POINTER(c_double), POINTER(c_double)], errcheck=False)

View File

@ -49,17 +49,17 @@ linear_units = units_func(lgdal.OSRGetLinearUnits)
angular_units = units_func(lgdal.OSRGetAngularUnits) angular_units = units_func(lgdal.OSRGetAngularUnits)
# For exporting to WKT, PROJ.4, "Pretty" WKT, and XML. # For exporting to WKT, PROJ.4, "Pretty" WKT, and XML.
to_wkt = string_output(std_call('OSRExportToWkt'), [c_void_p, POINTER(c_char_p)]) to_wkt = string_output(std_call('OSRExportToWkt'), [c_void_p, POINTER(c_char_p)], decoding='ascii')
to_proj = string_output(std_call('OSRExportToProj4'), [c_void_p, POINTER(c_char_p)]) to_proj = string_output(std_call('OSRExportToProj4'), [c_void_p, POINTER(c_char_p)], decoding='ascii')
to_pretty_wkt = string_output(std_call('OSRExportToPrettyWkt'), [c_void_p, POINTER(c_char_p), c_int], offset=-2) to_pretty_wkt = string_output(std_call('OSRExportToPrettyWkt'), [c_void_p, POINTER(c_char_p), c_int], offset=-2, decoding='ascii')
# Memory leak fixed in GDAL 1.5; still exists in 1.4. # Memory leak fixed in GDAL 1.5; still exists in 1.4.
to_xml = string_output(lgdal.OSRExportToXML, [c_void_p, POINTER(c_char_p), c_char_p], offset=-2) to_xml = string_output(lgdal.OSRExportToXML, [c_void_p, POINTER(c_char_p), c_char_p], offset=-2, decoding='ascii')
# String attribute retrival routines. # String attribute retrival routines.
get_attr_value = const_string_output(std_call('OSRGetAttrValue'), [c_void_p, c_char_p, c_int]) get_attr_value = const_string_output(std_call('OSRGetAttrValue'), [c_void_p, c_char_p, c_int], decoding='ascii')
get_auth_name = const_string_output(lgdal.OSRGetAuthorityName, [c_void_p, c_char_p]) get_auth_name = const_string_output(lgdal.OSRGetAuthorityName, [c_void_p, c_char_p], decoding='ascii')
get_auth_code = const_string_output(lgdal.OSRGetAuthorityCode, [c_void_p, c_char_p]) get_auth_code = const_string_output(lgdal.OSRGetAuthorityCode, [c_void_p, c_char_p], decoding='ascii')
# SRS Properties # SRS Properties
isgeographic = int_output(lgdal.OSRIsGeographic, [c_void_p]) isgeographic = int_output(lgdal.OSRIsGeographic, [c_void_p])

View File

@ -34,7 +34,7 @@ from django.contrib.gis.gdal.error import SRSException
from django.contrib.gis.gdal.prototypes import srs as capi from django.contrib.gis.gdal.prototypes import srs as capi
from django.utils import six from django.utils import six
from django.utils.encoding import force_bytes, force_text from django.utils.encoding import force_bytes
#### Spatial Reference class. #### #### Spatial Reference class. ####
@ -139,8 +139,7 @@ class SpatialReference(GDALBase):
""" """
if not isinstance(target, six.string_types) or not isinstance(index, int): if not isinstance(target, six.string_types) or not isinstance(index, int):
raise TypeError raise TypeError
value = capi.get_attr_value(self.ptr, force_bytes(target), index) return capi.get_attr_value(self.ptr, force_bytes(target), index)
return force_text(value, 'ascii', strings_only=True)
def auth_name(self, target): def auth_name(self, target):
"Returns the authority name for the given string target node." "Returns the authority name for the given string target node."

View File

@ -167,7 +167,8 @@ class DataSourceTest(unittest.TestCase):
self.assertEqual(True, isinstance(feat[k], v)) self.assertEqual(True, isinstance(feat[k], v))
# Testing Feature.__iter__ # Testing Feature.__iter__
for fld in feat: self.assertEqual(True, fld.name in source.fields.keys()) for fld in feat:
self.assertEqual(True, fld.name in source.fields.keys())
def test05_geometries(self): def test05_geometries(self):
"Testing Geometries from Data Source Features." "Testing Geometries from Data Source Features."

Binary file not shown.

View File

@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]

Binary file not shown.

Binary file not shown.

View File

@ -1,4 +1,5 @@
from __future__ import absolute_import # coding: utf-8
from __future__ import absolute_import, unicode_literals
import os import os
from copy import copy from copy import copy
@ -286,6 +287,13 @@ class LayerMapTest(TestCase):
self.assertEqual(City.objects.count(), 3) self.assertEqual(City.objects.count(), 3)
self.assertEqual(City.objects.all().order_by('name_txt')[0].name_txt, "Houston") self.assertEqual(City.objects.all().order_by('name_txt')[0].name_txt, "Houston")
def test_encoded_name(self):
""" Test a layer containing utf-8-encoded name """
city_shp = os.path.join(shp_path, 'ch-city', 'ch-city.shp')
lm = LayerMapping(City, city_shp, city_mapping)
lm.save(silent=True, strict=True)
self.assertEqual(City.objects.count(), 1)
self.assertEqual(City.objects.all()[0].name, "Zürich")
class OtherRouter(object): class OtherRouter(object):
def db_for_read(self, model, **hints): def db_for_read(self, model, **hints):

View File

@ -18,6 +18,8 @@ from django.contrib.gis.gdal.field import (
from django.db import models, transaction from django.db import models, transaction
from django.contrib.localflavor.us.models import USStateField from django.contrib.localflavor.us.models import USStateField
from django.utils import six from django.utils import six
from django.utils.encoding import force_text
# LayerMapping exceptions. # LayerMapping exceptions.
class LayerMapError(Exception): pass class LayerMapError(Exception): pass
@ -65,7 +67,7 @@ class LayerMapping(object):
} }
def __init__(self, model, data, mapping, layer=0, def __init__(self, model, data, mapping, layer=0,
source_srs=None, encoding=None, source_srs=None, encoding='utf-8',
transaction_mode='commit_on_success', transaction_mode='commit_on_success',
transform=True, unique=None, using=None): transform=True, unique=None, using=None):
""" """
@ -76,7 +78,7 @@ class LayerMapping(object):
""" """
# Getting the DataSource and the associated Layer. # Getting the DataSource and the associated Layer.
if isinstance(data, six.string_types): if isinstance(data, six.string_types):
self.ds = DataSource(data) self.ds = DataSource(data, encoding=encoding)
else: else:
self.ds = data self.ds = data
self.layer = self.ds[layer] self.layer = self.ds[layer]
@ -330,7 +332,7 @@ class LayerMapping(object):
if self.encoding: if self.encoding:
# The encoding for OGR data sources may be specified here # The encoding for OGR data sources may be specified here
# (e.g., 'cp437' for Census Bureau boundary files). # (e.g., 'cp437' for Census Bureau boundary files).
val = six.text_type(ogr_field.value, self.encoding) val = force_text(ogr_field.value, self.encoding)
else: else:
val = ogr_field.value val = ogr_field.value
if model_field.max_length and len(val) > model_field.max_length: if model_field.max_length and len(val) > model_field.max_length: