Use smarter string decoding in GeoDjango

The first try to solve the Python 3 GIS encoding/decoding issue
was too naive. Using decode() on all read strings is bound to fail
as soon as a non-ascii string is concerned.
This patch is a little more clever, leaving ascii decoding when
plain ascii strings are expected, and allowing to specify a custom
encoding in DataSource hierarchy.
This commit is contained in:
Claude Paroz 2012-10-06 22:56:47 +02:00
parent a62d53c032
commit 9a2bceed1a
17 changed files with 71 additions and 41 deletions

View File

@ -45,7 +45,7 @@ from django.contrib.gis.gdal.layer import Layer
# Getting the ctypes prototypes for the DataSource.
from django.contrib.gis.gdal.prototypes import ds as capi
from django.utils.encoding import force_bytes
from django.utils.encoding import force_bytes, force_text
from django.utils import six
from django.utils.six.moves import xrange
@ -57,12 +57,14 @@ class DataSource(GDALBase):
"Wraps an OGR Data Source object."
#### Python 'magic' routines ####
def __init__(self, ds_input, ds_driver=False, write=False):
def __init__(self, ds_input, ds_driver=False, write=False, encoding='utf-8'):
# The write flag.
if write:
self._write = 1
else:
self._write = 0
# See also http://trac.osgeo.org/gdal/wiki/rfc23_ogr_unicode
self.encoding = encoding
# Registering all the drivers, this needs to be done
# _before_ we try to open up a data source.
@ -129,4 +131,5 @@ class DataSource(GDALBase):
@property
def name(self):
"Returns the name of the data source."
return capi.get_ds_name(self._ptr)
name = capi.get_ds_name(self._ptr)
return force_text(name, self.encoding, strings_only=True)

View File

@ -7,7 +7,7 @@ from django.contrib.gis.gdal.geometries import OGRGeometry, OGRGeomType
# ctypes function prototypes
from django.contrib.gis.gdal.prototypes import ds as capi, geom as geom_api
from django.utils.encoding import force_bytes
from django.utils.encoding import force_bytes, force_text
from django.utils import six
from django.utils.six.moves import xrange
@ -68,6 +68,10 @@ class Feature(GDALBase):
return bool(capi.feature_equal(self.ptr, other._ptr))
#### Feature Properties ####
@property
def encoding(self):
return self._layer._ds.encoding
@property
def fid(self):
"Returns the feature identifier."
@ -76,7 +80,8 @@ class Feature(GDALBase):
@property
def layer_name(self):
"Returns the name of the layer for the feature."
return capi.get_feat_name(self._layer._ldefn)
name = capi.get_feat_name(self._layer._ldefn)
return force_text(name, self.encoding, strings_only=True)
@property
def num_fields(self):

View File

@ -3,6 +3,8 @@ from datetime import date, datetime, time
from django.contrib.gis.gdal.base import GDALBase
from django.contrib.gis.gdal.error import OGRException
from django.contrib.gis.gdal.prototypes import ds as capi
from django.utils.encoding import force_text
# For more information, see the OGR C API source code:
# http://www.gdal.org/ogr/ogr__api_8h.html
@ -53,7 +55,8 @@ class Field(GDALBase):
def as_string(self):
"Retrieves the Field's value as a string."
return capi.get_field_as_string(self._feat.ptr, self._index)
string = capi.get_field_as_string(self._feat.ptr, self._index)
return force_text(string, encoding=self._feat.encoding, strings_only=True)
def as_datetime(self):
"Retrieves the Field's value as a tuple of date & time components."
@ -70,7 +73,8 @@ class Field(GDALBase):
@property
def name(self):
"Returns the name of this Field."
return capi.get_field_name(self.ptr)
name = capi.get_field_name(self.ptr)
return force_text(name, encoding=self._feat.encoding, strings_only=True)
@property
def precision(self):

View File

@ -14,7 +14,7 @@ from django.contrib.gis.gdal.srs import SpatialReference
# GDAL ctypes function prototypes.
from django.contrib.gis.gdal.prototypes import ds as capi, geom as geom_api, srs as srs_api
from django.utils.encoding import force_bytes
from django.utils.encoding import force_bytes, force_text
from django.utils import six
from django.utils.six.moves import xrange
@ -103,7 +103,8 @@ class Layer(GDALBase):
@property
def name(self):
"Returns the name of this layer in the Data Source."
return capi.get_fd_name(self._ldefn)
name = capi.get_fd_name(self._ldefn)
return force_text(name, self._ds.encoding, strings_only=True)
@property
def num_feat(self, force=1):
@ -135,8 +136,9 @@ class Layer(GDALBase):
Returns a list of string names corresponding to each of the Fields
available in this Layer.
"""
return [capi.get_field_name(capi.get_field_defn(self._ldefn, i))
for i in xrange(self.num_fields) ]
return [force_text(capi.get_field_name(capi.get_field_defn(self._ldefn, i)),
self._ds.encoding, strings_only=True)
for i in xrange(self.num_fields)]
@property
def field_types(self):

View File

@ -17,7 +17,7 @@ cleanup_all = void_output(lgdal.OGRCleanupAll, [], errcheck=False)
get_driver = voidptr_output(lgdal.OGRGetDriver, [c_int])
get_driver_by_name = voidptr_output(lgdal.OGRGetDriverByName, [c_char_p])
get_driver_count = int_output(lgdal.OGRGetDriverCount, [])
get_driver_name = const_string_output(lgdal.OGR_Dr_GetName, [c_void_p])
get_driver_name = const_string_output(lgdal.OGR_Dr_GetName, [c_void_p], decoding='ascii')
### DataSource ###
open_ds = voidptr_output(lgdal.OGROpen, [c_char_p, c_int, POINTER(c_void_p)])

View File

@ -30,10 +30,9 @@ def check_const_string(result, func, cargs, offset=None):
if offset:
check_err(result)
ptr = ptr_byref(cargs, offset)
return ptr.value.decode()
return ptr.value
else:
if result is not None:
return result.decode()
return result
def check_string(result, func, cargs, offset=-1, str_result=False):
"""
@ -48,13 +47,13 @@ def check_string(result, func, cargs, offset=-1, str_result=False):
# For routines that return a string.
ptr = result
if not ptr: s = None
else: s = string_at(result).decode()
else: s = string_at(result)
else:
# Error-code return specified.
check_err(result)
ptr = ptr_byref(cargs, offset)
# Getting the string value
s = ptr.value.decode()
s = ptr.value
# Correctly freeing the allocated memory beind GDAL pointer
# w/the VSIFree routine.
if ptr: lgdal.VSIFree(ptr)

View File

@ -57,7 +57,7 @@ def srs_output(func, argtypes):
func.errcheck = check_srs
return func
def const_string_output(func, argtypes, offset=None):
def const_string_output(func, argtypes, offset=None, decoding=None):
func.argtypes = argtypes
if offset:
func.restype = c_int
@ -65,12 +65,15 @@ def const_string_output(func, argtypes, offset=None):
func.restype = c_char_p
def _check_const(result, func, cargs):
return check_const_string(result, func, cargs, offset=offset)
res = check_const_string(result, func, cargs, offset=offset)
if res and decoding:
res = res.decode(decoding)
return res
func.errcheck = _check_const
return func
def string_output(func, argtypes, offset=-1, str_result=False):
def string_output(func, argtypes, offset=-1, str_result=False, decoding=None):
"""
Generates a ctypes prototype for the given function with the
given argument types that returns a string from a GDAL pointer.
@ -90,8 +93,11 @@ def string_output(func, argtypes, offset=-1, str_result=False):
# Dynamically defining our error-checking function with the
# given offset.
def _check_str(result, func, cargs):
return check_string(result, func, cargs,
res = check_string(result, func, cargs,
offset=offset, str_result=str_result)
if res and decoding:
res = res.decode(decoding)
return res
func.errcheck = _check_str
return func

View File

@ -27,8 +27,8 @@ def topology_func(f):
# GeoJSON routines.
from_json = geom_output(lgdal.OGR_G_CreateGeometryFromJson, [c_char_p])
to_json = string_output(lgdal.OGR_G_ExportToJson, [c_void_p], str_result=True)
to_kml = string_output(lgdal.OGR_G_ExportToKML, [c_void_p, c_char_p], str_result=True)
to_json = string_output(lgdal.OGR_G_ExportToJson, [c_void_p], str_result=True, decoding='ascii')
to_kml = string_output(lgdal.OGR_G_ExportToKML, [c_void_p, c_char_p], str_result=True, decoding='ascii')
# GetX, GetY, GetZ all return doubles.
getx = pnt_func(lgdal.OGR_G_GetX)
@ -57,8 +57,8 @@ destroy_geom = void_output(lgdal.OGR_G_DestroyGeometry, [c_void_p], errcheck=Fal
# Geometry export routines.
to_wkb = void_output(lgdal.OGR_G_ExportToWkb, None, errcheck=True) # special handling for WKB.
to_wkt = string_output(lgdal.OGR_G_ExportToWkt, [c_void_p, POINTER(c_char_p)])
to_gml = string_output(lgdal.OGR_G_ExportToGML, [c_void_p], str_result=True)
to_wkt = string_output(lgdal.OGR_G_ExportToWkt, [c_void_p, POINTER(c_char_p)], decoding='ascii')
to_gml = string_output(lgdal.OGR_G_ExportToGML, [c_void_p], str_result=True, decoding='ascii')
get_wkbsize = int_output(lgdal.OGR_G_WkbSize, [c_void_p])
# Geometry spatial-reference related routines.
@ -73,7 +73,7 @@ get_coord_dim = int_output(lgdal.OGR_G_GetCoordinateDimension, [c_void_p])
set_coord_dim = void_output(lgdal.OGR_G_SetCoordinateDimension, [c_void_p, c_int], errcheck=False)
get_geom_count = int_output(lgdal.OGR_G_GetGeometryCount, [c_void_p])
get_geom_name = const_string_output(lgdal.OGR_G_GetGeometryName, [c_void_p])
get_geom_name = const_string_output(lgdal.OGR_G_GetGeometryName, [c_void_p], decoding='ascii')
get_geom_type = int_output(lgdal.OGR_G_GetGeometryType, [c_void_p])
get_point_count = int_output(lgdal.OGR_G_GetPointCount, [c_void_p])
get_point = void_output(lgdal.OGR_G_GetPoint, [c_void_p, c_int, POINTER(c_double), POINTER(c_double), POINTER(c_double)], errcheck=False)

View File

@ -49,17 +49,17 @@ linear_units = units_func(lgdal.OSRGetLinearUnits)
angular_units = units_func(lgdal.OSRGetAngularUnits)
# For exporting to WKT, PROJ.4, "Pretty" WKT, and XML.
to_wkt = string_output(std_call('OSRExportToWkt'), [c_void_p, POINTER(c_char_p)])
to_proj = string_output(std_call('OSRExportToProj4'), [c_void_p, POINTER(c_char_p)])
to_pretty_wkt = string_output(std_call('OSRExportToPrettyWkt'), [c_void_p, POINTER(c_char_p), c_int], offset=-2)
to_wkt = string_output(std_call('OSRExportToWkt'), [c_void_p, POINTER(c_char_p)], decoding='ascii')
to_proj = string_output(std_call('OSRExportToProj4'), [c_void_p, POINTER(c_char_p)], decoding='ascii')
to_pretty_wkt = string_output(std_call('OSRExportToPrettyWkt'), [c_void_p, POINTER(c_char_p), c_int], offset=-2, decoding='ascii')
# Memory leak fixed in GDAL 1.5; still exists in 1.4.
to_xml = string_output(lgdal.OSRExportToXML, [c_void_p, POINTER(c_char_p), c_char_p], offset=-2)
to_xml = string_output(lgdal.OSRExportToXML, [c_void_p, POINTER(c_char_p), c_char_p], offset=-2, decoding='ascii')
# String attribute retrival routines.
get_attr_value = const_string_output(std_call('OSRGetAttrValue'), [c_void_p, c_char_p, c_int])
get_auth_name = const_string_output(lgdal.OSRGetAuthorityName, [c_void_p, c_char_p])
get_auth_code = const_string_output(lgdal.OSRGetAuthorityCode, [c_void_p, c_char_p])
get_attr_value = const_string_output(std_call('OSRGetAttrValue'), [c_void_p, c_char_p, c_int], decoding='ascii')
get_auth_name = const_string_output(lgdal.OSRGetAuthorityName, [c_void_p, c_char_p], decoding='ascii')
get_auth_code = const_string_output(lgdal.OSRGetAuthorityCode, [c_void_p, c_char_p], decoding='ascii')
# SRS Properties
isgeographic = int_output(lgdal.OSRIsGeographic, [c_void_p])

View File

@ -34,7 +34,7 @@ from django.contrib.gis.gdal.error import SRSException
from django.contrib.gis.gdal.prototypes import srs as capi
from django.utils import six
from django.utils.encoding import force_bytes, force_text
from django.utils.encoding import force_bytes
#### Spatial Reference class. ####
@ -139,8 +139,7 @@ class SpatialReference(GDALBase):
"""
if not isinstance(target, six.string_types) or not isinstance(index, int):
raise TypeError
value = capi.get_attr_value(self.ptr, force_bytes(target), index)
return force_text(value, 'ascii', strings_only=True)
return capi.get_attr_value(self.ptr, force_bytes(target), index)
def auth_name(self, target):
"Returns the authority name for the given string target node."

View File

@ -167,7 +167,8 @@ class DataSourceTest(unittest.TestCase):
self.assertEqual(True, isinstance(feat[k], v))
# Testing Feature.__iter__
for fld in feat: self.assertEqual(True, fld.name in source.fields.keys())
for fld in feat:
self.assertEqual(True, fld.name in source.fields.keys())
def test05_geometries(self):
"Testing Geometries from Data Source Features."

Binary file not shown.

View File

@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]

Binary file not shown.

Binary file not shown.

View File

@ -1,4 +1,5 @@
from __future__ import absolute_import
# coding: utf-8
from __future__ import absolute_import, unicode_literals
import os
from copy import copy
@ -286,6 +287,13 @@ class LayerMapTest(TestCase):
self.assertEqual(City.objects.count(), 3)
self.assertEqual(City.objects.all().order_by('name_txt')[0].name_txt, "Houston")
def test_encoded_name(self):
""" Test a layer containing utf-8-encoded name """
city_shp = os.path.join(shp_path, 'ch-city', 'ch-city.shp')
lm = LayerMapping(City, city_shp, city_mapping)
lm.save(silent=True, strict=True)
self.assertEqual(City.objects.count(), 1)
self.assertEqual(City.objects.all()[0].name, "Zürich")
class OtherRouter(object):
def db_for_read(self, model, **hints):

View File

@ -18,6 +18,8 @@ from django.contrib.gis.gdal.field import (
from django.db import models, transaction
from django.contrib.localflavor.us.models import USStateField
from django.utils import six
from django.utils.encoding import force_text
# LayerMapping exceptions.
class LayerMapError(Exception): pass
@ -65,7 +67,7 @@ class LayerMapping(object):
}
def __init__(self, model, data, mapping, layer=0,
source_srs=None, encoding=None,
source_srs=None, encoding='utf-8',
transaction_mode='commit_on_success',
transform=True, unique=None, using=None):
"""
@ -76,7 +78,7 @@ class LayerMapping(object):
"""
# Getting the DataSource and the associated Layer.
if isinstance(data, six.string_types):
self.ds = DataSource(data)
self.ds = DataSource(data, encoding=encoding)
else:
self.ds = data
self.layer = self.ds[layer]
@ -330,7 +332,7 @@ class LayerMapping(object):
if self.encoding:
# The encoding for OGR data sources may be specified here
# (e.g., 'cp437' for Census Bureau boundary files).
val = six.text_type(ogr_field.value, self.encoding)
val = force_text(ogr_field.value, self.encoding)
else:
val = ogr_field.value
if model_field.max_length and len(val) > model_field.max_length: