Fixed #25184 -- Added support for MaxMind GeoLite2 database format

This commit is contained in:
Flavio Curella 2015-07-29 16:21:03 -05:00 committed by Tim Graham
parent 6bb4f07372
commit 7f0953ce1f
9 changed files with 577 additions and 0 deletions

View File

@ -0,0 +1,21 @@
"""
This module houses the GeoIP2 object, a wrapper for the MaxMind GeoIP2(R)
Python API (http://geoip2.readthedocs.org/). This is an alternative to the
Python GeoIP2 interface provided by MaxMind.
GeoIP(R) is a registered trademark of MaxMind, Inc.
For IP-based geolocation, this module requires the GeoLite2 Country and City
datasets, in binary format (CSV will not work!). The datasets may be
downloaded from MaxMind at http://dev.maxmind.com/geoip/geoip2/geolite2/.
Grab GeoLite2-Country.mmdb.gz and GeoLite2-City.mmdb.gz, and unzip them in the
directory corresponding to settings.GEOIP_PATH.
"""
__all__ = ['HAS_GEOIP2']
try:
from .base import GeoIP2, GeoIP2Exception
HAS_GEOIP2 = True
__all__ += ['GeoIP2', 'GeoIP2Exception']
except ImportError:
HAS_GEOIP2 = False

View File

@ -0,0 +1,219 @@
import os
import socket
import geoip2.database
from django.conf import settings
from django.core.validators import ipv4_re
from django.utils import six
from django.utils.ipv6 import is_valid_ipv6_address
from .resources import City, Country
# Creating the settings dictionary with any settings, if needed.
GEOIP_SETTINGS = {
'GEOIP_PATH': getattr(settings, 'GEOIP_PATH', None),
'GEOIP_CITY': getattr(settings, 'GEOIP_CITY', 'GeoLite2-City.mmdb'),
'GEOIP_COUNTRY': getattr(settings, 'GEOIP_COUNTRY', 'GeoLite2-Country.mmdb'),
}
class GeoIP2Exception(Exception):
pass
class GeoIP2(object):
# The flags for GeoIP memory caching.
# Try MODE_MMAP_EXT, MODE_MMAP, MODE_FILE in that order.
MODE_AUTO = 0
# Use the C extension with memory map.
MODE_MMAP_EXT = 1
# Read from memory map. Pure Python.
MODE_MMAP = 2
# Read database as standard file. Pure Python.
MODE_FILE = 4
# Load database into memory. Pure Python.
MODE_MEMORY = 8
cache_options = {opt: None for opt in (0, 1, 2, 4, 8)}
# Paths to the city & country binary databases.
_city_file = ''
_country_file = ''
# Initially, pointers to GeoIP file references are NULL.
_city = None
_country = None
def __init__(self, path=None, cache=0, country=None, city=None):
"""
Initialize the GeoIP object. No parameters are required to use default
settings. Keyword arguments may be passed in to customize the locations
of the GeoIP datasets.
* path: Base directory to where GeoIP data is located or the full path
to where the city or country data files (*.mmdb) are located.
Assumes that both the city and country data sets are located in
this directory; overrides the GEOIP_PATH setting.
* cache: The cache settings when opening up the GeoIP datasets. May be
an integer in (0, 1, 2, 4, 8) corresponding to the MODE_AUTO,
MODE_MMAP_EXT, MODE_MMAP, MODE_FILE, and MODE_MEMORY,
`GeoIPOptions` C API settings, respectively. Defaults to 0,
meaning MODE_AUTO.
* country: The name of the GeoIP country data file. Defaults to
'GeoLite2-Country.mmdb'; overrides the GEOIP_COUNTRY setting.
* city: The name of the GeoIP city data file. Defaults to
'GeoLite2-City.mmdb'; overrides the GEOIP_CITY setting.
"""
# Checking the given cache option.
if cache in self.cache_options:
self._cache = cache
else:
raise GeoIP2Exception('Invalid GeoIP caching option: %s' % cache)
# Getting the GeoIP data path.
if not path:
path = GEOIP_SETTINGS['GEOIP_PATH']
if not path:
raise GeoIP2Exception('GeoIP path must be provided via parameter or the GEOIP_PATH setting.')
if not isinstance(path, six.string_types):
raise TypeError('Invalid path type: %s' % type(path).__name__)
if os.path.isdir(path):
# Constructing the GeoIP database filenames using the settings
# dictionary. If the database files for the GeoLite country
# and/or city datasets exist, then try to open them.
country_db = os.path.join(path, country or GEOIP_SETTINGS['GEOIP_COUNTRY'])
if os.path.isfile(country_db):
self._country = geoip2.database.Reader(country_db, mode=cache)
self._country_file = country_db
city_db = os.path.join(path, city or GEOIP_SETTINGS['GEOIP_CITY'])
if os.path.isfile(city_db):
self._city = geoip2.database.Reader(city_db, mode=cache)
self._city_file = city_db
elif os.path.isfile(path):
# Otherwise, some detective work will be needed to figure out
# whether the given database path is for the GeoIP country or city
# databases.
reader = geoip2.database.Reader(path, mode=cache)
db_type = reader.metadata().database_type
if db_type.endswith('City'):
# GeoLite City database detected.
self._city = reader
self._city_file = path
elif db_type.endswith('Country'):
# GeoIP Country database detected.
self._country = reader
self._country_file = path
else:
raise GeoIP2Exception('Unable to recognize database edition: %s' % db_type)
else:
raise GeoIP2Exception('GeoIP path must be a valid file or directory.')
@property
def _reader(self):
if self._country:
return self._country
else:
return self._city
@property
def _country_or_city(self):
if self._country:
return self._country.country
else:
return self._city.city
def __del__(self):
# Cleanup any GeoIP file handles lying around.
if self._reader:
self._reader.close()
def _check_query(self, query, country=False, city=False, city_or_country=False):
"Helper routine for checking the query and database availability."
# Making sure a string was passed in for the query.
if not isinstance(query, six.string_types):
raise TypeError('GeoIP query must be a string, not type %s' % type(query).__name__)
# Extra checks for the existence of country and city databases.
if city_or_country and not (self._country or self._city):
raise GeoIP2Exception('Invalid GeoIP country and city data files.')
elif country and not self._country:
raise GeoIP2Exception('Invalid GeoIP country data file: %s' % self._country_file)
elif city and not self._city:
raise GeoIP2Exception('Invalid GeoIP city data file: %s' % self._city_file)
# Return the query string back to the caller. GeoIP2 only takes IP addresses.
if not (ipv4_re.match(query) or is_valid_ipv6_address(query)):
query = socket.gethostbyname(query)
return query
def city(self, query):
"""
Return a dictionary of city information for the given IP address or
Fully Qualified Domain Name (FQDN). Some information in the dictionary
may be undefined (None).
"""
enc_query = self._check_query(query, city=True)
return City(self._city.city(enc_query))
def country_code(self, query):
"Return the country code for the given IP Address or FQDN."
enc_query = self._check_query(query, city_or_country=True)
return self.country(enc_query)['country_code']
def country_name(self, query):
"Return the country name for the given IP Address or FQDN."
enc_query = self._check_query(query, city_or_country=True)
return self.country(enc_query)['country_name']
def country(self, query):
"""
Return a dictionary with the country code and name when given an
IP address or a Fully Qualified Domain Name (FQDN). For example, both
'24.124.1.80' and 'djangoproject.com' are valid parameters.
"""
# Returning the country code and name
enc_query = self._check_query(query, city_or_country=True)
return Country(self._country_or_city(enc_query))
# #### Coordinate retrieval routines ####
def coords(self, query, ordering=('longitude', 'latitude')):
cdict = self.city(query)
if cdict is None:
return None
else:
return tuple(cdict[o] for o in ordering)
def lon_lat(self, query):
"Return a tuple of the (longitude, latitude) for the given query."
return self.coords(query)
def lat_lon(self, query):
"Return a tuple of the (latitude, longitude) for the given query."
return self.coords(query, ('latitude', 'longitude'))
def geos(self, query):
"Return a GEOS Point object for the given query."
ll = self.lon_lat(query)
if ll:
from django.contrib.gis.geos import Point
return Point(ll, srid=4326)
else:
return None
# #### GeoIP Database Information Routines ####
@property
def info(self):
"Return information about the GeoIP library and databases in use."
meta = self._reader.metadata()
return 'GeoIP Library:\n\t%s.%s\n' % (meta.binary_format_major_version, meta.binary_format_minor_version)
@classmethod
def open(cls, full_path, cache):
return GeoIP2(full_path, cache)

View File

@ -0,0 +1,18 @@
def City(response):
return {
'city': response.city.name,
'country_code': response.country.iso_code,
'country_name': response.country.name,
'dma_code': response.location.metro_code,
'latitude': response.location.latitude,
'longitude': response.location.longitude,
'postal_code': response.postal.code,
'region': response.subdivisions[0].iso_code if len(response.subdivisions) else None,
}
def Country(response):
return {
'country_code': response.country.iso_code,
'country_name': response.country.name,
}

View File

@ -138,6 +138,7 @@ dependencies:
* bcrypt_
* docutils_
* geoip2_
* jinja2_ 2.7+
* numpy_
* Pillow_
@ -170,6 +171,7 @@ associated tests will be skipped.
.. _bcrypt: https://pypi.python.org/pypi/bcrypt
.. _docutils: https://pypi.python.org/pypi/docutils
.. _geoip2: https://pypi.python.org/pypi/geoip2
.. _jinja2: https://pypi.python.org/pypi/jinja2
.. _numpy: https://pypi.python.org/pypi/numpy
.. _Pillow: https://pypi.python.org/pypi/Pillow/

View File

@ -0,0 +1,173 @@
=======================
Geolocation with GeoIP2
=======================
.. module:: django.contrib.gis.geoip2
:synopsis: Python interface for MaxMind's GeoIP2 databases.
.. versionadded:: 1.9
The :class:`GeoIP2` object is a wrapper for the `MaxMind geoip2 Python
library`__. [#]_
In order to perform IP-based geolocation, the :class:`GeoIP2` object requires
the `geoip2 Python library`__ and the GeoIP `Country` and/or `City` `datasets
in binary format`__ (the CSV files will not work!). Grab the
``GeoLite2-Country.mmdb.gz`` and ``GeoLite2-City.mmdb.gz`` files and unzip them
in a directory corresponding to the :setting:`GEOIP_PATH` setting.
Additionally, it is recommended to install the `libmaxminddb C library`__, so
that ``geoip2`` can leverage the C library's faster speed.
__ http://geoip2.readthedocs.org/
__ https://pypi.python.org/pypi/geoip2
__ http://dev.maxmind.com/geoip/geoip2/geolite2/
__ https://github.com/maxmind/libmaxminddb
Example
=======
Here is an example of its usage::
>>> from django.contrib.gis.geoip2 import GeoIP2
>>> g = GeoIP2()
>>> g.country('google.com')
{'country_code': 'US', 'country_name': 'United States'}
>>> g.city('72.14.207.99')
{'city': 'Mountain View',
'country_code': 'US',
'country_name': 'United States',
'dma_code': 807,
'latitude': 37.419200897216797,
'longitude': -122.05740356445312,
'postal_code': '94043',
'region': 'CA'}
>>> g.lat_lon('salon.com')
(39.0437, -77.4875)
>>> g.lon_lat('uh.edu')
(-95.4342, 29.834)
>>> g.geos('24.124.1.80').wkt
'POINT (-97.0000000000000000 38.0000000000000000)'
``GeoIP`` Settings
==================
.. setting:: GEOIP_PATH
GEOIP_PATH
----------
A string specifying the directory where the GeoIP data files are
located. This setting is *required* unless manually specified
with ``path`` keyword when initializing the :class:`GeoIP2` object.
.. setting:: GEOIP_COUNTRY
GEOIP_COUNTRY
-------------
The basename to use for the GeoIP country data file. Defaults to
``'GeoLite2-Country.mmdb'``.
.. setting:: GEOIP_CITY
GEOIP_CITY
----------
The basename to use for the GeoIP city data file. Defaults to
``'GeoLite2-City.mmdb'``.
``GeoIP`` API
=============
.. class:: GeoIP2(path=None, cache=0, country=None, city=None)
The ``GeoIP`` object does not require any parameters to use the default
settings. However, at the very least the :setting:`GEOIP_PATH` setting
should be set with the path of the location of your GeoIP datasets. The
following initialization keywords may be used to customize any of the
defaults.
=================== =======================================================
Keyword Arguments Description
=================== =======================================================
``path`` Base directory to where GeoIP data is located or the
full path to where the city or country data files
(``.mmdb``) are located. Assumes that both the city and
country datasets are located in this directory;
overrides the :setting:`GEOIP_PATH` setting.
``cache`` The cache settings when opening up the GeoIP datasets. May
be an integer in (0, 1, 2, 4, 8) corresponding to the
``MODE_AUTO``, ``MODE_MMAP_EXT``, ``MODE_MMAP``, and
``GEOIP_INDEX_CACHE`` ``MODE_MEMORY`` C API settings,
respectively. Defaults to 0 (``MODE_AUTO``).
``country`` The name of the GeoIP country data file. Defaults
to ``GeoLite2-Country.mmdb``. Setting this keyword
overrides the :setting:`GEOIP_COUNTRY` setting.
``city`` The name of the GeoIP city data file. Defaults to
``GeoLite2-City.mmdb``. Setting this keyword overrides
the :setting:`GEOIP_CITY` setting.
=================== =======================================================
``GeoIP`` Methods
=================
Instantiating
-------------
.. classmethod:: GeoIP2.open(path, cache)
This classmethod instantiates the GeoIP object from the given database path
and given cache setting.
Querying
--------
All the following querying routines may take either a string IP address
or a fully qualified domain name (FQDN). For example, both
``'205.186.163.125'`` and ``'djangoproject.com'`` would be valid query
parameters.
.. method:: GeoIP2.city(query)
Returns a dictionary of city information for the given query. Some
of the values in the dictionary may be undefined (``None``).
.. method:: GeoIP2.country(query)
Returns a dictionary with the country code and country for the given
query.
.. method:: GeoIP2.country_code(query)
Returns the country code corresponding to the query.
.. method:: GeoIP2.country_name(query)
Returns the country name corresponding to the query.
Coordinate Retrieval
--------------------
.. method:: GeoIP2.coords(query)
Returns a coordinate tuple of (longitude, latitude).
.. method:: GeoIP2.lon_lat(query)
Returns a coordinate tuple of (longitude, latitude).
.. method:: GeoIP2.lat_lon(query)
Returns a coordinate tuple of (latitude, longitude),
.. method:: GeoIP2.geos(query)
Returns a :class:`~django.contrib.gis.geos.Point` object corresponding to the
query.
.. rubric:: Footnotes
.. [#] GeoIP(R) is a registered trademark of MaxMind, Inc.

View File

@ -23,6 +23,7 @@ of spatially enabled data.
geos
gdal
geoip
geoip2
utils
commands
admin

View File

@ -214,6 +214,9 @@ Minor features
raster into a different spatial reference system by specifying a target
``srid``.
* The new :class:`~django.contrib.gis.geoip2.GeoIP2` class allows using
MaxMind's GeoLite2 databases which includes support for IPv6 addresses.
:mod:`django.contrib.messages`
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -0,0 +1,139 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import os
import unittest
from unittest import skipUnless
from django.conf import settings
from django.contrib.gis.geoip2 import HAS_GEOIP2
from django.contrib.gis.geos import HAS_GEOS, GEOSGeometry
from django.utils import six
if HAS_GEOIP2:
from django.contrib.gis.geoip2 import GeoIP2, GeoIP2Exception
# Note: Requires both the GeoIP country and city datasets.
# The GEOIP_DATA path should be the only setting set (the directory
# should contain links or the actual database files 'GeoLite2-City.mmdb' and
# 'GeoLite2-City.mmdb'.
@skipUnless(HAS_GEOIP2 and getattr(settings, "GEOIP_PATH", None),
"GeoIP is required along with the GEOIP_PATH setting.")
class GeoIPTest(unittest.TestCase):
addr = '128.249.1.1'
fqdn = 'tmc.edu'
def test01_init(self):
"GeoIP initialization."
g1 = GeoIP2() # Everything inferred from GeoIP path
path = settings.GEOIP_PATH
g2 = GeoIP2(path, 0) # Passing in data path explicitly.
g3 = GeoIP2.open(path, 0) # MaxMind Python API syntax.
for g in (g1, g2, g3):
self.assertTrue(g._country)
self.assertTrue(g._city)
# Only passing in the location of one database.
city = os.path.join(path, 'GeoLite2-City.mmdb')
cntry = os.path.join(path, 'GeoLite2-Country.mmdb')
g4 = GeoIP2(city, country='')
self.assertIsNone(g4._country)
g5 = GeoIP2(cntry, city='')
self.assertIsNone(g5._city)
# Improper parameters.
bad_params = (23, 'foo', 15.23)
for bad in bad_params:
self.assertRaises(GeoIP2Exception, GeoIP2, cache=bad)
if isinstance(bad, six.string_types):
e = GeoIP2Exception
else:
e = TypeError
self.assertRaises(e, GeoIP2, bad, 0)
def test02_bad_query(self):
"GeoIP query parameter checking."
cntry_g = GeoIP2(city='<foo>')
# No city database available, these calls should fail.
self.assertRaises(GeoIP2Exception, cntry_g.city, 'tmc.edu')
self.assertRaises(GeoIP2Exception, cntry_g.coords, 'tmc.edu')
# Non-string query should raise TypeError
self.assertRaises(TypeError, cntry_g.country_code, 17)
self.assertRaises(TypeError, cntry_g.country_name, GeoIP2)
def test03_country(self):
"GeoIP country querying methods."
g = GeoIP2(city='<foo>')
for query in (self.fqdn, self.addr):
self.assertEqual(
'US',
g.country_code(query),
'Failed for func country_code and query %s' % query
)
self.assertEqual(
'United States',
g.country_name(query),
'Failed for func country_name and query %s' % query
)
self.assertEqual(
{'country_code': 'US', 'country_name': 'United States'},
g.country(query)
)
@skipUnless(HAS_GEOS, "Geos is required")
def test04_city(self):
"GeoIP city querying methods."
g = GeoIP2(country='<foo>')
for query in (self.fqdn, self.addr):
# Country queries should still work.
self.assertEqual(
'US',
g.country_code(query),
'Failed for func country_code and query %s' % query
)
self.assertEqual(
'United States',
g.country_name(query),
'Failed for func country_name and query %s' % query
)
self.assertEqual(
{'country_code': 'US', 'country_name': 'United States'},
g.country(query)
)
# City information dictionary.
d = g.city(query)
self.assertEqual('US', d['country_code'])
self.assertEqual('Houston', d['city'])
self.assertEqual('TX', d['region'])
geom = g.geos(query)
self.assertIsInstance(geom, GEOSGeometry)
lon, lat = (-95.4010, 29.7079)
lat_lon = g.lat_lon(query)
lat_lon = (lat_lon[1], lat_lon[0])
for tup in (geom.tuple, g.coords(query), g.lon_lat(query), lat_lon):
self.assertAlmostEqual(lon, tup[0], 4)
self.assertAlmostEqual(lat, tup[1], 4)
def test05_unicode_response(self):
"GeoIP strings should be properly encoded (#16553)."
g = GeoIP2()
d = g.city("duesseldorf.de")
self.assertEqual('Düsseldorf', d['city'])
d = g.country('200.26.205.1')
# Some databases have only unaccented countries
self.assertIn(d['country_name'], ('Curaçao', 'Curacao'))
def test06_ipv6_query(self):
"GeoIP can lookup IPv6 addresses."
g = GeoIP2()
d = g.city('2002:81ed:c9a5::81ed:c9a5') # IPv6 address for www.nhm.ku.edu
self.assertEqual('US', d['country_code'])
self.assertEqual('Lawrence', d['city'])
self.assertEqual('KS', d['region'])

View File

@ -1,5 +1,6 @@
bcrypt
docutils
geoip2
jinja2 >= 2.7
numpy
Pillow