From 7f0953ce1f1e263a2a74db52d70cdf278840a1d2 Mon Sep 17 00:00:00 2001 From: Flavio Curella Date: Wed, 29 Jul 2015 16:21:03 -0500 Subject: [PATCH] Fixed #25184 -- Added support for MaxMind GeoLite2 database format --- django/contrib/gis/geoip2/__init__.py | 21 ++ django/contrib/gis/geoip2/base.py | 219 ++++++++++++++++++ django/contrib/gis/geoip2/resources.py | 18 ++ .../contributing/writing-code/unit-tests.txt | 2 + docs/ref/contrib/gis/geoip2.txt | 173 ++++++++++++++ docs/ref/contrib/gis/index.txt | 1 + docs/releases/1.9.txt | 3 + tests/gis_tests/test_geoip2.py | 139 +++++++++++ tests/requirements/base.txt | 1 + 9 files changed, 577 insertions(+) create mode 100644 django/contrib/gis/geoip2/__init__.py create mode 100644 django/contrib/gis/geoip2/base.py create mode 100644 django/contrib/gis/geoip2/resources.py create mode 100644 docs/ref/contrib/gis/geoip2.txt create mode 100644 tests/gis_tests/test_geoip2.py diff --git a/django/contrib/gis/geoip2/__init__.py b/django/contrib/gis/geoip2/__init__.py new file mode 100644 index 00000000000..b6142e08445 --- /dev/null +++ b/django/contrib/gis/geoip2/__init__.py @@ -0,0 +1,21 @@ +""" +This module houses the GeoIP2 object, a wrapper for the MaxMind GeoIP2(R) +Python API (http://geoip2.readthedocs.org/). This is an alternative to the +Python GeoIP2 interface provided by MaxMind. + +GeoIP(R) is a registered trademark of MaxMind, Inc. + +For IP-based geolocation, this module requires the GeoLite2 Country and City +datasets, in binary format (CSV will not work!). The datasets may be +downloaded from MaxMind at http://dev.maxmind.com/geoip/geoip2/geolite2/. +Grab GeoLite2-Country.mmdb.gz and GeoLite2-City.mmdb.gz, and unzip them in the +directory corresponding to settings.GEOIP_PATH. +""" +__all__ = ['HAS_GEOIP2'] + +try: + from .base import GeoIP2, GeoIP2Exception + HAS_GEOIP2 = True + __all__ += ['GeoIP2', 'GeoIP2Exception'] +except ImportError: + HAS_GEOIP2 = False diff --git a/django/contrib/gis/geoip2/base.py b/django/contrib/gis/geoip2/base.py new file mode 100644 index 00000000000..582358969c2 --- /dev/null +++ b/django/contrib/gis/geoip2/base.py @@ -0,0 +1,219 @@ +import os +import socket + +import geoip2.database + +from django.conf import settings +from django.core.validators import ipv4_re +from django.utils import six +from django.utils.ipv6 import is_valid_ipv6_address + +from .resources import City, Country + +# Creating the settings dictionary with any settings, if needed. +GEOIP_SETTINGS = { + 'GEOIP_PATH': getattr(settings, 'GEOIP_PATH', None), + 'GEOIP_CITY': getattr(settings, 'GEOIP_CITY', 'GeoLite2-City.mmdb'), + 'GEOIP_COUNTRY': getattr(settings, 'GEOIP_COUNTRY', 'GeoLite2-Country.mmdb'), +} + + +class GeoIP2Exception(Exception): + pass + + +class GeoIP2(object): + # The flags for GeoIP memory caching. + # Try MODE_MMAP_EXT, MODE_MMAP, MODE_FILE in that order. + MODE_AUTO = 0 + # Use the C extension with memory map. + MODE_MMAP_EXT = 1 + # Read from memory map. Pure Python. + MODE_MMAP = 2 + # Read database as standard file. Pure Python. + MODE_FILE = 4 + # Load database into memory. Pure Python. + MODE_MEMORY = 8 + cache_options = {opt: None for opt in (0, 1, 2, 4, 8)} + + # Paths to the city & country binary databases. + _city_file = '' + _country_file = '' + + # Initially, pointers to GeoIP file references are NULL. + _city = None + _country = None + + def __init__(self, path=None, cache=0, country=None, city=None): + """ + Initialize the GeoIP object. No parameters are required to use default + settings. Keyword arguments may be passed in to customize the locations + of the GeoIP datasets. + + * path: Base directory to where GeoIP data is located or the full path + to where the city or country data files (*.mmdb) are located. + Assumes that both the city and country data sets are located in + this directory; overrides the GEOIP_PATH setting. + + * cache: The cache settings when opening up the GeoIP datasets. May be + an integer in (0, 1, 2, 4, 8) corresponding to the MODE_AUTO, + MODE_MMAP_EXT, MODE_MMAP, MODE_FILE, and MODE_MEMORY, + `GeoIPOptions` C API settings, respectively. Defaults to 0, + meaning MODE_AUTO. + + * country: The name of the GeoIP country data file. Defaults to + 'GeoLite2-Country.mmdb'; overrides the GEOIP_COUNTRY setting. + + * city: The name of the GeoIP city data file. Defaults to + 'GeoLite2-City.mmdb'; overrides the GEOIP_CITY setting. + """ + # Checking the given cache option. + if cache in self.cache_options: + self._cache = cache + else: + raise GeoIP2Exception('Invalid GeoIP caching option: %s' % cache) + + # Getting the GeoIP data path. + if not path: + path = GEOIP_SETTINGS['GEOIP_PATH'] + if not path: + raise GeoIP2Exception('GeoIP path must be provided via parameter or the GEOIP_PATH setting.') + if not isinstance(path, six.string_types): + raise TypeError('Invalid path type: %s' % type(path).__name__) + + if os.path.isdir(path): + # Constructing the GeoIP database filenames using the settings + # dictionary. If the database files for the GeoLite country + # and/or city datasets exist, then try to open them. + country_db = os.path.join(path, country or GEOIP_SETTINGS['GEOIP_COUNTRY']) + if os.path.isfile(country_db): + self._country = geoip2.database.Reader(country_db, mode=cache) + self._country_file = country_db + + city_db = os.path.join(path, city or GEOIP_SETTINGS['GEOIP_CITY']) + if os.path.isfile(city_db): + self._city = geoip2.database.Reader(city_db, mode=cache) + self._city_file = city_db + elif os.path.isfile(path): + # Otherwise, some detective work will be needed to figure out + # whether the given database path is for the GeoIP country or city + # databases. + reader = geoip2.database.Reader(path, mode=cache) + db_type = reader.metadata().database_type + + if db_type.endswith('City'): + # GeoLite City database detected. + self._city = reader + self._city_file = path + elif db_type.endswith('Country'): + # GeoIP Country database detected. + self._country = reader + self._country_file = path + else: + raise GeoIP2Exception('Unable to recognize database edition: %s' % db_type) + else: + raise GeoIP2Exception('GeoIP path must be a valid file or directory.') + + @property + def _reader(self): + if self._country: + return self._country + else: + return self._city + + @property + def _country_or_city(self): + if self._country: + return self._country.country + else: + return self._city.city + + def __del__(self): + # Cleanup any GeoIP file handles lying around. + if self._reader: + self._reader.close() + + def _check_query(self, query, country=False, city=False, city_or_country=False): + "Helper routine for checking the query and database availability." + # Making sure a string was passed in for the query. + if not isinstance(query, six.string_types): + raise TypeError('GeoIP query must be a string, not type %s' % type(query).__name__) + + # Extra checks for the existence of country and city databases. + if city_or_country and not (self._country or self._city): + raise GeoIP2Exception('Invalid GeoIP country and city data files.') + elif country and not self._country: + raise GeoIP2Exception('Invalid GeoIP country data file: %s' % self._country_file) + elif city and not self._city: + raise GeoIP2Exception('Invalid GeoIP city data file: %s' % self._city_file) + + # Return the query string back to the caller. GeoIP2 only takes IP addresses. + if not (ipv4_re.match(query) or is_valid_ipv6_address(query)): + query = socket.gethostbyname(query) + + return query + + def city(self, query): + """ + Return a dictionary of city information for the given IP address or + Fully Qualified Domain Name (FQDN). Some information in the dictionary + may be undefined (None). + """ + enc_query = self._check_query(query, city=True) + return City(self._city.city(enc_query)) + + def country_code(self, query): + "Return the country code for the given IP Address or FQDN." + enc_query = self._check_query(query, city_or_country=True) + return self.country(enc_query)['country_code'] + + def country_name(self, query): + "Return the country name for the given IP Address or FQDN." + enc_query = self._check_query(query, city_or_country=True) + return self.country(enc_query)['country_name'] + + def country(self, query): + """ + Return a dictionary with the country code and name when given an + IP address or a Fully Qualified Domain Name (FQDN). For example, both + '24.124.1.80' and 'djangoproject.com' are valid parameters. + """ + # Returning the country code and name + enc_query = self._check_query(query, city_or_country=True) + return Country(self._country_or_city(enc_query)) + + # #### Coordinate retrieval routines #### + def coords(self, query, ordering=('longitude', 'latitude')): + cdict = self.city(query) + if cdict is None: + return None + else: + return tuple(cdict[o] for o in ordering) + + def lon_lat(self, query): + "Return a tuple of the (longitude, latitude) for the given query." + return self.coords(query) + + def lat_lon(self, query): + "Return a tuple of the (latitude, longitude) for the given query." + return self.coords(query, ('latitude', 'longitude')) + + def geos(self, query): + "Return a GEOS Point object for the given query." + ll = self.lon_lat(query) + if ll: + from django.contrib.gis.geos import Point + return Point(ll, srid=4326) + else: + return None + + # #### GeoIP Database Information Routines #### + @property + def info(self): + "Return information about the GeoIP library and databases in use." + meta = self._reader.metadata() + return 'GeoIP Library:\n\t%s.%s\n' % (meta.binary_format_major_version, meta.binary_format_minor_version) + + @classmethod + def open(cls, full_path, cache): + return GeoIP2(full_path, cache) diff --git a/django/contrib/gis/geoip2/resources.py b/django/contrib/gis/geoip2/resources.py new file mode 100644 index 00000000000..327b60049ae --- /dev/null +++ b/django/contrib/gis/geoip2/resources.py @@ -0,0 +1,18 @@ +def City(response): + return { + 'city': response.city.name, + 'country_code': response.country.iso_code, + 'country_name': response.country.name, + 'dma_code': response.location.metro_code, + 'latitude': response.location.latitude, + 'longitude': response.location.longitude, + 'postal_code': response.postal.code, + 'region': response.subdivisions[0].iso_code if len(response.subdivisions) else None, + } + + +def Country(response): + return { + 'country_code': response.country.iso_code, + 'country_name': response.country.name, + } diff --git a/docs/internals/contributing/writing-code/unit-tests.txt b/docs/internals/contributing/writing-code/unit-tests.txt index e1d0aa41f7c..06eaf90b74b 100644 --- a/docs/internals/contributing/writing-code/unit-tests.txt +++ b/docs/internals/contributing/writing-code/unit-tests.txt @@ -138,6 +138,7 @@ dependencies: * bcrypt_ * docutils_ +* geoip2_ * jinja2_ 2.7+ * numpy_ * Pillow_ @@ -170,6 +171,7 @@ associated tests will be skipped. .. _bcrypt: https://pypi.python.org/pypi/bcrypt .. _docutils: https://pypi.python.org/pypi/docutils +.. _geoip2: https://pypi.python.org/pypi/geoip2 .. _jinja2: https://pypi.python.org/pypi/jinja2 .. _numpy: https://pypi.python.org/pypi/numpy .. _Pillow: https://pypi.python.org/pypi/Pillow/ diff --git a/docs/ref/contrib/gis/geoip2.txt b/docs/ref/contrib/gis/geoip2.txt new file mode 100644 index 00000000000..4d014a70e1a --- /dev/null +++ b/docs/ref/contrib/gis/geoip2.txt @@ -0,0 +1,173 @@ +======================= +Geolocation with GeoIP2 +======================= + +.. module:: django.contrib.gis.geoip2 + :synopsis: Python interface for MaxMind's GeoIP2 databases. + +.. versionadded:: 1.9 + +The :class:`GeoIP2` object is a wrapper for the `MaxMind geoip2 Python +library`__. [#]_ + +In order to perform IP-based geolocation, the :class:`GeoIP2` object requires +the `geoip2 Python library`__ and the GeoIP `Country` and/or `City` `datasets +in binary format`__ (the CSV files will not work!). Grab the +``GeoLite2-Country.mmdb.gz`` and ``GeoLite2-City.mmdb.gz`` files and unzip them +in a directory corresponding to the :setting:`GEOIP_PATH` setting. + +Additionally, it is recommended to install the `libmaxminddb C library`__, so +that ``geoip2`` can leverage the C library's faster speed. + +__ http://geoip2.readthedocs.org/ +__ https://pypi.python.org/pypi/geoip2 +__ http://dev.maxmind.com/geoip/geoip2/geolite2/ +__ https://github.com/maxmind/libmaxminddb + +Example +======= + +Here is an example of its usage:: + + >>> from django.contrib.gis.geoip2 import GeoIP2 + >>> g = GeoIP2() + >>> g.country('google.com') + {'country_code': 'US', 'country_name': 'United States'} + >>> g.city('72.14.207.99') + {'city': 'Mountain View', + 'country_code': 'US', + 'country_name': 'United States', + 'dma_code': 807, + 'latitude': 37.419200897216797, + 'longitude': -122.05740356445312, + 'postal_code': '94043', + 'region': 'CA'} + >>> g.lat_lon('salon.com') + (39.0437, -77.4875) + >>> g.lon_lat('uh.edu') + (-95.4342, 29.834) + >>> g.geos('24.124.1.80').wkt + 'POINT (-97.0000000000000000 38.0000000000000000)' + +``GeoIP`` Settings +================== + +.. setting:: GEOIP_PATH + +GEOIP_PATH +---------- + +A string specifying the directory where the GeoIP data files are +located. This setting is *required* unless manually specified +with ``path`` keyword when initializing the :class:`GeoIP2` object. + +.. setting:: GEOIP_COUNTRY + +GEOIP_COUNTRY +------------- + +The basename to use for the GeoIP country data file. Defaults to +``'GeoLite2-Country.mmdb'``. + +.. setting:: GEOIP_CITY + +GEOIP_CITY +---------- + +The basename to use for the GeoIP city data file. Defaults to +``'GeoLite2-City.mmdb'``. + +``GeoIP`` API +============= + +.. class:: GeoIP2(path=None, cache=0, country=None, city=None) + +The ``GeoIP`` object does not require any parameters to use the default +settings. However, at the very least the :setting:`GEOIP_PATH` setting +should be set with the path of the location of your GeoIP datasets. The +following initialization keywords may be used to customize any of the +defaults. + +=================== ======================================================= +Keyword Arguments Description +=================== ======================================================= +``path`` Base directory to where GeoIP data is located or the + full path to where the city or country data files + (``.mmdb``) are located. Assumes that both the city and + country datasets are located in this directory; + overrides the :setting:`GEOIP_PATH` setting. + +``cache`` The cache settings when opening up the GeoIP datasets. May + be an integer in (0, 1, 2, 4, 8) corresponding to the + ``MODE_AUTO``, ``MODE_MMAP_EXT``, ``MODE_MMAP``, and + ``GEOIP_INDEX_CACHE`` ``MODE_MEMORY`` C API settings, + respectively. Defaults to 0 (``MODE_AUTO``). + +``country`` The name of the GeoIP country data file. Defaults + to ``GeoLite2-Country.mmdb``. Setting this keyword + overrides the :setting:`GEOIP_COUNTRY` setting. + +``city`` The name of the GeoIP city data file. Defaults to + ``GeoLite2-City.mmdb``. Setting this keyword overrides + the :setting:`GEOIP_CITY` setting. +=================== ======================================================= + +``GeoIP`` Methods +================= + +Instantiating +------------- + +.. classmethod:: GeoIP2.open(path, cache) + +This classmethod instantiates the GeoIP object from the given database path +and given cache setting. + +Querying +-------- + +All the following querying routines may take either a string IP address +or a fully qualified domain name (FQDN). For example, both +``'205.186.163.125'`` and ``'djangoproject.com'`` would be valid query +parameters. + +.. method:: GeoIP2.city(query) + +Returns a dictionary of city information for the given query. Some +of the values in the dictionary may be undefined (``None``). + +.. method:: GeoIP2.country(query) + +Returns a dictionary with the country code and country for the given +query. + +.. method:: GeoIP2.country_code(query) + +Returns the country code corresponding to the query. + +.. method:: GeoIP2.country_name(query) + +Returns the country name corresponding to the query. + +Coordinate Retrieval +-------------------- + +.. method:: GeoIP2.coords(query) + +Returns a coordinate tuple of (longitude, latitude). + +.. method:: GeoIP2.lon_lat(query) + +Returns a coordinate tuple of (longitude, latitude). + +.. method:: GeoIP2.lat_lon(query) + +Returns a coordinate tuple of (latitude, longitude), + +.. method:: GeoIP2.geos(query) + +Returns a :class:`~django.contrib.gis.geos.Point` object corresponding to the +query. + +.. rubric:: Footnotes +.. [#] GeoIP(R) is a registered trademark of MaxMind, Inc. diff --git a/docs/ref/contrib/gis/index.txt b/docs/ref/contrib/gis/index.txt index 3641fd81912..83b8b31b509 100644 --- a/docs/ref/contrib/gis/index.txt +++ b/docs/ref/contrib/gis/index.txt @@ -23,6 +23,7 @@ of spatially enabled data. geos gdal geoip + geoip2 utils commands admin diff --git a/docs/releases/1.9.txt b/docs/releases/1.9.txt index 96a757dfe29..6d482038af8 100644 --- a/docs/releases/1.9.txt +++ b/docs/releases/1.9.txt @@ -214,6 +214,9 @@ Minor features raster into a different spatial reference system by specifying a target ``srid``. +* The new :class:`~django.contrib.gis.geoip2.GeoIP2` class allows using + MaxMind's GeoLite2 databases which includes support for IPv6 addresses. + :mod:`django.contrib.messages` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/tests/gis_tests/test_geoip2.py b/tests/gis_tests/test_geoip2.py new file mode 100644 index 00000000000..7b81a1a7ded --- /dev/null +++ b/tests/gis_tests/test_geoip2.py @@ -0,0 +1,139 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import os +import unittest +from unittest import skipUnless + +from django.conf import settings +from django.contrib.gis.geoip2 import HAS_GEOIP2 +from django.contrib.gis.geos import HAS_GEOS, GEOSGeometry +from django.utils import six + +if HAS_GEOIP2: + from django.contrib.gis.geoip2 import GeoIP2, GeoIP2Exception + + +# Note: Requires both the GeoIP country and city datasets. +# The GEOIP_DATA path should be the only setting set (the directory +# should contain links or the actual database files 'GeoLite2-City.mmdb' and +# 'GeoLite2-City.mmdb'. +@skipUnless(HAS_GEOIP2 and getattr(settings, "GEOIP_PATH", None), + "GeoIP is required along with the GEOIP_PATH setting.") +class GeoIPTest(unittest.TestCase): + addr = '128.249.1.1' + fqdn = 'tmc.edu' + + def test01_init(self): + "GeoIP initialization." + g1 = GeoIP2() # Everything inferred from GeoIP path + path = settings.GEOIP_PATH + g2 = GeoIP2(path, 0) # Passing in data path explicitly. + g3 = GeoIP2.open(path, 0) # MaxMind Python API syntax. + + for g in (g1, g2, g3): + self.assertTrue(g._country) + self.assertTrue(g._city) + + # Only passing in the location of one database. + city = os.path.join(path, 'GeoLite2-City.mmdb') + cntry = os.path.join(path, 'GeoLite2-Country.mmdb') + g4 = GeoIP2(city, country='') + self.assertIsNone(g4._country) + g5 = GeoIP2(cntry, city='') + self.assertIsNone(g5._city) + + # Improper parameters. + bad_params = (23, 'foo', 15.23) + for bad in bad_params: + self.assertRaises(GeoIP2Exception, GeoIP2, cache=bad) + if isinstance(bad, six.string_types): + e = GeoIP2Exception + else: + e = TypeError + self.assertRaises(e, GeoIP2, bad, 0) + + def test02_bad_query(self): + "GeoIP query parameter checking." + cntry_g = GeoIP2(city='') + # No city database available, these calls should fail. + self.assertRaises(GeoIP2Exception, cntry_g.city, 'tmc.edu') + self.assertRaises(GeoIP2Exception, cntry_g.coords, 'tmc.edu') + + # Non-string query should raise TypeError + self.assertRaises(TypeError, cntry_g.country_code, 17) + self.assertRaises(TypeError, cntry_g.country_name, GeoIP2) + + def test03_country(self): + "GeoIP country querying methods." + g = GeoIP2(city='') + + for query in (self.fqdn, self.addr): + self.assertEqual( + 'US', + g.country_code(query), + 'Failed for func country_code and query %s' % query + ) + self.assertEqual( + 'United States', + g.country_name(query), + 'Failed for func country_name and query %s' % query + ) + self.assertEqual( + {'country_code': 'US', 'country_name': 'United States'}, + g.country(query) + ) + + @skipUnless(HAS_GEOS, "Geos is required") + def test04_city(self): + "GeoIP city querying methods." + g = GeoIP2(country='') + + for query in (self.fqdn, self.addr): + # Country queries should still work. + self.assertEqual( + 'US', + g.country_code(query), + 'Failed for func country_code and query %s' % query + ) + self.assertEqual( + 'United States', + g.country_name(query), + 'Failed for func country_name and query %s' % query + ) + self.assertEqual( + {'country_code': 'US', 'country_name': 'United States'}, + g.country(query) + ) + + # City information dictionary. + d = g.city(query) + self.assertEqual('US', d['country_code']) + self.assertEqual('Houston', d['city']) + self.assertEqual('TX', d['region']) + + geom = g.geos(query) + self.assertIsInstance(geom, GEOSGeometry) + lon, lat = (-95.4010, 29.7079) + lat_lon = g.lat_lon(query) + lat_lon = (lat_lon[1], lat_lon[0]) + for tup in (geom.tuple, g.coords(query), g.lon_lat(query), lat_lon): + self.assertAlmostEqual(lon, tup[0], 4) + self.assertAlmostEqual(lat, tup[1], 4) + + def test05_unicode_response(self): + "GeoIP strings should be properly encoded (#16553)." + g = GeoIP2() + d = g.city("duesseldorf.de") + self.assertEqual('Düsseldorf', d['city']) + d = g.country('200.26.205.1') + # Some databases have only unaccented countries + self.assertIn(d['country_name'], ('Curaçao', 'Curacao')) + + def test06_ipv6_query(self): + "GeoIP can lookup IPv6 addresses." + g = GeoIP2() + d = g.city('2002:81ed:c9a5::81ed:c9a5') # IPv6 address for www.nhm.ku.edu + self.assertEqual('US', d['country_code']) + self.assertEqual('Lawrence', d['city']) + self.assertEqual('KS', d['region']) diff --git a/tests/requirements/base.txt b/tests/requirements/base.txt index c2c1906e3d5..af828306c37 100644 --- a/tests/requirements/base.txt +++ b/tests/requirements/base.txt @@ -1,5 +1,6 @@ bcrypt docutils +geoip2 jinja2 >= 2.7 numpy Pillow