diff --git a/django/utils/simplejson/__init__.py b/django/utils/simplejson/__init__.py index 130940f358..2be6c3909c 100644 --- a/django/utils/simplejson/__init__.py +++ b/django/utils/simplejson/__init__.py @@ -1,376 +1,343 @@ -r""" -A simple, fast, extensible JSON encoder and decoder - -JSON (JavaScript Object Notation) is a subset of +r"""JSON (JavaScript Object Notation) is a subset of JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data interchange format. -simplejson exposes an API familiar to uses of the standard library -marshal and pickle modules. +:mod:`simplejson` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained +version of the :mod:`json` library contained in Python 2.6, but maintains +compatibility with Python 2.4 and Python 2.5 and (currently) has +significant performance advantages, even without using the optional C +extension for speedups. Encoding basic Python object hierarchies:: - - >>> import simplejson - >>> simplejson.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + + >>> import simplejson as json + >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) '["foo", {"bar": ["baz", null, 1.0, 2]}]' - >>> print simplejson.dumps("\"foo\bar") + >>> print json.dumps("\"foo\bar") "\"foo\bar" - >>> print simplejson.dumps(u'\u1234') + >>> print json.dumps(u'\u1234') "\u1234" - >>> print simplejson.dumps('\\') + >>> print json.dumps('\\') "\\" - >>> print simplejson.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) {"a": 0, "b": 0, "c": 0} >>> from StringIO import StringIO >>> io = StringIO() - >>> simplejson.dump(['streaming API'], io) + >>> json.dump(['streaming API'], io) >>> io.getvalue() '["streaming API"]' Compact encoding:: - >>> import simplejson - >>> simplejson.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + >>> import simplejson as json + >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) '[1,2,3,{"4":5,"6":7}]' Pretty printing:: - >>> import simplejson - >>> print simplejson.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) + >>> import simplejson as json + >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) + >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) { - "4": 5, + "4": 5, "6": 7 } Decoding JSON:: - - >>> import simplejson - >>> simplejson.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') - [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] - >>> simplejson.loads('"\\"foo\\bar"') - u'"foo\x08ar' + + >>> import simplejson as json + >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] + >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj + True + >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' + True >>> from StringIO import StringIO >>> io = StringIO('["streaming API"]') - >>> simplejson.load(io) - [u'streaming API'] + >>> json.load(io)[0] == 'streaming API' + True Specializing JSON object decoding:: - >>> import simplejson + >>> import simplejson as json >>> def as_complex(dct): ... if '__complex__' in dct: ... return complex(dct['real'], dct['imag']) ... return dct - ... - >>> simplejson.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... + >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', ... object_hook=as_complex) (1+2j) >>> import decimal - >>> simplejson.loads('1.1', parse_float=decimal.Decimal) - Decimal("1.1") + >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1') + True -Extending JSONEncoder:: - - >>> import simplejson - >>> class ComplexEncoder(simplejson.JSONEncoder): - ... def default(self, obj): - ... if isinstance(obj, complex): - ... return [obj.real, obj.imag] - ... return simplejson.JSONEncoder.default(self, obj) - ... - >>> dumps(2 + 1j, cls=ComplexEncoder) - '[2.0, 1.0]' - >>> ComplexEncoder().encode(2 + 1j) - '[2.0, 1.0]' - >>> list(ComplexEncoder().iterencode(2 + 1j)) - ['[', '2.0', ', ', '1.0', ']'] - +Specializing JSON object encoding:: + + >>> import simplejson as json + >>> def encode_complex(obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... raise TypeError("%r is not JSON serializable" % (o,)) + ... + >>> json.dumps(2 + 1j, default=encode_complex) + '[2.0, 1.0]' + >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) + '[2.0, 1.0]' + >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) + '[2.0, 1.0]' + + +Using simplejson.tool from the shell to validate and pretty-print:: -Using simplejson from the shell to validate and -pretty-print:: - $ echo '{"json":"obj"}' | python -msimplejson.tool { "json": "obj" } $ echo '{ 1.2:3.4}' | python -msimplejson.tool Expecting property name: line 1 column 2 (char 2) - -Note that the JSON produced by this module's default settings -is a subset of YAML, so it may be used as a serializer for that as well. """ -__version__ = '1.9.2' -__all__ = [ - 'dump', 'dumps', 'load', 'loads', - 'JSONDecoder', 'JSONEncoder', -] -if __name__ == '__main__': - import warnings - warnings.warn('python -msimplejson is deprecated, use python -msiplejson.tool', DeprecationWarning) +# Django modification: try to use the system version first, providing it's +# either of a later version of has the C speedups in place. Otherwise, fall +# back to our local copy. + +__version__ = '2.0.7' + +use_system_version = False +try: + # The system-installed version has priority providing it is either not an + # earlier version or it contains the C speedups. + import simplejson + if (simplejson.__version__.split('.') >= __version__.split('.') or + hasattr(simplejson, '_speedups')): + from simplejson import * + use_system_version = True +except ImportError: + pass + +if not use_system_version: + try: + from json import * # Python 2.6 preferred over local copy. + use_system_version = True + except ImportError: + pass + +# If all else fails, we have a bundled version that can be used. +if not use_system_version: + __all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONEncoder', + ] + from django.utils.simplejson.decoder import JSONDecoder from django.utils.simplejson.encoder import JSONEncoder -else: - from decoder import JSONDecoder - from encoder import JSONEncoder -_default_encoder = JSONEncoder( - skipkeys=False, - ensure_ascii=True, - check_circular=True, - allow_nan=True, - indent=None, - separators=None, - encoding='utf-8', - default=None, -) + _default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, + ) -def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, **kw): - """ - Serialize ``obj`` as a JSON formatted stream to ``fp`` (a - ``.write()``-supporting file-like object). + def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). - If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types - (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) - will be skipped instead of raising a ``TypeError``. + If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. - If ``ensure_ascii`` is ``False``, then the some chunks written to ``fp`` - may be ``unicode`` instances, subject to normal Python ``str`` to - ``unicode`` coercion rules. Unless ``fp.write()`` explicitly - understands ``unicode`` (as in ``codecs.getwriter()``) this is likely - to cause an error. + If ``ensure_ascii`` is ``False``, then the some chunks written to ``fp`` + may be ``unicode`` instances, subject to normal Python ``str`` to + ``unicode`` coercion rules. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter()``) this is likely + to cause an error. - If ``check_circular`` is ``False``, then the circular reference check - for container types will be skipped and a circular reference will - result in an ``OverflowError`` (or worse). + If ``check_circular`` is ``False``, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). - If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to - serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) - in strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). - If ``indent`` is a non-negative integer, then JSON array elements and object - members will be pretty-printed with that indent level. An indent level - of 0 will only insert newlines. ``None`` is the most compact representation. + If ``indent`` is a non-negative integer, then JSON array elements and object + members will be pretty-printed with that indent level. An indent level + of 0 will only insert newlines. ``None`` is the most compact representation. - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. - ``encoding`` is the character encoding for str instances, default is UTF-8. + ``encoding`` is the character encoding for str instances, default is UTF-8. - ``default(obj)`` is a function that should return a serializable version - of obj or raise TypeError. The default simply raises TypeError. + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. - To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the - ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg. - """ - # cached encoder - if (skipkeys is False and ensure_ascii is True and - check_circular is True and allow_nan is True and - cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and not kw): - iterable = _default_encoder.iterencode(obj) - else: + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (skipkeys is False and ensure_ascii is True and + check_circular is True and allow_nan is True and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, + default=default, **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + + def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is ``False``, then the return value will be a + ``unicode`` instance subject to normal Python ``str`` to ``unicode`` + coercion rules instead of being escaped to an ASCII ``str``. + + If ``check_circular`` is ``False``, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (skipkeys is False and ensure_ascii is True and + check_circular is True and allow_nan is True and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + return _default_encoder.encode(obj) if cls is None: cls = JSONEncoder - iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, - separators=separators, encoding=encoding, - default=default, **kw).iterencode(obj) - # could accelerate with writelines in some versions of Python, at - # a debuggability cost - for chunk in iterable: - fp.write(chunk) + separators=separators, encoding=encoding, default=default, + **kw).encode(obj) -def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, **kw): - """ - Serialize ``obj`` to a JSON formatted ``str``. - - If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types - (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) - will be skipped instead of raising a ``TypeError``. - - If ``ensure_ascii`` is ``False``, then the return value will be a - ``unicode`` instance subject to normal Python ``str`` to ``unicode`` - coercion rules instead of being escaped to an ASCII ``str``. - - If ``check_circular`` is ``False``, then the circular reference check - for container types will be skipped and a circular reference will - result in an ``OverflowError`` (or worse). - - If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to - serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in - strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). - - If ``indent`` is a non-negative integer, then JSON array elements and - object members will be pretty-printed with that indent level. An indent - level of 0 will only insert newlines. ``None`` is the most compact - representation. - - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. - - ``encoding`` is the character encoding for str instances, default is UTF-8. - - ``default(obj)`` is a function that should return a serializable version - of obj or raise TypeError. The default simply raises TypeError. - - To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the - ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg. - """ - # cached encoder - if (skipkeys is False and ensure_ascii is True and - check_circular is True and allow_nan is True and - cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and not kw): - return _default_encoder.encode(obj) - if cls is None: - cls = JSONEncoder - return cls( - skipkeys=skipkeys, ensure_ascii=ensure_ascii, - check_circular=check_circular, allow_nan=allow_nan, indent=indent, - separators=separators, encoding=encoding, default=default, - **kw).encode(obj) + _default_decoder = JSONDecoder(encoding=None, object_hook=None) -_default_decoder = JSONDecoder(encoding=None, object_hook=None) + def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + If the contents of ``fp`` is encoded with an ASCII based encoding other + than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must + be specified. Encodings that are not ASCII based (such as UCS-2) are + not allowed, and should be wrapped with + ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` + object and passed to ``loads()`` + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + return loads(fp.read(), + encoding=encoding, cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, **kw) -def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, **kw): - """ - Deserialize ``fp`` (a ``.read()``-supporting file-like object containing - a JSON document) to a Python object. + def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. - If the contents of ``fp`` is encoded with an ASCII based encoding other - than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must - be specified. Encodings that are not ASCII based (such as UCS-2) are - not allowed, and should be wrapped with - ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` - object and passed to ``loads()`` + If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding + other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name + must be specified. Encodings that are not ASCII based (such as UCS-2) + are not allowed and should be decoded to ``unicode`` first. - ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature - can be used to implement custom decoders (e.g. JSON-RPC class hinting). - - To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg. - """ - return loads(fp.read(), - encoding=encoding, cls=cls, object_hook=object_hook, - parse_float=parse_float, parse_int=parse_int, - parse_constant=parse_constant, **kw) + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). -def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, **kw): - """ - Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON - document) to a Python object. + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). - If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding - other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name - must be specified. Encodings that are not ASCII based (such as UCS-2) - are not allowed and should be decoded to ``unicode`` first. + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN, null, true, false. + This can be used to raise an exception if invalid JSON numbers + are encountered. - ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature - can be used to implement custom decoders (e.g. JSON-RPC class hinting). + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. - ``parse_float``, if specified, will be called with the string - of every JSON float to be decoded. By default this is equivalent to - float(num_str). This can be used to use another datatype or parser - for JSON floats (e.g. decimal.Decimal). - - ``parse_int``, if specified, will be called with the string - of every JSON int to be decoded. By default this is equivalent to - int(num_str). This can be used to use another datatype or parser - for JSON integers (e.g. float). - - ``parse_constant``, if specified, will be called with one of the - following strings: -Infinity, Infinity, NaN, null, true, false. - This can be used to raise an exception if invalid JSON numbers - are encountered. - - To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg. - """ - if (cls is None and encoding is None and object_hook is None and - parse_int is None and parse_float is None and - parse_constant is None and not kw): - return _default_decoder.decode(s) - if cls is None: - cls = JSONDecoder - if object_hook is not None: - kw['object_hook'] = object_hook - if parse_float is not None: - kw['parse_float'] = parse_float - if parse_int is not None: - kw['parse_int'] = parse_int - if parse_constant is not None: - kw['parse_constant'] = parse_constant - return cls(encoding=encoding, **kw).decode(s) - - -# -# Compatibility cruft from other libraries -# - - -def decode(s): - """ - demjson, python-cjson API compatibility hook. Use loads(s) instead. - """ - import warnings - warnings.warn("simplejson.loads(s) should be used instead of decode(s)", - DeprecationWarning) - return loads(s) - - -def encode(obj): - """ - demjson, python-cjson compatibility hook. Use dumps(s) instead. - """ - import warnings - warnings.warn("simplejson.dumps(s) should be used instead of encode(s)", - DeprecationWarning) - return dumps(obj) - - -def read(s): - """ - jsonlib, JsonUtils, python-json, json-py API compatibility hook. - Use loads(s) instead. - """ - import warnings - warnings.warn("simplejson.loads(s) should be used instead of read(s)", - DeprecationWarning) - return loads(s) - - -def write(obj): - """ - jsonlib, JsonUtils, python-json, json-py API compatibility hook. - Use dumps(s) instead. - """ - import warnings - warnings.warn("simplejson.dumps(s) should be used instead of write(s)", - DeprecationWarning) - return dumps(obj) - - -if __name__ == '__main__': - import simplejson.tool - simplejson.tool.main() + """ + if (cls is None and encoding is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + return cls(encoding=encoding, **kw).decode(s) diff --git a/django/utils/simplejson/decoder.py b/django/utils/simplejson/decoder.py index cef9fc002b..5a845ccfdc 100644 --- a/django/utils/simplejson/decoder.py +++ b/django/utils/simplejson/decoder.py @@ -1,20 +1,17 @@ -""" -Implementation of JSONDecoder +"""Implementation of JSONDecoder """ import re import sys +import struct -from django.utils.simplejson.scanner import Scanner, pattern -try: - from django.utils.simplejson._speedups import scanstring as c_scanstring -except ImportError: - pass +from django.utils.simplejson.scanner import make_scanner +c_scanstring = None + +__all__ = ['JSONDecoder'] FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL def _floatconstants(): - import struct - import sys _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') if sys.byteorder != 'big': _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] @@ -34,6 +31,7 @@ def linecol(doc, pos): def errmsg(msg, doc, pos, end=None): + # Note that this function is called from _speedups lineno, colno = linecol(doc, pos) if end is None: return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) @@ -46,35 +44,8 @@ _CONSTANTS = { '-Infinity': NegInf, 'Infinity': PosInf, 'NaN': NaN, - 'true': True, - 'false': False, - 'null': None, } -def JSONConstant(match, context, c=_CONSTANTS): - s = match.group(0) - fn = getattr(context, 'parse_constant', None) - if fn is None: - rval = c[s] - else: - rval = fn(s) - return rval, None -pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) - - -def JSONNumber(match, context): - match = JSONNumber.regex.match(match.string, *match.span()) - integer, frac, exp = match.groups() - if frac or exp: - fn = getattr(context, 'parse_float', None) or float - res = fn(integer + (frac or '') + (exp or '')) - else: - fn = getattr(context, 'parse_int', None) or int - res = fn(integer) - return res, None -pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) - - STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) BACKSLASH = { '"': u'"', '\\': u'\\', '/': u'/', @@ -84,6 +55,14 @@ BACKSLASH = { DEFAULT_ENCODING = "utf-8" def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): + """Scan the string s for a JSON string. End is the index of the + character in s after the quote that started the JSON string. + Unescapes all valid JSON string escape sequences and raises ValueError + on attempt to decode an invalid string. If strict is False then literal + control characters are allowed in the string. + + Returns a tuple of the decoded string and the index of the character in s + after the end quote.""" if encoding is None: encoding = DEFAULT_ENCODING chunks = [] @@ -96,15 +75,19 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU errmsg("Unterminated string starting at", s, begin)) end = chunk.end() content, terminator = chunk.groups() + # Content is contains zero or more unescaped string characters if content: if not isinstance(content, unicode): content = unicode(content, encoding) _append(content) + # Terminator is the end of string, a literal control character, + # or a backslash denoting that an escape sequence follows if terminator == '"': break elif terminator != '\\': if strict: - raise ValueError(errmsg("Invalid control character %r at", s, end)) + msg = "Invalid control character %r at" % (terminator,) + raise ValueError(msg, s, end) else: _append(terminator) continue @@ -113,142 +96,162 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU except IndexError: raise ValueError( errmsg("Unterminated string starting at", s, begin)) + # If not a unicode escape sequence, must be in the lookup table if esc != 'u': try: - m = _b[esc] + char = _b[esc] except KeyError: raise ValueError( errmsg("Invalid \\escape: %r" % (esc,), s, end)) end += 1 else: + # Unicode escape sequence esc = s[end + 1:end + 5] next_end = end + 5 - msg = "Invalid \\uXXXX escape" - try: - if len(esc) != 4: - raise ValueError - uni = int(esc, 16) - if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: - msg = "Invalid \\uXXXX\\uXXXX surrogate pair" - if not s[end + 5:end + 7] == '\\u': - raise ValueError - esc2 = s[end + 7:end + 11] - if len(esc2) != 4: - raise ValueError - uni2 = int(esc2, 16) - uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) - next_end += 6 - m = unichr(uni) - except ValueError: + if len(esc) != 4: + msg = "Invalid \\uXXXX escape" raise ValueError(errmsg(msg, s, end)) + uni = int(esc, 16) + # Check for surrogate pair on UCS-4 systems + if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise ValueError(errmsg(msg, s, end)) + esc2 = s[end + 7:end + 11] + if len(esc2) != 4: + raise ValueError(errmsg(msg, s, end)) + uni2 = int(esc2, 16) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + char = unichr(uni) end = next_end - _append(m) + # Append the unescaped character + _append(char) return u''.join(chunks), end -# Use speedup -try: - scanstring = c_scanstring -except NameError: - scanstring = py_scanstring +# Use speedup if available +scanstring = c_scanstring or py_scanstring -def JSONString(match, context): - encoding = getattr(context, 'encoding', None) - strict = getattr(context, 'strict', True) - return scanstring(match.string, match.end(), encoding, strict) -pattern(r'"')(JSONString) +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' - -WHITESPACE = re.compile(r'\s*', FLAGS) - -def JSONObject(match, context, _w=WHITESPACE.match): +def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): pairs = {} - s = match.string - end = _w(s, match.end()).end() + # Use a slice to prevent IndexError from being raised, the following + # check will raise a more specific ValueError if the string is empty nextchar = s[end:end + 1] - # Trivial empty object - if nextchar == '}': - return pairs, end + 1 + # Normally we expect nextchar == '"' if nextchar != '"': - raise ValueError(errmsg("Expecting property name", s, end)) + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + return pairs, end + 1 + elif nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end)) end += 1 - encoding = getattr(context, 'encoding', None) - strict = getattr(context, 'strict', True) - iterscan = JSONScanner.iterscan while True: key, end = scanstring(s, end, encoding, strict) - end = _w(s, end).end() + + # To skip some function call overhead we optimize the fast paths where + # the JSON key separator is ": " or just ":". if s[end:end + 1] != ':': - raise ValueError(errmsg("Expecting : delimiter", s, end)) - end = _w(s, end + 1).end() + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise ValueError(errmsg("Expecting : delimiter", s, end)) + + end += 1 + try: - value, end = iterscan(s, idx=end, context=context).next() + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + try: + value, end = scan_once(s, end) except StopIteration: raise ValueError(errmsg("Expecting object", s, end)) pairs[key] = value - end = _w(s, end).end() - nextchar = s[end:end + 1] + + try: + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' end += 1 + if nextchar == '}': break - if nextchar != ',': + elif nextchar != ',': raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) - end = _w(s, end).end() - nextchar = s[end:end + 1] + + try: + nextchar = s[end] + if nextchar in _ws: + end += 1 + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 if nextchar != '"': raise ValueError(errmsg("Expecting property name", s, end - 1)) - object_hook = getattr(context, 'object_hook', None) + if object_hook is not None: pairs = object_hook(pairs) return pairs, end -pattern(r'{')(JSONObject) - -def JSONArray(match, context, _w=WHITESPACE.match): +def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): values = [] - s = match.string - end = _w(s, match.end()).end() - # Look-ahead for trivial empty array nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + # Look-ahead for trivial empty array if nextchar == ']': return values, end + 1 - iterscan = JSONScanner.iterscan + _append = values.append while True: try: - value, end = iterscan(s, idx=end, context=context).next() + value, end = scan_once(s, end) except StopIteration: raise ValueError(errmsg("Expecting object", s, end)) - values.append(value) - end = _w(s, end).end() + _append(value) nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] end += 1 if nextchar == ']': break - if nextchar != ',': + elif nextchar != ',': raise ValueError(errmsg("Expecting , delimiter", s, end)) - end = _w(s, end).end() + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + return values, end -pattern(r'\[')(JSONArray) - - -ANYTHING = [ - JSONObject, - JSONArray, - JSONString, - JSONConstant, - JSONNumber, -] - -JSONScanner = Scanner(ANYTHING) - class JSONDecoder(object): - """ - Simple JSON decoder + """Simple JSON decoder Performs the following translations in decoding by default: - + +---------------+-------------------+ | JSON | Python | +===============+===================+ @@ -271,18 +274,15 @@ class JSONDecoder(object): It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as their corresponding ``float`` values, which is outside the JSON spec. - """ - _scanner = Scanner(ANYTHING) - __all__ = ['__init__', 'decode', 'raw_decode'] + """ def __init__(self, encoding=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, strict=True): - """ - ``encoding`` determines the encoding used to interpret any ``str`` + """``encoding`` determines the encoding used to interpret any ``str`` objects decoded by this instance (utf-8 by default). It has no effect when decoding ``unicode`` objects. - + Note that currently only encodings that are a superset of ASCII work, strings of other encodings should be passed in as ``unicode``. @@ -302,21 +302,26 @@ class JSONDecoder(object): for JSON integers (e.g. float). ``parse_constant``, if specified, will be called with one of the - following strings: -Infinity, Infinity, NaN, null, true, false. + following strings: -Infinity, Infinity, NaN. This can be used to raise an exception if invalid JSON numbers are encountered. + """ self.encoding = encoding self.object_hook = object_hook - self.parse_float = parse_float - self.parse_int = parse_int - self.parse_constant = parse_constant + self.parse_float = parse_float or float + self.parse_int = parse_int or int + self.parse_constant = parse_constant or _CONSTANTS.__getitem__ self.strict = strict + self.parse_object = JSONObject + self.parse_array = JSONArray + self.parse_string = scanstring + self.scan_once = make_scanner(self) def decode(self, s, _w=WHITESPACE.match): - """ - Return the Python representation of ``s`` (a ``str`` or ``unicode`` + """Return the Python representation of ``s`` (a ``str`` or ``unicode`` instance containing a JSON document) + """ obj, end = self.raw_decode(s, idx=_w(s, 0).end()) end = _w(s, end).end() @@ -324,20 +329,17 @@ class JSONDecoder(object): raise ValueError(errmsg("Extra data", s, end, len(s))) return obj - def raw_decode(self, s, **kw): - """ - Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning + def raw_decode(self, s, idx=0): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning with a JSON document) and return a 2-tuple of the Python representation and the index in ``s`` where the document ended. This can be used to decode a JSON document from a string that may have extraneous data at the end. + """ - kw.setdefault('context', self) try: - obj, end = self._scanner.iterscan(s, **kw).next() + obj, end = self.scan_once(s, idx) except StopIteration: raise ValueError("No JSON object could be decoded") return obj, end - -__all__ = ['JSONDecoder'] diff --git a/django/utils/simplejson/encoder.py b/django/utils/simplejson/encoder.py index e6c01f6138..06ebe62a3c 100644 --- a/django/utils/simplejson/encoder.py +++ b/django/utils/simplejson/encoder.py @@ -1,12 +1,9 @@ -""" -Implementation of JSONEncoder +"""Implementation of JSONEncoder """ import re -try: - from django.utils.simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii -except ImportError: - pass +c_encode_basestring_ascii = None +c_make_encoder = None ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') @@ -27,29 +24,9 @@ for i in range(0x20): INFINITY = float('1e66666') FLOAT_REPR = repr -def floatstr(o, allow_nan=True): - # Check for specials. Note that this type of test is processor- and/or - # platform-specific, so do tests which don't depend on the internals. - - if o != o: - text = 'NaN' - elif o == INFINITY: - text = 'Infinity' - elif o == -INFINITY: - text = '-Infinity' - else: - return FLOAT_REPR(o) - - if not allow_nan: - raise ValueError("Out of range float values are not JSON compliant: %r" - % (o,)) - - return text - - def encode_basestring(s): - """ - Return a JSON representation of a Python string + """Return a JSON representation of a Python string + """ def replace(match): return ESCAPE_DCT[match.group(0)] @@ -57,6 +34,9 @@ def encode_basestring(s): def py_encode_basestring_ascii(s): + """Return an ASCII-only JSON representation of a Python string + + """ if isinstance(s, str) and HAS_UTF8.search(s) is not None: s = s.decode('utf-8') def replace(match): @@ -76,18 +56,13 @@ def py_encode_basestring_ascii(s): return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' -try: - encode_basestring_ascii = c_encode_basestring_ascii -except NameError: - encode_basestring_ascii = py_encode_basestring_ascii - +encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii class JSONEncoder(object): - """ - Extensible JSON encoder for Python data structures. + """Extensible JSON encoder for Python data structures. Supports the following objects and types by default: - + +-------------------+---------------+ | Python | JSON | +===================+===============+ @@ -110,15 +85,14 @@ class JSONEncoder(object): ``.default()`` method with another method that returns a serializable object for ``o`` if possible, otherwise it should call the superclass implementation (to raise ``TypeError``). + """ - __all__ = ['__init__', 'default', 'encode', 'iterencode'] item_separator = ', ' key_separator = ': ' def __init__(self, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, encoding='utf-8', default=None): - """ - Constructor for JSONEncoder, with sensible defaults. + """Constructor for JSONEncoder, with sensible defaults. If skipkeys is False, then it is a TypeError to attempt encoding of keys that are not str, int, long, float or None. If @@ -158,6 +132,7 @@ class JSONEncoder(object): If encoding is not None, then all input strings will be transformed into unicode using that encoding prior to JSON-encoding. The default is UTF-8. + """ self.skipkeys = skipkeys @@ -166,171 +141,20 @@ class JSONEncoder(object): self.allow_nan = allow_nan self.sort_keys = sort_keys self.indent = indent - self.current_indent_level = 0 if separators is not None: self.item_separator, self.key_separator = separators if default is not None: self.default = default self.encoding = encoding - def _newline_indent(self): - return '\n' + (' ' * (self.indent * self.current_indent_level)) - - def _iterencode_list(self, lst, markers=None): - if not lst: - yield '[]' - return - if markers is not None: - markerid = id(lst) - if markerid in markers: - raise ValueError("Circular reference detected") - markers[markerid] = lst - yield '[' - if self.indent is not None: - self.current_indent_level += 1 - newline_indent = self._newline_indent() - separator = self.item_separator + newline_indent - yield newline_indent - else: - newline_indent = None - separator = self.item_separator - first = True - for value in lst: - if first: - first = False - else: - yield separator - for chunk in self._iterencode(value, markers): - yield chunk - if newline_indent is not None: - self.current_indent_level -= 1 - yield self._newline_indent() - yield ']' - if markers is not None: - del markers[markerid] - - def _iterencode_dict(self, dct, markers=None): - if not dct: - yield '{}' - return - if markers is not None: - markerid = id(dct) - if markerid in markers: - raise ValueError("Circular reference detected") - markers[markerid] = dct - yield '{' - key_separator = self.key_separator - if self.indent is not None: - self.current_indent_level += 1 - newline_indent = self._newline_indent() - item_separator = self.item_separator + newline_indent - yield newline_indent - else: - newline_indent = None - item_separator = self.item_separator - first = True - if self.ensure_ascii: - encoder = encode_basestring_ascii - else: - encoder = encode_basestring - allow_nan = self.allow_nan - if self.sort_keys: - keys = dct.keys() - keys.sort() - items = [(k, dct[k]) for k in keys] - else: - items = dct.iteritems() - _encoding = self.encoding - _do_decode = (_encoding is not None - and not (_encoding == 'utf-8')) - for key, value in items: - if isinstance(key, str): - if _do_decode: - key = key.decode(_encoding) - elif isinstance(key, basestring): - pass - # JavaScript is weakly typed for these, so it makes sense to - # also allow them. Many encoders seem to do something like this. - elif isinstance(key, float): - key = floatstr(key, allow_nan) - elif isinstance(key, (int, long)): - key = str(key) - elif key is True: - key = 'true' - elif key is False: - key = 'false' - elif key is None: - key = 'null' - elif self.skipkeys: - continue - else: - raise TypeError("key %r is not a string" % (key,)) - if first: - first = False - else: - yield item_separator - yield encoder(key) - yield key_separator - for chunk in self._iterencode(value, markers): - yield chunk - if newline_indent is not None: - self.current_indent_level -= 1 - yield self._newline_indent() - yield '}' - if markers is not None: - del markers[markerid] - - def _iterencode(self, o, markers=None): - if isinstance(o, basestring): - if self.ensure_ascii: - encoder = encode_basestring_ascii - else: - encoder = encode_basestring - _encoding = self.encoding - if (_encoding is not None and isinstance(o, str) - and not (_encoding == 'utf-8')): - o = o.decode(_encoding) - yield encoder(o) - elif o is None: - yield 'null' - elif o is True: - yield 'true' - elif o is False: - yield 'false' - elif isinstance(o, (int, long)): - yield str(o) - elif isinstance(o, float): - yield floatstr(o, self.allow_nan) - elif isinstance(o, (list, tuple)): - for chunk in self._iterencode_list(o, markers): - yield chunk - elif isinstance(o, dict): - for chunk in self._iterencode_dict(o, markers): - yield chunk - else: - if markers is not None: - markerid = id(o) - if markerid in markers: - raise ValueError("Circular reference detected") - markers[markerid] = o - for chunk in self._iterencode_default(o, markers): - yield chunk - if markers is not None: - del markers[markerid] - - def _iterencode_default(self, o, markers=None): - newobj = self.default(o) - return self._iterencode(newobj, markers) - def default(self, o): - """ - Implement this method in a subclass such that it returns + """Implement this method in a subclass such that it returns a serializable object for ``o``, or calls the base implementation (to raise a ``TypeError``). For example, to support arbitrary iterators, you could implement default like this:: - + def default(self, o): try: iterable = iter(o) @@ -339,21 +163,22 @@ class JSONEncoder(object): else: return list(iterable) return JSONEncoder.default(self, o) + """ raise TypeError("%r is not JSON serializable" % (o,)) def encode(self, o): - """ - Return a JSON string representation of a Python data structure. + """Return a JSON string representation of a Python data structure. >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) '{"foo": ["bar", "baz"]}' + """ # This is for extremely simple cases and benchmarks. if isinstance(o, basestring): if isinstance(o, str): _encoding = self.encoding - if (_encoding is not None + if (_encoding is not None and not (_encoding == 'utf-8')): o = o.decode(_encoding) if self.ensure_ascii: @@ -363,23 +188,243 @@ class JSONEncoder(object): # This doesn't pass the iterator directly to ''.join() because the # exceptions aren't as detailed. The list call should be roughly # equivalent to the PySequence_Fast that ''.join() would do. - chunks = list(self.iterencode(o)) + chunks = self.iterencode(o, _one_shot=True) + if not isinstance(chunks, (list, tuple)): + chunks = list(chunks) return ''.join(chunks) - def iterencode(self, o): - """ - Encode the given object and yield each string + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string representation as available. - + For example:: - + for chunk in JSONEncoder().iterencode(bigobject): mysocket.write(chunk) + """ if self.check_circular: markers = {} else: markers = None - return self._iterencode(o, markers) + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + if self.encoding != 'utf-8': + def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): + if isinstance(o, str): + o = o.decode(_encoding) + return _orig_encoder(o) -__all__ = ['JSONEncoder'] + def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): + # Check for specials. Note that this type of test is processor- and/or + # platform-specific, so do tests which don't depend on the internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if not allow_nan: + raise ValueError("Out of range float values are not JSON compliant: %r" + % (o,)) + + return text + + + if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: + _iterencode = c_make_encoder( + markers, self.default, _encoder, self.indent, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, self.allow_nan) + else: + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot) + return _iterencode(o, 0) + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + ## HACK: hand-optimized bytecode; turn globals into locals + False=False, + True=True, + ValueError=ValueError, + basestring=basestring, + dict=dict, + float=float, + id=id, + int=int, + isinstance=isinstance, + list=list, + long=long, + str=str, + tuple=tuple, + ): + + def _iterencode_list(lst, _current_indent_level): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + buf = '[' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + else: + newline_indent = None + separator = _item_separator + first = True + for value in lst: + if first: + first = False + else: + buf = separator + if isinstance(value, basestring): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, (int, long)): + yield buf + str(value) + elif isinstance(value, float): + yield buf + _floatstr(value) + else: + yield buf + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(dct, _current_indent_level): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + item_separator = _item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = _item_separator + first = True + if _sort_keys: + items = dct.items() + items.sort(key=lambda kv: kv[0]) + else: + items = dct.iteritems() + for key, value in items: + if isinstance(key, basestring): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + key = _floatstr(key) + elif isinstance(key, (int, long)): + key = str(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif _skipkeys: + continue + else: + raise TypeError("key %r is not a string" % (key,)) + if first: + first = False + else: + yield item_separator + yield _encoder(key) + yield _key_separator + if isinstance(value, basestring): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, (int, long)): + yield str(value) + elif isinstance(value, float): + yield _floatstr(value) + else: + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(o, _current_indent_level): + if isinstance(o, basestring): + yield _encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, (int, long)): + yield str(o) + elif isinstance(o, float): + yield _floatstr(o) + elif isinstance(o, (list, tuple)): + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + elif isinstance(o, dict): + for chunk in _iterencode_dict(o, _current_indent_level): + yield chunk + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + o = _default(o) + for chunk in _iterencode(o, _current_indent_level): + yield chunk + if markers is not None: + del markers[markerid] + + return _iterencode diff --git a/django/utils/simplejson/scanner.py b/django/utils/simplejson/scanner.py index 2a18390d0d..adbc6ec979 100644 --- a/django/utils/simplejson/scanner.py +++ b/django/utils/simplejson/scanner.py @@ -1,67 +1,65 @@ -""" -Iterator based sre token scanner +"""JSON token scanner """ import re -from re import VERBOSE, MULTILINE, DOTALL -import sre_parse -import sre_compile -import sre_constants -from sre_constants import BRANCH, SUBPATTERN +try: + from simplejson._speedups import make_scanner as c_make_scanner +except ImportError: + c_make_scanner = None -__all__ = ['Scanner', 'pattern'] +__all__ = ['make_scanner'] -FLAGS = (VERBOSE | MULTILINE | DOTALL) +NUMBER_RE = re.compile( + r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', + (re.VERBOSE | re.MULTILINE | re.DOTALL)) -class Scanner(object): - def __init__(self, lexicon, flags=FLAGS): - self.actions = [None] - # Combine phrases into a compound pattern - s = sre_parse.Pattern() - s.flags = flags - p = [] - for idx, token in enumerate(lexicon): - phrase = token.pattern - try: - subpattern = sre_parse.SubPattern(s, - [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))]) - except sre_constants.error: - raise - p.append(subpattern) - self.actions.append(token) +def py_make_scanner(context): + parse_object = context.parse_object + parse_array = context.parse_array + parse_string = context.parse_string + match_number = NUMBER_RE.match + encoding = context.encoding + strict = context.strict + parse_float = context.parse_float + parse_int = context.parse_int + parse_constant = context.parse_constant + object_hook = context.object_hook - s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work - p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) - self.scanner = sre_compile.compile(p) + def _scan_once(string, idx): + try: + nextchar = string[idx] + except IndexError: + raise StopIteration - def iterscan(self, string, idx=0, context=None): - """ - Yield match, end_idx for each match - """ - match = self.scanner.scanner(string, idx).match - actions = self.actions - lastend = idx - end = len(string) - while True: - m = match() - if m is None: - break - matchbegin, matchend = m.span() - if lastend == matchend: - break - action = actions[m.lastindex] - if action is not None: - rval, next_pos = action(m, context) - if next_pos is not None and next_pos != matchend: - # "fast forward" the scanner - matchend = next_pos - match = self.scanner.scanner(string, matchend).match - yield rval, matchend - lastend = matchend + if nextchar == '"': + return parse_string(string, idx + 1, encoding, strict) + elif nextchar == '{': + return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook) + elif nextchar == '[': + return parse_array((string, idx + 1), _scan_once) + elif nextchar == 'n' and string[idx:idx + 4] == 'null': + return None, idx + 4 + elif nextchar == 't' and string[idx:idx + 4] == 'true': + return True, idx + 4 + elif nextchar == 'f' and string[idx:idx + 5] == 'false': + return False, idx + 5 + m = match_number(string, idx) + if m is not None: + integer, frac, exp = m.groups() + if frac or exp: + res = parse_float(integer + (frac or '') + (exp or '')) + else: + res = parse_int(integer) + return res, m.end() + elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': + return parse_constant('NaN'), idx + 3 + elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': + return parse_constant('Infinity'), idx + 8 + elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': + return parse_constant('-Infinity'), idx + 9 + else: + raise StopIteration -def pattern(pattern, flags=FLAGS): - def decorator(fn): - fn.pattern = pattern - fn.regex = re.compile(pattern, flags) - return fn - return decorator \ No newline at end of file + return _scan_once + +make_scanner = c_make_scanner or py_make_scanner diff --git a/django/utils/simplejson/tool.py b/django/utils/simplejson/tool.py index d0eb8ad9ee..74401c279a 100644 --- a/django/utils/simplejson/tool.py +++ b/django/utils/simplejson/tool.py @@ -1,23 +1,14 @@ -r""" -Using simplejson from the shell to validate and +r"""Using simplejson from the shell to validate and pretty-print:: - - $ echo '{"json":"obj"}' | python -msimplejson + + $ echo '{"json":"obj"}' | python -msimplejson.tool { "json": "obj" } - $ echo '{ 1.2:3.4}' | python -msimplejson + $ echo '{ 1.2:3.4}' | python -msimplejson.tool Expecting property name: line 1 column 2 (char 2) - -Note that the JSON produced by this module's default settings -is a subset of YAML, so it may be used as a serializer for that as well. """ -import django.utils.simplejson - -# -# Pretty printer: -# curl http://mochikit.com/examples/ajax_tables/domains.json | python -msimplejson.tool -# +from django.utils import simplejson def main(): import sys diff --git a/docs/topics/serialization.txt b/docs/topics/serialization.txt index 78bf6f5f03..a9366d6967 100644 --- a/docs/topics/serialization.txt +++ b/docs/topics/serialization.txt @@ -162,11 +162,17 @@ For example:: json_serializer = serializers.get_serializer("json")() json_serializer.serialize(queryset, ensure_ascii=False, stream=response) -The Django source code includes the simplejson_ module. Be aware that if you're -serializing using that module directly, not all Django output can be passed -unmodified to simplejson. In particular, :ref:`lazy translation objects -` need a `special encoder`_ written for them. Something like -this will work:: +The Django source code includes the simplejson_ module. However, if you're +using Python 2.6 (which includes a builtin version of the module), Django will +use the builtin ``json`` module automatically. If you have a system installed +version that includes the C-based speedup extension, or your system version is +more recent than the version shipped with Django (currently, 2.0.7), the +system version will be used instead of the version included with Django. + +Be aware that if you're serializing using that module directly, not all Django +output can be passed unmodified to simplejson. In particular, :ref:`lazy +translation objects ` need a `special encoder`_ written for +them. Something like this will work:: from django.utils.functional import Promise from django.utils.encoding import force_unicode @@ -178,3 +184,4 @@ this will work:: return obj .. _special encoder: http://svn.red-bean.com/bob/simplejson/tags/simplejson-1.7/docs/index.html +