Fixed #7131 -- Updated included simplejson code to match the simplejson-1.9.2

release. This should be fully backwards-compatible for people using the public
interfaces.


git-svn-id: http://code.djangoproject.com/svn/django/trunk@8124 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Malcolm Tredinnick 2008-07-27 23:38:28 +00:00
parent de9e2ae5bb
commit d57ce3d6a9
7 changed files with 367 additions and 112 deletions

View File

@ -1,4 +1,3 @@
simplejson 1.5
Copyright (c) 2006 Bob Ippolito Copyright (c) 2006 Bob Ippolito
Permission is hereby granted, free of charge, to any person obtaining a copy of Permission is hereby granted, free of charge, to any person obtaining a copy of

View File

@ -65,6 +65,9 @@ Specializing JSON object decoding::
>>> simplejson.loads('{"__complex__": true, "real": 1, "imag": 2}', >>> simplejson.loads('{"__complex__": true, "real": 1, "imag": 2}',
... object_hook=as_complex) ... object_hook=as_complex)
(1+2j) (1+2j)
>>> import decimal
>>> simplejson.loads('1.1', parse_float=decimal.Decimal)
Decimal("1.1")
Extending JSONEncoder:: Extending JSONEncoder::
@ -83,20 +86,48 @@ Extending JSONEncoder::
['[', '2.0', ', ', '1.0', ']'] ['[', '2.0', ', ', '1.0', ']']
Using simplejson from the shell to validate and
pretty-print::
$ echo '{"json":"obj"}' | python -msimplejson.tool
{
"json": "obj"
}
$ echo '{ 1.2:3.4}' | python -msimplejson.tool
Expecting property name: line 1 column 2 (char 2)
Note that the JSON produced by this module's default settings Note that the JSON produced by this module's default settings
is a subset of YAML, so it may be used as a serializer for that as well. is a subset of YAML, so it may be used as a serializer for that as well.
""" """
__version__ = '1.5' __version__ = '1.9.2'
__all__ = [ __all__ = [
'dump', 'dumps', 'load', 'loads', 'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONEncoder', 'JSONDecoder', 'JSONEncoder',
] ]
from django.utils.simplejson.decoder import JSONDecoder if __name__ == '__main__':
from django.utils.simplejson.encoder import JSONEncoder import warnings
warnings.warn('python -msimplejson is deprecated, use python -msiplejson.tool', DeprecationWarning)
from django.utils.simplejson.decoder import JSONDecoder
from django.utils.simplejson.encoder import JSONEncoder
else:
from decoder import JSONDecoder
from encoder import JSONEncoder
_default_encoder = JSONEncoder(
skipkeys=False,
ensure_ascii=True,
check_circular=True,
allow_nan=True,
indent=None,
separators=None,
encoding='utf-8',
default=None,
)
def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, **kw): allow_nan=True, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, **kw):
""" """
Serialize ``obj`` as a JSON formatted stream to ``fp`` (a Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
``.write()``-supporting file-like object). ``.write()``-supporting file-like object).
@ -107,7 +138,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
If ``ensure_ascii`` is ``False``, then the some chunks written to ``fp`` If ``ensure_ascii`` is ``False``, then the some chunks written to ``fp``
may be ``unicode`` instances, subject to normal Python ``str`` to may be ``unicode`` instances, subject to normal Python ``str`` to
``unicode`` coercion rules. Unless ``fp.write()`` explicitly ``unicode`` coercion rules. Unless ``fp.write()`` explicitly
understands ``unicode`` (as in ``codecs.getwriter()``) this is likely understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
to cause an error. to cause an error.
@ -121,25 +152,44 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
If ``indent`` is a non-negative integer, then JSON array elements and object If ``indent`` is a non-negative integer, then JSON array elements and object
members will be pretty-printed with that indent level. An indent level members will be pretty-printed with that indent level. An indent level
of 0 will only insert newlines. ``None`` is the most compact representation. of 0 will only insert newlines. ``None`` is the most compact representation.
If ``separators`` is an ``(item_separator, dict_separator)`` tuple
then it will be used instead of the default ``(', ', ': ')`` separators.
``(',', ':')`` is the most compact JSON representation.
``encoding`` is the character encoding for str instances, default is UTF-8.
``default(obj)`` is a function that should return a serializable version
of obj or raise TypeError. The default simply raises TypeError.
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
``.default()`` method to serialize additional types), specify it with ``.default()`` method to serialize additional types), specify it with
the ``cls`` kwarg. the ``cls`` kwarg.
""" """
if cls is None: # cached encoder
cls = JSONEncoder if (skipkeys is False and ensure_ascii is True and
iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular is True and allow_nan is True and
check_circular=check_circular, allow_nan=allow_nan, indent=indent, cls is None and indent is None and separators is None and
**kw).iterencode(obj) encoding == 'utf-8' and default is None and not kw):
iterable = _default_encoder.iterencode(obj)
else:
if cls is None:
cls = JSONEncoder
iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
check_circular=check_circular, allow_nan=allow_nan, indent=indent,
separators=separators, encoding=encoding,
default=default, **kw).iterencode(obj)
# could accelerate with writelines in some versions of Python, at # could accelerate with writelines in some versions of Python, at
# a debuggability cost # a debuggability cost
for chunk in iterable: for chunk in iterable:
fp.write(chunk) fp.write(chunk)
def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None, **kw): allow_nan=True, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, **kw):
""" """
Serialize ``obj`` to a JSON formatted ``str``. Serialize ``obj`` to a JSON formatted ``str``.
@ -161,88 +211,159 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
If ``indent`` is a non-negative integer, then JSON array elements and If ``indent`` is a non-negative integer, then JSON array elements and
object members will be pretty-printed with that indent level. An indent object members will be pretty-printed with that indent level. An indent
level of 0 will only insert newlines. ``None`` is the most compact level of 0 will only insert newlines. ``None`` is the most compact
representation. representation.
If ``separators`` is an ``(item_separator, dict_separator)`` tuple If ``separators`` is an ``(item_separator, dict_separator)`` tuple
then it will be used instead of the default ``(', ', ': ')`` separators. then it will be used instead of the default ``(', ', ': ')`` separators.
``(',', ':')`` is the most compact JSON representation. ``(',', ':')`` is the most compact JSON representation.
``encoding`` is the character encoding for str instances, default is UTF-8.
``default(obj)`` is a function that should return a serializable version
of obj or raise TypeError. The default simply raises TypeError.
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
``.default()`` method to serialize additional types), specify it with ``.default()`` method to serialize additional types), specify it with
the ``cls`` kwarg. the ``cls`` kwarg.
""" """
# cached encoder
if (skipkeys is False and ensure_ascii is True and
check_circular is True and allow_nan is True and
cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and not kw):
return _default_encoder.encode(obj)
if cls is None: if cls is None:
cls = JSONEncoder cls = JSONEncoder
return cls( return cls(
skipkeys=skipkeys, ensure_ascii=ensure_ascii, skipkeys=skipkeys, ensure_ascii=ensure_ascii,
check_circular=check_circular, allow_nan=allow_nan, indent=indent, check_circular=check_circular, allow_nan=allow_nan, indent=indent,
separators=separators, separators=separators, encoding=encoding, default=default,
**kw).encode(obj) **kw).encode(obj)
def load(fp, encoding=None, cls=None, object_hook=None, **kw):
_default_decoder = JSONDecoder(encoding=None, object_hook=None)
def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, **kw):
""" """
Deserialize ``fp`` (a ``.read()``-supporting file-like object containing Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
a JSON document) to a Python object. a JSON document) to a Python object.
If the contents of ``fp`` is encoded with an ASCII based encoding other If the contents of ``fp`` is encoded with an ASCII based encoding other
than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must
be specified. Encodings that are not ASCII based (such as UCS-2) are be specified. Encodings that are not ASCII based (such as UCS-2) are
not allowed, and should be wrapped with not allowed, and should be wrapped with
``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode``
object and passed to ``loads()`` object and passed to ``loads()``
``object_hook`` is an optional function that will be called with the ``object_hook`` is an optional function that will be called with the
result of any object literal decode (a ``dict``). The return value of result of any object literal decode (a ``dict``). The return value of
``object_hook`` will be used instead of the ``dict``. This feature ``object_hook`` will be used instead of the ``dict``. This feature
can be used to implement custom decoders (e.g. JSON-RPC class hinting). can be used to implement custom decoders (e.g. JSON-RPC class hinting).
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg. kwarg.
""" """
if cls is None: return loads(fp.read(),
cls = JSONDecoder encoding=encoding, cls=cls, object_hook=object_hook,
if object_hook is not None: parse_float=parse_float, parse_int=parse_int,
kw['object_hook'] = object_hook parse_constant=parse_constant, **kw)
return cls(encoding=encoding, **kw).decode(fp.read())
def loads(s, encoding=None, cls=None, object_hook=None, **kw):
def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, **kw):
""" """
Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
document) to a Python object. document) to a Python object.
If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding
other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name
must be specified. Encodings that are not ASCII based (such as UCS-2) must be specified. Encodings that are not ASCII based (such as UCS-2)
are not allowed and should be decoded to ``unicode`` first. are not allowed and should be decoded to ``unicode`` first.
``object_hook`` is an optional function that will be called with the ``object_hook`` is an optional function that will be called with the
result of any object literal decode (a ``dict``). The return value of result of any object literal decode (a ``dict``). The return value of
``object_hook`` will be used instead of the ``dict``. This feature ``object_hook`` will be used instead of the ``dict``. This feature
can be used to implement custom decoders (e.g. JSON-RPC class hinting). can be used to implement custom decoders (e.g. JSON-RPC class hinting).
``parse_float``, if specified, will be called with the string
of every JSON float to be decoded. By default this is equivalent to
float(num_str). This can be used to use another datatype or parser
for JSON floats (e.g. decimal.Decimal).
``parse_int``, if specified, will be called with the string
of every JSON int to be decoded. By default this is equivalent to
int(num_str). This can be used to use another datatype or parser
for JSON integers (e.g. float).
``parse_constant``, if specified, will be called with one of the
following strings: -Infinity, Infinity, NaN, null, true, false.
This can be used to raise an exception if invalid JSON numbers
are encountered.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg. kwarg.
""" """
if (cls is None and encoding is None and object_hook is None and
parse_int is None and parse_float is None and
parse_constant is None and not kw):
return _default_decoder.decode(s)
if cls is None: if cls is None:
cls = JSONDecoder cls = JSONDecoder
if object_hook is not None: if object_hook is not None:
kw['object_hook'] = object_hook kw['object_hook'] = object_hook
if parse_float is not None:
kw['parse_float'] = parse_float
if parse_int is not None:
kw['parse_int'] = parse_int
if parse_constant is not None:
kw['parse_constant'] = parse_constant
return cls(encoding=encoding, **kw).decode(s) return cls(encoding=encoding, **kw).decode(s)
#
# Compatibility cruft from other libraries
#
def decode(s):
"""
demjson, python-cjson API compatibility hook. Use loads(s) instead.
"""
import warnings
warnings.warn("simplejson.loads(s) should be used instead of decode(s)",
DeprecationWarning)
return loads(s)
def encode(obj):
"""
demjson, python-cjson compatibility hook. Use dumps(s) instead.
"""
import warnings
warnings.warn("simplejson.dumps(s) should be used instead of encode(s)",
DeprecationWarning)
return dumps(obj)
def read(s): def read(s):
""" """
json-py API compatibility hook. Use loads(s) instead. jsonlib, JsonUtils, python-json, json-py API compatibility hook.
Use loads(s) instead.
""" """
import warnings import warnings
warnings.warn("simplejson.loads(s) should be used instead of read(s)", warnings.warn("simplejson.loads(s) should be used instead of read(s)",
DeprecationWarning) DeprecationWarning)
return loads(s) return loads(s)
def write(obj): def write(obj):
""" """
json-py API compatibility hook. Use dumps(s) instead. jsonlib, JsonUtils, python-json, json-py API compatibility hook.
Use dumps(s) instead.
""" """
import warnings import warnings
warnings.warn("simplejson.dumps(s) should be used instead of write(s)", warnings.warn("simplejson.dumps(s) should be used instead of write(s)",
@ -250,3 +371,6 @@ def write(obj):
return dumps(obj) return dumps(obj)
if __name__ == '__main__':
import simplejson.tool
simplejson.tool.main()

View File

@ -2,8 +2,13 @@
Implementation of JSONDecoder Implementation of JSONDecoder
""" """
import re import re
import sys
from django.utils.simplejson.scanner import Scanner, pattern from django.utils.simplejson.scanner import Scanner, pattern
try:
from django.utils.simplejson._speedups import scanstring as c_scanstring
except ImportError:
pass
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
@ -18,6 +23,7 @@ def _floatconstants():
NaN, PosInf, NegInf = _floatconstants() NaN, PosInf, NegInf = _floatconstants()
def linecol(doc, pos): def linecol(doc, pos):
lineno = doc.count('\n', 0, pos) + 1 lineno = doc.count('\n', 0, pos) + 1
if lineno == 1: if lineno == 1:
@ -26,6 +32,7 @@ def linecol(doc, pos):
colno = pos - doc.rindex('\n', 0, pos) colno = pos - doc.rindex('\n', 0, pos)
return lineno, colno return lineno, colno
def errmsg(msg, doc, pos, end=None): def errmsg(msg, doc, pos, end=None):
lineno, colno = linecol(doc, pos) lineno, colno = linecol(doc, pos)
if end is None: if end is None:
@ -34,6 +41,7 @@ def errmsg(msg, doc, pos, end=None):
return '%s: line %d column %d - line %d column %d (char %d - %d)' % ( return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
msg, lineno, colno, endlineno, endcolno, pos, end) msg, lineno, colno, endlineno, endcolno, pos, end)
_CONSTANTS = { _CONSTANTS = {
'-Infinity': NegInf, '-Infinity': NegInf,
'Infinity': PosInf, 'Infinity': PosInf,
@ -44,20 +52,30 @@ _CONSTANTS = {
} }
def JSONConstant(match, context, c=_CONSTANTS): def JSONConstant(match, context, c=_CONSTANTS):
return c[match.group(0)], None s = match.group(0)
fn = getattr(context, 'parse_constant', None)
if fn is None:
rval = c[s]
else:
rval = fn(s)
return rval, None
pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)
def JSONNumber(match, context): def JSONNumber(match, context):
match = JSONNumber.regex.match(match.string, *match.span()) match = JSONNumber.regex.match(match.string, *match.span())
integer, frac, exp = match.groups() integer, frac, exp = match.groups()
if frac or exp: if frac or exp:
res = float(integer + (frac or '') + (exp or '')) fn = getattr(context, 'parse_float', None) or float
res = fn(integer + (frac or '') + (exp or ''))
else: else:
res = int(integer) fn = getattr(context, 'parse_int', None) or int
res = fn(integer)
return res, None return res, None
pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
STRINGCHUNK = re.compile(r'(.*?)(["\\])', FLAGS)
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
BACKSLASH = { BACKSLASH = {
'"': u'"', '\\': u'\\', '/': u'/', '"': u'"', '\\': u'\\', '/': u'/',
'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
@ -65,7 +83,7 @@ BACKSLASH = {
DEFAULT_ENCODING = "utf-8" DEFAULT_ENCODING = "utf-8"
def scanstring(s, end, encoding=None, _b=BACKSLASH, _m=STRINGCHUNK.match): def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
if encoding is None: if encoding is None:
encoding = DEFAULT_ENCODING encoding = DEFAULT_ENCODING
chunks = [] chunks = []
@ -84,6 +102,12 @@ def scanstring(s, end, encoding=None, _b=BACKSLASH, _m=STRINGCHUNK.match):
_append(content) _append(content)
if terminator == '"': if terminator == '"':
break break
elif terminator != '\\':
if strict:
raise ValueError(errmsg("Invalid control character %r at", s, end))
else:
_append(terminator)
continue
try: try:
esc = s[end] esc = s[end]
except IndexError: except IndexError:
@ -98,21 +122,43 @@ def scanstring(s, end, encoding=None, _b=BACKSLASH, _m=STRINGCHUNK.match):
end += 1 end += 1
else: else:
esc = s[end + 1:end + 5] esc = s[end + 1:end + 5]
next_end = end + 5
msg = "Invalid \\uXXXX escape"
try: try:
m = unichr(int(esc, 16)) if len(esc) != 4:
if len(esc) != 4 or not esc.isalnum():
raise ValueError raise ValueError
uni = int(esc, 16)
if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
if not s[end + 5:end + 7] == '\\u':
raise ValueError
esc2 = s[end + 7:end + 11]
if len(esc2) != 4:
raise ValueError
uni2 = int(esc2, 16)
uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
next_end += 6
m = unichr(uni)
except ValueError: except ValueError:
raise ValueError(errmsg("Invalid \\uXXXX escape", s, end)) raise ValueError(errmsg(msg, s, end))
end += 5 end = next_end
_append(m) _append(m)
return u''.join(chunks), end return u''.join(chunks), end
# Use speedup
try:
scanstring = c_scanstring
except NameError:
scanstring = py_scanstring
def JSONString(match, context): def JSONString(match, context):
encoding = getattr(context, 'encoding', None) encoding = getattr(context, 'encoding', None)
return scanstring(match.string, match.end(), encoding) strict = getattr(context, 'strict', True)
return scanstring(match.string, match.end(), encoding, strict)
pattern(r'"')(JSONString) pattern(r'"')(JSONString)
WHITESPACE = re.compile(r'\s*', FLAGS) WHITESPACE = re.compile(r'\s*', FLAGS)
def JSONObject(match, context, _w=WHITESPACE.match): def JSONObject(match, context, _w=WHITESPACE.match):
@ -120,16 +166,17 @@ def JSONObject(match, context, _w=WHITESPACE.match):
s = match.string s = match.string
end = _w(s, match.end()).end() end = _w(s, match.end()).end()
nextchar = s[end:end + 1] nextchar = s[end:end + 1]
# trivial empty object # Trivial empty object
if nextchar == '}': if nextchar == '}':
return pairs, end + 1 return pairs, end + 1
if nextchar != '"': if nextchar != '"':
raise ValueError(errmsg("Expecting property name", s, end)) raise ValueError(errmsg("Expecting property name", s, end))
end += 1 end += 1
encoding = getattr(context, 'encoding', None) encoding = getattr(context, 'encoding', None)
strict = getattr(context, 'strict', True)
iterscan = JSONScanner.iterscan iterscan = JSONScanner.iterscan
while True: while True:
key, end = scanstring(s, end, encoding) key, end = scanstring(s, end, encoding, strict)
end = _w(s, end).end() end = _w(s, end).end()
if s[end:end + 1] != ':': if s[end:end + 1] != ':':
raise ValueError(errmsg("Expecting : delimiter", s, end)) raise ValueError(errmsg("Expecting : delimiter", s, end))
@ -156,12 +203,13 @@ def JSONObject(match, context, _w=WHITESPACE.match):
pairs = object_hook(pairs) pairs = object_hook(pairs)
return pairs, end return pairs, end
pattern(r'{')(JSONObject) pattern(r'{')(JSONObject)
def JSONArray(match, context, _w=WHITESPACE.match): def JSONArray(match, context, _w=WHITESPACE.match):
values = [] values = []
s = match.string s = match.string
end = _w(s, match.end()).end() end = _w(s, match.end()).end()
# look-ahead for trivial empty array # Look-ahead for trivial empty array
nextchar = s[end:end + 1] nextchar = s[end:end + 1]
if nextchar == ']': if nextchar == ']':
return values, end + 1 return values, end + 1
@ -182,7 +230,8 @@ def JSONArray(match, context, _w=WHITESPACE.match):
end = _w(s, end).end() end = _w(s, end).end()
return values, end return values, end
pattern(r'\[')(JSONArray) pattern(r'\[')(JSONArray)
ANYTHING = [ ANYTHING = [
JSONObject, JSONObject,
JSONArray, JSONArray,
@ -193,11 +242,12 @@ ANYTHING = [
JSONScanner = Scanner(ANYTHING) JSONScanner = Scanner(ANYTHING)
class JSONDecoder(object): class JSONDecoder(object):
""" """
Simple JSON <http://json.org> decoder Simple JSON <http://json.org> decoder
Performs the following translations in decoding: Performs the following translations in decoding by default:
+---------------+-------------------+ +---------------+-------------------+
| JSON | Python | | JSON | Python |
@ -226,7 +276,8 @@ class JSONDecoder(object):
_scanner = Scanner(ANYTHING) _scanner = Scanner(ANYTHING)
__all__ = ['__init__', 'decode', 'raw_decode'] __all__ = ['__init__', 'decode', 'raw_decode']
def __init__(self, encoding=None, object_hook=None): def __init__(self, encoding=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, strict=True):
""" """
``encoding`` determines the encoding used to interpret any ``str`` ``encoding`` determines the encoding used to interpret any ``str``
objects decoded by this instance (utf-8 by default). It has no objects decoded by this instance (utf-8 by default). It has no
@ -239,9 +290,28 @@ class JSONDecoder(object):
of every JSON object decoded and its return value will be used in of every JSON object decoded and its return value will be used in
place of the given ``dict``. This can be used to provide custom place of the given ``dict``. This can be used to provide custom
deserializations (e.g. to support JSON-RPC class hinting). deserializations (e.g. to support JSON-RPC class hinting).
``parse_float``, if specified, will be called with the string
of every JSON float to be decoded. By default this is equivalent to
float(num_str). This can be used to use another datatype or parser
for JSON floats (e.g. decimal.Decimal).
``parse_int``, if specified, will be called with the string
of every JSON int to be decoded. By default this is equivalent to
int(num_str). This can be used to use another datatype or parser
for JSON integers (e.g. float).
``parse_constant``, if specified, will be called with one of the
following strings: -Infinity, Infinity, NaN, null, true, false.
This can be used to raise an exception if invalid JSON numbers
are encountered.
""" """
self.encoding = encoding self.encoding = encoding
self.object_hook = object_hook self.object_hook = object_hook
self.parse_float = parse_float
self.parse_int = parse_int
self.parse_constant = parse_constant
self.strict = strict
def decode(self, s, _w=WHITESPACE.match): def decode(self, s, _w=WHITESPACE.match):
""" """

View File

@ -3,11 +3,15 @@ Implementation of JSONEncoder
""" """
import re import re
ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]') try:
ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])') from django.utils.simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii
except ImportError:
pass
ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
HAS_UTF8 = re.compile(r'[\x80-\xff]')
ESCAPE_DCT = { ESCAPE_DCT = {
# escape all forward slashes to prevent </script> attack
'/': '\\/',
'\\': '\\\\', '\\': '\\\\',
'"': '\\"', '"': '\\"',
'\b': '\\b', '\b': '\\b',
@ -19,8 +23,9 @@ ESCAPE_DCT = {
for i in range(0x20): for i in range(0x20):
ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
# assume this produces an infinity on all machines (probably not guaranteed) # Assume this produces an infinity on all machines (probably not guaranteed)
INFINITY = float('1e66666') INFINITY = float('1e66666')
FLOAT_REPR = repr
def floatstr(o, allow_nan=True): def floatstr(o, allow_nan=True):
# Check for specials. Note that this type of test is processor- and/or # Check for specials. Note that this type of test is processor- and/or
@ -33,7 +38,7 @@ def floatstr(o, allow_nan=True):
elif o == -INFINITY: elif o == -INFINITY:
text = '-Infinity' text = '-Infinity'
else: else:
return str(o) return FLOAT_REPR(o)
if not allow_nan: if not allow_nan:
raise ValueError("Out of range float values are not JSON compliant: %r" raise ValueError("Out of range float values are not JSON compliant: %r"
@ -50,15 +55,32 @@ def encode_basestring(s):
return ESCAPE_DCT[match.group(0)] return ESCAPE_DCT[match.group(0)]
return '"' + ESCAPE.sub(replace, s) + '"' return '"' + ESCAPE.sub(replace, s) + '"'
def encode_basestring_ascii(s):
def py_encode_basestring_ascii(s):
if isinstance(s, str) and HAS_UTF8.search(s) is not None:
s = s.decode('utf-8')
def replace(match): def replace(match):
s = match.group(0) s = match.group(0)
try: try:
return ESCAPE_DCT[s] return ESCAPE_DCT[s]
except KeyError: except KeyError:
return '\\u%04x' % (ord(s),) n = ord(s)
if n < 0x10000:
return '\\u%04x' % (n,)
else:
# surrogate pair
n -= 0x10000
s1 = 0xd800 | ((n >> 10) & 0x3ff)
s2 = 0xdc00 | (n & 0x3ff)
return '\\u%04x\\u%04x' % (s1, s2)
return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
try:
encode_basestring_ascii = c_encode_basestring_ascii
except NameError:
encode_basestring_ascii = py_encode_basestring_ascii
class JSONEncoder(object): class JSONEncoder(object):
""" """
@ -94,7 +116,7 @@ class JSONEncoder(object):
key_separator = ': ' key_separator = ': '
def __init__(self, skipkeys=False, ensure_ascii=True, def __init__(self, skipkeys=False, ensure_ascii=True,
check_circular=True, allow_nan=True, sort_keys=False, check_circular=True, allow_nan=True, sort_keys=False,
indent=None, separators=None): indent=None, separators=None, encoding='utf-8', default=None):
""" """
Constructor for JSONEncoder, with sensible defaults. Constructor for JSONEncoder, with sensible defaults.
@ -126,8 +148,16 @@ class JSONEncoder(object):
None is the most compact representation. None is the most compact representation.
If specified, separators should be a (item_separator, key_separator) If specified, separators should be a (item_separator, key_separator)
tuple. The default is (', ', ': '). To get the most compact JSON tuple. The default is (', ', ': '). To get the most compact JSON
representation you should specify (',', ':') to eliminate whitespace. representation you should specify (',', ':') to eliminate whitespace.
If specified, default is a function that gets called for objects
that can't otherwise be serialized. It should return a JSON encodable
version of the object or raise a ``TypeError``.
If encoding is not None, then all input strings will be
transformed into unicode using that encoding prior to JSON-encoding.
The default is UTF-8.
""" """
self.skipkeys = skipkeys self.skipkeys = skipkeys
@ -139,6 +169,9 @@ class JSONEncoder(object):
self.current_indent_level = 0 self.current_indent_level = 0
if separators is not None: if separators is not None:
self.item_separator, self.key_separator = separators self.item_separator, self.key_separator = separators
if default is not None:
self.default = default
self.encoding = encoding
def _newline_indent(self): def _newline_indent(self):
return '\n' + (' ' * (self.indent * self.current_indent_level)) return '\n' + (' ' * (self.indent * self.current_indent_level))
@ -207,8 +240,14 @@ class JSONEncoder(object):
items = [(k, dct[k]) for k in keys] items = [(k, dct[k]) for k in keys]
else: else:
items = dct.iteritems() items = dct.iteritems()
_encoding = self.encoding
_do_decode = (_encoding is not None
and not (_encoding == 'utf-8'))
for key, value in items: for key, value in items:
if isinstance(key, basestring): if isinstance(key, str):
if _do_decode:
key = key.decode(_encoding)
elif isinstance(key, basestring):
pass pass
# JavaScript is weakly typed for these, so it makes sense to # JavaScript is weakly typed for these, so it makes sense to
# also allow them. Many encoders seem to do something like this. # also allow them. Many encoders seem to do something like this.
@ -247,6 +286,10 @@ class JSONEncoder(object):
encoder = encode_basestring_ascii encoder = encode_basestring_ascii
else: else:
encoder = encode_basestring encoder = encode_basestring
_encoding = self.encoding
if (_encoding is not None and isinstance(o, str)
and not (_encoding == 'utf-8')):
o = o.decode(_encoding)
yield encoder(o) yield encoder(o)
elif o is None: elif o is None:
yield 'null' yield 'null'
@ -304,11 +347,22 @@ class JSONEncoder(object):
Return a JSON string representation of a Python data structure. Return a JSON string representation of a Python data structure.
>>> JSONEncoder().encode({"foo": ["bar", "baz"]}) >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
'{"foo":["bar", "baz"]}' '{"foo": ["bar", "baz"]}'
""" """
# This doesn't pass the iterator directly to ''.join() because it # This is for extremely simple cases and benchmarks.
# sucks at reporting exceptions. It's going to do this internally if isinstance(o, basestring):
# anyway because it uses PySequence_Fast or similar. if isinstance(o, str):
_encoding = self.encoding
if (_encoding is not None
and not (_encoding == 'utf-8')):
o = o.decode(_encoding)
if self.ensure_ascii:
return encode_basestring_ascii(o)
else:
return encode_basestring(o)
# This doesn't pass the iterator directly to ''.join() because the
# exceptions aren't as detailed. The list call should be roughly
# equivalent to the PySequence_Fast that ''.join() would do.
chunks = list(self.iterencode(o)) chunks = list(self.iterencode(o))
return ''.join(chunks) return ''.join(chunks)

View File

@ -1,40 +0,0 @@
from django.utils import simplejson
import cgi
class JSONFilter(object):
def __init__(self, app, mime_type='text/x-json'):
self.app = app
self.mime_type = mime_type
def __call__(self, environ, start_response):
# Read JSON POST input to jsonfilter.json if matching mime type
response = {'status': '200 OK', 'headers': []}
def json_start_response(status, headers):
response['status'] = status
response['headers'].extend(headers)
environ['jsonfilter.mime_type'] = self.mime_type
if environ.get('REQUEST_METHOD', '') == 'POST':
if environ.get('CONTENT_TYPE', '') == self.mime_type:
args = [_ for _ in [environ.get('CONTENT_LENGTH')] if _]
data = environ['wsgi.input'].read(*map(int, args))
environ['jsonfilter.json'] = simplejson.loads(data)
res = simplejson.dumps(self.app(environ, json_start_response))
jsonp = cgi.parse_qs(environ.get('QUERY_STRING', '')).get('jsonp')
if jsonp:
content_type = 'text/javascript'
res = ''.join(jsonp + ['(', res, ')'])
elif 'Opera' in environ.get('HTTP_USER_AGENT', ''):
# Opera has bunk XMLHttpRequest support for most mime types
content_type = 'text/plain'
else:
content_type = self.mime_type
headers = [
('Content-type', content_type),
('Content-length', len(res)),
]
headers.extend(response['headers'])
start_response(response['status'], headers)
return [res]
def factory(app, global_conf, **kw):
return JSONFilter(app, **kw)

View File

@ -1,18 +1,21 @@
""" """
Iterator based sre token scanner Iterator based sre token scanner
""" """
import sre_parse, sre_compile, sre_constants
from sre_constants import BRANCH, SUBPATTERN
from re import VERBOSE, MULTILINE, DOTALL
import re import re
from re import VERBOSE, MULTILINE, DOTALL
import sre_parse
import sre_compile
import sre_constants
from sre_constants import BRANCH, SUBPATTERN
__all__ = ['Scanner', 'pattern'] __all__ = ['Scanner', 'pattern']
FLAGS = (VERBOSE | MULTILINE | DOTALL) FLAGS = (VERBOSE | MULTILINE | DOTALL)
class Scanner(object): class Scanner(object):
def __init__(self, lexicon, flags=FLAGS): def __init__(self, lexicon, flags=FLAGS):
self.actions = [None] self.actions = [None]
# combine phrases into a compound pattern # Combine phrases into a compound pattern
s = sre_parse.Pattern() s = sre_parse.Pattern()
s.flags = flags s.flags = flags
p = [] p = []
@ -26,10 +29,10 @@ class Scanner(object):
p.append(subpattern) p.append(subpattern)
self.actions.append(token) self.actions.append(token)
s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
self.scanner = sre_compile.compile(p) self.scanner = sre_compile.compile(p)
def iterscan(self, string, idx=0, context=None): def iterscan(self, string, idx=0, context=None):
""" """
Yield match, end_idx for each match Yield match, end_idx for each match
@ -54,10 +57,11 @@ class Scanner(object):
match = self.scanner.scanner(string, matchend).match match = self.scanner.scanner(string, matchend).match
yield rval, matchend yield rval, matchend
lastend = matchend lastend = matchend
def pattern(pattern, flags=FLAGS): def pattern(pattern, flags=FLAGS):
def decorator(fn): def decorator(fn):
fn.pattern = pattern fn.pattern = pattern
fn.regex = re.compile(pattern, flags) fn.regex = re.compile(pattern, flags)
return fn return fn
return decorator return decorator

View File

@ -0,0 +1,44 @@
r"""
Using simplejson from the shell to validate and
pretty-print::
$ echo '{"json":"obj"}' | python -msimplejson
{
"json": "obj"
}
$ echo '{ 1.2:3.4}' | python -msimplejson
Expecting property name: line 1 column 2 (char 2)
Note that the JSON produced by this module's default settings
is a subset of YAML, so it may be used as a serializer for that as well.
"""
import django.utils.simplejson
#
# Pretty printer:
# curl http://mochikit.com/examples/ajax_tables/domains.json | python -msimplejson.tool
#
def main():
import sys
if len(sys.argv) == 1:
infile = sys.stdin
outfile = sys.stdout
elif len(sys.argv) == 2:
infile = open(sys.argv[1], 'rb')
outfile = sys.stdout
elif len(sys.argv) == 3:
infile = open(sys.argv[1], 'rb')
outfile = open(sys.argv[2], 'wb')
else:
raise SystemExit("%s [infile [outfile]]" % (sys.argv[0],))
try:
obj = simplejson.load(infile)
except ValueError, e:
raise SystemExit(e)
simplejson.dump(obj, outfile, sort_keys=True, indent=4)
outfile.write('\n')
if __name__ == '__main__':
main()