From f44e7acb17738cb222a0d8390eac480488241d15 Mon Sep 17 00:00:00 2001 From: Jacob Kaplan-Moss Date: Wed, 28 Jun 2006 20:59:49 +0000 Subject: [PATCH] Added Bob Ippolito's simplejson (http://undefined.org/python/#simplejson) as {{{django.auth.simplejson}}}. This is version 1.3 of simplejson. Thanks to Bob for his code and his permission to include it. git-svn-id: http://code.djangoproject.com/svn/django/trunk@3232 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/utils/simplejson/LICENSE.txt | 20 ++ django/utils/simplejson/__init__.py | 221 +++++++++++++++++++++ django/utils/simplejson/decoder.py | 271 ++++++++++++++++++++++++++ django/utils/simplejson/encoder.py | 289 ++++++++++++++++++++++++++++ django/utils/simplejson/scanner.py | 63 ++++++ 5 files changed, 864 insertions(+) create mode 100644 django/utils/simplejson/LICENSE.txt create mode 100644 django/utils/simplejson/__init__.py create mode 100644 django/utils/simplejson/decoder.py create mode 100644 django/utils/simplejson/encoder.py create mode 100644 django/utils/simplejson/scanner.py diff --git a/django/utils/simplejson/LICENSE.txt b/django/utils/simplejson/LICENSE.txt new file mode 100644 index 0000000000..90251a9f62 --- /dev/null +++ b/django/utils/simplejson/LICENSE.txt @@ -0,0 +1,20 @@ +simplejson 1.3 +Copyright (c) 2006 Bob Ippolito + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/django/utils/simplejson/__init__.py b/django/utils/simplejson/__init__.py new file mode 100644 index 0000000000..f88329b950 --- /dev/null +++ b/django/utils/simplejson/__init__.py @@ -0,0 +1,221 @@ +r""" +A simple, fast, extensible JSON encoder and decoder + +JSON (JavaScript Object Notation) is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +simplejson exposes an API familiar to uses of the standard library +marshal and pickle modules. + +Encoding basic Python object hierarchies:: + + >>> import simplejson + >>> simplejson.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print simplejson.dumps("\"foo\bar") + "\"foo\bar" + >>> print simplejson.dumps(u'\u1234') + "\u1234" + >>> print simplejson.dumps('\\') + "\\" + >>> print simplejson.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + {"a": 0, "b": 0, "c": 0} + >>> from StringIO import StringIO + >>> io = StringIO() + >>> simplejson.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Decoding JSON:: + + >>> import simplejson + >>> simplejson.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') + [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] + >>> simplejson.loads('"\\"foo\\bar"') + u'"foo\x08ar' + >>> from StringIO import StringIO + >>> io = StringIO('["streaming API"]') + >>> simplejson.load(io) + [u'streaming API'] + +Specializing JSON object decoding:: + + >>> import simplejson + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> simplejson.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + +Extending JSONEncoder:: + + >>> import simplejson + >>> class ComplexEncoder(simplejson.JSONEncoder): + ... def default(self, obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... return simplejson.JSONEncoder.default(self, obj) + ... + >>> dumps(2 + 1j, cls=ComplexEncoder) + '[2.0, 1.0]' + >>> ComplexEncoder().encode(2 + 1j) + '[2.0, 1.0]' + >>> list(ComplexEncoder().iterencode(2 + 1j)) + ['[', '2.0', ', ', '1.0', ']'] + + +Note that the JSON produced by this module is a subset of YAML, +so it may be used as a serializer for that as well. +""" +__version__ = '1.3' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONEncoder', +] + +from django.utils.simplejson.decoder import JSONDecoder +from django.utils.simplejson.encoder import JSONEncoder + +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, **kw): + """ + Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is ``False``, then the some chunks written to ``fp`` + may be ``unicode`` instances, subject to normal Python ``str`` to + ``unicode`` coercion rules. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter()``) this is likely + to cause an error. + + If ``check_circular`` is ``False``, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + """ + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, + **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, **kw): + """ + Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is ``False``, then the return value will be a + ``unicode`` instance subject to normal Python ``str`` to ``unicode`` + coercion rules instead of being escaped to an ASCII ``str``. + + If ``check_circular`` is ``False``, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + """ + if cls is None: + cls = JSONEncoder + return cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, **kw).encode(obj) + +def load(fp, encoding=None, cls=None, object_hook=None, **kw): + """ + Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + If the contents of ``fp`` is encoded with an ASCII based encoding other + than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must + be specified. Encodings that are not ASCII based (such as UCS-2) are + not allowed, and should be wrapped with + ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` + object and passed to ``loads()`` + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + """ + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + return cls(encoding=encoding, **kw).decode(fp.read()) + +def loads(s, encoding=None, cls=None, object_hook=None, **kw): + """ + Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + + If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding + other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name + must be specified. Encodings that are not ASCII based (such as UCS-2) + are not allowed and should be decoded to ``unicode`` first. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + """ + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + return cls(encoding=encoding, **kw).decode(s) + +def read(s): + """ + json-py API compatibility hook. Use loads(s) instead. + """ + import warnings + warnings.warn("simplejson.loads(s) should be used instead of read(s)", + DeprecationWarning) + return loads(s) + +def write(obj): + """ + json-py API compatibility hook. Use dumps(s) instead. + """ + import warnings + warnings.warn("simplejson.dumps(s) should be used instead of write(s)", + DeprecationWarning) + return dumps(obj) + + diff --git a/django/utils/simplejson/decoder.py b/django/utils/simplejson/decoder.py new file mode 100644 index 0000000000..684af8c9ad --- /dev/null +++ b/django/utils/simplejson/decoder.py @@ -0,0 +1,271 @@ +""" +Implementation of JSONDecoder +""" +import re + +from django.utils.simplejson.scanner import Scanner, pattern + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +def _floatconstants(): + import struct + import sys + _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + if sys.byteorder != 'big': + _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] + nan, inf = struct.unpack('dd', _BYTES) + return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + +def errmsg(msg, doc, pos, end=None): + lineno, colno = linecol(doc, pos) + if end is None: + return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + return '%s: line %d column %d - line %d column %d (char %d - %d)' % ( + msg, lineno, colno, endlineno, endcolno, pos, end) + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, + 'true': True, + 'false': False, + 'null': None, +} + +def JSONConstant(match, context, c=_CONSTANTS): + return c[match.group(0)], None +pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) + +def JSONNumber(match, context): + match = JSONNumber.regex.match(match.string, *match.span()) + integer, frac, exp = match.groups() + if frac or exp: + res = float(integer + (frac or '') + (exp or '')) + else: + res = int(integer) + return res, None +pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) + +STRINGCHUNK = re.compile(r'(.*?)(["\\])', FLAGS) +BACKSLASH = { + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', +} + +DEFAULT_ENCODING = "utf-8" + +def scanstring(s, end, encoding=None, _b=BACKSLASH, _m=STRINGCHUNK.match): + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + end = chunk.end() + content, terminator = chunk.groups() + if content: + if not isinstance(content, unicode): + content = unicode(content, encoding) + _append(content) + if terminator == '"': + break + try: + esc = s[end] + except IndexError: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + if esc != 'u': + try: + m = _b[esc] + except KeyError: + raise ValueError( + errmsg("Invalid \\escape: %r" % (esc,), s, end)) + end += 1 + else: + esc = s[end + 1:end + 5] + try: + m = unichr(int(esc, 16)) + if len(esc) != 4 or not esc.isalnum(): + raise ValueError + except ValueError: + raise ValueError(errmsg("Invalid \\uXXXX escape", s, end)) + end += 5 + _append(m) + return u''.join(chunks), end + +def JSONString(match, context): + encoding = getattr(context, 'encoding', None) + return scanstring(match.string, match.end(), encoding) +pattern(r'"')(JSONString) + +WHITESPACE = re.compile(r'\s*', FLAGS) + +def JSONObject(match, context, _w=WHITESPACE.match): + pairs = {} + s = match.string + end = _w(s, match.end()).end() + nextchar = s[end:end + 1] + # trivial empty object + if nextchar == '}': + return pairs, end + 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end)) + end += 1 + encoding = getattr(context, 'encoding', None) + while True: + key, end = scanstring(s, end, encoding) + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise ValueError(errmsg("Expecting : delimiter", s, end)) + end = _w(s, end + 1).end() + try: + value, end = JSONScanner.iterscan(s, idx=end).next() + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + pairs[key] = value + end = _w(s, end).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == '}': + break + if nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + end = _w(s, end).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end - 1)) + object_hook = getattr(context, 'object_hook', None) + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end +pattern(r'{')(JSONObject) + +def JSONArray(match, context, _w=WHITESPACE.match): + values = [] + s = match.string + end = _w(s, match.end()).end() + # look-ahead for trivial empty array + nextchar = s[end:end + 1] + if nextchar == ']': + return values, end + 1 + while True: + try: + value, end = JSONScanner.iterscan(s, idx=end).next() + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + values.append(value) + end = _w(s, end).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + if nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end)) + end = _w(s, end).end() + return values, end +pattern(r'\[')(JSONArray) + +ANYTHING = [ + JSONObject, + JSONArray, + JSONString, + JSONConstant, + JSONNumber, +] + +JSONScanner = Scanner(ANYTHING) + +class JSONDecoder(object): + """ + Simple JSON decoder + + Performs the following translations in decoding: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | unicode | + +---------------+-------------------+ + | number (int) | int, long | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + """ + + _scanner = Scanner(ANYTHING) + __all__ = ['__init__', 'decode', 'raw_decode'] + + def __init__(self, encoding=None, object_hook=None): + """ + ``encoding`` determines the encoding used to interpret any ``str`` + objects decoded by this instance (utf-8 by default). It has no + effect when decoding ``unicode`` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as ``unicode``. + + ``object_hook``, if specified, will be called with the result + of every JSON object decoded and its return value will be used in + place of the given ``dict``. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + """ + self.encoding = encoding + self.object_hook = object_hook + + def decode(self, s, _w=WHITESPACE.match): + """ + Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) + """ + obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + end = _w(s, end).end() + if end != len(s): + raise ValueError(errmsg("Extra data", s, end, len(s))) + return obj + + def raw_decode(self, s, **kw): + """ + Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning + with a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + """ + kw.setdefault('context', self) + try: + obj, end = self._scanner.iterscan(s, **kw).next() + except StopIteration: + raise ValueError("No JSON object could be decoded") + return obj, end + +__all__ = ['JSONDecoder'] diff --git a/django/utils/simplejson/encoder.py b/django/utils/simplejson/encoder.py new file mode 100644 index 0000000000..bb1aba09f0 --- /dev/null +++ b/django/utils/simplejson/encoder.py @@ -0,0 +1,289 @@ +""" +Implementation of JSONEncoder +""" +import re + +# this should match any kind of infinity +INFCHARS = re.compile(r'[infINF]') +ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]') +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') +ESCAPE_DCT = { + '\\': '\\\\', + '"': '\\"', + '\b': '\\b', + '\f': '\\f', + '\n': '\\n', + '\r': '\\r', + '\t': '\\t', +} +for i in range(20): + ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) + +def floatstr(o, allow_nan=True): + s = str(o) + # If the first non-sign is a digit then it's not a special value + if (o < 0.0 and s[1].isdigit()) or s[0].isdigit(): + return s + elif not allow_nan: + raise ValueError("Out of range float values are not JSON compliant: %r" + % (o,)) + # These are the string representations on the platforms I've tried + if s == 'nan': + return 'NaN' + if s == 'inf': + return 'Infinity' + if s == '-inf': + return '-Infinity' + # NaN should either be inequal to itself, or equal to everything + if o != o or o == 0.0: + return 'NaN' + # Last ditch effort, assume inf + if o < 0: + return '-Infinity' + return 'Infinity' + +def encode_basestring(s): + """ + Return a JSON representation of a Python string + """ + def replace(match): + return ESCAPE_DCT[match.group(0)] + return '"' + ESCAPE.sub(replace, s) + '"' + +def encode_basestring_ascii(s): + def replace(match): + s = match.group(0) + try: + return ESCAPE_DCT[s] + except KeyError: + return '\\u%04x' % (ord(s),) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +class JSONEncoder(object): + """ + Extensible JSON encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str, unicode | string | + +-------------------+---------------+ + | int, long, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default()`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + """ + __all__ = ['__init__', 'default', 'encode', 'iterencode'] + def __init__(self, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False): + """ + Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is False, then it is a TypeError to attempt + encoding of keys that are not str, int, long, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is True, the output is guaranteed to be str + objects with all incoming unicode characters escaped. If + ensure_ascii is false, the output will be unicode object. + + If check_circular is True, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is True, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + + If sort_keys is True, then the output of dictionaries will be + sorted by key; this is useful for regression tests to ensure + that JSON serializations can be compared on a day-to-day basis. + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + + def _iterencode_list(self, lst, markers=None): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + yield '[' + first = True + for value in lst: + if first: + first = False + else: + yield ', ' + for chunk in self._iterencode(value, markers): + yield chunk + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(self, dct, markers=None): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + first = True + if self.ensure_ascii: + encoder = encode_basestring_ascii + else: + encoder = encode_basestring + allow_nan = self.allow_nan + if self.sort_keys: + keys = dct.keys() + keys.sort() + items = [(k,dct[k]) for k in keys] + else: + items = dct.iteritems() + for key, value in items: + if isinstance(key, basestring): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + key = floatstr(key, allow_nan) + elif isinstance(key, (int, long)): + key = str(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif self.skipkeys: + continue + else: + raise TypeError("key %r is not a string" % (key,)) + if first: + first = False + else: + yield ', ' + yield encoder(key) + yield ': ' + for chunk in self._iterencode(value, markers): + yield chunk + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(self, o, markers=None): + if isinstance(o, basestring): + if self.ensure_ascii: + encoder = encode_basestring_ascii + else: + encoder = encode_basestring + yield encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, (int, long)): + yield str(o) + elif isinstance(o, float): + yield floatstr(o, self.allow_nan) + elif isinstance(o, (list, tuple)): + for chunk in self._iterencode_list(o, markers): + yield chunk + elif isinstance(o, dict): + for chunk in self._iterencode_dict(o, markers): + yield chunk + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + for chunk in self._iterencode_default(o, markers): + yield chunk + if markers is not None: + del markers[markerid] + + def _iterencode_default(self, o, markers=None): + newobj = self.default(o) + return self._iterencode(newobj, markers) + + def default(self, o): + """ + Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + """ + raise TypeError("%r is not JSON serializable" % (o,)) + + def encode(self, o): + """ + Return a JSON string representation of a Python data structure. + + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo":["bar", "baz"]}' + """ + # This doesn't pass the iterator directly to ''.join() because it + # sucks at reporting exceptions. It's going to do this internally + # anyway because it uses PySequence_Fast or similar. + chunks = list(self.iterencode(o)) + return ''.join(chunks) + + def iterencode(self, o): + """ + Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + """ + if self.check_circular: + markers = {} + else: + markers = None + return self._iterencode(o, markers) + +__all__ = ['JSONEncoder'] diff --git a/django/utils/simplejson/scanner.py b/django/utils/simplejson/scanner.py new file mode 100644 index 0000000000..c2e9b6eb89 --- /dev/null +++ b/django/utils/simplejson/scanner.py @@ -0,0 +1,63 @@ +""" +Iterator based sre token scanner +""" +import sre_parse, sre_compile, sre_constants +from sre_constants import BRANCH, SUBPATTERN +from sre import VERBOSE, MULTILINE, DOTALL +import re + +__all__ = ['Scanner', 'pattern'] + +FLAGS = (VERBOSE | MULTILINE | DOTALL) +class Scanner(object): + def __init__(self, lexicon, flags=FLAGS): + self.actions = [None] + # combine phrases into a compound pattern + s = sre_parse.Pattern() + s.flags = flags + p = [] + for idx, token in enumerate(lexicon): + phrase = token.pattern + try: + subpattern = sre_parse.SubPattern(s, + [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))]) + except sre_constants.error: + raise + p.append(subpattern) + self.actions.append(token) + + p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) + self.scanner = sre_compile.compile(p) + + + def iterscan(self, string, idx=0, context=None): + """ + Yield match, end_idx for each match + """ + match = self.scanner.scanner(string, idx).match + actions = self.actions + lastend = idx + end = len(string) + while True: + m = match() + if m is None: + break + matchbegin, matchend = m.span() + if lastend == matchend: + break + action = actions[m.lastindex] + if action is not None: + rval, next_pos = action(m, context) + if next_pos is not None and next_pos != matchend: + # "fast forward" the scanner + matchend = next_pos + match = self.scanner.scanner(string, matchend).match + yield rval, matchend + lastend = matchend + +def pattern(pattern, flags=FLAGS): + def decorator(fn): + fn.pattern = pattern + fn.regex = re.compile(pattern, flags) + return fn + return decorator