nightingale1/vendor/github.com/hydrogen18/stalecucumber/protocol_0.go

724 lines
18 KiB
Go

package stalecucumber
import "strconv"
import "fmt"
import "math/big"
import "errors"
//import "unicode/utf8"
import "unicode/utf16"
/**
Opcode: INT
Push an integer or bool.
The argument is a newline-terminated decimal literal string.
The intent may have been that this always fit in a short Python int,
but INT can be generated in pickles written on a 64-bit box that
require a Python long on a 32-bit box. The difference between this
and LONG then is that INT skips a trailing 'L', and produces a short
int whenever possible.
Another difference is due to that, when bool was introduced as a
distinct type in 2.3, builtin names True and False were also added to
2.2.2, mapping to ints 1 and 0. For compatibility in both directions,
True gets pickled as INT + "I01\n", and False as INT + "I00\n".
Leading zeroes are never produced for a genuine integer. The 2.3
(and later) unpicklers special-case these and return bool instead;
earlier unpicklers ignore the leading "0" and return the int.
**
Stack before: []
Stack after: [int_or_bool]
**/
func (pm *PickleMachine) opcode_INT() error {
str, err := pm.readString()
if err != nil {
return err
}
//check for boolean sentinels
if len(str) == 2 {
switch str {
case "01":
pm.push(true)
return nil
case "00":
pm.push(false)
return nil
default:
}
}
n, err := strconv.ParseInt(str, 10, 64)
if err != nil {
return err
}
pm.push(n)
return nil
}
/**
Opcode: LONG
Push a long integer.
The same as INT, except that the literal ends with 'L', and always
unpickles to a Python long. There doesn't seem a real purpose to the
trailing 'L'.
Note that LONG takes time quadratic in the number of digits when
unpickling (this is simply due to the nature of decimal->binary
conversion). Proto 2 added linear-time (in C; still quadratic-time
in Python) LONG1 and LONG4 opcodes.
**
Stack before: []
Stack after: [long]
**/
func (pm *PickleMachine) opcode_LONG() error {
i := new(big.Int)
str, err := pm.readString()
if err != nil {
return err
}
if len(str) == 0 {
return fmt.Errorf("String for LONG opcode cannot be zero length")
}
last := str[len(str)-1]
if last != 'L' {
return fmt.Errorf("String for LONG opcode must end with %q not %q", "L", last)
}
v := str[:len(str)-1]
_, err = fmt.Sscan(v, i)
if err != nil {
return err
}
pm.push(i)
return nil
}
/**
Opcode: STRING
Push a Python string object.
The argument is a repr-style string, with bracketing quote characters,
and perhaps embedded escapes. The argument extends until the next
newline character.
**
Stack before: []
Stack after: [str]
**/
var unquoteInputs = []byte{0x27, 0x22, 0x0}
func (pm *PickleMachine) opcode_STRING() error {
str, err := pm.readString()
if err != nil {
return err
}
//For whatever reason, the string is quoted. So the first and last character
//should always be the single quote
if len(str) < 2 {
return fmt.Errorf("For STRING opcode, argument has invalid length %d", len(str))
}
if str[0] != '\'' || str[len(str)-1] != '\'' {
return fmt.Errorf("For STRING opcode, argument has poorly formed value %q", str)
}
v := str[1 : len(str)-1]
f := make([]rune, 0, len(v))
for len(v) != 0 {
var vr rune
var replacement string
for _, i := range unquoteInputs {
vr, _, replacement, err = strconv.UnquoteChar(v, i)
if err == nil {
break
}
}
if err != nil {
c := v[0]
return fmt.Errorf("Read thus far %q. Failed to unquote character %c error:%v", string(f), c, err)
}
v = replacement
f = append(f, vr)
}
pm.push(string(f))
return nil
}
/**
Opcode: NONE
Push None on the stack.**
Stack before: []
Stack after: [None]
**/
func (pm *PickleMachine) opcode_NONE() error {
pm.push(PickleNone{})
return nil
}
/**
Opcode: UNICODE
Push a Python Unicode string object.
The argument is a raw-unicode-escape encoding of a Unicode string,
and so may contain embedded escape sequences. The argument extends
until the next newline character.
**
Stack before: []
Stack after: [unicode]
**/
func (pm *PickleMachine) opcode_UNICODE() error {
str, err := pm.readBytes()
if err != nil {
return err
}
f := make([]rune, 0, len(str))
var total int
var consumed int
total = len(str)
for total != consumed {
h := str[consumed]
//Python 'raw-unicode-escape' doesnt
//escape extended ascii
if h > 127 {
ea := utf16.Decode([]uint16{uint16(h)})
f = append(f, ea...)
consumed += 1
continue
}
//Multibyte unicode points are escaped
//so use "UnquoteChar" to handle those
var vr rune
for _, i := range unquoteInputs {
pre := string(str[consumed:])
var post string
vr, _, post, err = strconv.UnquoteChar(pre, i)
if err == nil {
consumed += len(pre) - len(post)
break
}
}
if err != nil {
c := str[0]
return fmt.Errorf("Read thus far %q. Failed to unquote character %c error:%v", string(f), c, err)
}
f = append(f, vr)
}
pm.push(string(f))
return nil
}
/**
Opcode: FLOAT
Newline-terminated decimal float literal.
The argument is repr(a_float), and in general requires 17 significant
digits for roundtrip conversion to be an identity (this is so for
IEEE-754 double precision values, which is what Python float maps to
on most boxes).
In general, FLOAT cannot be used to transport infinities, NaNs, or
minus zero across boxes (or even on a single box, if the platform C
library can't read the strings it produces for such things -- Windows
is like that), but may do less damage than BINFLOAT on boxes with
greater precision or dynamic range than IEEE-754 double.
**
Stack before: []
Stack after: [float]
**/
func (pm *PickleMachine) opcode_FLOAT() error {
str, err := pm.readString()
if err != nil {
return err
}
var v float64
_, err = fmt.Sscanf(str, "%f", &v)
if err != nil {
return err
}
pm.push(v)
return nil
}
/**
Opcode: APPEND
Append an object to a list.
Stack before: ... pylist anyobject
Stack after: ... pylist+[anyobject]
although pylist is really extended in-place.
**
Stack before: [list, any]
Stack after: [list]
**/
func (pm *PickleMachine) opcode_APPEND() error {
v, err := pm.pop()
if err != nil {
return err
}
listI, err := pm.pop()
if err != nil {
return err
}
list, ok := listI.([]interface{})
if !ok {
fmt.Errorf("Second item on top of stack must be of %T not %T", list, listI)
}
list = append(list, v)
pm.push(list)
return nil
}
/**
Opcode: LIST
Build a list out of the topmost stack slice, after markobject.
All the stack entries following the topmost markobject are placed into
a single Python list, which single list object replaces all of the
stack from the topmost markobject onward. For example,
Stack before: ... markobject 1 2 3 'abc'
Stack after: ... [1, 2, 3, 'abc']
**
Stack before: [mark, stackslice]
Stack after: [list]
**/
func (pm *PickleMachine) opcode_LIST() error {
markIndex, err := pm.findMark()
if err != nil {
return err
}
v := make([]interface{}, 0)
for i := markIndex + 1; i != len(pm.Stack); i++ {
v = append(v, pm.Stack[i])
}
//Pop the values off the stack
pm.popAfterIndex(markIndex)
pm.push(v)
return nil
}
/**
Opcode: TUPLE
Build a tuple out of the topmost stack slice, after markobject.
All the stack entries following the topmost markobject are placed into
a single Python tuple, which single tuple object replaces all of the
stack from the topmost markobject onward. For example,
Stack before: ... markobject 1 2 3 'abc'
Stack after: ... (1, 2, 3, 'abc')
**
Stack before: [mark, stackslice]
Stack after: [tuple]
**/
func (pm *PickleMachine) opcode_TUPLE() error {
return pm.opcode_LIST()
}
/**
Opcode: DICT
Build a dict out of the topmost stack slice, after markobject.
All the stack entries following the topmost markobject are placed into
a single Python dict, which single dict object replaces all of the
stack from the topmost markobject onward. The stack slice alternates
key, value, key, value, .... For example,
Stack before: ... markobject 1 2 3 'abc'
Stack after: ... {1: 2, 3: 'abc'}
**
Stack before: [mark, stackslice]
Stack after: [dict]
**/
func (pm *PickleMachine) opcode_DICT() (err error) {
defer func() {
if r := recover(); r != nil {
switch x := r.(type) {
case string:
err = errors.New(x)
case error:
err = x
default:
err = errors.New("Unknown panic")
}
}
}()
markIndex, err := pm.findMark()
if err != nil {
return err
}
v := make(map[interface{}]interface{})
var key interface{}
for i := markIndex + 1; i != len(pm.Stack); i++ {
if key == nil {
key = pm.Stack[i]
} else {
v[key] = pm.Stack[i]
key = nil
}
}
if key != nil {
return fmt.Errorf("For opcode DICT stack after mark contained an odd number of items, this is not valid")
}
pm.popAfterIndex(markIndex)
pm.push(v)
return nil
}
/**
Opcode: SETITEM
Add a key+value pair to an existing dict.
Stack before: ... pydict key value
Stack after: ... pydict
where pydict has been modified via pydict[key] = value.
**
Stack before: [dict, any, any]
Stack after: [dict]
**/
func (pm *PickleMachine) opcode_SETITEM() (err error) {
defer func() {
if r := recover(); r != nil {
switch x := r.(type) {
case string:
err = errors.New(x)
case error:
err = x
default:
err = errors.New("Unknown panic")
}
}
}()
v, err := pm.pop()
if err != nil {
return err
}
k, err := pm.pop()
if err != nil {
return err
}
dictI, err := pm.pop()
if err != nil {
return err
}
dict, ok := dictI.(map[interface{}]interface{})
if !ok {
return fmt.Errorf("For opcode SETITEM stack item 2 from top must be of type %T not %T", dict, dictI)
}
dict[k] = v
pm.push(dict)
return nil
}
/**
Opcode: POP
Discard the top stack item, shrinking the stack by one item.**
Stack before: [any]
Stack after: []
**/
func (pm *PickleMachine) opcode_POP() error {
_, err := pm.pop()
return err
}
/**
Opcode: DUP
Push the top stack item onto the stack again, duplicating it.**
Stack before: [any]
Stack after: [any, any]
**/
func (pm *PickleMachine) opcode_DUP() error {
return ErrOpcodeNotImplemented
}
/**
Opcode: MARK
Push markobject onto the stack.
markobject is a unique object, used by other opcodes to identify a
region of the stack containing a variable number of objects for them
to work on. See markobject.doc for more detail.
**
Stack before: []
Stack after: [mark]
**/
func (pm *PickleMachine) opcode_MARK() error {
pm.lastMark = len(pm.Stack)
pm.push(PickleMark{})
return nil
}
/**
Opcode: GET
Read an object from the memo and push it on the stack.
The index of the memo object to push is given by the newline-terminated
decimal string following. BINGET and LONG_BINGET are space-optimized
versions.
**
Stack before: []
Stack after: [any]
**/
func (pm *PickleMachine) opcode_GET() error {
str, err := pm.readString()
if err != nil {
return err
}
index, err := strconv.Atoi(str)
if err != nil {
return err
}
v, err := pm.readFromMemo(int64(index))
if err != nil {
return err
}
//TODO test if the object we are about to push is mutable
//if so it needs to be somehow deep copied first
pm.push(v)
return nil
}
/**
Opcode: PUT
Store the stack top into the memo. The stack is not popped.
The index of the memo location to write into is given by the newline-
terminated decimal string following. BINPUT and LONG_BINPUT are
space-optimized versions.
**
Stack before: []
Stack after: []
**/
func (pm *PickleMachine) opcode_PUT() error {
if len(pm.Stack) < 1 {
return ErrStackTooSmall
}
str, err := pm.readString()
if err != nil {
return err
}
idx, err := strconv.Atoi(str)
if err != nil {
return err
}
pm.storeMemo(int64(idx), pm.Stack[len(pm.Stack)-1])
return nil
}
/**
Opcode: GLOBAL
Push a global object (module.attr) on the stack.
Two newline-terminated strings follow the GLOBAL opcode. The first is
taken as a module name, and the second as a class name. The class
object module.class is pushed on the stack. More accurately, the
object returned by self.find_class(module, class) is pushed on the
stack, so unpickling subclasses can override this form of lookup.
**
Stack before: []
Stack after: [any]
**/
func (pm *PickleMachine) opcode_GLOBAL() error {
//TODO push an object that represents the result of this operation
return ErrOpcodeNotImplemented
}
/**
Opcode: REDUCE
Push an object built from a callable and an argument tuple.
The opcode is named to remind of the __reduce__() method.
Stack before: ... callable pytuple
Stack after: ... callable(*pytuple)
The callable and the argument tuple are the first two items returned
by a __reduce__ method. Applying the callable to the argtuple is
supposed to reproduce the original object, or at least get it started.
If the __reduce__ method returns a 3-tuple, the last component is an
argument to be passed to the object's __setstate__, and then the REDUCE
opcode is followed by code to create setstate's argument, and then a
BUILD opcode to apply __setstate__ to that argument.
If type(callable) is not ClassType, REDUCE complains unless the
callable has been registered with the copy_reg module's
safe_constructors dict, or the callable has a magic
'__safe_for_unpickling__' attribute with a true value. I'm not sure
why it does this, but I've sure seen this complaint often enough when
I didn't want to <wink>.
**
Stack before: [any, any]
Stack after: [any]
**/
func (pm *PickleMachine) opcode_REDUCE() error {
//TODO push an object that represents the result result of this operation
return ErrOpcodeNotImplemented
}
/**
Opcode: BUILD
Finish building an object, via __setstate__ or dict update.
Stack before: ... anyobject argument
Stack after: ... anyobject
where anyobject may have been mutated, as follows:
If the object has a __setstate__ method,
anyobject.__setstate__(argument)
is called.
Else the argument must be a dict, the object must have a __dict__, and
the object is updated via
anyobject.__dict__.update(argument)
This may raise RuntimeError in restricted execution mode (which
disallows access to __dict__ directly); in that case, the object
is updated instead via
for k, v in argument.items():
anyobject[k] = v
**
Stack before: [any, any]
Stack after: [any]
**/
func (pm *PickleMachine) opcode_BUILD() error {
return ErrOpcodeNotImplemented
}
/**
Opcode: INST
Build a class instance.
This is the protocol 0 version of protocol 1's OBJ opcode.
INST is followed by two newline-terminated strings, giving a
module and class name, just as for the GLOBAL opcode (and see
GLOBAL for more details about that). self.find_class(module, name)
is used to get a class object.
In addition, all the objects on the stack following the topmost
markobject are gathered into a tuple and popped (along with the
topmost markobject), just as for the TUPLE opcode.
Now it gets complicated. If all of these are true:
+ The argtuple is empty (markobject was at the top of the stack
at the start).
+ It's an old-style class object (the type of the class object is
ClassType).
+ The class object does not have a __getinitargs__ attribute.
then we want to create an old-style class instance without invoking
its __init__() method (pickle has waffled on this over the years; not
calling __init__() is current wisdom). In this case, an instance of
an old-style dummy class is created, and then we try to rebind its
__class__ attribute to the desired class object. If this succeeds,
the new instance object is pushed on the stack, and we're done. In
restricted execution mode it can fail (assignment to __class__ is
disallowed), and I'm not really sure what happens then -- it looks
like the code ends up calling the class object's __init__ anyway,
via falling into the next case.
Else (the argtuple is not empty, it's not an old-style class object,
or the class object does have a __getinitargs__ attribute), the code
first insists that the class object have a __safe_for_unpickling__
attribute. Unlike as for the __safe_for_unpickling__ check in REDUCE,
it doesn't matter whether this attribute has a true or false value, it
only matters whether it exists (XXX this is a bug; cPickle
requires the attribute to be true). If __safe_for_unpickling__
doesn't exist, UnpicklingError is raised.
Else (the class object does have a __safe_for_unpickling__ attr),
the class object obtained from INST's arguments is applied to the
argtuple obtained from the stack, and the resulting instance object
is pushed on the stack.
NOTE: checks for __safe_for_unpickling__ went away in Python 2.3.
**
Stack before: [mark, stackslice]
Stack after: [any]
**/
func (pm *PickleMachine) opcode_INST() error {
return ErrOpcodeNotImplemented
}
/**
Opcode: STOP
Stop the unpickling machine.
Every pickle ends with this opcode. The object at the top of the stack
is popped, and that's the result of unpickling. The stack should be
empty then.
**
Stack before: [any]
Stack after: []
**/
func (pm *PickleMachine) opcode_STOP() error {
return ErrOpcodeStopped
}
/**
Opcode: PERSID
Push an object identified by a persistent ID.
The pickle module doesn't define what a persistent ID means. PERSID's
argument is a newline-terminated str-style (no embedded escapes, no
bracketing quote characters) string, which *is* "the persistent ID".
The unpickler passes this string to self.persistent_load(). Whatever
object that returns is pushed on the stack. There is no implementation
of persistent_load() in Python's unpickler: it must be supplied by an
unpickler subclass.
**
Stack before: []
Stack after: [any]
**/
func (pm *PickleMachine) opcode_PERSID() error {
return ErrOpcodeNotImplemented
}