msgpack/python/msgpack/_msgpack.pyx

342 lines
9.6 KiB
Cython
Raw Normal View History

2009-05-22 14:31:20 +09:00
# coding: utf-8
cdef extern from "Python.h":
ctypedef char* const_char_ptr "const char*"
ctypedef struct PyObject
2010-09-02 01:29:57 +09:00
cdef object PyBytes_FromStringAndSize(const_char_ptr b, Py_ssize_t len)
cdef PyObject* Py_True
cdef PyObject* Py_False
cdef long long PyLong_AsLongLong(object o)
cdef unsigned long long PyLong_AsUnsignedLongLong(object o)
2010-09-02 01:29:57 +09:00
cdef bint PyBool_Check(object o)
cdef bint PyDict_Check(object o)
2010-09-02 01:29:57 +09:00
cdef bint PySequence_Check(object o)
cdef bint PyLong_Check(object o)
cdef bint PyInt_Check(object o)
cdef bint PyFloat_Check(object o)
cdef bint PyBytes_Check(object o)
cdef bint PyUnicode_Check(object o)
2009-05-22 14:31:20 +09:00
cdef extern from "stdlib.h":
2009-06-26 14:10:20 +09:00
void* malloc(size_t)
void* realloc(void*, size_t)
2009-05-22 14:31:20 +09:00
void free(void*)
2009-05-22 14:31:20 +09:00
cdef extern from "string.h":
2009-06-26 14:10:20 +09:00
void* memcpy(char* dst, char* src, size_t size)
void* memmove(char* dst, char* src, size_t size)
2009-05-22 14:31:20 +09:00
cdef extern from "pack.h":
2009-05-22 14:31:20 +09:00
struct msgpack_packer:
2009-07-01 00:57:46 +09:00
char* buf
size_t length
size_t buf_size
2009-05-22 14:31:20 +09:00
int msgpack_pack_int(msgpack_packer* pk, int d)
int msgpack_pack_nil(msgpack_packer* pk)
int msgpack_pack_true(msgpack_packer* pk)
int msgpack_pack_false(msgpack_packer* pk)
int msgpack_pack_long(msgpack_packer* pk, long d)
int msgpack_pack_long_long(msgpack_packer* pk, long long d)
int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d)
int msgpack_pack_double(msgpack_packer* pk, double d)
int msgpack_pack_array(msgpack_packer* pk, size_t l)
int msgpack_pack_map(msgpack_packer* pk, size_t l)
int msgpack_pack_raw(msgpack_packer* pk, size_t l)
int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l)
2009-05-22 14:31:20 +09:00
2009-06-28 21:24:02 +09:00
cdef class Packer(object):
"""MessagePack Packer
usage:
2009-06-08 01:30:43 +09:00
packer = Packer()
astream.write(packer.pack(a))
astream.write(packer.pack(b))
2009-06-08 01:30:43 +09:00
"""
2009-05-22 14:31:20 +09:00
cdef msgpack_packer pk
def __cinit__(self):
2009-07-01 00:57:46 +09:00
cdef int buf_size = 1024*1024
self.pk.buf = <char*> malloc(buf_size);
self.pk.buf_size = buf_size
self.pk.length = 0
2009-05-22 14:31:20 +09:00
def __dealloc__(self):
2009-07-01 00:57:46 +09:00
free(self.pk.buf);
2009-05-22 14:31:20 +09:00
cdef int _pack(self, object o) except -1:
2009-06-28 21:24:02 +09:00
cdef long long llval
cdef unsigned long long ullval
2009-06-28 21:24:02 +09:00
cdef long longval
2009-05-22 14:31:20 +09:00
cdef double fval
cdef char* rawval
cdef int ret
cdef dict d
2009-05-22 14:31:20 +09:00
if o is None:
ret = msgpack_pack_nil(&self.pk)
2010-09-02 01:29:57 +09:00
#elif PyBool_Check(o):
elif isinstance(o, bool):
if o:
ret = msgpack_pack_true(&self.pk)
else:
ret = msgpack_pack_false(&self.pk)
2009-06-30 23:03:33 +09:00
elif PyLong_Check(o):
if o > 0:
ullval = PyLong_AsUnsignedLongLong(o)
ret = msgpack_pack_unsigned_long_long(&self.pk, ullval)
else:
llval = PyLong_AsLongLong(o)
ret = msgpack_pack_long_long(&self.pk, llval)
2009-06-30 23:03:33 +09:00
elif PyInt_Check(o):
2009-06-28 21:24:02 +09:00
longval = o
ret = msgpack_pack_long(&self.pk, longval)
2009-06-30 23:03:33 +09:00
elif PyFloat_Check(o):
2009-06-16 01:58:07 +09:00
fval = o
ret = msgpack_pack_double(&self.pk, fval)
2010-09-02 01:29:57 +09:00
elif PyBytes_Check(o):
2009-05-22 14:31:20 +09:00
rawval = o
ret = msgpack_pack_raw(&self.pk, len(o))
if ret == 0:
ret = msgpack_pack_raw_body(&self.pk, rawval, len(o))
2009-06-30 23:03:33 +09:00
elif PyUnicode_Check(o):
o = o.encode('utf-8')
rawval = o
ret = msgpack_pack_raw(&self.pk, len(o))
if ret == 0:
ret = msgpack_pack_raw_body(&self.pk, rawval, len(o))
elif PyDict_Check(o):
d = o
ret = msgpack_pack_map(&self.pk, len(d))
if ret == 0:
for k,v in d.items():
ret = self._pack(k)
if ret != 0: break
ret = self._pack(v)
if ret != 0: break
2009-06-28 21:24:02 +09:00
elif PySequence_Check(o):
ret = msgpack_pack_array(&self.pk, len(o))
if ret == 0:
for v in o:
ret = self._pack(v)
if ret != 0: break
2009-05-22 14:31:20 +09:00
else:
# TODO: Serialize with defalt() like simplejson.
2009-05-22 14:31:20 +09:00
raise TypeError, "can't serialize %r" % (o,)
return ret
def pack(self, object obj):
cdef int ret
ret = self._pack(obj)
if ret:
raise TypeError
2010-09-02 01:29:57 +09:00
buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
2009-07-01 00:57:46 +09:00
self.pk.length = 0
return buf
2009-07-01 00:57:46 +09:00
2009-06-26 14:10:20 +09:00
2009-06-08 01:30:43 +09:00
def pack(object o, object stream):
2009-07-12 20:02:21 +09:00
"""pack an object `o` and write it to stream)."""
packer = Packer()
stream.write(packer.pack(o))
2009-06-08 01:30:43 +09:00
2009-06-26 14:10:20 +09:00
def packb(object o):
"""pack o and return packed bytes."""
packer = Packer()
return packer.pack(o)
2009-06-26 14:10:20 +09:00
packs = packb
2009-06-08 01:30:43 +09:00
cdef extern from "unpack.h":
ctypedef struct msgpack_user:
2009-12-17 15:19:18 +09:00
int use_list
ctypedef struct template_context:
msgpack_user user
2009-06-26 14:10:20 +09:00
PyObject* obj
size_t count
unsigned int ct
PyObject* key
2009-06-08 01:30:43 +09:00
int template_execute(template_context* ctx, const_char_ptr data,
size_t len, size_t* off)
void template_init(template_context* ctx)
2009-06-08 12:46:02 +09:00
object template_data(template_context* ctx)
2009-07-12 09:29:11 +09:00
def unpackb(object packed_bytes):
2009-07-12 20:02:21 +09:00
"""Unpack packed_bytes to object. Returns an unpacked object."""
2009-06-08 01:30:43 +09:00
cdef const_char_ptr p = packed_bytes
cdef template_context ctx
cdef size_t off = 0
2009-06-26 14:10:20 +09:00
cdef int ret
2009-06-08 01:30:43 +09:00
template_init(&ctx)
2009-12-17 15:19:18 +09:00
ctx.user.use_list = 0
2009-06-26 14:10:20 +09:00
ret = template_execute(&ctx, p, len(packed_bytes), &off)
if ret == 1:
return template_data(&ctx)
else:
return None
2009-07-12 09:29:11 +09:00
unpacks = unpackb
2009-06-08 01:30:43 +09:00
def unpack(object stream):
2009-07-12 20:02:21 +09:00
"""unpack an object from stream."""
2009-06-08 01:30:43 +09:00
packed = stream.read()
2009-07-12 09:29:11 +09:00
return unpackb(packed)
2009-06-08 01:30:43 +09:00
2009-06-26 14:10:20 +09:00
cdef class UnpackIterator(object):
cdef object unpacker
def __init__(self, unpacker):
self.unpacker = unpacker
def __next__(self):
return self.unpacker.unpack()
def __iter__(self):
return self
cdef class Unpacker(object):
2009-07-12 20:02:21 +09:00
"""Unpacker(file_like=None, read_size=1024*1024)
2009-06-26 14:10:20 +09:00
Streaming unpacker.
file_like must have read(n) method.
read_size is used like file_like.read(read_size)
2009-07-12 20:02:21 +09:00
If file_like is None, you can ``feed()`` bytes. ``feed()`` is
useful for unpacking from non-blocking stream.
2009-06-26 14:10:20 +09:00
exsample 1:
unpacker = Unpacker(afile)
for o in unpacker:
do_something(o)
example 2:
unpacker = Unpacker()
while 1:
buf = astream.read()
unpacker.feed(buf)
for o in unpacker:
do_something(o)
"""
cdef template_context ctx
cdef char* buf
cdef size_t buf_size, buf_head, buf_tail
cdef object file_like
cdef int read_size
cdef object waiting_bytes
2009-12-17 15:19:18 +09:00
cdef int use_list
2009-06-26 14:10:20 +09:00
def __cinit__(self):
self.buf = NULL
def __dealloc__(self):
if self.buf:
free(self.buf);
2009-12-17 15:19:18 +09:00
def __init__(self, file_like=None, int read_size=0, use_list=0):
2009-12-16 22:05:31 +09:00
if read_size == 0:
read_size = 1024*1024
2009-12-17 15:19:18 +09:00
self.use_list = use_list
2009-06-26 14:10:20 +09:00
self.file_like = file_like
self.read_size = read_size
self.waiting_bytes = []
self.buf = <char*>malloc(read_size)
self.buf_size = read_size
self.buf_head = 0
self.buf_tail = 0
template_init(&self.ctx)
2009-12-17 15:19:18 +09:00
self.ctx.user.use_list = use_list
2009-06-26 14:10:20 +09:00
def feed(self, bytes next_bytes):
2009-06-26 14:10:20 +09:00
self.waiting_bytes.append(next_bytes)
cdef append_buffer(self):
cdef char* buf = self.buf
cdef Py_ssize_t tail = self.buf_tail
cdef Py_ssize_t l
2010-09-02 01:29:57 +09:00
cdef bytes b
2009-06-26 14:10:20 +09:00
for b in self.waiting_bytes:
l = len(b)
2010-09-02 01:29:57 +09:00
memcpy(buf + tail, <char*>(b), l)
2009-06-26 14:10:20 +09:00
tail += l
self.buf_tail = tail
del self.waiting_bytes[:]
# prepare self.buf
cdef fill_buffer(self):
cdef Py_ssize_t add_size
if self.file_like is not None:
next_bytes = self.file_like.read(self.read_size)
if next_bytes:
self.waiting_bytes.append(next_bytes)
else:
self.file_like = None
2009-06-26 14:10:20 +09:00
if not self.waiting_bytes:
return
add_size = 0
for b in self.waiting_bytes:
add_size += len(b)
cdef char* buf = self.buf
cdef size_t head = self.buf_head
cdef size_t tail = self.buf_tail
cdef size_t size = self.buf_size
if self.buf_tail + add_size <= self.buf_size:
# do nothing.
pass
if self.buf_tail - self.buf_head + add_size < self.buf_size:
# move to front.
memmove(buf, buf + head, tail - head)
tail -= head
head = 0
else:
# expand buffer
size = tail + add_size
buf = <char*>realloc(<void*>buf, size)
self.buf = buf
self.buf_head = head
self.buf_tail = tail
self.buf_size = size
self.append_buffer()
cpdef unpack(self):
"""unpack one object"""
cdef int ret
self.fill_buffer()
ret = template_execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head)
if ret == 1:
2009-12-16 22:14:13 +09:00
o = template_data(&self.ctx)
2009-12-16 22:05:31 +09:00
template_init(&self.ctx)
2009-12-16 22:14:13 +09:00
return o
2009-06-26 14:10:20 +09:00
elif ret == 0:
if self.file_like is not None:
return self.unpack()
2009-06-26 14:10:20 +09:00
raise StopIteration, "No more unpack data."
else:
raise ValueError, "Unpack failed."
def __iter__(self):
return UnpackIterator(self)
2009-12-16 22:05:31 +09:00
# for debug.
#def _buf(self):
# return PyString_FromStringAndSize(self.buf, self.buf_tail)
#def _off(self):
# return self.buf_head