Merge branch 'master' of git@github.com:msgpack/msgpack

This commit is contained in:
Muga Nishizawa 2011-06-12 02:48:42 +09:00
commit d70e64a434
7 changed files with 193 additions and 34 deletions

View File

@ -1,3 +1,13 @@
0.1.10
======
:release date: NOT RELEASED YET
New feature
-----------
* Add ``encoding`` and ``unicode_erros`` option to packer and unpacker.
When this option is specified, (un)packs unicode object instead of bytes.
This enables using msgpack as a replacement of json.
0.1.9 0.1.9
====== ======
:release date: 2011-01-29 :release date: 2011-01-29

View File

@ -36,7 +36,7 @@ cdef int DEFAULT_RECURSE_LIMIT=511
cdef class Packer(object): cdef class Packer(object):
"""MessagePack Packer """MessagePack Packer
usage: usage:
packer = Packer() packer = Packer()
@ -45,6 +45,10 @@ cdef class Packer(object):
""" """
cdef msgpack_packer pk cdef msgpack_packer pk
cdef object _default cdef object _default
cdef object _bencoding
cdef object _berrors
cdef char *encoding
cdef char *unicode_errors
def __cinit__(self): def __cinit__(self):
cdef int buf_size = 1024*1024 cdef int buf_size = 1024*1024
@ -54,11 +58,25 @@ cdef class Packer(object):
self.pk.buf_size = buf_size self.pk.buf_size = buf_size
self.pk.length = 0 self.pk.length = 0
def __init__(self, default=None): def __init__(self, default=None, encoding='utf-8', unicode_errors='strict'):
if default is not None: if default is not None:
if not PyCallable_Check(default): if not PyCallable_Check(default):
raise TypeError("default must be a callable.") raise TypeError("default must be a callable.")
self._default = default self._default = default
if encoding is None:
self.encoding = NULL
self.unicode_errors = NULL
else:
if isinstance(encoding, unicode):
self._bencoding = encoding.encode('ascii')
else:
self._bencoding = encoding
self.encoding = PyBytes_AsString(self._bencoding)
if isinstance(unicode_errors, unicode):
self._berrors = unicode_errors.encode('ascii')
else:
self._berrors = unicode_errors
self.unicode_errors = PyBytes_AsString(self._berrors)
def __dealloc__(self): def __dealloc__(self):
free(self.pk.buf); free(self.pk.buf);
@ -68,7 +86,7 @@ cdef class Packer(object):
cdef unsigned long long ullval cdef unsigned long long ullval
cdef long longval cdef long longval
cdef double fval cdef double fval
cdef char* rawval cdef char* rawval
cdef int ret cdef int ret
cdef dict d cdef dict d
@ -101,7 +119,9 @@ cdef class Packer(object):
if ret == 0: if ret == 0:
ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) ret = msgpack_pack_raw_body(&self.pk, rawval, len(o))
elif PyUnicode_Check(o): elif PyUnicode_Check(o):
o = PyUnicode_AsUTF8String(o) if not self.encoding:
raise TypeError("Can't encode utf-8 no encoding is specified")
o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
rawval = o rawval = o
ret = msgpack_pack_raw(&self.pk, len(o)) ret = msgpack_pack_raw(&self.pk, len(o))
if ret == 0: if ret == 0:
@ -138,14 +158,14 @@ cdef class Packer(object):
return buf return buf
def pack(object o, object stream, default=None): def pack(object o, object stream, default=None, encoding='utf-8', unicode_errors='strict'):
"""pack an object `o` and write it to stream).""" """pack an object `o` and write it to stream)."""
packer = Packer(default=default) packer = Packer(default=default, encoding=encoding, unicode_errors=unicode_errors)
stream.write(packer.pack(o)) stream.write(packer.pack(o))
def packb(object o, default=None): def packb(object o, default=None, encoding='utf-8', unicode_errors='strict'):
"""pack o and return packed bytes.""" """pack o and return packed bytes."""
packer = Packer(default=default) packer = Packer(default=default, encoding=encoding, unicode_errors=unicode_errors)
return packer.pack(o) return packer.pack(o)
dumps = packs = packb dumps = packs = packb
@ -155,6 +175,8 @@ cdef extern from "unpack.h":
int use_list int use_list
PyObject* object_hook PyObject* object_hook
PyObject* list_hook PyObject* list_hook
char *encoding
char *unicode_errors
ctypedef struct template_context: ctypedef struct template_context:
msgpack_user user msgpack_user user
@ -164,12 +186,12 @@ cdef extern from "unpack.h":
PyObject* key PyObject* key
int template_execute(template_context* ctx, const_char_ptr data, int template_execute(template_context* ctx, const_char_ptr data,
size_t len, size_t* off) size_t len, size_t* off) except -1
void template_init(template_context* ctx) void template_init(template_context* ctx)
object template_data(template_context* ctx) object template_data(template_context* ctx)
def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=0): def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=0, encoding=None, unicode_errors="strict"):
"""Unpack packed_bytes to object. Returns an unpacked object.""" """Unpack packed_bytes to object. Returns an unpacked object."""
cdef template_context ctx cdef template_context ctx
cdef size_t off = 0 cdef size_t off = 0
@ -179,9 +201,25 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, bint
cdef Py_ssize_t buf_len cdef Py_ssize_t buf_len
PyObject_AsReadBuffer(packed, <const_void_ptr*>&buf, &buf_len) PyObject_AsReadBuffer(packed, <const_void_ptr*>&buf, &buf_len)
if encoding is None:
enc = NULL
else:
if isinstance(encoding, unicode):
bencoding = encoding.encode('ascii')
else:
bencoding = encoding
if isinstance(unicode_errors, unicode):
berrors = unicode_errors.encode('ascii')
else:
berrors = unicode_errors
enc = PyBytes_AsString(bencoding)
err = PyBytes_AsString(berrors)
template_init(&ctx) template_init(&ctx)
ctx.user.use_list = use_list ctx.user.use_list = use_list
ctx.user.object_hook = ctx.user.list_hook = NULL ctx.user.object_hook = ctx.user.list_hook = NULL
ctx.user.encoding = enc
ctx.user.unicode_errors = err
if object_hook is not None: if object_hook is not None:
if not PyCallable_Check(object_hook): if not PyCallable_Check(object_hook):
raise TypeError("object_hook must be a callable.") raise TypeError("object_hook must be a callable.")
@ -191,8 +229,10 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, bint
raise TypeError("list_hook must be a callable.") raise TypeError("list_hook must be a callable.")
ctx.user.list_hook = <PyObject*>list_hook ctx.user.list_hook = <PyObject*>list_hook
_gc_disable() _gc_disable()
ret = template_execute(&ctx, buf, buf_len, &off) try:
_gc_enable() ret = template_execute(&ctx, buf, buf_len, &off)
finally:
_gc_enable()
if ret == 1: if ret == 1:
return template_data(&ctx) return template_data(&ctx)
else: else:
@ -200,10 +240,10 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, bint
loads = unpacks = unpackb loads = unpacks = unpackb
def unpack(object stream, object object_hook=None, object list_hook=None, bint use_list=0): def unpack(object stream, object object_hook=None, object list_hook=None, bint use_list=0, encoding=None, unicode_errors="strict"):
"""unpack an object from stream.""" """unpack an object from stream."""
return unpackb(stream.read(), use_list=use_list, return unpackb(stream.read(), use_list=use_list,
object_hook=object_hook, list_hook=list_hook) object_hook=object_hook, list_hook=list_hook, encoding=encoding, unicode_errors=unicode_errors)
cdef class Unpacker(object): cdef class Unpacker(object):
"""Unpacker(read_size=1024*1024) """Unpacker(read_size=1024*1024)
@ -236,7 +276,7 @@ cdef class Unpacker(object):
self.buf = NULL; self.buf = NULL;
def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=0, def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=0,
object object_hook=None, object list_hook=None): object object_hook=None, object list_hook=None, encoding=None, unicode_errors=None):
if read_size == 0: if read_size == 0:
read_size = 1024*1024 read_size = 1024*1024
self.use_list = use_list self.use_list = use_list
@ -292,7 +332,7 @@ cdef class Unpacker(object):
new_size = tail + _buf_len new_size = tail + _buf_len
if new_size < buf_size*2: if new_size < buf_size*2:
new_size = buf_size*2 new_size = buf_size*2
buf = <char*>realloc(buf, new_size) buf = <char*>realloc(buf, new_size)
if buf == NULL: if buf == NULL:
# self.buf still holds old buffer and will be freed during # self.buf still holds old buffer and will be freed during
# obj destruction # obj destruction

View File

@ -23,6 +23,8 @@ typedef struct unpack_user {
int use_list; int use_list;
PyObject *object_hook; PyObject *object_hook;
PyObject *list_hook; PyObject *list_hook;
const char *encoding;
const char *unicode_errors;
} unpack_user; } unpack_user;
@ -197,7 +199,11 @@ static inline int template_callback_map_end(unpack_user* u, msgpack_unpack_objec
static inline int template_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int l, msgpack_unpack_object* o) static inline int template_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int l, msgpack_unpack_object* o)
{ {
PyObject *py; PyObject *py;
py = PyBytes_FromStringAndSize(p, l); if(u->encoding) {
py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors);
} else {
py = PyBytes_FromStringAndSize(p, l);
}
if (!py) if (!py)
return -1; return -1;
*o = py; *o = py;

View File

@ -3,6 +3,7 @@
from nose import main from nose import main
from nose.tools import * from nose.tools import *
from nose.plugins.skip import SkipTest
from msgpack import packs, unpacks from msgpack import packs, unpacks
@ -17,12 +18,65 @@ def testPack():
1.0, 1.0,
"", "a", "a"*31, "a"*32, "", "a", "a"*31, "a"*32,
None, True, False, None, True, False,
(), ((),), ((), None,), (), ((),), ((), None,),
{None: 0}, {None: 0},
(1<<23), (1<<23),
] ]
for td in test_data: for td in test_data:
check(td) check(td)
def testPackUnicode():
test_data = [
u"", u"abcd", (u"defgh",), u"Русский текст",
]
for td in test_data:
re = unpacks(packs(td, encoding='utf-8'), encoding='utf-8')
assert_equal(re, td)
def testPackUTF32():
try:
test_data = [
u"", u"abcd", (u"defgh",), u"Русский текст",
]
for td in test_data:
re = unpacks(packs(td, encoding='utf-32'), encoding='utf-32')
assert_equal(re, td)
except LookupError:
raise SkipTest
def testPackBytes():
test_data = [
"", "abcd", ("defgh",),
]
for td in test_data:
check(td)
def testIgnoreUnicodeErrors():
re = unpacks(packs('abc\xeddef'),
encoding='ascii', unicode_errors='ignore')
assert_equal(re, "abcdef")
@raises(UnicodeDecodeError)
def testStrictUnicodeUnpack():
unpacks(packs('abc\xeddef'), encoding='utf-8')
@raises(UnicodeEncodeError)
def testStrictUnicodePack():
packs(u"abc\xeddef", encoding='ascii', unicode_errors='strict')
def testIgnoreErrorsPack():
re = unpacks(
packs(u"abcФФФdef", encoding='ascii', unicode_errors='ignore'),
encoding='utf-8')
assert_equal(re, u"abcdef")
@raises(TypeError)
def testNoEncoding():
packs(u"abc", encoding=None)
def testDecodeBinary():
re = unpacks(packs(u"abc"), encoding=None)
assert_equal(re, "abc")
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -6,12 +6,12 @@ from msgpack import Unpacker
def test_foobar(): def test_foobar():
unpacker = Unpacker(read_size=3) unpacker = Unpacker(read_size=3)
unpacker.feed('foobar') unpacker.feed('foobar')
assert unpacker.unpack() == ord(b'f') assert unpacker.unpack() == ord('f')
assert unpacker.unpack() == ord(b'o') assert unpacker.unpack() == ord('o')
assert unpacker.unpack() == ord(b'o') assert unpacker.unpack() == ord('o')
assert unpacker.unpack() == ord(b'b') assert unpacker.unpack() == ord('b')
assert unpacker.unpack() == ord(b'a') assert unpacker.unpack() == ord('a')
assert unpacker.unpack() == ord(b'r') assert unpacker.unpack() == ord('r')
try: try:
o = unpacker.unpack() o = unpacker.unpack()
print "Oops!", o print "Oops!", o
@ -20,14 +20,14 @@ def test_foobar():
assert 1 assert 1
else: else:
assert 0 assert 0
unpacker.feed(b'foo') unpacker.feed('foo')
unpacker.feed(b'bar') unpacker.feed('bar')
k = 0 k = 0
for o, e in zip(unpacker, b'foobarbaz'): for o, e in zip(unpacker, 'foobarbaz'):
assert o == ord(e) assert o == ord(e)
k += 1 k += 1
assert k == len(b'foobar') assert k == len('foobar')
if __name__ == '__main__': if __name__ == '__main__':
test_foobar() test_foobar()

View File

@ -26,7 +26,7 @@ def test_decode_hook():
unpacked = unpacks(packed, object_hook=_decode_complex) unpacked = unpacks(packed, object_hook=_decode_complex)
eq_(unpacked[1], 1+2j) eq_(unpacked[1], 1+2j)
@raises(TypeError) @raises(ValueError)
def test_bad_hook(): def test_bad_hook():
packed = packs([3, 1+2j], default=lambda o: o) packed = packs([3, 1+2j], default=lambda o: o)
unpacked = unpacks(packed) unpacked = unpacks(packed)

View File

@ -17,12 +17,61 @@ def testPack():
1.0, 1.0,
b"", b"a", b"a"*31, b"a"*32, b"", b"a", b"a"*31, b"a"*32,
None, True, False, None, True, False,
(), ((),), ((), None,), (), ((),), ((), None,),
{None: 0}, {None: 0},
(1<<23), (1<<23),
] ]
for td in test_data: for td in test_data:
check(td) check(td)
def testPackUnicode():
test_data = [
"", "abcd", ("defgh",), "Русский текст",
]
for td in test_data:
re = unpacks(packs(td, encoding='utf-8'), encoding='utf-8')
assert_equal(re, td)
def testPackUTF32():
test_data = [
"", "abcd", ("defgh",), "Русский текст",
]
for td in test_data:
print(packs(td, encoding='utf-32'))
re = unpacks(packs(td, encoding='utf-32'), encoding='utf-32')
assert_equal(re, td)
def testPackBytes():
test_data = [
b"", b"abcd", (b"defgh",),
]
for td in test_data:
check(td)
def testIgnoreUnicodeErrors():
re = unpacks(packs(b'abc\xeddef'),
encoding='utf-8', unicode_errors='ignore')
assert_equal(re, "abcdef")
@raises(UnicodeDecodeError)
def testStrictUnicodeUnpack():
unpacks(packs(b'abc\xeddef'), encoding='utf-8')
@raises(UnicodeEncodeError)
def testStrictUnicodePack():
packs("abc\xeddef", encoding='ascii', unicode_errors='strict')
def testIgnoreErrorsPack():
re = unpacks(packs("abcФФФdef", encoding='ascii', unicode_errors='ignore'), encoding='utf-8')
assert_equal(re, "abcdef")
@raises(TypeError)
def testNoEncoding():
packs("abc", encoding=None)
def testDecodeBinary():
re = unpacks(packs("abc"), encoding=None)
assert_equal(re, b"abc")
if __name__ == '__main__': if __name__ == '__main__':
main() main()