Merge branch 'master' of git@github.com:msgpack/msgpack

This commit is contained in:
Muga Nishizawa
2010-10-07 13:01:33 +09:00
12 changed files with 130 additions and 11 deletions

View File

@@ -38,7 +38,8 @@ test -f ChangeLog || touch ChangeLog
test -f NEWS || touch NEWS
test -f README || cp -f README.md README
if test ! ./preprocess; then
./preprocess
if [ $? -ne 0 ]; then
exit 1
fi

View File

@@ -1,3 +1,12 @@
0.30
- fix utf8 mode not to be reseted by $unpacker->reset method
0.29
- add $unpacker->utf8 mode, decoding strings as UTF-8.
0.28
- added more tests(gfx)

View File

@@ -102,6 +102,11 @@ TODO
"while(read($socket, $buffer, $arbitrary_buffer_size)) { ... }"). We
should implement the internal buffer for the unpacker.
UTF8 mode
Data::MessagePack::Unpacker supports utf8 mode, which decodes
strings as UTF8-8. << Data::MessagePack->unpack >> should support
utf8 mode in a future.
AUTHORS
Tokuhiro Matsuno

View File

@@ -3,7 +3,7 @@ use strict;
use warnings;
use 5.008001;
our $VERSION = '0.28';
our $VERSION = '0.29';
our $PreferInteger = 0;
sub true () {
@@ -165,6 +165,12 @@ will astonish those who try to unpack byte streams with an arbitrary buffer size
(e.g. C<< while(read($socket, $buffer, $arbitrary_buffer_size)) { ... } >>).
We should implement the internal buffer for the unpacker.
=item UTF8 mode
Data::MessagePack::Unpacker supports utf8 mode, which decodes strings
as UTF8-8. << Data::MessagePack->unpack >> should support utf8 mode in a
future.
=back
=head1 AUTHORS

View File

@@ -248,6 +248,7 @@ sub _pack {
# UNPACK
#
our $_utf8 = 0;
my $p; # position variables for speed.
sub unpack :method {
@@ -358,7 +359,9 @@ sub _unpack {
$num = $byte & ~0xa0;
$p += $num;
}
return substr( $value, $p - $num, $num );
my $s = substr( $value, $p - $num, $num );
utf8::decode($s) if $_utf8;
return $s;
}
elsif ( $byte == 0xc0 ) { # nil
@@ -396,9 +399,19 @@ package
Data::MessagePack::PP::Unpacker;
sub new {
bless { pos => 0 }, shift;
bless { pos => 0, utf8 => 0 }, shift;
}
sub utf8 {
my $self = shift;
$self->{utf8} = (@_ ? shift : 1);
return $self;
}
sub get_utf8 {
my($self) = @_;
return $self->{utf8};
}
sub execute_limit {
execute( @_ );
@@ -540,7 +553,9 @@ sub _count {
sub data {
return Data::MessagePack->unpack( substr($_[0]->{ data }, 0, $_[0]->{pos}) );
my($self) = @_;
local $Data::MessagePack::PP::_utf8 = $self->{utf8};
return Data::MessagePack->unpack( substr($self->{ data }, 0, $self->{pos}) );
}

View File

@@ -24,6 +24,19 @@ This is a streaming deserializer for messagepack.
creates a new instance of stream deserializer.
=item $up->utf8([$bool])
sets utf8 mode. true if I<$bool> is omitted.
returns I<$up> itself.
If utf8 mode is enabled, strings will be decoded as UTF-8.
The utf8 mode is disabled by default.
=item my $ret = $up->get_utf8()
returns the utf8 mode flag of I<$up>.
=item my $ret = $up->execute($data, $offset);
=item my $ret = $up->execute_limit($data, $offset, $limit)

33
perl/t/15_utf8.t Normal file
View File

@@ -0,0 +1,33 @@
#!perl -w
use strict;
use Test::More;
use Data::MessagePack;
use utf8;
my $data = [42, undef, 'foo', "\x{99f1}\x{99dd}"];
my $packed = Data::MessagePack->pack($data) x 2;
my $u = Data::MessagePack::Unpacker->new()->utf8();
my $p = 0;
for(1 .. 2) {
ok $u->get_utf8();
$p = $u->execute($packed, $p);
my $d = $u->data();
$u->reset();
is_deeply $d, $data, 'decoded';
}
is $u->utf8(0), $u, 'utf8(0)';
$p = 0;
for(1 .. 2) {
ok !$u->get_utf8();
$p = $u->execute($packed, $p);
my $d = $u->data();
$u->reset();
my $s = $data->[3];
utf8::encode($s);
is_deeply $d->[3], $s, 'not decoded';
}
done_testing;

View File

@@ -7,6 +7,8 @@
XS(xs_pack);
XS(xs_unpack);
XS(xs_unpacker_new);
XS(xs_unpacker_utf8);
XS(xs_unpacker_get_utf8);
XS(xs_unpacker_execute);
XS(xs_unpacker_execute_limit);
XS(xs_unpacker_is_finished);
@@ -28,6 +30,8 @@ XS(boot_Data__MessagePack) {
newXS("Data::MessagePack::unpack", xs_unpack, __FILE__);
newXS("Data::MessagePack::Unpacker::new", xs_unpacker_new, __FILE__);
newXS("Data::MessagePack::Unpacker::utf8", xs_unpacker_utf8, __FILE__);
newXS("Data::MessagePack::Unpacker::get_utf8", xs_unpacker_get_utf8, __FILE__);
newXS("Data::MessagePack::Unpacker::execute", xs_unpacker_execute, __FILE__);
newXS("Data::MessagePack::Unpacker::execute_limit", xs_unpacker_execute_limit, __FILE__);
newXS("Data::MessagePack::Unpacker::is_finished", xs_unpacker_is_finished, __FILE__);

View File

@@ -13,6 +13,7 @@ START_MY_CXT
typedef struct {
bool finished;
bool incremented;
bool utf8;
} unpack_user;
#include "msgpack/unpack_define.h"
@@ -237,6 +238,9 @@ STATIC_INLINE int template_callback_raw(unpack_user* u PERL_UNUSED_DECL, const c
dTHX;
/* newSVpvn(p, l) returns an undef if p == NULL */
*o = ((l==0) ? newSVpvs("") : newSVpvn(p, l));
if(u->utf8) {
sv_utf8_decode(*o);
}
return 0;
}
@@ -276,7 +280,7 @@ XS(xs_unpack) {
msgpack_unpack_t mp;
template_init(&mp);
unpack_user const u = {false, false};
unpack_user const u = {false, false, false};
mp.user = u;
size_t from = 0;
@@ -303,7 +307,7 @@ XS(xs_unpack) {
STATIC_INLINE void _reset(SV* const self) {
dTHX;
unpack_user const u = {false, false};
unpack_user const u = {false, false, false};
UNPACKER(self, mp);
template_init(mp);
@@ -328,6 +332,26 @@ XS(xs_unpacker_new) {
XSRETURN(1);
}
XS(xs_unpacker_utf8) {
dXSARGS;
if (!(items == 1 || items == 2)) {
Perl_croak(aTHX_ "Usage: $unpacker->utf8([$bool)");
}
UNPACKER(ST(0), mp);
mp->user.utf8 = (items == 1 || sv_true(ST(1))) ? true : false;
XSRETURN(1); // returns $self
}
XS(xs_unpacker_get_utf8) {
dXSARGS;
if (items != 1) {
Perl_croak(aTHX_ "Usage: $unpacker->get_utf8()");
}
UNPACKER(ST(0), mp);
ST(0) = boolSV(mp->user.utf8);
XSRETURN(1);
}
STATIC_INLINE size_t
_execute_impl(SV* const self, SV* const data, UV const offset, UV const limit) {
dTHX;
@@ -419,10 +443,12 @@ XS(xs_unpacker_reset) {
}
UNPACKER(ST(0), mp);
bool const utf8 = mp->user.utf8; // save
SV* const data = template_data(mp);
SvREFCNT_dec(data);
_reset(ST(0));
mp->user.utf8 = utf8;
XSRETURN(0);
}

View File

@@ -7,6 +7,7 @@ cdef extern from "Python.h":
cdef object PyBytes_FromStringAndSize(const_char_ptr b, Py_ssize_t len)
cdef PyObject* Py_True
cdef PyObject* Py_False
cdef object PyUnicode_AsUTF8String(object)
cdef long long PyLong_AsLongLong(object o)
cdef unsigned long long PyLong_AsUnsignedLongLong(object o)
@@ -105,7 +106,7 @@ cdef class Packer(object):
if ret == 0:
ret = msgpack_pack_raw_body(&self.pk, rawval, len(o))
elif PyUnicode_Check(o):
o = o.encode('utf-8')
o = PyUnicode_AsUTF8String(o)
rawval = o
ret = msgpack_pack_raw(&self.pk, len(o))
if ret == 0:
@@ -169,7 +170,7 @@ cdef extern from "unpack.h":
object template_data(template_context* ctx)
def unpackb(object packed_bytes):
def unpackb(bytes packed_bytes):
"""Unpack packed_bytes to object. Returns an unpacked object."""
cdef const_char_ptr p = packed_bytes
cdef template_context ctx
@@ -232,7 +233,7 @@ cdef class Unpacker(object):
cdef object file_like
cdef int read_size
cdef object waiting_bytes
cdef int use_list
cdef bint use_list
def __cinit__(self):
self.buf = NULL
@@ -241,7 +242,7 @@ cdef class Unpacker(object):
if self.buf:
free(self.buf);
def __init__(self, file_like=None, int read_size=0, use_list=0):
def __init__(self, file_like=None, int read_size=0, bint use_list=0):
if read_size == 0:
read_size = 1024*1024
self.use_list = use_list

View File

@@ -98,5 +98,8 @@ def test_match():
for v, p in cases:
match(v, p)
def test_unicode():
assert_equal('foobar', unpacks(packs(u'foobar')))
if __name__ == '__main__':
main()

View File

@@ -98,5 +98,8 @@ def test_match():
for v, p in cases:
match(v, p)
def test_unicode():
assert_equal(b'foobar', unpacks(packs('foobar')))
if __name__ == '__main__':
main()