diff --git a/perl/lib/Data/MessagePack/PP.pm b/perl/lib/Data/MessagePack/PP.pm index 5dccc0bb..00e58b92 100644 --- a/perl/lib/Data/MessagePack/PP.pm +++ b/perl/lib/Data/MessagePack/PP.pm @@ -248,6 +248,7 @@ sub _pack { # UNPACK # +our $_utf8 = 0; my $p; # position variables for speed. sub unpack :method { @@ -358,7 +359,9 @@ sub _unpack { $num = $byte & ~0xa0; $p += $num; } - return substr( $value, $p - $num, $num ); + my $s = substr( $value, $p - $num, $num ); + utf8::decode($s) if $_utf8; + return $s; } elsif ( $byte == 0xc0 ) { # nil @@ -396,9 +399,19 @@ package Data::MessagePack::PP::Unpacker; sub new { - bless { pos => 0 }, shift; + bless { pos => 0, utf8 => 0 }, shift; } +sub utf8 { + my $self = shift; + $self->{utf8} = (@_ ? shift : 1); + return $self; +} + +sub get_utf8 { + my($self) = @_; + return $self->{utf8}; +} sub execute_limit { execute( @_ ); @@ -540,7 +553,9 @@ sub _count { sub data { - return Data::MessagePack->unpack( substr($_[0]->{ data }, 0, $_[0]->{pos}) ); + my($self) = @_; + local $Data::MessagePack::PP::_utf8 = $self->{utf8}; + return Data::MessagePack->unpack( substr($self->{ data }, 0, $self->{pos}) ); } diff --git a/perl/lib/Data/MessagePack/Unpacker.pod b/perl/lib/Data/MessagePack/Unpacker.pod index 2bc4549c..37ab3db8 100644 --- a/perl/lib/Data/MessagePack/Unpacker.pod +++ b/perl/lib/Data/MessagePack/Unpacker.pod @@ -24,6 +24,17 @@ This is a streaming deserializer for messagepack. creates a new instance of stream deserializer. +=item $up->utf8([$bool]) + +sets utf8 mode. true if I<$bool> is omitted. +returns I<$up> itself. + +If utf8 mode is enabled, strings will be decoded as UTF-8. + +=item my $ret = $up->get_utf8() + +returns the utf8 mode flag of I<$up>. + =item my $ret = $up->execute($data, $offset); =item my $ret = $up->execute_limit($data, $offset, $limit) diff --git a/perl/t/15_utf8.t b/perl/t/15_utf8.t new file mode 100644 index 00000000..d7d17b88 --- /dev/null +++ b/perl/t/15_utf8.t @@ -0,0 +1,27 @@ +#!perl -w +use strict; +use Test::More; +use Data::MessagePack; +use utf8; + +my $data = [42, undef, 'foo', "\x{99f1}\x{99dd}"]; +my $packed = Data::MessagePack->pack($data); + +my $u = Data::MessagePack::Unpacker->new()->utf8(); +ok $u->get_utf8(); +$u->execute($packed); +my $d = $u->data(); +$u->reset(); +is_deeply $d, $data, 'decoded'; + +is $u->utf8(0), $u, 'utf8(0)'; +ok !$u->get_utf8(); +$u->execute($packed); +$d = $u->data(); +$u->reset(); +my $s = $data->[3]; +utf8::encode($s); +is_deeply $d->[3], $s, 'not decoded'; + +done_testing; + diff --git a/perl/xs-src/MessagePack.c b/perl/xs-src/MessagePack.c index 69337f41..0c3c0b16 100644 --- a/perl/xs-src/MessagePack.c +++ b/perl/xs-src/MessagePack.c @@ -7,6 +7,8 @@ XS(xs_pack); XS(xs_unpack); XS(xs_unpacker_new); +XS(xs_unpacker_utf8); +XS(xs_unpacker_get_utf8); XS(xs_unpacker_execute); XS(xs_unpacker_execute_limit); XS(xs_unpacker_is_finished); @@ -28,6 +30,8 @@ XS(boot_Data__MessagePack) { newXS("Data::MessagePack::unpack", xs_unpack, __FILE__); newXS("Data::MessagePack::Unpacker::new", xs_unpacker_new, __FILE__); + newXS("Data::MessagePack::Unpacker::utf8", xs_unpacker_utf8, __FILE__); + newXS("Data::MessagePack::Unpacker::get_utf8", xs_unpacker_get_utf8, __FILE__); newXS("Data::MessagePack::Unpacker::execute", xs_unpacker_execute, __FILE__); newXS("Data::MessagePack::Unpacker::execute_limit", xs_unpacker_execute_limit, __FILE__); newXS("Data::MessagePack::Unpacker::is_finished", xs_unpacker_is_finished, __FILE__); diff --git a/perl/xs-src/unpack.c b/perl/xs-src/unpack.c index 065573ab..f39d8c11 100644 --- a/perl/xs-src/unpack.c +++ b/perl/xs-src/unpack.c @@ -13,6 +13,7 @@ START_MY_CXT typedef struct { bool finished; bool incremented; + bool utf8; } unpack_user; #include "msgpack/unpack_define.h" @@ -237,6 +238,9 @@ STATIC_INLINE int template_callback_raw(unpack_user* u PERL_UNUSED_DECL, const c dTHX; /* newSVpvn(p, l) returns an undef if p == NULL */ *o = ((l==0) ? newSVpvs("") : newSVpvn(p, l)); + if(u->utf8) { + sv_utf8_decode(*o); + } return 0; } @@ -276,7 +280,7 @@ XS(xs_unpack) { msgpack_unpack_t mp; template_init(&mp); - unpack_user const u = {false, false}; + unpack_user const u = {false, false, false}; mp.user = u; size_t from = 0; @@ -303,7 +307,7 @@ XS(xs_unpack) { STATIC_INLINE void _reset(SV* const self) { dTHX; - unpack_user const u = {false, false}; + unpack_user const u = {false, false, false}; UNPACKER(self, mp); template_init(mp); @@ -328,6 +332,26 @@ XS(xs_unpacker_new) { XSRETURN(1); } +XS(xs_unpacker_utf8) { + dXSARGS; + if (!(items == 1 || items == 2)) { + Perl_croak(aTHX_ "Usage: $unpacker->utf8([$bool)"); + } + UNPACKER(ST(0), mp); + mp->user.utf8 = (items == 1 || sv_true(ST(1))) ? true : false; + XSRETURN(1); // returns $self +} + +XS(xs_unpacker_get_utf8) { + dXSARGS; + if (items != 1) { + Perl_croak(aTHX_ "Usage: $unpacker->get_utf8()"); + } + UNPACKER(ST(0), mp); + ST(0) = boolSV(mp->user.utf8); + XSRETURN(1); +} + STATIC_INLINE size_t _execute_impl(SV* const self, SV* const data, UV const offset, UV const limit) { dTHX;