mirror of
https://github.com/msgpack/msgpack-c.git
synced 2025-03-20 13:33:51 +01:00
perl: add $unpacker->utf8 mode, decoding strings as UTF-8.
This commit is contained in:
parent
f2d13cd647
commit
a4a04872a3
@ -248,6 +248,7 @@ sub _pack {
|
|||||||
# UNPACK
|
# UNPACK
|
||||||
#
|
#
|
||||||
|
|
||||||
|
our $_utf8 = 0;
|
||||||
my $p; # position variables for speed.
|
my $p; # position variables for speed.
|
||||||
|
|
||||||
sub unpack :method {
|
sub unpack :method {
|
||||||
@ -358,7 +359,9 @@ sub _unpack {
|
|||||||
$num = $byte & ~0xa0;
|
$num = $byte & ~0xa0;
|
||||||
$p += $num;
|
$p += $num;
|
||||||
}
|
}
|
||||||
return substr( $value, $p - $num, $num );
|
my $s = substr( $value, $p - $num, $num );
|
||||||
|
utf8::decode($s) if $_utf8;
|
||||||
|
return $s;
|
||||||
}
|
}
|
||||||
|
|
||||||
elsif ( $byte == 0xc0 ) { # nil
|
elsif ( $byte == 0xc0 ) { # nil
|
||||||
@ -396,9 +399,19 @@ package
|
|||||||
Data::MessagePack::PP::Unpacker;
|
Data::MessagePack::PP::Unpacker;
|
||||||
|
|
||||||
sub new {
|
sub new {
|
||||||
bless { pos => 0 }, shift;
|
bless { pos => 0, utf8 => 0 }, shift;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sub utf8 {
|
||||||
|
my $self = shift;
|
||||||
|
$self->{utf8} = (@_ ? shift : 1);
|
||||||
|
return $self;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub get_utf8 {
|
||||||
|
my($self) = @_;
|
||||||
|
return $self->{utf8};
|
||||||
|
}
|
||||||
|
|
||||||
sub execute_limit {
|
sub execute_limit {
|
||||||
execute( @_ );
|
execute( @_ );
|
||||||
@ -540,7 +553,9 @@ sub _count {
|
|||||||
|
|
||||||
|
|
||||||
sub data {
|
sub data {
|
||||||
return Data::MessagePack->unpack( substr($_[0]->{ data }, 0, $_[0]->{pos}) );
|
my($self) = @_;
|
||||||
|
local $Data::MessagePack::PP::_utf8 = $self->{utf8};
|
||||||
|
return Data::MessagePack->unpack( substr($self->{ data }, 0, $self->{pos}) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -24,6 +24,17 @@ This is a streaming deserializer for messagepack.
|
|||||||
|
|
||||||
creates a new instance of stream deserializer.
|
creates a new instance of stream deserializer.
|
||||||
|
|
||||||
|
=item $up->utf8([$bool])
|
||||||
|
|
||||||
|
sets utf8 mode. true if I<$bool> is omitted.
|
||||||
|
returns I<$up> itself.
|
||||||
|
|
||||||
|
If utf8 mode is enabled, strings will be decoded as UTF-8.
|
||||||
|
|
||||||
|
=item my $ret = $up->get_utf8()
|
||||||
|
|
||||||
|
returns the utf8 mode flag of I<$up>.
|
||||||
|
|
||||||
=item my $ret = $up->execute($data, $offset);
|
=item my $ret = $up->execute($data, $offset);
|
||||||
|
|
||||||
=item my $ret = $up->execute_limit($data, $offset, $limit)
|
=item my $ret = $up->execute_limit($data, $offset, $limit)
|
||||||
|
27
perl/t/15_utf8.t
Normal file
27
perl/t/15_utf8.t
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
#!perl -w
|
||||||
|
use strict;
|
||||||
|
use Test::More;
|
||||||
|
use Data::MessagePack;
|
||||||
|
use utf8;
|
||||||
|
|
||||||
|
my $data = [42, undef, 'foo', "\x{99f1}\x{99dd}"];
|
||||||
|
my $packed = Data::MessagePack->pack($data);
|
||||||
|
|
||||||
|
my $u = Data::MessagePack::Unpacker->new()->utf8();
|
||||||
|
ok $u->get_utf8();
|
||||||
|
$u->execute($packed);
|
||||||
|
my $d = $u->data();
|
||||||
|
$u->reset();
|
||||||
|
is_deeply $d, $data, 'decoded';
|
||||||
|
|
||||||
|
is $u->utf8(0), $u, 'utf8(0)';
|
||||||
|
ok !$u->get_utf8();
|
||||||
|
$u->execute($packed);
|
||||||
|
$d = $u->data();
|
||||||
|
$u->reset();
|
||||||
|
my $s = $data->[3];
|
||||||
|
utf8::encode($s);
|
||||||
|
is_deeply $d->[3], $s, 'not decoded';
|
||||||
|
|
||||||
|
done_testing;
|
||||||
|
|
@ -7,6 +7,8 @@
|
|||||||
XS(xs_pack);
|
XS(xs_pack);
|
||||||
XS(xs_unpack);
|
XS(xs_unpack);
|
||||||
XS(xs_unpacker_new);
|
XS(xs_unpacker_new);
|
||||||
|
XS(xs_unpacker_utf8);
|
||||||
|
XS(xs_unpacker_get_utf8);
|
||||||
XS(xs_unpacker_execute);
|
XS(xs_unpacker_execute);
|
||||||
XS(xs_unpacker_execute_limit);
|
XS(xs_unpacker_execute_limit);
|
||||||
XS(xs_unpacker_is_finished);
|
XS(xs_unpacker_is_finished);
|
||||||
@ -28,6 +30,8 @@ XS(boot_Data__MessagePack) {
|
|||||||
newXS("Data::MessagePack::unpack", xs_unpack, __FILE__);
|
newXS("Data::MessagePack::unpack", xs_unpack, __FILE__);
|
||||||
|
|
||||||
newXS("Data::MessagePack::Unpacker::new", xs_unpacker_new, __FILE__);
|
newXS("Data::MessagePack::Unpacker::new", xs_unpacker_new, __FILE__);
|
||||||
|
newXS("Data::MessagePack::Unpacker::utf8", xs_unpacker_utf8, __FILE__);
|
||||||
|
newXS("Data::MessagePack::Unpacker::get_utf8", xs_unpacker_get_utf8, __FILE__);
|
||||||
newXS("Data::MessagePack::Unpacker::execute", xs_unpacker_execute, __FILE__);
|
newXS("Data::MessagePack::Unpacker::execute", xs_unpacker_execute, __FILE__);
|
||||||
newXS("Data::MessagePack::Unpacker::execute_limit", xs_unpacker_execute_limit, __FILE__);
|
newXS("Data::MessagePack::Unpacker::execute_limit", xs_unpacker_execute_limit, __FILE__);
|
||||||
newXS("Data::MessagePack::Unpacker::is_finished", xs_unpacker_is_finished, __FILE__);
|
newXS("Data::MessagePack::Unpacker::is_finished", xs_unpacker_is_finished, __FILE__);
|
||||||
|
@ -13,6 +13,7 @@ START_MY_CXT
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
bool finished;
|
bool finished;
|
||||||
bool incremented;
|
bool incremented;
|
||||||
|
bool utf8;
|
||||||
} unpack_user;
|
} unpack_user;
|
||||||
|
|
||||||
#include "msgpack/unpack_define.h"
|
#include "msgpack/unpack_define.h"
|
||||||
@ -237,6 +238,9 @@ STATIC_INLINE int template_callback_raw(unpack_user* u PERL_UNUSED_DECL, const c
|
|||||||
dTHX;
|
dTHX;
|
||||||
/* newSVpvn(p, l) returns an undef if p == NULL */
|
/* newSVpvn(p, l) returns an undef if p == NULL */
|
||||||
*o = ((l==0) ? newSVpvs("") : newSVpvn(p, l));
|
*o = ((l==0) ? newSVpvs("") : newSVpvn(p, l));
|
||||||
|
if(u->utf8) {
|
||||||
|
sv_utf8_decode(*o);
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -276,7 +280,7 @@ XS(xs_unpack) {
|
|||||||
msgpack_unpack_t mp;
|
msgpack_unpack_t mp;
|
||||||
template_init(&mp);
|
template_init(&mp);
|
||||||
|
|
||||||
unpack_user const u = {false, false};
|
unpack_user const u = {false, false, false};
|
||||||
mp.user = u;
|
mp.user = u;
|
||||||
|
|
||||||
size_t from = 0;
|
size_t from = 0;
|
||||||
@ -303,7 +307,7 @@ XS(xs_unpack) {
|
|||||||
|
|
||||||
STATIC_INLINE void _reset(SV* const self) {
|
STATIC_INLINE void _reset(SV* const self) {
|
||||||
dTHX;
|
dTHX;
|
||||||
unpack_user const u = {false, false};
|
unpack_user const u = {false, false, false};
|
||||||
|
|
||||||
UNPACKER(self, mp);
|
UNPACKER(self, mp);
|
||||||
template_init(mp);
|
template_init(mp);
|
||||||
@ -328,6 +332,26 @@ XS(xs_unpacker_new) {
|
|||||||
XSRETURN(1);
|
XSRETURN(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
XS(xs_unpacker_utf8) {
|
||||||
|
dXSARGS;
|
||||||
|
if (!(items == 1 || items == 2)) {
|
||||||
|
Perl_croak(aTHX_ "Usage: $unpacker->utf8([$bool)");
|
||||||
|
}
|
||||||
|
UNPACKER(ST(0), mp);
|
||||||
|
mp->user.utf8 = (items == 1 || sv_true(ST(1))) ? true : false;
|
||||||
|
XSRETURN(1); // returns $self
|
||||||
|
}
|
||||||
|
|
||||||
|
XS(xs_unpacker_get_utf8) {
|
||||||
|
dXSARGS;
|
||||||
|
if (items != 1) {
|
||||||
|
Perl_croak(aTHX_ "Usage: $unpacker->get_utf8()");
|
||||||
|
}
|
||||||
|
UNPACKER(ST(0), mp);
|
||||||
|
ST(0) = boolSV(mp->user.utf8);
|
||||||
|
XSRETURN(1);
|
||||||
|
}
|
||||||
|
|
||||||
STATIC_INLINE size_t
|
STATIC_INLINE size_t
|
||||||
_execute_impl(SV* const self, SV* const data, UV const offset, UV const limit) {
|
_execute_impl(SV* const self, SV* const data, UV const offset, UV const limit) {
|
||||||
dTHX;
|
dTHX;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user