From a4a04872a3e7d331c722938a719711cb3178a5c7 Mon Sep 17 00:00:00 2001
From: "Fuji, Goro" <gfuji@cpan.org>
Date: Tue, 5 Oct 2010 17:10:10 +0900
Subject: [PATCH 1/2] perl: add $unpacker->utf8 mode, decoding strings as
 UTF-8.

---
 perl/lib/Data/MessagePack/PP.pm        | 21 ++++++++++++++++---
 perl/lib/Data/MessagePack/Unpacker.pod | 11 ++++++++++
 perl/t/15_utf8.t                       | 27 +++++++++++++++++++++++++
 perl/xs-src/MessagePack.c              |  4 ++++
 perl/xs-src/unpack.c                   | 28 ++++++++++++++++++++++++--
 5 files changed, 86 insertions(+), 5 deletions(-)
 create mode 100644 perl/t/15_utf8.t

diff --git a/perl/lib/Data/MessagePack/PP.pm b/perl/lib/Data/MessagePack/PP.pm
index 5dccc0bb..00e58b92 100644
--- a/perl/lib/Data/MessagePack/PP.pm
+++ b/perl/lib/Data/MessagePack/PP.pm
@@ -248,6 +248,7 @@ sub _pack {
 # UNPACK
 #
 
+our $_utf8 = 0;
 my $p; # position variables for speed.
 
 sub unpack :method {
@@ -358,7 +359,9 @@ sub _unpack {
             $num = $byte & ~0xa0;
             $p += $num;
         }
-        return substr( $value, $p - $num, $num );
+        my $s = substr( $value, $p - $num, $num );
+        utf8::decode($s) if $_utf8;
+        return $s;
     }
 
     elsif ( $byte == 0xc0 ) { # nil
@@ -396,9 +399,19 @@ package
     Data::MessagePack::PP::Unpacker;
 
 sub new {
-    bless { pos => 0 }, shift;
+    bless { pos => 0, utf8 => 0 }, shift;
 }
 
+sub utf8 {
+    my $self = shift;
+    $self->{utf8} = (@_ ? shift : 1);
+    return $self;
+}
+
+sub get_utf8 {
+    my($self) = @_;
+    return $self->{utf8};
+}
 
 sub execute_limit {
     execute( @_ );
@@ -540,7 +553,9 @@ sub _count {
 
 
 sub data {
-    return Data::MessagePack->unpack( substr($_[0]->{ data }, 0, $_[0]->{pos}) );
+    my($self) = @_;
+    local $Data::MessagePack::PP::_utf8 = $self->{utf8};
+    return Data::MessagePack->unpack( substr($self->{ data }, 0, $self->{pos}) );
 }
 
 
diff --git a/perl/lib/Data/MessagePack/Unpacker.pod b/perl/lib/Data/MessagePack/Unpacker.pod
index 2bc4549c..37ab3db8 100644
--- a/perl/lib/Data/MessagePack/Unpacker.pod
+++ b/perl/lib/Data/MessagePack/Unpacker.pod
@@ -24,6 +24,17 @@ This is a streaming deserializer for messagepack.
 
 creates a new instance of stream deserializer.
 
+=item $up->utf8([$bool])
+
+sets utf8 mode. true if I<$bool> is omitted.
+returns I<$up> itself.
+
+If utf8 mode is enabled, strings will be decoded as UTF-8.
+
+=item my $ret = $up->get_utf8()
+
+returns the utf8 mode flag of I<$up>.
+
 =item my $ret = $up->execute($data, $offset);
 
 =item my $ret = $up->execute_limit($data, $offset, $limit)
diff --git a/perl/t/15_utf8.t b/perl/t/15_utf8.t
new file mode 100644
index 00000000..d7d17b88
--- /dev/null
+++ b/perl/t/15_utf8.t
@@ -0,0 +1,27 @@
+#!perl -w
+use strict;
+use Test::More;
+use Data::MessagePack;
+use utf8;
+
+my $data = [42, undef, 'foo', "\x{99f1}\x{99dd}"];
+my $packed = Data::MessagePack->pack($data);
+
+my $u = Data::MessagePack::Unpacker->new()->utf8();
+ok $u->get_utf8();
+$u->execute($packed);
+my $d = $u->data();
+$u->reset();
+is_deeply $d, $data, 'decoded';
+
+is $u->utf8(0), $u, 'utf8(0)';
+ok !$u->get_utf8();
+$u->execute($packed);
+$d = $u->data();
+$u->reset();
+my $s = $data->[3];
+utf8::encode($s);
+is_deeply $d->[3], $s, 'not decoded';
+
+done_testing;
+
diff --git a/perl/xs-src/MessagePack.c b/perl/xs-src/MessagePack.c
index 69337f41..0c3c0b16 100644
--- a/perl/xs-src/MessagePack.c
+++ b/perl/xs-src/MessagePack.c
@@ -7,6 +7,8 @@
 XS(xs_pack);
 XS(xs_unpack);
 XS(xs_unpacker_new);
+XS(xs_unpacker_utf8);
+XS(xs_unpacker_get_utf8);
 XS(xs_unpacker_execute);
 XS(xs_unpacker_execute_limit);
 XS(xs_unpacker_is_finished);
@@ -28,6 +30,8 @@ XS(boot_Data__MessagePack) {
     newXS("Data::MessagePack::unpack", xs_unpack, __FILE__);
 
     newXS("Data::MessagePack::Unpacker::new",           xs_unpacker_new, __FILE__);
+    newXS("Data::MessagePack::Unpacker::utf8",          xs_unpacker_utf8, __FILE__);
+    newXS("Data::MessagePack::Unpacker::get_utf8",      xs_unpacker_get_utf8, __FILE__);
     newXS("Data::MessagePack::Unpacker::execute",       xs_unpacker_execute, __FILE__);
     newXS("Data::MessagePack::Unpacker::execute_limit", xs_unpacker_execute_limit, __FILE__);
     newXS("Data::MessagePack::Unpacker::is_finished",   xs_unpacker_is_finished, __FILE__);
diff --git a/perl/xs-src/unpack.c b/perl/xs-src/unpack.c
index 065573ab..f39d8c11 100644
--- a/perl/xs-src/unpack.c
+++ b/perl/xs-src/unpack.c
@@ -13,6 +13,7 @@ START_MY_CXT
 typedef struct {
     bool finished;
     bool incremented;
+    bool utf8;
 } unpack_user;
 
 #include "msgpack/unpack_define.h"
@@ -237,6 +238,9 @@ STATIC_INLINE int template_callback_raw(unpack_user* u PERL_UNUSED_DECL, const c
     dTHX;
     /*  newSVpvn(p, l) returns an undef if p == NULL */
     *o = ((l==0) ? newSVpvs("") : newSVpvn(p, l));
+    if(u->utf8) {
+        sv_utf8_decode(*o);
+    }
     return 0;
 }
 
@@ -276,7 +280,7 @@ XS(xs_unpack) {
     msgpack_unpack_t mp;
     template_init(&mp);
 
-    unpack_user const u = {false, false};
+    unpack_user const u = {false, false, false};
     mp.user = u;
 
     size_t from = 0;
@@ -303,7 +307,7 @@ XS(xs_unpack) {
 
 STATIC_INLINE void _reset(SV* const self) {
     dTHX;
-	unpack_user const u = {false, false};
+	unpack_user const u = {false, false, false};
 
 	UNPACKER(self, mp);
 	template_init(mp);
@@ -328,6 +332,26 @@ XS(xs_unpacker_new) {
     XSRETURN(1);
 }
 
+XS(xs_unpacker_utf8) {
+    dXSARGS;
+    if (!(items == 1 || items == 2)) {
+        Perl_croak(aTHX_ "Usage: $unpacker->utf8([$bool)");
+    }
+    UNPACKER(ST(0), mp);
+    mp->user.utf8 = (items == 1 || sv_true(ST(1))) ? true : false;
+    XSRETURN(1); // returns $self
+}
+
+XS(xs_unpacker_get_utf8) {
+    dXSARGS;
+    if (items != 1) {
+        Perl_croak(aTHX_ "Usage: $unpacker->get_utf8()");
+    }
+    UNPACKER(ST(0), mp);
+    ST(0) = boolSV(mp->user.utf8);
+    XSRETURN(1);
+}
+
 STATIC_INLINE size_t
 _execute_impl(SV* const self, SV* const data, UV const offset, UV const limit) {
     dTHX;

From 7c92f8a90b54a7cf2279211fc4601e11872bee38 Mon Sep 17 00:00:00 2001
From: "Fuji, Goro" <gfuji@cpan.org>
Date: Tue, 5 Oct 2010 17:47:27 +0900
Subject: [PATCH 2/2] perl: improve docs

---
 perl/lib/Data/MessagePack.pm           | 6 ++++++
 perl/lib/Data/MessagePack/Unpacker.pod | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/perl/lib/Data/MessagePack.pm b/perl/lib/Data/MessagePack.pm
index 7d1bda78..3d2d6954 100644
--- a/perl/lib/Data/MessagePack.pm
+++ b/perl/lib/Data/MessagePack.pm
@@ -165,6 +165,12 @@ will astonish those who try to unpack byte streams with an arbitrary buffer size
 (e.g. C<< while(read($socket, $buffer, $arbitrary_buffer_size)) { ... } >>).
 We should implement the internal buffer for the unpacker.
 
+=item UTF8 mode
+
+Data::MessagePack::Unpacker supports utf8 mode, which decodes strings
+as UTF8-8. << Data::MessagePack->unpack >> should support utf8 mode in a
+future.
+
 =back
 
 =head1 AUTHORS
diff --git a/perl/lib/Data/MessagePack/Unpacker.pod b/perl/lib/Data/MessagePack/Unpacker.pod
index 37ab3db8..24dafd00 100644
--- a/perl/lib/Data/MessagePack/Unpacker.pod
+++ b/perl/lib/Data/MessagePack/Unpacker.pod
@@ -31,6 +31,8 @@ returns I<$up> itself.
 
 If utf8 mode is enabled, strings will be decoded as UTF-8.
 
+The utf8 mode is disabled by default.
+
 =item my $ret = $up->get_utf8()
 
 returns the utf8 mode flag of I<$up>.