diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..d740b181 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.o +*.so +ruby/Makefile diff --git a/perl/.gitignore b/perl/.gitignore index b64dcdfe..3e0e73e5 100644 --- a/perl/.gitignore +++ b/perl/.gitignore @@ -6,6 +6,7 @@ MessagePack.o blib/ inc/ msgpack/ +t/std/ pack.o pm_to_blib unpack.o diff --git a/perl/Changes b/perl/Changes index 41203763..ce525818 100644 --- a/perl/Changes +++ b/perl/Changes @@ -1,3 +1,8 @@ + +0.24 + - Fixed a possible SEGV on streaming unpacking (gfx) + - Improve performance, esp. in unpacking (gfx) + 0.23 (NO FEATURE CHANGES) diff --git a/perl/MANIFEST.SKIP b/perl/MANIFEST.SKIP index 372742ca..71a24e5c 100644 --- a/perl/MANIFEST.SKIP +++ b/perl/MANIFEST.SKIP @@ -25,4 +25,3 @@ ^Data-MessagePack-[0-9.]+/ ^\.testenv/test_pp.pl ^ppport.h$ -^xshelper.h$ diff --git a/perl/Makefile.PL b/perl/Makefile.PL index 7053cf96..fafc3876 100644 --- a/perl/Makefile.PL +++ b/perl/Makefile.PL @@ -1,4 +1,5 @@ use inc::Module::Install; +use Module::Install::XSUtil 0.32; use Config; name 'Data-MessagePack'; @@ -53,6 +54,11 @@ if ($Module::Install::AUTHOR && -d File::Spec->catfile('..', 'msgpack')) { for my $src (<../msgpack/*.h>) { File::Copy::copy($src, 'msgpack/') or die "copy failed: $!"; } + + mkdir 't/std'; + for my $data(<../test/*.{json,mpac}>) { + File::Copy::copy($data, 't/std') or die "copy failed: $!"; + } } requires 'Test::More' => 0.94; # done_testing diff --git a/perl/README b/perl/README index e46323da..31052789 100644 --- a/perl/README +++ b/perl/README @@ -2,7 +2,7 @@ NAME Data::MessagePack - MessagePack serialising/deserialising SYNOPSIS - my $packed = Data::MessagePack->pack($dat); + my $packed = Data::MessagePack->pack($dat); my $unpacked = Data::MessagePack->unpack($dat); DESCRIPTION @@ -14,10 +14,10 @@ ABOUT MESSAGEPACK FORMAT But unlike JSON, it is very fast and small. ADVANTAGES - PORTABILITY - Messagepack is language independent binary serialize format. + PORTABLE + The MessagePack format does not depend on language nor byte order. - SMALL SIZE + SMALL IN SIZE say length(JSON::XS::encode_json({a=>1, b=>2})); # => 13 say length(Storable::nfreeze({a=>1, b=>2})); # => 21 say length(Data::MessagePack->pack({a=>1, b=>2})); # => 7 @@ -26,7 +26,7 @@ ABOUT MESSAGEPACK FORMAT STREAMING DESERIALIZER MessagePack supports streaming deserializer. It is useful for - networking such as RPC. + networking such as RPC. See Data::MessagePack::Unpacker for details. If you want to get more information about the MessagePack format, please visit to <http://msgpack.org/>. @@ -47,36 +47,59 @@ METHODS Configuration Variables $Data::MessagePack::PreferInteger - Pack the string as int when the value looks like int(EXPERIMENTAL). + Packs a string as an integer, when it looks like an integer. SPEED - This is the result of benchmark/serialize.pl and - benchmark/deserialize.pl on my SC440(Linux 2.6.32-23-server #37-Ubuntu - SMP). + This is a result of benchmark/serialize.pl and benchmark/deserialize.pl + on my SC440(Linux 2.6.32-23-server #37-Ubuntu SMP). -- serialize JSON::XS: 2.3 - Data::MessagePack: 0.20 + Data::MessagePack: 0.24 Storable: 2.21 - Benchmark: timing 1000000 iterations of json, mp, storable... - json: 5 wallclock secs ( 3.95 usr + 0.00 sys = 3.95 CPU) @ 253164.56/s (n=1000000) - mp: 3 wallclock secs ( 2.69 usr + 0.00 sys = 2.69 CPU) @ 371747.21/s (n=1000000) - storable: 26 wallclock secs (27.21 usr + 0.00 sys = 27.21 CPU) @ 36751.19/s (n=1000000) + Benchmark: running json, mp, storable for at least 1 CPU seconds... + json: 1 wallclock secs ( 1.00 usr + 0.01 sys = 1.01 CPU) @ 141939.60/s (n=143359) + mp: 1 wallclock secs ( 1.06 usr + 0.00 sys = 1.06 CPU) @ 355500.94/s (n=376831) + storable: 1 wallclock secs ( 1.12 usr + 0.00 sys = 1.12 CPU) @ 38399.11/s (n=43007) + Rate storable json mp + storable 38399/s -- -73% -89% + json 141940/s 270% -- -60% + mp 355501/s 826% 150% -- -- deserialize JSON::XS: 2.3 - Data::MessagePack: 0.20 + Data::MessagePack: 0.24 Storable: 2.21 - Benchmark: timing 1000000 iterations of json, mp, storable... - json: 4 wallclock secs ( 4.45 usr + 0.00 sys = 4.45 CPU) @ 224719.10/s (n=1000000) - mp: 6 wallclock secs ( 5.45 usr + 0.00 sys = 5.45 CPU) @ 183486.24/s (n=1000000) - storable: 7 wallclock secs ( 7.77 usr + 0.00 sys = 7.77 CPU) @ 128700.13/s (n=1000000) + Benchmark: running json, mp, storable for at least 1 CPU seconds... + json: 0 wallclock secs ( 1.05 usr + 0.00 sys = 1.05 CPU) @ 179442.86/s (n=188415) + mp: 0 wallclock secs ( 1.01 usr + 0.00 sys = 1.01 CPU) @ 212909.90/s (n=215039) + storable: 2 wallclock secs ( 1.14 usr + 0.00 sys = 1.14 CPU) @ 114974.56/s (n=131071) + Rate storable json mp + storable 114975/s -- -36% -46% + json 179443/s 56% -- -16% + mp 212910/s 85% 19% -- + +TODO + Error handling + MessagePack cannot deal with complex scalars such as object + references, filehandles, and code references. We should report the + errors more kindly. + + Streaming deserializer + The current implementation of the streaming deserializer does not + have internal buffers while some other bindings (such as Ruby + binding) does. This limitation will astonish those who try to unpack + byte streams with an arbitrary buffer size (e.g. + "while(read($socket, $buffer, $arbitrary_buffer_size)) { ... }"). We + should implement the internal buffer for the unpacker. AUTHORS Tokuhiro Matsuno Makamaka Hannyaharamitu + gfx + THANKS TO Jun Kuriyama @@ -91,5 +114,6 @@ LICENSE under the same terms as Perl itself. SEE ALSO - <http://msgpack.org/> is official web site for MessagePack format. + <http://msgpack.org/> is the official web site for the MessagePack + format. diff --git a/perl/benchmark/deserialize.pl b/perl/benchmark/deserialize.pl index 750704e9..634a79ed 100644 --- a/perl/benchmark/deserialize.pl +++ b/perl/benchmark/deserialize.pl @@ -5,11 +5,13 @@ use JSON::XS; use Benchmark ':all'; use Storable; +#$Data::MessagePack::PreferInteger = 1; + my $a = { "method" => "handleMessage", "params" => [ "user1", "we were just talking" ], "id" => undef, - "array" => [ 1, 11, 234, -5, 1e5, 1e7, 1, 0 ] + "array" => [ 1, 1024, 70000, -5, 1e5, 1e7, 1, 0, 3.14, sqrt(2) ], }; my $j = JSON::XS::encode_json($a); my $m = Data::MessagePack->pack($a); @@ -19,8 +21,8 @@ print "-- deserialize\n"; print "JSON::XS: $JSON::XS::VERSION\n"; print "Data::MessagePack: $Data::MessagePack::VERSION\n"; print "Storable: $Storable::VERSION\n"; -timethese( - 1000000 => { +cmpthese timethese( + -1 => { json => sub { JSON::XS::decode_json($j) }, mp => sub { Data::MessagePack->unpack($m) }, storable => sub { Storable::thaw($s) }, diff --git a/perl/benchmark/serialize.pl b/perl/benchmark/serialize.pl index c5ab15bc..e0509ffa 100644 --- a/perl/benchmark/serialize.pl +++ b/perl/benchmark/serialize.pl @@ -9,15 +9,15 @@ my $a = { "method" => "handleMessage", "params" => [ "user1", "we were just talking" ], "id" => undef, - "array" => [ 1, 11, 234, -5, 1e5, 1e7, 1, 0 ] + "array" => [ 1, 1024, 70000, -5, 1e5, 1e7, 1, 0, 3.14, sqrt(2) ], }; print "-- serialize\n"; print "JSON::XS: $JSON::XS::VERSION\n"; print "Data::MessagePack: $Data::MessagePack::VERSION\n"; print "Storable: $Storable::VERSION\n"; -timethese( - 1000000 => { +cmpthese timethese( + -1 => { json => sub { JSON::XS::encode_json($a) }, storable => sub { Storable::freeze($a) }, mp => sub { Data::MessagePack->pack($a) }, diff --git a/perl/lib/Data/MessagePack.pm b/perl/lib/Data/MessagePack.pm index 3511628c..953bdf85 100644 --- a/perl/lib/Data/MessagePack.pm +++ b/perl/lib/Data/MessagePack.pm @@ -6,10 +6,21 @@ use 5.008001; our $VERSION = '0.23'; our $PreferInteger = 0; -our $true = do { bless \(my $dummy = 1), "Data::MessagePack::Boolean" }; -our $false = do { bless \(my $dummy = 0), "Data::MessagePack::Boolean" }; -sub true () { $true } -sub false () { $false } +sub true () { + require Data::MessagePack::Boolean; + no warnings 'once', 'redefine'; + my $t = $Data::MessagePack::Boolean::true; + *true = sub (){ $t }; + return $t; +} + +sub false () { + require Data::MessagePack::Boolean; + no warnings 'once', 'redefine'; + my $f = $Data::MessagePack::Boolean::false; + *false = sub (){ $f }; + return $f; +} if ( !__PACKAGE__->can('pack') ) { # this idea comes from Text::Xslate my $backend = $ENV{ PERL_DATA_MESSAGEPACK } || ''; @@ -34,7 +45,7 @@ Data::MessagePack - MessagePack serialising/deserialising =head1 SYNOPSIS - my $packed = Data::MessagePack->pack($dat); + my $packed = Data::MessagePack->pack($dat); my $unpacked = Data::MessagePack->unpack($dat); =head1 DESCRIPTION @@ -50,11 +61,11 @@ It enables to exchange structured objects between many languages like JSON. But =over 4 -=item PORTABILITY +=item PORTABLE -Messagepack is language independent binary serialize format. +The MessagePack format does not depend on language nor byte order. -=item SMALL SIZE +=item SMALL IN SIZE say length(JSON::XS::encode_json({a=>1, b=>2})); # => 13 say length(Storable::nfreeze({a=>1, b=>2})); # => 21 @@ -65,6 +76,7 @@ The MessagePack format saves memory than JSON and Storable format. =item STREAMING DESERIALIZER MessagePack supports streaming deserializer. It is useful for networking such as RPC. +See L<Data::MessagePack::Unpacker> for details. =back @@ -94,31 +106,59 @@ unpack the $msgpackstr to a MessagePack format string. =item $Data::MessagePack::PreferInteger -Pack the string as int when the value looks like int(EXPERIMENTAL). +Packs a string as an integer, when it looks like an integer. =back =head1 SPEED -This is the result of benchmark/serialize.pl and benchmark/deserialize.pl on my SC440(Linux 2.6.32-23-server #37-Ubuntu SMP). +This is a result of benchmark/serialize.pl and benchmark/deserialize.pl on my SC440(Linux 2.6.32-23-server #37-Ubuntu SMP). + -- serialize JSON::XS: 2.3 - Data::MessagePack: 0.20 + Data::MessagePack: 0.24 Storable: 2.21 - Benchmark: timing 1000000 iterations of json, mp, storable... - json: 5 wallclock secs ( 3.95 usr + 0.00 sys = 3.95 CPU) @ 253164.56/s (n=1000000) - mp: 3 wallclock secs ( 2.69 usr + 0.00 sys = 2.69 CPU) @ 371747.21/s (n=1000000) - storable: 26 wallclock secs (27.21 usr + 0.00 sys = 27.21 CPU) @ 36751.19/s (n=1000000) + Benchmark: running json, mp, storable for at least 1 CPU seconds... + json: 1 wallclock secs ( 1.00 usr + 0.01 sys = 1.01 CPU) @ 141939.60/s (n=143359) + mp: 1 wallclock secs ( 1.06 usr + 0.00 sys = 1.06 CPU) @ 355500.94/s (n=376831) + storable: 1 wallclock secs ( 1.12 usr + 0.00 sys = 1.12 CPU) @ 38399.11/s (n=43007) + Rate storable json mp + storable 38399/s -- -73% -89% + json 141940/s 270% -- -60% + mp 355501/s 826% 150% -- -- deserialize JSON::XS: 2.3 - Data::MessagePack: 0.20 + Data::MessagePack: 0.24 Storable: 2.21 - Benchmark: timing 1000000 iterations of json, mp, storable... - json: 4 wallclock secs ( 4.45 usr + 0.00 sys = 4.45 CPU) @ 224719.10/s (n=1000000) - mp: 6 wallclock secs ( 5.45 usr + 0.00 sys = 5.45 CPU) @ 183486.24/s (n=1000000) - storable: 7 wallclock secs ( 7.77 usr + 0.00 sys = 7.77 CPU) @ 128700.13/s (n=1000000) + Benchmark: running json, mp, storable for at least 1 CPU seconds... + json: 0 wallclock secs ( 1.05 usr + 0.00 sys = 1.05 CPU) @ 179442.86/s (n=188415) + mp: 0 wallclock secs ( 1.01 usr + 0.00 sys = 1.01 CPU) @ 212909.90/s (n=215039) + storable: 2 wallclock secs ( 1.14 usr + 0.00 sys = 1.14 CPU) @ 114974.56/s (n=131071) + Rate storable json mp + storable 114975/s -- -36% -46% + json 179443/s 56% -- -16% + mp 212910/s 85% 19% -- + +=head1 TODO + +=over + +=item Error handling + +MessagePack cannot deal with complex scalars such as object references, +filehandles, and code references. We should report the errors more kindly. + +=item Streaming deserializer + +The current implementation of the streaming deserializer does not have internal +buffers while some other bindings (such as Ruby binding) does. This limitation +will astonish those who try to unpack byte streams with an arbitrary buffer size +(e.g. C<< while(read($socket, $buffer, $arbitrary_buffer_size)) { ... } >>). +We should implement the internal buffer for the unpacker. + +=back =head1 AUTHORS @@ -126,6 +166,8 @@ Tokuhiro Matsuno Makamaka Hannyaharamitu +gfx + =head1 THANKS TO Jun Kuriyama @@ -141,8 +183,8 @@ hanekomu This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. - =head1 SEE ALSO -L<http://msgpack.org/> is official web site for MessagePack format. +L<http://msgpack.org/> is the official web site for the MessagePack format. +=cut diff --git a/perl/lib/Data/MessagePack/Boolean.pm b/perl/lib/Data/MessagePack/Boolean.pm new file mode 100644 index 00000000..2bb3ecad --- /dev/null +++ b/perl/lib/Data/MessagePack/Boolean.pm @@ -0,0 +1,14 @@ +package Data::MessagePack::Boolean; +use strict; +use overload + 'bool' => sub { ${ $_[0] } }, + '0+' => sub { ${ $_[0] } }, + '""' => sub { ${ $_[0] } ? 'true' : 'false' }, + + fallback => 1, +; + +our $true = do { bless \(my $dummy = 1) }; +our $false = do { bless \(my $dummy = 0) }; + +1; diff --git a/perl/lib/Data/MessagePack/PP.pm b/perl/lib/Data/MessagePack/PP.pm index 9e322991..0dd64272 100644 --- a/perl/lib/Data/MessagePack/PP.pm +++ b/perl/lib/Data/MessagePack/PP.pm @@ -1,11 +1,8 @@ package Data::MessagePack::PP; - -use 5.008000; +use 5.008001; use strict; use Carp (); -our $VERSION = '0.15'; - # See also # http://redmine.msgpack.org/projects/msgpack/wiki/FormatSpec # http://cpansearch.perl.org/src/YAPPO/Data-Model-0.00006/lib/Data/Model/Driver/Memcached.pm @@ -25,49 +22,74 @@ BEGIN { # require $Config{byteorder}; my $bo_is_le = ( $Config{byteorder} =~ /^1234/ ); # which better? my $bo_is_le = unpack ( 'd', "\x00\x00\x00\x00\x00\x00\xf0\x3f") == 1; # 1.0LE - # In really, since 5.9.2 '>' is introduced. but 'n!' and 'N!'? - *pack_uint64 = $bo_is_le ? sub { - my @v = unpack( 'V2', pack( 'Q', $_[0] ) ); - return pack 'CN2', 0xcf, @v[1,0]; - } : sub { pack 'CQ', 0xcf, $_[0]; }; - *pack_int64 = $bo_is_le ? sub { - my @v = unpack( 'V2', pack( 'q', $_[0] ) ); - return pack 'CN2', 0xd3, @v[1,0]; - } : sub { pack 'Cq', 0xd3, $_[0]; }; - *pack_double = $bo_is_le ? sub { - my @v = unpack( 'V2', pack( 'd', $_[0] ) ); - return pack 'CN2', 0xcb, @v[1,0]; - } : sub { pack 'Cd', 0xcb, $_[0]; }; - *unpack_float = $bo_is_le ? sub { - my @v = unpack( 'v2', substr( $_[0], $_[1], 4 ) ); - return unpack( 'f', pack( 'n2', @v[1,0] ) ); - } : sub { return unpack( 'f', substr( $_[0], $_[1], 4 ) ); }; - *unpack_double = $bo_is_le ? sub { - my @v = unpack( 'V2', substr( $_[0], $_[1], 8 ) ); - return unpack( 'd', pack( 'N2', @v[1,0] ) ); - } : sub { return unpack( 'd', substr( $_[0], $_[1], 8 ) ); }; - *unpack_int16 = sub { - my $v = unpack 'n', substr( $_[0], $_[1], 2 ); - return $v ? $v - 0x10000 : 0; - }; - *unpack_int32 = sub { - no warnings; # avoid for warning about Hexadecimal number - my $v = unpack 'N', substr( $_[0], $_[1], 4 ); - return $v ? $v - 0x100000000 : 0; - }; - *unpack_int64 = $bo_is_le ? sub { - my @v = unpack( 'V*', substr( $_[0], $_[1], 8 ) ); - return unpack( 'q', pack( 'N2', @v[1,0] ) ); - } : sub { pack 'q', substr( $_[0], $_[1], 8 ); }; - *unpack_uint64 = $bo_is_le ? sub { - my @v = unpack( 'V*', substr( $_[0], $_[1], 8 ) ); - return unpack( 'Q', pack( 'N2', @v[1,0] ) ); - } : sub { pack 'Q', substr( $_[0], $_[1], 8 ); }; + + # In reality, since 5.9.2 '>' is introduced. but 'n!' and 'N!'? + if($bo_is_le) { + *pack_uint64 = sub { + my @v = unpack( 'V2', pack( 'Q', $_[0] ) ); + return pack 'CN2', 0xcf, @v[1,0]; + }; + *pack_int64 = sub { + my @v = unpack( 'V2', pack( 'q', $_[0] ) ); + return pack 'CN2', 0xd3, @v[1,0]; + }; + *pack_double = sub { + my @v = unpack( 'V2', pack( 'd', $_[0] ) ); + return pack 'CN2', 0xcb, @v[1,0]; + }; + + *unpack_float = sub { + my @v = unpack( 'v2', substr( $_[0], $_[1], 4 ) ); + return unpack( 'f', pack( 'n2', @v[1,0] ) ); + }; + *unpack_double = sub { + my @v = unpack( 'V2', substr( $_[0], $_[1], 8 ) ); + return unpack( 'd', pack( 'N2', @v[1,0] ) ); + }; + + *unpack_int16 = sub { + my $v = unpack 'n', substr( $_[0], $_[1], 2 ); + return $v ? $v - 0x10000 : 0; + }; + *unpack_int32 = sub { + no warnings; # avoid for warning about Hexadecimal number + my $v = unpack 'N', substr( $_[0], $_[1], 4 ); + return $v ? $v - 0x100000000 : 0; + }; + *unpack_int64 = sub { + my @v = unpack( 'V*', substr( $_[0], $_[1], 8 ) ); + return unpack( 'q', pack( 'N2', @v[1,0] ) ); + }; + *unpack_uint64 = sub { + my @v = unpack( 'V*', substr( $_[0], $_[1], 8 ) ); + return unpack( 'Q', pack( 'N2', @v[1,0] ) ); + }; + } + else { # big endian + *pack_uint64 = sub { return pack 'CQ', 0xcf, $_[0]; }; + *pack_int64 = sub { return pack 'Cq', 0xd3, $_[0]; }; + *pack_double = sub { return pack 'Cd', 0xcb, $_[0]; }; + + *unpack_float = sub { return unpack( 'f', substr( $_[0], $_[1], 4 ) ); }; + *unpack_double = sub { return unpack( 'd', substr( $_[0], $_[1], 8 ) ); }; + *unpack_int16 = sub { + my $v = unpack 'n', substr( $_[0], $_[1], 2 ); + return $v ? $v - 0x10000 : 0; + }; + *unpack_int32 = sub { + no warnings; # avoid for warning about Hexadecimal number + my $v = unpack 'N', substr( $_[0], $_[1], 4 ); + return $v ? $v - 0x100000000 : 0; + }; + *unpack_int64 = sub { pack 'q', substr( $_[0], $_[1], 8 ); }; + *unpack_uint64 = sub { pack 'Q', substr( $_[0], $_[1], 8 ); }; + } } else { *pack_uint64 = sub { return pack 'CQ>', 0xcf, $_[0]; }; *pack_int64 = sub { return pack 'Cq>', 0xd3, $_[0]; }; *pack_double = sub { return pack 'Cd>', 0xcb, $_[0]; }; + *unpack_float = sub { return unpack( 'f>', substr( $_[0], $_[1], 4 ) ); }; *unpack_double = sub { return unpack( 'd>', substr( $_[0], $_[1], 8 ) ); }; *unpack_int16 = sub { return unpack( 'n!', substr( $_[0], $_[1], 2 ) ); }; @@ -75,11 +97,6 @@ BEGIN { *unpack_int64 = sub { return unpack( 'q>', substr( $_[0], $_[1], 8 ) ); }; *unpack_uint64 = sub { return unpack( 'Q>', substr( $_[0], $_[1], 8 ) ); }; } - # for 5.8 etc. - unless ( defined &utf8::is_utf8 ) { - require Encode; - *utf8::is_utf8 = *Encode::is_utf8; - } } @@ -92,7 +109,7 @@ BEGIN { my $max_depth; -sub pack { +sub pack :method { Carp::croak('Usage: Data::MessagePack->pack($dat [,$max_depth])') if @_ < 2; $max_depth = defined $_[2] ? $_[2] : 512; # init return _pack( $_[1] ); @@ -104,9 +121,7 @@ sub _pack { return CORE::pack( 'C', 0xc0 ) if ( not defined $value ); - my $b_obj = B::svref_2object( ref $value ? $value : \$value ); - - if ( $b_obj->isa('B::AV') ) { + if ( ref($value) eq 'ARRAY' ) { my $num = @$value; my $header = $num < 16 ? CORE::pack( 'C', 0x90 + $num ) @@ -120,7 +135,7 @@ sub _pack { return join( '', $header, map { _pack( $_ ) } @$value ); } - elsif ( $b_obj->isa('B::HV') ) { + elsif ( ref($value) eq 'HASH' ) { my $num = keys %$value; my $header = $num < 16 ? CORE::pack( 'C', 0x80 + $num ) @@ -134,10 +149,12 @@ sub _pack { return join( '', $header, map { _pack( $_ ) } %$value ); } - elsif ( blessed( $value ) and blessed( $value ) eq 'Data::MessagePack::Boolean' ) { - return CORE::pack( 'C', $$value ? 0xc3 : 0xc2 ); + elsif ( ref( $value ) eq 'Data::MessagePack::Boolean' ) { + return CORE::pack( 'C', ${$value} ? 0xc3 : 0xc2 ); } + + my $b_obj = B::svref_2object( \$value ); my $flags = $b_obj->FLAGS; if ( $flags & ( B::SVf_IOK | B::SVp_IOK ) ) { @@ -158,7 +175,6 @@ sub _pack { } } - elsif ( $flags & B::SVf_POK ) { # raw / check needs before dboule if ( $Data::MessagePack::PreferInteger ) { @@ -187,11 +203,9 @@ sub _pack { return $header . $value; } - elsif ( $flags & ( B::SVf_NOK | B::SVp_NOK ) ) { # double only return pack_double( $value ); } - else { die "???"; } @@ -209,7 +223,7 @@ sub _pack { my $p; # position variables for speed. -sub unpack { +sub unpack :method { $p = 0; # init _unpack( $_[1] ); } @@ -356,7 +370,7 @@ package use strict; sub new { - bless { stack => [] }, shift; + bless { pos => 0 }, shift; } @@ -370,25 +384,30 @@ sub execute_limit { sub execute { my ( $self, $data, $offset, $limit ) = @_; + $offset ||= 0; my $value = substr( $data, $offset, $limit ? $limit : length $data ); my $len = length $value; + $self->{data} .= $value; + local $self->{stack} = []; + $p = 0; - while ( $len > $p ) { - _count( $self, $value ) or last; + LOOP: while ( length($self->{data}) > $p ) { + _count( $self, $self->{data} ) or last; - if ( @{ $self->{stack} } > 0 ) { - pop @{ $self->{stack} } if --$self->{stack}->[-1] == 0; + while ( @{ $self->{stack} } > 0 && --$self->{stack}->[-1] == 0) { + pop @{ $self->{stack} }; + } + + if (@{$self->{stack}} == 0) { + $self->{is_finished}++; + last LOOP; } } + $self->{pos} = $p; - if ( $len == $p ) { - $self->{ data } .= substr( $value, 0, $p ); - $self->{ remain } = undef; - } - - return $p; + return $p + $offset; } @@ -410,7 +429,9 @@ sub _count { $num = $byte & ~0x90; } - push @{ $self->{stack} }, $num + 1; + if (defined($num) && $num > 0) { + push @{ $self->{stack} }, $num + 1; + } return 1; } @@ -429,7 +450,9 @@ sub _count { $num = $byte & ~0x80; } - push @{ $self->{stack} }, $num * 2 + 1; # a pair + if ($num > 0) { + push @{ $self->{stack} }, $num * 2 + 1; # a pair + } return 1; } @@ -497,22 +520,19 @@ sub _count { sub data { - my $data = Data::MessagePack->unpack( $_[0]->{ data } ); - $_[0]->reset; - return $data; + return Data::MessagePack->unpack( substr($_[0]->{ data }, 0, $_[0]->{pos}) ); } sub is_finished { my ( $self ) = @_; - ( scalar( @{ $self->{stack} } ) or defined $self->{ remain } ) ? 0 : 1; + return $self->{is_finished}; } - -sub reset { - $_[0]->{ stack } = []; +sub reset :method { $_[0]->{ data } = undef; - $_[0]->{ remain } = undef; + $_[0]->{ pos } = 0; + $_[0]->{ is_finished } = 0; } 1; diff --git a/perl/perlxs.h b/perl/perlxs.h deleted file mode 100644 index 441682de..00000000 --- a/perl/perlxs.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - perlxs.h - Standard XS header file - Copyright (c) Fuji, Goro (gfx) -*/ - -#ifdef __cplusplus -extern "C" { -#endif - -#define PERL_NO_GET_CONTEXT /* we want efficiency */ -#include <EXTERN.h> - -#include <perl.h> -#define NO_XSLOCKS /* for exceptions */ -#include <XSUB.h> - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#include "ppport.h" - -/* portability stuff not supported by ppport.h yet */ - -#ifndef STATIC_INLINE /* from 5.13.4 */ -# if defined(__GNUC__) || defined(__cplusplus__) || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) -# define STATIC_INLINE static inline -# else -# define STATIC_INLINE static -# endif -#endif /* STATIC_INLINE */ - -#ifndef __attribute__format__ -#define __attribute__format__(a,b,c) /* nothing */ -#endif - -#ifndef LIKELY /* they are just a compiler's hint */ -#define LIKELY(x) (x) -#define UNLIKELY(x) (x) -#endif - -#ifndef newSVpvs_share -#define newSVpvs_share(s) Perl_newSVpvn_share(aTHX_ STR_WITH_LEN(s), 0U) -#endif - -#ifndef get_cvs -#define get_cvs(name, flags) get_cv(name, flags) -#endif - -#ifndef GvNAME_get -#define GvNAME_get GvNAME -#endif -#ifndef GvNAMELEN_get -#define GvNAMELEN_get GvNAMELEN -#endif - -#ifndef CvGV_set -#define CvGV_set(cv, gv) (CvGV(cv) = (gv)) -#endif - -/* general utility */ - -#if PERL_BCDVERSION >= 0x5008005 -#define LooksLikeNumber(x) looks_like_number(x) -#else -#define LooksLikeNumber(x) (SvPOKp(x) ? looks_like_number(x) : (I32)SvNIOKp(x)) -#endif - -#define newAV_mortal() (AV*)sv_2mortal((SV*)newAV()) -#define newHV_mortal() (HV*)sv_2mortal((SV*)newHV()) - -#define DECL_BOOT(name) EXTERN_C XS(CAT2(boot_, name)) -#define CALL_BOOT(name) STMT_START { \ - PUSHMARK(SP); \ - CALL_FPTR(CAT2(boot_, name))(aTHX_ cv); \ - } STMT_END diff --git a/perl/t/03_stream_unpack.t b/perl/t/03_stream_unpack.t index a4ab4eba..646fc249 100644 --- a/perl/t/03_stream_unpack.t +++ b/perl/t/03_stream_unpack.t @@ -37,7 +37,7 @@ for (my $i=0; $i<scalar(@dat); ) { for (1..5) { $up->execute("\xc0", 0); # nil } - ok $up->is_finished; - is_deeply $up->data, [undef, undef, undef, undef, undef]; + ok $up->is_finished, 'finished'; + is_deeply $up->data, [undef, undef, undef, undef, undef], 'array, is_deeply'; } diff --git a/perl/t/06_stream_unpack2.t b/perl/t/06_stream_unpack2.t index eaf2cb4b..bb6fe93d 100644 --- a/perl/t/06_stream_unpack2.t +++ b/perl/t/06_stream_unpack2.t @@ -1,9 +1,17 @@ use strict; use warnings; use Data::MessagePack; -use Test::More tests => 6; +use Test::More tests => 9; +use t::Util; + +my $input = [ + false,true,null,0,0,0,0,0,0,0,0,0,-1,-1,-1,-1,-1, + 127,127,255,65535,4294967295,-32,-32,-128,-32768, + -2147483648,0.0,-0.0,1.0,-1.0,"a","a","a","","","", + [0],[0],[0],[],[],[],{},{},{}, + {"a" => 97},{"a" => 97},{"a" => 97},[[]],[["a"]] +]; -my $input = [(undef)x16]; my $packed = Data::MessagePack->pack($input); is_deeply(Data::MessagePack->unpack($packed), $input); @@ -16,10 +24,20 @@ is_deeply(Data::MessagePack->unpack($packed), $input); { my $up = Data::MessagePack::Unpacker->new(); - is $up->execute(substr($packed, 0, 3), 0), 3; - $up->execute($packed, 3); - ok $up->is_finished; - is_deeply $up->data, $input; + $packed x= 3; + + my $offset = 0; + for my $i(1 .. 3) { + note "block $i (offset: $offset/".length($packed).")"; + note "starting 3 bytes: ", join " ", map { unpack 'H2', $_ } + split //, substr($packed, $offset, 3); + + $offset = $up->execute($packed, $offset); + ok $up->is_finished, 'finished'; + my $data = $up->data; + is_deeply $data, $input; + $up->reset(); + } } diff --git a/perl/t/09_stddata.t b/perl/t/09_stddata.t new file mode 100644 index 00000000..f98d696b --- /dev/null +++ b/perl/t/09_stddata.t @@ -0,0 +1,42 @@ +#!perl -w +# Testing standard dataset in msgpack/test/*.{json,mpac}. +# Don't edit msgpack/perl/t/std/*, which are just copies. +use strict; +use Test::More; +use t::Util; + +use Data::MessagePack; + +sub slurp { + open my $fh, '<:raw', $_[0] or die "failed to open '$_[0]': $!"; + local $/; + return scalar <$fh>; +} + +my @data = do { + my $json = slurp("t/std/cases.json"); + $json =~ s/:/=>/g; + @{ eval $json }; +}; + +my $mpac1 = slurp("t/std/cases.mpac"); +my $mpac2 = slurp("t/std/cases_compact.mpac"); + +my $mps = Data::MessagePack::Unpacker->new(); + +my $t = 1; +for my $mpac($mpac1, $mpac2) { + note "mpac", $t++; + + my $offset = 0; + my $i = 0; + while($offset < length($mpac)) { + $offset = $mps->execute($mpac, $offset); + ok $mps->is_finished, "data[$i] : is_finished"; + is_deeply $mps->data, $data[$i], "data[$i]"; + $mps->reset; + $i++; + } +} + +done_testing; diff --git a/perl/t/10_splitted_bytes.t b/perl/t/10_splitted_bytes.t new file mode 100644 index 00000000..15598f4e --- /dev/null +++ b/perl/t/10_splitted_bytes.t @@ -0,0 +1,42 @@ +#!perl + +# This feature is not yet supported, but 0.23 (or former) caused SEGV in this code, +# so we put it here. + +use strict; +use warnings; +use Data::MessagePack; +use Test::More; +use t::Util; + +my $input = [ + false,true,null,0,0,0,0,0,0,0,0,0,-1,-1,-1,-1,-1, + 127,127,255,65535,4294967295,-32,-32,-128,-32768, + -2147483648,0.0,-0.0,1.0,-1.0,"a","a","a","","","", + [0],[0],[0],[],[],[],{},{},{}, + {"a" => 97},{"a" => 97},{"a" => 97},[[]],[["a"]] +]; + +my $packed = Data::MessagePack->pack($input); + +foreach my $size(1 .. 16) { + local $TODO = "Splitted byte streaming is not yet supported (bufer size: $size)"; + + my $up = Data::MessagePack::Unpacker->new(); + + open my $stream, '<:bytes :scalar', \$packed; + binmode $stream; + my $buff; + my $done = 0; + while( read($stream, $buff, $size) ) { + #note "buff: ", join " ", map { unpack 'H2', $_ } split //, $buff; + + $done = $up->execute($buff); + } + is $done, length($packed); + ok $up->is_finished, "is_finished: $size"; + my $data = $up->data; + is_deeply $data, $input; +} + +done_testing; diff --git a/perl/t/11_stream_unpack3.t b/perl/t/11_stream_unpack3.t new file mode 100644 index 00000000..0eb8bff7 --- /dev/null +++ b/perl/t/11_stream_unpack3.t @@ -0,0 +1,39 @@ +use strict; +use warnings; +use Test::More; +use Data::MessagePack; + +my @data = ( [ 1, 2, 3 ], [ 4, 5, 6 ] ); + +# serialize +my $buffer = ''; +for my $d (@data) { + $buffer .= Data::MessagePack->pack($d); +} + +# deserialize +my $cb = sub { + my ($data) = @_; + + my $d = shift @data; + is_deeply $data, $d; +}; +my $unpacker = Data::MessagePack::Unpacker->new(); +my $nread = 0; +while (1) { + $nread = $unpacker->execute( $buffer, $nread ); + if ( $unpacker->is_finished ) { + my $ret = $unpacker->data; + $cb->( $ret ); + $unpacker->reset; + + $buffer = substr( $buffer, $nread ); + $nread = 0; + next if length($buffer) != 0; + } + last; +} +is scalar(@data), 0; + +done_testing; + diff --git a/perl/t/12_stream_unpack3.t b/perl/t/12_stream_unpack3.t new file mode 100644 index 00000000..118acc30 --- /dev/null +++ b/perl/t/12_stream_unpack3.t @@ -0,0 +1,23 @@ +use strict; +use warnings; +use Data::MessagePack; +use Test::More; +use t::Util; + +my @input = ( + +[[]], + [[],[]], + [{"a" => 97},{"a" => 97}], + [{"a" => 97},{"a" => 97},{"a" => 97}], +); + +plan tests => @input * 2; + +for my $input (@input) { + my $packed = Data::MessagePack->pack($input); + my $up = Data::MessagePack::Unpacker->new(); + $up->execute($packed, 0); + ok $up->is_finished, 'finished'; + is_deeply($up->data, $input); +} + diff --git a/perl/t/50_leaktrace.t b/perl/t/50_leaktrace.t new file mode 100644 index 00000000..29485270 --- /dev/null +++ b/perl/t/50_leaktrace.t @@ -0,0 +1,58 @@ +#!perl -w +use strict; +use Test::Requires { 'Test::LeakTrace' => 0.13 }; +use Test::More; + +use Data::MessagePack; + +my $simple_data = "xyz"; +my $complex_data = { + a => 'foo', + b => 42, + c => undef, + d => [qw(bar baz)], + e => 3.14, +}; + +note 'pack'; + +no_leaks_ok { + my $s = Data::MessagePack->pack($complex_data); +}; + +no_leaks_ok { + eval { Data::MessagePack->pack([\*STDIN]) }; + note $@; + $@ or warn "# it must die"; +}; + +note 'unpack'; + +my $s = Data::MessagePack->pack($simple_data); +my $c = Data::MessagePack->pack($complex_data); + +no_leaks_ok { + my $data = Data::MessagePack->unpack($s); +}; + +no_leaks_ok { + my $data = Data::MessagePack->unpack($c); +}; + +no_leaks_ok { + my $broken = $s; + chop $broken; + eval { Data::MessagePack->unpack($broken) }; + note $@; + $@ or warn "# it must die"; +}; + +note 'stream'; + +no_leaks_ok { + my $up = Data::MessagePack::Unpacker->new(); + $up->execute($c); + my $data = $up->data(); +}; + +done_testing; diff --git a/perl/t/Util.pm b/perl/t/Util.pm index c8debefb..ad69c4d5 100644 --- a/perl/t/Util.pm +++ b/perl/t/Util.pm @@ -1,6 +1,7 @@ package t::Util; use strict; use warnings; +use Data::MessagePack; sub import { my $pkg = caller(0); @@ -15,6 +16,7 @@ sub import { *{"$pkg\::false"} = sub () { Data::MessagePack::false() }; + *{"$pkg\::null"} = sub() { undef }; } 1; diff --git a/perl/util.h b/perl/util.h deleted file mode 100644 index 2b4ed072..00000000 --- a/perl/util.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef __PERL_MSGPACK_UTIL_H__ -#define __PERL_MSGPACK_UTIL_H__ - -#if __GNUC__ >= 3 -# define INLINE inline -#else -# define INLINE -#endif - -#endif // __PERL_MSGPACK_UTIL_H__ - diff --git a/perl/xs-src/MessagePack.c b/perl/xs-src/MessagePack.c index fd1b344d..69337f41 100644 --- a/perl/xs-src/MessagePack.c +++ b/perl/xs-src/MessagePack.c @@ -1,13 +1,7 @@ -#ifdef __cplusplus -extern "C" { -#endif -#include "EXTERN.h" -#include "perl.h" -#include "XSUB.h" -#define NEED_newCONSTSUB -#include "ppport.h" -#ifdef __cplusplus -}; +#include "xshelper.h" + +#ifndef __cplusplus +#include <stdbool.h> #endif XS(xs_pack); @@ -20,17 +14,18 @@ XS(xs_unpacker_data); XS(xs_unpacker_reset); XS(xs_unpacker_destroy); -void boot_Data__MessagePack_pack(void); +void init_Data__MessagePack_pack(pTHX_ bool const cloning); +void init_Data__MessagePack_unpack(pTHX_ bool const cloning); XS(boot_Data__MessagePack) { dXSARGS; - HV * stash; + PERL_UNUSED_VAR(items); - boot_Data__MessagePack_pack(); + init_Data__MessagePack_pack(aTHX_ false); + init_Data__MessagePack_unpack(aTHX_ false); newXS("Data::MessagePack::pack", xs_pack, __FILE__); newXS("Data::MessagePack::unpack", xs_unpack, __FILE__); - stash = gv_stashpvn("Data::MessagePack", strlen("Data::MessagePack"), TRUE); newXS("Data::MessagePack::Unpacker::new", xs_unpacker_new, __FILE__); newXS("Data::MessagePack::Unpacker::execute", xs_unpacker_execute, __FILE__); diff --git a/perl/xs-src/pack.c b/perl/xs-src/pack.c index 0aa32502..862808eb 100644 --- a/perl/xs-src/pack.c +++ b/perl/xs-src/pack.c @@ -13,17 +13,18 @@ static inline void msgpack_pack ## name typedef struct { - char *cur; /* SvPVX (sv) + current output position */ - char *end; /* SvEND (sv) */ - SV *sv; /* result scalar */ + char *cur; /* SvPVX (sv) + current output position */ + const char *end; /* SvEND (sv) */ + SV *sv; /* result scalar */ } enc_t; -static void need(enc_t *enc, STRLEN len); + +STATIC_INLINE void need(enc_t* const enc, STRLEN const len); #define msgpack_pack_user enc_t* #define msgpack_pack_append_buffer(enc, buf, len) \ - need(enc, len); \ - memcpy(enc->cur, buf, len); \ + need(enc, len); \ + memcpy(enc->cur, buf, len); \ enc->cur += len; #include "msgpack/pack_template.h" @@ -32,10 +33,13 @@ static void need(enc_t *enc, STRLEN len); #if IVSIZE == 8 # define PACK_IV msgpack_pack_int64 +# define PACK_UV msgpack_pack_uint64 #elif IVSIZE == 4 # define PACK_IV msgpack_pack_int32 +# define PACK_UV msgpack_pack_uint32 #elif IVSIZE == 2 # define PACK_IV msgpack_pack_int16 +# define PACK_UV msgpack_pack_uint16 #else # error "msgpack only supports IVSIZE = 8,4,2 environment." #endif @@ -43,21 +47,21 @@ static void need(enc_t *enc, STRLEN len); #define ERR_NESTING_EXCEEDED "perl structure exceeds maximum nesting level (max_depth set too low?)" -STATIC_INLINE void need(enc_t *enc, STRLEN len) +STATIC_INLINE void need(enc_t* const enc, STRLEN const len) { - dTHX; if (enc->cur + len >= enc->end) { - STRLEN cur = enc->cur - (char *)SvPVX (enc->sv); - SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); - enc->cur = SvPVX (enc->sv) + cur; - enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1; + dTHX; + STRLEN const cur = enc->cur - SvPVX_const(enc->sv); + sv_grow (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); + enc->cur = SvPVX_mutable(enc->sv) + cur; + enc->end = SvPVX_const(enc->sv) + SvLEN (enc->sv) - 1; } } static int s_pref_int = 0; -STATIC_INLINE int pref_int_set(pTHX_ SV* sv, MAGIC* mg) { +STATIC_INLINE int pref_int_set(pTHX_ SV* sv, MAGIC* mg PERL_UNUSED_DECL) { if (SvTRUE(sv)) { s_pref_int = 1; } else { @@ -79,8 +83,7 @@ MGVTBL pref_int_vtbl = { #endif }; -void boot_Data__MessagePack_pack(void) { - dTHX; +void init_Data__MessagePack_pack(pTHX_ bool const cloning) { SV* var = get_sv("Data::MessagePack::PreferInteger", 0); sv_magicext(var, NULL, PERL_MAGIC_ext, &pref_int_vtbl, NULL, 0); SvSETMAGIC(var); @@ -141,32 +144,35 @@ STATIC_INLINE int try_int(enc_t* enc, const char *p, size_t len) { } -static void _msgpack_pack_rv(enc_t *enc, SV* sv, int depth); +STATIC_INLINE void _msgpack_pack_rv(enc_t *enc, SV* sv, int depth); -STATIC_INLINE void _msgpack_pack_sv(enc_t *enc, SV* sv, int depth) { +STATIC_INLINE void _msgpack_pack_sv(enc_t* const enc, SV* const sv, int const depth) { dTHX; - if (depth <= 0) Perl_croak(aTHX_ ERR_NESTING_EXCEEDED); + assert(sv); + if (UNLIKELY(depth <= 0)) Perl_croak(aTHX_ ERR_NESTING_EXCEEDED); SvGETMAGIC(sv); - if (sv==NULL) { - msgpack_pack_nil(enc); - } else if (SvPOKp(sv)) { - STRLEN len; - char * csv = SvPV(sv, len); + if (SvPOKp(sv)) { + STRLEN const len = SvCUR(sv); + const char* const pv = SvPVX_const(sv); - if (s_pref_int && try_int(enc, csv, len)) { + if (s_pref_int && try_int(enc, pv, len)) { return; } else { msgpack_pack_raw(enc, len); - msgpack_pack_raw_body(enc, csv, len); + msgpack_pack_raw_body(enc, pv, len); + } + } else if (SvNIOKp(sv)) { + if(SvUOK(sv)) { + PACK_UV(enc, SvUVX(sv)); + } + else if(SvIOKp(sv)) { + PACK_IV(enc, SvIVX(sv)); + } + else { + /* XXX long double is not supported yet. */ + msgpack_pack_double(enc, (double)SvNVX(sv)); } - } else if (SvNOKp(sv)) { - /* XXX long double is not supported yet. */ - msgpack_pack_double(enc, (double)SvNVX(sv)); - } else if (SvIOK_UV(sv)) { - msgpack_pack_uint32(enc, SvUV(sv)); - } else if (SvIOKp(sv)) { - PACK_IV(enc, SvIV(sv)); } else if (SvROK(sv)) { _msgpack_pack_rv(enc, SvRV(sv), depth-1); } else if (!SvOK(sv)) { @@ -182,7 +188,7 @@ STATIC_INLINE void _msgpack_pack_sv(enc_t *enc, SV* sv, int depth) { STATIC_INLINE void _msgpack_pack_rv(enc_t *enc, SV* sv, int depth) { svtype svt; dTHX; - if (depth <= 0) Perl_croak(aTHX_ ERR_NESTING_EXCEEDED); + assert(sv); SvGETMAGIC(sv); svt = SvTYPE(sv); @@ -205,7 +211,7 @@ STATIC_INLINE void _msgpack_pack_rv(enc_t *enc, SV* sv, int depth) { msgpack_pack_map(enc, count); - while (he = hv_iternext(hval)) { + while ((he = hv_iternext(hval))) { _msgpack_pack_sv(enc, hv_iterkeysv(he), depth); _msgpack_pack_sv(enc, HeVAL(he), depth); } @@ -231,7 +237,7 @@ STATIC_INLINE void _msgpack_pack_rv(enc_t *enc, SV* sv, int depth) { else if (len == 1 && *pv == '0') msgpack_pack_false(enc); else { - sv_dump(sv); + //sv_dump(sv); croak("cannot encode reference to scalar '%s' unless the scalar is 0 or 1", SvPV_nolen (sv_2mortal (newRV_inc (sv)))); } @@ -252,7 +258,7 @@ XS(xs_pack) { if (items >= 3) depth = SvIV(ST(2)); enc_t enc; - enc.sv = sv_2mortal(NEWSV(0, INIT_SIZE)); + enc.sv = sv_2mortal(newSV(INIT_SIZE)); enc.cur = SvPVX(enc.sv); enc.end = SvEND(enc.sv); SvPOK_only(enc.sv); diff --git a/perl/xs-src/unpack.c b/perl/xs-src/unpack.c index c329e99c..e89b22c5 100644 --- a/perl/xs-src/unpack.c +++ b/perl/xs-src/unpack.c @@ -1,19 +1,17 @@ -#ifdef __cplusplus -extern "C" { -#endif - #define NEED_newRV_noinc #define NEED_sv_2pv_flags #include "xshelper.h" -#ifdef __cplusplus -}; -#endif +#define MY_CXT_KEY "Data::MessagePack::_unpack_guts" XS_VERSION +typedef struct { + SV* msgpack_true; + SV* msgpack_false; +} my_cxt_t; +START_MY_CXT typedef struct { - int finished; - SV* source; - int incremented; + bool finished; + bool incremented; } unpack_user; #include "msgpack/unpack_define.h" @@ -31,22 +29,55 @@ typedef struct { #define msgpack_unpack_user unpack_user +void init_Data__MessagePack_unpack(pTHX_ bool const cloning) { + if(!cloning) { + MY_CXT_INIT; + MY_CXT.msgpack_true = NULL; + MY_CXT.msgpack_false = NULL; + } + else { + MY_CXT_CLONE; + MY_CXT.msgpack_true = NULL; + MY_CXT.msgpack_false = NULL; + } +} + + + /* ---------------------------------------------------------------------- */ /* utility functions */ -STATIC_INLINE SV * -get_bool (const char *name) { - dTHX; - SV * sv = sv_mortalcopy(get_sv( name, 1 )); - - SvREADONLY_on(sv); - SvREADONLY_on( SvRV(sv) ); - +static SV* +load_bool(pTHX_ const char* const name) { + CV* const cv = get_cv(name, GV_ADD); + dSP; + PUSHMARK(SP); + call_sv((SV*)cv, G_SCALAR); + SPAGAIN; + SV* const sv = newSVsv(POPs); + PUTBACK; return sv; } -/* ---------------------------------------------------------------------- */ +static SV* +get_bool(bool const value) { + dTHX; + dMY_CXT; + if(value) { + if(!MY_CXT.msgpack_true) { + MY_CXT.msgpack_true = load_bool(aTHX_ "Data::MessagePack::true"); + } + return newSVsv(MY_CXT.msgpack_true); + } + else { + if(!MY_CXT.msgpack_false) { + MY_CXT.msgpack_false = load_bool(aTHX_ "Data::MessagePack::false"); + } + return newSVsv(MY_CXT.msgpack_false); + } +} +/* ---------------------------------------------------------------------- */ struct template_context; typedef struct template_context msgpack_unpack_t; @@ -54,152 +85,210 @@ static void template_init(msgpack_unpack_t* u); static SV* template_data(msgpack_unpack_t* u); -static int template_execute(msgpack_unpack_t* u, +static int template_execute(msgpack_unpack_t* u PERL_UNUSED_DECL, const char* data, size_t len, size_t* off); -STATIC_INLINE SV* template_callback_root(unpack_user* u) -{ dTHX; return &PL_sv_undef; } +STATIC_INLINE SV* template_callback_root(unpack_user* u PERL_UNUSED_DECL) +{ + return NULL; +} -STATIC_INLINE int template_callback_uint8(unpack_user* u, uint8_t d, SV** o) -{ dTHX; *o = sv_2mortal(newSVuv(d)); return 0; } +#if IVSIZE == 4 -STATIC_INLINE int template_callback_uint16(unpack_user* u, uint16_t d, SV** o) -{ dTHX; *o = sv_2mortal(newSVuv(d)); return 0; } - -STATIC_INLINE int template_callback_uint32(unpack_user* u, uint32_t d, SV** o) -{ dTHX; *o = sv_2mortal(newSVuv(d)); return 0; } - -STATIC_INLINE int template_callback_uint64(unpack_user* u, uint64_t d, SV** o) +STATIC_INLINE int template_callback_UV(unpack_user* u PERL_UNUSED_DECL, UV const d, SV** o) { dTHX; -#if IVSIZE==4 - *o = sv_2mortal(newSVnv(d)); -#else - *o = sv_2mortal(newSVuv(d)); -#endif + *o = newSVuv(d); return 0; } -STATIC_INLINE int template_callback_int8(unpack_user* u, int8_t d, SV** o) -{ dTHX; *o = sv_2mortal(newSViv((long)d)); return 0; } +STATIC_INLINE int template_callback_uint64(unpack_user* u PERL_UNUSED_DECL, uint64_t const d, SV** o) +{ + dTHX; + *o = newSVnv((NV)d); + return 0; +} -STATIC_INLINE int template_callback_int16(unpack_user* u, int16_t d, SV** o) -{ dTHX; *o = sv_2mortal(newSViv((long)d)); return 0; } +STATIC_INLINE int template_callback_IV(unpack_user* u PERL_UNUSED_DECL, IV const d, SV** o) +{ + dTHX; + *o = newSViv(d); + return 0; +} -STATIC_INLINE int template_callback_int32(unpack_user* u, int32_t d, SV** o) -{ dTHX; *o = sv_2mortal(newSViv((long)d)); return 0; } +STATIC_INLINE int template_callback_int64(unpack_user* u PERL_UNUSED_DECL, int64_t const d, SV** o) +{ + dTHX; + *o = newSVnv((NV)d); + return 0; +} -STATIC_INLINE int template_callback_int64(unpack_user* u, int64_t d, SV** o) -{ dTHX; *o = sv_2mortal(newSViv(d)); return 0; } +#else /* IVSIZE == 8 */ -STATIC_INLINE int template_callback_float(unpack_user* u, float d, SV** o) -{ dTHX; *o = sv_2mortal(newSVnv(d)); return 0; } -STATIC_INLINE int template_callback_double(unpack_user* u, double d, SV** o) -{ dTHX; *o = sv_2mortal(newSVnv(d)); return 0; } +STATIC_INLINE int template_callback_UV(unpack_user* u PERL_UNUSED_DECL, UV const d, SV** o) +{ + dTHX; + *o = newSVuv(d); + return 0; +} + +#define template_callback_uint64 template_callback_UV + +STATIC_INLINE int template_callback_IV(unpack_user* u PERL_UNUSED_DECL, IV const d, SV** o) +{ + dTHX; + *o = newSViv(d); + return 0; +} + +#define template_callback_int64 template_callback_IV + +#endif /* IVSIZE */ + +#define template_callback_uint8 template_callback_UV +#define template_callback_uint16 template_callback_UV +#define template_callback_uint32 template_callback_UV + +#define template_callback_int8 template_callback_IV +#define template_callback_int16 template_callback_IV +#define template_callback_int32 template_callback_IV + +#define template_callback_float template_callback_double + +STATIC_INLINE int template_callback_double(unpack_user* u PERL_UNUSED_DECL, double d, SV** o) +{ + dTHX; + *o = newSVnv(d); + return 0; +} /* &PL_sv_undef is not so good. see http://gist.github.com/387743 */ -STATIC_INLINE int template_callback_nil(unpack_user* u, SV** o) -{ dTHX; *o = sv_newmortal(); return 0; } +STATIC_INLINE int template_callback_nil(unpack_user* u PERL_UNUSED_DECL, SV** o) +{ + dTHX; + *o = newSV(0); + return 0; +} -STATIC_INLINE int template_callback_true(unpack_user* u, SV** o) -{ dTHX; *o = get_bool("Data::MessagePack::true") ; return 0; } +STATIC_INLINE int template_callback_true(unpack_user* u PERL_UNUSED_DECL, SV** o) +{ + *o = get_bool(true); + return 0; +} -STATIC_INLINE int template_callback_false(unpack_user* u, SV** o) -{ dTHX; *o = get_bool("Data::MessagePack::false") ; return 0; } +STATIC_INLINE int template_callback_false(unpack_user* u PERL_UNUSED_DECL, SV** o) +{ + *o = get_bool(false); + return 0; +} -STATIC_INLINE int template_callback_array(unpack_user* u, unsigned int n, SV** o) -{ dTHX; AV* a = (AV*)sv_2mortal((SV*)newAV()); *o = sv_2mortal((SV*)newRV_inc((SV*)a)); av_extend(a, n); return 0; } +STATIC_INLINE int template_callback_array(unpack_user* u PERL_UNUSED_DECL, unsigned int n, SV** o) +{ + dTHX; + AV* const a = newAV(); + *o = newRV_noinc((SV*)a); + av_extend(a, n + 1); + return 0; +} -STATIC_INLINE int template_callback_array_item(unpack_user* u, SV** c, SV* o) -{ dTHX; av_push((AV*)SvRV(*c), o); SvREFCNT_inc(o); return 0; } /* FIXME set value directry RARRAY_PTR(obj)[RARRAY_LEN(obj)++] */ +STATIC_INLINE int template_callback_array_item(unpack_user* u PERL_UNUSED_DECL, SV** c, SV* o) +{ + dTHX; + AV* const a = (AV*)SvRV(*c); + assert(SvTYPE(a) == SVt_PVAV); + (void)av_store(a, AvFILLp(a) + 1, o); // the same as av_push(a, o) + return 0; +} -STATIC_INLINE int template_callback_map(unpack_user* u, unsigned int n, SV** o) -{ dTHX; HV * h = (HV*)sv_2mortal((SV*)newHV()); *o = sv_2mortal(newRV_inc((SV*)h)); return 0; } +STATIC_INLINE int template_callback_map(unpack_user* u PERL_UNUSED_DECL, unsigned int n, SV** o) +{ + dTHX; + HV* const h = newHV(); + hv_ksplit(h, n); + *o = newRV_noinc((SV*)h); + return 0; +} -STATIC_INLINE int template_callback_map_item(unpack_user* u, SV** c, SV* k, SV* v) -{ dTHX; hv_store_ent((HV*)SvRV(*c), k, v, 0); SvREFCNT_inc(v); return 0; } +STATIC_INLINE int template_callback_map_item(unpack_user* u PERL_UNUSED_DECL, SV** c, SV* k, SV* v) +{ + dTHX; + HV* const h = (HV*)SvRV(*c); + assert(SvTYPE(h) == SVt_PVHV); + (void)hv_store_ent(h, k, v, 0); + SvREFCNT_dec(k); + return 0; +} -STATIC_INLINE int template_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int l, SV** o) -{ dTHX; *o = sv_2mortal((l==0) ? newSVpv("", 0) : newSVpv(p, l)); return 0; } -/* { *o = newSVpvn_flags(p, l, SVs_TEMP); return 0; } <= this does not works. */ - -#define UNPACKER(from, name) \ - msgpack_unpack_t *name; \ - name = INT2PTR(msgpack_unpack_t*, SvROK((from)) ? SvIV(SvRV((from))) : SvIV((from))); \ - if(name == NULL) { \ - Perl_croak(aTHX_ "NULL found for " # name " when shouldn't be."); \ - } +STATIC_INLINE int template_callback_raw(unpack_user* u PERL_UNUSED_DECL, const char* b PERL_UNUSED_DECL, const char* p, unsigned int l, SV** o) +{ + dTHX; + /* newSVpvn(p, l) returns an undef if p == NULL */ + *o = ((l==0) ? newSVpvs("") : newSVpvn(p, l)); + return 0; +} #include "msgpack/unpack_template.h" -STATIC_INLINE SV* _msgpack_unpack(SV* data, int limit) { - msgpack_unpack_t mp; - dTHX; - unpack_user u = {0, &PL_sv_undef}; - int ret; - size_t from = 0; - STRLEN dlen; - const char * dptr = SvPV_const(data, dlen); - SV* obj; - - template_init(&mp); - mp.user = u; - - mp.user.source = data; - ret = template_execute(&mp, dptr, (size_t)dlen, &from); - mp.user.source = &PL_sv_undef; - - obj = template_data(&mp); - - if(ret < 0) { - Perl_croak(aTHX_ "parse error."); - } else if(ret == 0) { - Perl_croak(aTHX_ "insufficient bytes."); - } else { - if(from < dlen) { - Perl_croak(aTHX_ "extra bytes."); - } - return obj; - } -} - -XS(xs_unpack_limit) { - dXSARGS; - - if (items != 3) { - Perl_croak(aTHX_ "Usage: Data::MessagePack->unpack('datadata', $limit)"); +#define UNPACKER(from, name) \ + msgpack_unpack_t *name; \ + if(!(SvROK(from) && SvIOK(SvRV(from)))) { \ + Perl_croak(aTHX_ "Invalid unpacker instance for " #name); \ + } \ + name = INT2PTR(msgpack_unpack_t*, SvIVX(SvRV((from)))); \ + if(name == NULL) { \ + Perl_croak(aTHX_ "NULL found for " # name " when shouldn't be."); \ } - { - int limit = SvIV(ST(2)); - ST(0) = _msgpack_unpack(ST(1), limit); - } - XSRETURN(1); -} - - XS(xs_unpack) { dXSARGS; + SV* const data = ST(1); + size_t limit; + + if (items == 2) { + limit = sv_len(data); + } + else if(items == 3) { + limit = SvUVx(ST(2)); + } + else { + Perl_croak(aTHX_ "Usage: Data::MessagePack->unpack('data' [, $limit])"); + } + + STRLEN dlen; + const char* const dptr = SvPV_const(data, dlen); + msgpack_unpack_t mp; + template_init(&mp); - if (items != 2) { - Perl_croak(aTHX_ "Usage: Data::MessagePack->unpack('datadata')"); - } - - { - ST(0) = _msgpack_unpack(ST(1), sv_len(ST(1))); + unpack_user const u = {false, false}; + mp.user = u; + + size_t from = 0; + int const ret = template_execute(&mp, dptr, (size_t)dlen, &from); + SV* const obj = template_data(&mp); + sv_2mortal(obj); + + if(ret < 0) { + Perl_croak(aTHX_ "Data::MessagePack->unpack: parse error"); + } else if(ret == 0) { + Perl_croak(aTHX_ "Data::MessagePack->unpack: insufficient bytes"); + } else { + if(from < dlen) { + Perl_croak(aTHX_ "Data::MessagePack->unpack: extra bytes"); + } } + ST(0) = obj; XSRETURN(1); } /* ------------------------------ stream -- */ /* http://twitter.com/frsyuki/status/13249304748 */ -STATIC_INLINE void _reset(SV* self) { +STATIC_INLINE void _reset(SV* const self) { dTHX; - unpack_user u = {0, &PL_sv_undef, 0}; + unpack_user const u = {false, false}; UNPACKER(self, mp); template_init(mp); @@ -212,10 +301,10 @@ XS(xs_unpacker_new) { Perl_croak(aTHX_ "Usage: Data::MessagePack::Unpacker->new()"); } - SV* self = sv_newmortal(); - msgpack_unpack_t *mp; + SV* const self = sv_newmortal(); + msgpack_unpack_t *mp; - Newx(mp, 1, msgpack_unpack_t); + Newxz(mp, 1, msgpack_unpack_t); sv_setref_pv(self, "Data::MessagePack::Unpacker", mp); _reset(self); @@ -224,73 +313,64 @@ XS(xs_unpacker_new) { XSRETURN(1); } -STATIC_INLINE SV* _execute_impl(SV* self, SV* data, UV off, I32 limit) { +STATIC_INLINE SV* +_execute_impl(SV* const self, SV* const data, UV const offset, UV const limit) { dTHX; + + if(offset >= limit) { + Perl_croak(aTHX_ "offset (%"UVuf") is bigger than data buffer size (%"UVuf")", + offset, limit); + } + UNPACKER(self, mp); - size_t from = off; - const char* dptr = SvPV_nolen_const(data); - long dlen = limit; - int ret; + size_t from = offset; + const char* const dptr = SvPV_nolen_const(data); - if(from >= dlen) { - Perl_croak(aTHX_ "offset is bigger than data buffer size."); - } + int const ret = template_execute(mp, dptr, limit, &from); - mp->user.source = data; - ret = template_execute(mp, dptr, (size_t)dlen, &from); - mp->user.source = &PL_sv_undef; - - if(ret < 0) { - Perl_croak(aTHX_ "parse error."); - } else if(ret > 0) { - mp->user.finished = 1; - return sv_2mortal(newSVuv(from)); - } else { - mp->user.finished = 0; - return sv_2mortal(newSVuv(from)); - } + if(ret < 0) { + Perl_croak(aTHX_ "Data::MessagePack::Unpacker: parse error while executing"); + } else { + mp->user.finished = (ret > 0) ? true : false; + return sv_2mortal(newSVuv(from)); + } } XS(xs_unpacker_execute) { dXSARGS; - if (items != 3) { - Perl_croak(aTHX_ "Usage: $unpacker->execute_limit(data, off)"); + SV* const self = ST(0); + SV* const data = ST(1); + UV offset; + + if (items == 2) { + offset = 0; + } + else if (items == 3) { + offset = SvUVx(ST(2)); + } + else { + Perl_croak(aTHX_ "Usage: $unpacker->execute(data, offset = 0)"); } - UNPACKER(ST(0), mp); - { - SV* self = ST(0); - SV* data = ST(1); - IV off = SvIV(ST(2)); /* offset of $data. normaly, 0. */ - - ST(0) = _execute_impl(self, data, off, sv_len(data)); - - { - SV * d2 = template_data(mp); - if (!mp->user.incremented && d2) { - SvREFCNT_inc(d2); - mp->user.incremented = 1; - } - } - } + UNPACKER(self, mp); + ST(0) = _execute_impl(self, data, offset, sv_len(data)); XSRETURN(1); } XS(xs_unpacker_execute_limit) { dXSARGS; if (items != 4) { - Perl_croak(aTHX_ "Usage: $unpacker->execute_limit(data, off, limit)"); + Perl_croak(aTHX_ "Usage: $unpacker->execute_limit(data, offset, limit)"); } - SV* self = ST(0); - SV* data = ST(1); - IV off = SvIV(ST(2)); - IV limit = SvIV(ST(3)); - - ST(0) = _execute_impl(self, data, off, limit); + SV* const self = ST(0); + SV* const data = ST(1); + UV const offset = SvUVx(ST(2)); + UV const limit = SvUVx(ST(3)); + ST(0) = _execute_impl(self, data, offset, limit); XSRETURN(1); } @@ -300,9 +380,8 @@ XS(xs_unpacker_is_finished) { Perl_croak(aTHX_ "Usage: $unpacker->is_finished()"); } - UNPACKER(ST(0), mp); - ST(0) = (mp->user.finished) ? &PL_sv_yes : &PL_sv_no; - + UNPACKER(ST(0), mp); + ST(0) = boolSV(mp->user.finished); XSRETURN(1); } @@ -312,9 +391,8 @@ XS(xs_unpacker_data) { Perl_croak(aTHX_ "Usage: $unpacker->data()"); } - UNPACKER(ST(0), mp); - ST(0) = sv_2mortal(newSVsv(template_data(mp))); - + UNPACKER(ST(0), mp); + ST(0) = template_data(mp); XSRETURN(1); } @@ -324,13 +402,10 @@ XS(xs_unpacker_reset) { Perl_croak(aTHX_ "Usage: $unpacker->reset()"); } - UNPACKER(ST(0), mp); - { - SV * data = template_data(mp); - if (data) { - SvREFCNT_dec(data); - } - } + UNPACKER(ST(0), mp); + + SV* const data = template_data(mp); + sv_2mortal(data); _reset(ST(0)); XSRETURN(0); @@ -342,11 +417,10 @@ XS(xs_unpacker_destroy) { Perl_croak(aTHX_ "Usage: $unpacker->DESTROY()"); } - UNPACKER(ST(0), mp); - SV * data = template_data(mp); - if (SvOK(data)) { - SvREFCNT_dec(data); - } + UNPACKER(ST(0), mp); + + SV* const data = template_data(mp); + sv_2mortal(data); Safefree(mp); XSRETURN(0); diff --git a/ruby/test/test_pack_unpack.rb b/ruby/test/test_pack_unpack.rb index 545e5939..f378c3c7 100644 --- a/ruby/test/test_pack_unpack.rb +++ b/ruby/test/test_pack_unpack.rb @@ -239,7 +239,7 @@ class MessagePackTestPackUnpack < Test::Unit::TestCase end it "gc mark" do - obj = [{["a","b"]=>["c","d"]}, ["e","f"], "d"] + obj = [1024, {["a","b"]=>["c","d"]}, ["e","f"], "d", 70000, 4.12, 1.5, 1.5, 1.5] num = 4 raw = obj.to_msgpack * num pac = MessagePack::Unpacker.new @@ -257,7 +257,7 @@ class MessagePackTestPackUnpack < Test::Unit::TestCase end it "streaming backward compatibility" do - obj = [{["a","b"]=>["c","d"]}, ["e","f"], "d"] + obj = [1024, {["a","b"]=>["c","d"]}, ["e","f"], "d", 70000, 4.12, 1.5, 1.5, 1.5] num = 4 raw = obj.to_msgpack * num pac = MessagePack::Unpacker.new