From 8f7f23a0e5fcc595f6d6178e9e532b38b5cd1b46 Mon Sep 17 00:00:00 2001 From: Vincent de Phily Date: Mon, 28 Jun 2010 18:11:52 +0200 Subject: [PATCH 01/12] Rewrite unpack_/1 using pattern matching to get a 30-40% speedup. Simplify pack_* and unpack_{array,map} function clauses to get more readability and a minor speedup. --- erlang/msgpack.erl | 290 +++++++++++++++++++-------------------------- 1 file changed, 123 insertions(+), 167 deletions(-) diff --git a/erlang/msgpack.erl b/erlang/msgpack.erl index 255542b6..e94262d1 100644 --- a/erlang/msgpack.erl +++ b/erlang/msgpack.erl @@ -42,8 +42,10 @@ pack(O) when is_float(O) -> pack_double(O); pack(nil) -> pack_nil(); -pack(Bool) when is_atom(Bool) -> - pack_bool(Bool); +pack(true) -> + pack_true(); +pack(false) -> + pack_false(); pack(Bin) when is_binary(Bin) -> pack_raw(Bin); pack(List) when is_list(List) -> @@ -61,13 +63,8 @@ pack(_O) -> % TODO: error case for imcomplete format when short for any type formats. -spec unpack( binary() )-> {msgpack_term(), binary()} | {more, non_neg_integer()} | {error, reason()}. -unpack(Bin) when not is_binary(Bin)-> - {error, badarg}; -unpack(Bin) when bit_size(Bin) >= 8 -> - << Flag:8/unsigned-integer, Payload/binary >> = Bin, - unpack_(Flag, Payload); -unpack(<<>>)-> % when bit_size(Bin) < 8 -> - {more, 1}. +unpack(Bin) -> + unpack_(Bin). -spec unpack_all( binary() ) -> [msgpack_term()]. unpack_all(Data)-> @@ -82,56 +79,52 @@ unpack_all(Data)-> % ===== internal APIs ===== % % positive fixnum -pack_uint_(N) when is_integer( N ) , N < 128 -> +pack_uint_(N) when N < 128 -> << 2#0:1, N:7 >>; % uint 8 -pack_uint_( N ) when is_integer( N ) andalso N < 256 -> +pack_uint_(N) when N < 256 -> << 16#CC:8, N:8 >>; - % uint 16 -pack_uint_( N ) when is_integer( N ) andalso N < 65536 -> +pack_uint_(N) when N < 65536 -> << 16#CD:8, N:16/big-unsigned-integer-unit:1 >>; - % uint 32 -pack_uint_( N ) when is_integer( N ) andalso N < 16#FFFFFFFF-> +pack_uint_(N) when N < 16#FFFFFFFF-> << 16#CE:8, N:32/big-unsigned-integer-unit:1 >>; - % uint 64 -pack_uint_( N ) when is_integer( N )-> +pack_uint_(N) -> << 16#CF:8, N:64/big-unsigned-integer-unit:1 >>. % negative fixnum -pack_int_( N ) when is_integer( N ) , N >= -32-> +pack_int_(N) when N >= -32-> << 2#111:3, N:5 >>; % int 8 -pack_int_( N ) when is_integer( N ) , N >= -256 -> +pack_int_(N) when N >= -256 -> << 16#D0:8, N:8 >>; % int 16 -pack_int_( N ) when is_integer( N ), N >= -65536 -> +pack_int_(N) when N >= -65536 -> << 16#D1:8, N:16/big-signed-integer-unit:1 >>; % int 32 -pack_int_( N ) when is_integer( N ), N >= -16#FFFFFFFF -> +pack_int_(N) when N >= -16#FFFFFFFF -> << 16#D2:8, N:32/big-signed-integer-unit:1 >>; % int 64 -pack_int_( N ) when is_integer( N )-> +pack_int_(N) -> << 16#D3:8, N:64/big-signed-integer-unit:1 >>. -% nil -pack_nil()-> << 16#C0:8 >>. -% pack_true / pack_false -pack_bool(true)-> << 16#C3:8 >>; -pack_bool(false)-> << 16#C2:8 >>. +% nil/true/false +pack_nil() -> << 16#C0:8 >>. +pack_true()-> << 16#C3:8 >>. +pack_false()-> << 16#C2:8 >>. % float : erlang's float is always IEEE 754 64bit format. %pack_float(F) when is_float(F)-> % << 16#CA:8, F:32/big-float-unit:1 >>. % pack_double(F). % double -pack_double(F) when is_float(F)-> +pack_double(F) -> << 16#CB:8, F:64/big-float-unit:1 >>. % raw bytes -pack_raw(Bin) when is_binary(Bin)-> +pack_raw(Bin) -> case byte_size(Bin) of Len when Len < 6-> << 2#101:3, Len:5, Bin/binary >>; @@ -142,24 +135,22 @@ pack_raw(Bin) when is_binary(Bin)-> end. % list / tuple -pack_array(L) when is_list(L)-> +pack_array(L) -> case length(L) of Len when Len < 16 -> << 2#1001:4, Len:4/integer-unit:1, (pack_array_(L, <<>>))/binary >>; Len when Len < 16#10000 -> % 65536 - << 16#DC:8, Len:16/big-unsigned-integer-unit:1,(pack_array_(L, <<>>))/binary >>; + << 16#DC:8, Len:16/big-unsigned-integer-unit:1, (pack_array_(L, <<>>))/binary >>; Len -> - << 16#DD:8, Len:32/big-unsigned-integer-unit:1,(pack_array_(L, <<>>))/binary >> + << 16#DD:8, Len:32/big-unsigned-integer-unit:1, (pack_array_(L, <<>>))/binary >> end. pack_array_([], Acc) -> Acc; pack_array_([Head|Tail], Acc) -> pack_array_(Tail, <>). % FIXME! this should be tail-recursive and without lists:reverse/1 -unpack_array_(<<>>, 0, RetList) -> {lists:reverse(RetList), <<>>}; -unpack_array_(Remain, 0, RetList) when is_binary(Remain)-> {lists:reverse(RetList), Remain}; -unpack_array_(<<>>, RestLen, _RetList) when RestLen > 0 -> {more, RestLen}; -unpack_array_(Bin, RestLen, RetList) when is_binary(Bin)-> +unpack_array_(Remain, 0, RetList) -> {lists:reverse(RetList), Remain}; +unpack_array_(Bin, RestLen, RetList) -> case unpack(Bin) of {more, Len} -> {more, Len+RestLen-1}; {Term, Rest}-> unpack_array_(Rest, RestLen-1, [Term|RetList]) @@ -181,10 +172,10 @@ pack_map_([{Key,Value}|Tail], Acc) -> pack_map_(Tail, << Acc/binary, (pack(Key))/binary, (pack(Value))/binary>>). % FIXME: write test for unpack_map/1 --spec unpack_map_(binary(), non_neg_integer(), [{term(), msgpack_term()}])-> - {more, non_neg_integer()} | { any(), binary()}. -unpack_map_(Bin, 0, Acc) when is_binary(Bin) -> {{lists:reverse(Acc)}, Bin}; -unpack_map_(Bin, Len, Acc) when is_binary(Bin) and is_integer(Len) -> +-spec unpack_map_(binary(), non_neg_integer(), [{term(), msgpack_term()}]) -> + {more, non_neg_integer()} | {any(), binary()}. +unpack_map_(Bin, 0, Acc) -> {{lists:reverse(Acc)}, Bin}; +unpack_map_(Bin, Len, Acc) -> case unpack(Bin) of { more, MoreLen } -> { more, MoreLen+Len-1 }; { Key, Rest } -> @@ -195,142 +186,107 @@ unpack_map_(Bin, Len, Acc) when is_binary(Bin) and is_integer(Len) -> end end. -% {more, --spec unpack_(Flag::integer(), Payload::binary())-> - {more, pos_integer()} | {msgpack_term(), binary()} | {error, reason()}. -unpack_(Flag, Payload)-> - PayloadLen = byte_size(Payload), - case Flag of - 16#C0 -> - {nil, Payload}; - 16#C2 -> - {false, Payload}; - 16#C3 -> - {true, Payload}; - 16#CA when PayloadLen >= 4 -> % 32bit float - << Return:32/float-unit:1, Rest/binary >> = Payload, - {Return, Rest}; - 16#CA -> - {more, 4-PayloadLen}; % at least more +-spec unpack_(Payload::binary()) -> {more, pos_integer()} | {msgpack_term(), binary()} | {error, reason()}. +unpack_(<<16#C0, Rest/binary>>) -> + {nil, Rest}; +unpack_(<<16#C2, Rest/binary>>) -> + {false, Rest}; +unpack_(<<16#C3, Rest/binary>>) -> + {true, Rest}; - 16#CB when PayloadLen >= 8 -> % 64bit float - << Return:64/float-unit:1, Rest/binary >> = Payload, - {Return, Rest}; - 16#CB -> - {more, 8-PayloadLen}; +unpack_(<<16#CA, Return:32/float-unit:1, Rest/binary>>) -> % 32bit float + {Return, Rest}; +unpack_(<<16#CA, Rest/binary>>) -> + {more, 4-byte_size(Rest)}; +unpack_(<<16#CB, Return:64/float-unit:1, Rest/binary>>) -> % 64bit float + {Return, Rest}; +unpack_(<<16#CB, Rest/binary>>) -> + {more, 8-byte_size(Rest)}; - 16#CC when PayloadLen >= 1 -> % uint 8 - << Int:8/unsigned-integer, Rest/binary >> = Payload, - {Int, Rest}; - 16#CC -> - {more, 1}; +unpack_(<<16#CC, Int:8/unsigned-integer, Rest/binary>>) -> % uint 8 + {Int, Rest}; +unpack_(<<16#CC>>) -> + {more, 1}; +unpack_(<<16#CD, Int:16/big-unsigned-integer-unit:1, Rest/binary>>) -> % uint 16 + {Int, Rest}; +unpack_(<<16#CD, Rest/binary>>) -> + {more, 2-byte_size(Rest)}; +unpack_(<<16#CE, Int:32/big-unsigned-integer-unit:1, Rest/binary>>) -> % uint 32 + {Int, Rest}; +unpack_(<<16#CE, Rest/binary>>) -> + {more, 4-byte_size(Rest)}; +unpack_(<<16#CF, Int:64/big-unsigned-integer-unit:1, Rest/binary>>) -> % uint 64 + {Int, Rest}; +unpack_(<<16#CF, Rest/binary>>) -> + {more, 8-byte_size(Rest)}; - 16#CD when PayloadLen >= 2 -> % uint 16 - << Int:16/big-unsigned-integer-unit:1, Rest/binary >> = Payload, - {Int, Rest}; - 16#CD -> - {more, 2-PayloadLen}; +unpack_(<<16#D0, Int:8/signed-integer, Rest/binary>>) -> % int 8 + {Int, Rest}; +unpack_(<<16#D0>>) -> + {more, 1}; +unpack_(<<16#D1, Int:16/big-signed-integer-unit:1, Rest/binary>>) -> % int 16 + {Int, Rest}; +unpack_(<<16#D1, Rest/binary>>) -> + {more, 2-byte_size(Rest)}; +unpack_(<<16#D2, Int:32/big-signed-integer-unit:1, Rest/binary>>) -> % int 32 + {Int, Rest}; +unpack_(<<16#D2, Rest/binary>>) -> + {more, 4-byte_size(Rest)}; +unpack_(<<16#D3, Int:64/big-signed-integer-unit:1, Rest/binary>>) -> % int 64 + {Int, Rest}; +unpack_(<<16#D3, Rest/binary>>) -> + {more, 8-byte_size(Rest)}; - 16#CE when PayloadLen >= 4 -> - << Int:32/big-unsigned-integer-unit:1, Rest/binary >> = Payload, - {Int, Rest}; - 16#CE -> - {more, 4-PayloadLen}; % at least more +unpack_(<<16#DA, Len:16/unsigned-integer-unit:1, Val:Len/binary, Rest/binary>>) -> % raw 16 + {Val, Rest}; +unpack_(<<16#DA, Rest/binary>>) -> + {more, 16-byte_size(Rest)}; +unpack_(<<16#DB, Len:32/unsigned-integer-unit:1, Val:Len/binary, Rest/binary>>) -> % raw 32 + {Val, Rest}; +unpack_(<<16#DB, Rest/binary>>) -> + {more, 32-byte_size(Rest)}; - 16#CF when PayloadLen >= 8 -> - << Int:64/big-unsigned-integer-unit:1, Rest/binary >> = Payload, - {Int, Rest}; - 16#CF -> - {more, 8-PayloadLen}; +unpack_(<<16#DC, Len:16/big-unsigned-integer-unit:1, Rest/binary>>) -> % array 16 + unpack_array_(Rest, Len, []); +unpack_(<<16#DC, Rest/binary>>) -> + {more, 2-byte_size(Rest)}; +unpack_(<<16#DD, Len:32/big-unsigned-integer-unit:1, Rest/binary>>) -> % array 32 + unpack_array_(Rest, Len, []); +unpack_(<<16#DD, Rest/binary>>) -> + {more, 4-byte_size(Rest)}; - 16#D0 when PayloadLen >= 1 -> % int 8 - << Int:8/big-signed-integer-unit:1, Rest/binary >> = Payload, - {Int, Rest}; - 16#D0 -> - {more, 1}; +unpack_(<<16#DE, Len:16/big-unsigned-integer-unit:1, Rest/binary>>) -> % map 16 + unpack_map_(Rest, Len, []); +unpack_(<<16#DE, Rest/binary>>) -> + {more, 2-byte_size(Rest)}; +unpack_(<<16#DF, Len:32/big-unsigned-integer-unit:1, Rest/binary>>) -> % map 32 + unpack_map_(Rest, Len, []); +unpack_(<<16#DF, Rest/binary>>) -> + {more, 4-byte_size(Rest)}; - 16#D1 when PayloadLen >= 2 -> % int 16 - << Int:16/big-signed-integer-unit:1, Rest/binary >> = Payload, - {Int, Rest}; - 16#D1 -> - {more, 2-PayloadLen}; +unpack_(<<0:1, Value:7, Rest/binary>>) -> % positive fixnum + {Value, Rest}; +unpack_(<<2#111:3, Value:5, Rest/binary>>) -> % negative fixnum + {Value - 2#100000, Rest}; +unpack_(<<2#101:3, Len:5, Value:Len/binary, Rest/binary>>) -> % fixraw + {Value, Rest}; +unpack_(<<2#101:3, Len:5, Rest/binary>>) -> + {more, Len-byte_size(Rest)}; +unpack_(<<2#1001:4, Len:4, Rest/binary>>) -> % fixarray + unpack_array_(Rest, Len, []); +unpack_(<<2#1000:4, Len:4, Rest/binary>>) -> % fixmap + unpack_map_(Rest, Len, []); - 16#D2 when PayloadLen >= 4 -> % int 32 - << Int:32/big-signed-integer-unit:1, Rest/binary >> = Payload, - {Int, Rest}; - 16#D2 -> - {more, 4-PayloadLen}; +%unpack_(<>) when F==16#C1; F==16#C4; F==16#C5; F==16#C6; F==16#C7; F==16#C8; F==16#C9; F==16#D5; F==16#D6; F==16#D7; F==16#D8; F==16#D9-> +% {error, {badarg, <>}}. +%unpack_(Other) when is_binary(Bin) -> +% {more, 1}. +unpack_(<<>>) -> + {more, 1}. +unpack_(Other) -> + {error, {badarg, Other}}. - 16#D3 when PayloadLen >= 8 -> % int 64 - << Int:64/big-signed-integer-unit:1, Rest/binary >> = Payload, - {Int, Rest}; - 16#D3 -> - {more, 8-PayloadLen}; - - 16#DA when PayloadLen >= 2 -> % raw 16 - << Len:16/unsigned-integer-unit:1, Rest/binary >> = Payload, - << Return:Len/binary, Remain/binary >> = Rest, - {Return, Remain}; - 16#DA -> - {more, 16-PayloadLen}; - - 16#DB when PayloadLen >= 4 -> % raw 32 - << Len:32/big-unsigned-integer-unit:1, Rest/binary >> = Payload, - << Return:Len/binary, Remain/binary >> = Rest, - {Return, Remain}; - 16#DB -> - {more, 4-PayloadLen}; - - 16#DC when PayloadLen >= 2 -> % array 16 - << Len:16/big-unsigned-integer-unit:1, Rest/binary >> = Payload, - unpack_array_(Rest, Len, []); - 16#DC -> - {more, 2-PayloadLen}; - - 16#DD when PayloadLen >= 4 -> % array 32 - << Len:32/big-unsigned-integer-unit:1, Rest/binary >> = Payload, - unpack_array_(Rest, Len, []); - 16#DD -> - {more, 4-PayloadLen}; - - 16#DE when PayloadLen >= 2 -> % map 16 - << Len:16/big-unsigned-integer-unit:1, Rest/binary >> = Payload, - unpack_map_(Rest, Len, []); - 16#DE -> - {more, 2-PayloadLen}; - - 16#DF when PayloadLen >= 4 -> % map 32 - << Len:32/big-unsigned-integer-unit:1, Rest/binary >> = Payload, - unpack_map_(Rest, Len, []); - - % positive fixnum - Code when Code >= 2#00000000, Code < 2#10000000-> - {Code, Payload}; - - % negative fixnum - Code when Code >= 2#11100000 -> - {(Code - 16#100), Payload}; - - Code when Code >= 2#10100000 , Code < 2#11000000 -> -% 101XXXXX for FixRaw - Len = Code rem 2#10100000, - << Return:Len/binary, Remain/binary >> = Payload, - {Return, Remain}; - - Code when Code >= 2#10010000 , Code < 2#10100000 -> -% 1001XXXX for FixArray - Len = Code rem 2#10010000, - unpack_array_(Payload, Len, []); - - Code when Code >= 2#10000000 , Code < 2#10010000 -> -% 1000XXXX for FixMap - Len = Code rem 2#10000000, - unpack_map_(Payload, Len, []); - - _Other -> - {error, no_code_matches} - end. % ===== test codes ===== % -include_lib("eunit/include/eunit.hrl"). @@ -419,7 +375,7 @@ unknown_test()-> test_([]) -> 0; test_([S|Rest])-> Pack = msgpack:pack(S), - {S, <<>>} = msgpack:unpack( Pack ), + ?assertEqual({S, <<>>}, msgpack:unpack(Pack)), 1+test_(Rest). other_test()-> From 33a7d56042539282e3b02d75d365dc3dfa57266c Mon Sep 17 00:00:00 2001 From: Vincent de Phily Date: Tue, 29 Jun 2010 11:59:56 +0200 Subject: [PATCH 02/12] * Return {more,undefined} instead of {more,integer()}, as we can only know the "minimum bytes needed to continue" instead of the actually usefull "total packet size". * Merge all {more,...} clauses of unpack_/1 into one. * Reformat unpack_/1 for readability. * Fix some specs, error values, and documentation. --- erlang/msgpack.erl | 178 ++++++++++++++++----------------------------- 1 file changed, 62 insertions(+), 116 deletions(-) diff --git a/erlang/msgpack.erl b/erlang/msgpack.erl index e94262d1..dc4907d8 100644 --- a/erlang/msgpack.erl +++ b/erlang/msgpack.erl @@ -29,11 +29,13 @@ % erl> c(msgpack). % erl> S = . % erl> {S, <<>>} = msgpack:unpack( msgpack:pack(S) ). --type reason() :: enomem | badarg | no_code_matches. --type msgpack_term() :: [msgpack_term()] | {[{msgpack_term(),msgpack_term()}]} | integer() | float(). +-type reason() :: enomem | {badarg, term()}. +-type msgpack_term() :: [msgpack_term()] | {[{msgpack_term(),msgpack_term()}]} | integer() | float() | binary(). % ===== external APIs ===== % --spec pack(Term::msgpack_term()) -> binary(). + +% @doc Pack one erlang term into an msgpack message. +-spec pack(Term::msgpack_term()) -> binary() | reason(). pack(O) when is_integer(O) andalso O < 0 -> pack_int_(O); pack(O) when is_integer(O) -> @@ -54,17 +56,17 @@ pack({Map}) when is_list(Map) -> pack_map(Map); pack(Map) when is_tuple(Map), element(1,Map)=:=dict -> pack_map(dict:from_list(Map)); -pack(_O) -> - {error, undefined}. +pack(Other) -> + {error, {badarg, Other}}. -% unpacking. -% if failed in decoding and not end, get more data -% and feed more Bin into this function. -% TODO: error case for imcomplete format when short for any type formats. --spec unpack( binary() )-> - {msgpack_term(), binary()} | {more, non_neg_integer()} | {error, reason()}. -unpack(Bin) -> - unpack_(Bin). +% @doc Unpack one (possibly deeply nested) msgpack message into an erlang term. +% If failed in decoding and not end, get more data +% and feed more Bin into this function. +-spec unpack( binary() ) -> {Decoded::msgpack_term(), Rest::binary()} | {more, undefined} | {error, reason()}. +unpack(Bin) when is_binary(Bin) -> + unpack_(Bin); +unpack(Other) -> + {error, {badarg, Other}}. -spec unpack_all( binary() ) -> [msgpack_term()]. unpack_all(Data)-> @@ -134,7 +136,7 @@ pack_raw(Bin) -> << 16#DB:8, Len:32/big-unsigned-integer-unit:1, Bin/binary >> end. -% list / tuple +% list pack_array(L) -> case length(L) of Len when Len < 16 -> @@ -152,7 +154,7 @@ pack_array_([Head|Tail], Acc) -> unpack_array_(Remain, 0, RetList) -> {lists:reverse(RetList), Remain}; unpack_array_(Bin, RestLen, RetList) -> case unpack(Bin) of - {more, Len} -> {more, Len+RestLen-1}; + {more, undefined} -> {more, undefined}; {Term, Rest}-> unpack_array_(Rest, RestLen-1, [Term|RetList]) end. @@ -172,120 +174,66 @@ pack_map_([{Key,Value}|Tail], Acc) -> pack_map_(Tail, << Acc/binary, (pack(Key))/binary, (pack(Value))/binary>>). % FIXME: write test for unpack_map/1 --spec unpack_map_(binary(), non_neg_integer(), [{term(), msgpack_term()}]) -> - {more, non_neg_integer()} | {any(), binary()}. +-spec unpack_map_(binary(), non_neg_integer(), [{msgpack_term(), msgpack_term()}]) -> {more, undefined} | {any(), binary()} | {error, reason()}. unpack_map_(Bin, 0, Acc) -> {{lists:reverse(Acc)}, Bin}; unpack_map_(Bin, Len, Acc) -> case unpack(Bin) of - { more, MoreLen } -> { more, MoreLen+Len-1 }; - { Key, Rest } -> + {more, undefined} -> {more, undefined}; + {Key, Rest} -> case unpack(Rest) of - {more, MoreLen} -> { more, MoreLen+Len-1 }; - { Value, Rest2 } -> + {more, undefined} -> {more, undefined}; + {Value, Rest2} -> unpack_map_(Rest2,Len-1,[{Key,Value}|Acc]) end end. --spec unpack_(Payload::binary()) -> {more, pos_integer()} | {msgpack_term(), binary()} | {error, reason()}. -unpack_(<<16#C0, Rest/binary>>) -> - {nil, Rest}; -unpack_(<<16#C2, Rest/binary>>) -> - {false, Rest}; -unpack_(<<16#C3, Rest/binary>>) -> - {true, Rest}; +-spec unpack_(Payload::binary()) -> {more, undefined} | {msgpack_term(), binary()} | {error, reason()}. +% Atoms +unpack_(<<16#C0, Rest/binary>>) -> {nil, Rest}; +unpack_(<<16#C2, Rest/binary>>) -> {false, Rest}; +unpack_(<<16#C3, Rest/binary>>) -> {true, Rest}; -unpack_(<<16#CA, Return:32/float-unit:1, Rest/binary>>) -> % 32bit float - {Return, Rest}; -unpack_(<<16#CA, Rest/binary>>) -> - {more, 4-byte_size(Rest)}; -unpack_(<<16#CB, Return:64/float-unit:1, Rest/binary>>) -> % 64bit float - {Return, Rest}; -unpack_(<<16#CB, Rest/binary>>) -> - {more, 8-byte_size(Rest)}; +% Floats +unpack_(<<16#CA, Val:32/float-unit:1, Rest/binary>>) -> {Val, Rest}; +unpack_(<<16#CB, Val:64/float-unit:1, Rest/binary>>) -> {Val, Rest}; -unpack_(<<16#CC, Int:8/unsigned-integer, Rest/binary>>) -> % uint 8 - {Int, Rest}; -unpack_(<<16#CC>>) -> - {more, 1}; -unpack_(<<16#CD, Int:16/big-unsigned-integer-unit:1, Rest/binary>>) -> % uint 16 - {Int, Rest}; -unpack_(<<16#CD, Rest/binary>>) -> - {more, 2-byte_size(Rest)}; -unpack_(<<16#CE, Int:32/big-unsigned-integer-unit:1, Rest/binary>>) -> % uint 32 - {Int, Rest}; -unpack_(<<16#CE, Rest/binary>>) -> - {more, 4-byte_size(Rest)}; -unpack_(<<16#CF, Int:64/big-unsigned-integer-unit:1, Rest/binary>>) -> % uint 64 - {Int, Rest}; -unpack_(<<16#CF, Rest/binary>>) -> - {more, 8-byte_size(Rest)}; +% Unsigned integers +unpack_(<<16#CC, Val:8/unsigned-integer, Rest/binary>>) -> {Val, Rest}; +unpack_(<<16#CD, Val:16/big-unsigned-integer-unit:1, Rest/binary>>) -> {Val, Rest}; +unpack_(<<16#CE, Val:32/big-unsigned-integer-unit:1, Rest/binary>>) -> {Val, Rest}; +unpack_(<<16#CF, Val:64/big-unsigned-integer-unit:1, Rest/binary>>) -> {Val, Rest}; -unpack_(<<16#D0, Int:8/signed-integer, Rest/binary>>) -> % int 8 - {Int, Rest}; -unpack_(<<16#D0>>) -> - {more, 1}; -unpack_(<<16#D1, Int:16/big-signed-integer-unit:1, Rest/binary>>) -> % int 16 - {Int, Rest}; -unpack_(<<16#D1, Rest/binary>>) -> - {more, 2-byte_size(Rest)}; -unpack_(<<16#D2, Int:32/big-signed-integer-unit:1, Rest/binary>>) -> % int 32 - {Int, Rest}; -unpack_(<<16#D2, Rest/binary>>) -> - {more, 4-byte_size(Rest)}; -unpack_(<<16#D3, Int:64/big-signed-integer-unit:1, Rest/binary>>) -> % int 64 - {Int, Rest}; -unpack_(<<16#D3, Rest/binary>>) -> - {more, 8-byte_size(Rest)}; +% Signed integers +unpack_(<<16#D0, Val:8/signed-integer, Rest/binary>>) -> {Val, Rest}; +unpack_(<<16#D1, Val:16/big-signed-integer-unit:1, Rest/binary>>) -> {Val, Rest}; +unpack_(<<16#D2, Val:32/big-signed-integer-unit:1, Rest/binary>>) -> {Val, Rest}; +unpack_(<<16#D3, Val:64/big-signed-integer-unit:1, Rest/binary>>) -> {Val, Rest}; -unpack_(<<16#DA, Len:16/unsigned-integer-unit:1, Val:Len/binary, Rest/binary>>) -> % raw 16 - {Val, Rest}; -unpack_(<<16#DA, Rest/binary>>) -> - {more, 16-byte_size(Rest)}; -unpack_(<<16#DB, Len:32/unsigned-integer-unit:1, Val:Len/binary, Rest/binary>>) -> % raw 32 - {Val, Rest}; -unpack_(<<16#DB, Rest/binary>>) -> - {more, 32-byte_size(Rest)}; +% Raw bytes +unpack_(<<16#DA, Len:16/unsigned-integer-unit:1, Val:Len/binary, Rest/binary>>) -> {Val, Rest}; +unpack_(<<16#DB, Len:32/unsigned-integer-unit:1, Val:Len/binary, Rest/binary>>) -> {Val, Rest}; -unpack_(<<16#DC, Len:16/big-unsigned-integer-unit:1, Rest/binary>>) -> % array 16 - unpack_array_(Rest, Len, []); -unpack_(<<16#DC, Rest/binary>>) -> - {more, 2-byte_size(Rest)}; -unpack_(<<16#DD, Len:32/big-unsigned-integer-unit:1, Rest/binary>>) -> % array 32 - unpack_array_(Rest, Len, []); -unpack_(<<16#DD, Rest/binary>>) -> - {more, 4-byte_size(Rest)}; +% Arrays +unpack_(<<16#DC, Len:16/big-unsigned-integer-unit:1, Rest/binary>>) -> unpack_array_(Rest, Len, []); +unpack_(<<16#DD, Len:32/big-unsigned-integer-unit:1, Rest/binary>>) -> unpack_array_(Rest, Len, []); -unpack_(<<16#DE, Len:16/big-unsigned-integer-unit:1, Rest/binary>>) -> % map 16 - unpack_map_(Rest, Len, []); -unpack_(<<16#DE, Rest/binary>>) -> - {more, 2-byte_size(Rest)}; -unpack_(<<16#DF, Len:32/big-unsigned-integer-unit:1, Rest/binary>>) -> % map 32 - unpack_map_(Rest, Len, []); -unpack_(<<16#DF, Rest/binary>>) -> - {more, 4-byte_size(Rest)}; +% Maps +unpack_(<<16#DE, Len:16/big-unsigned-integer-unit:1, Rest/binary>>) -> unpack_map_(Rest, Len, []); +unpack_(<<16#DF, Len:32/big-unsigned-integer-unit:1, Rest/binary>>) -> unpack_map_(Rest, Len, []); -unpack_(<<0:1, Value:7, Rest/binary>>) -> % positive fixnum - {Value, Rest}; -unpack_(<<2#111:3, Value:5, Rest/binary>>) -> % negative fixnum - {Value - 2#100000, Rest}; -unpack_(<<2#101:3, Len:5, Value:Len/binary, Rest/binary>>) -> % fixraw - {Value, Rest}; -unpack_(<<2#101:3, Len:5, Rest/binary>>) -> - {more, Len-byte_size(Rest)}; -unpack_(<<2#1001:4, Len:4, Rest/binary>>) -> % fixarray - unpack_array_(Rest, Len, []); -unpack_(<<2#1000:4, Len:4, Rest/binary>>) -> % fixmap - unpack_map_(Rest, Len, []); +% Tag-encoded lengths (kept last, for speed) +unpack_(<<0:1, Val:7, Rest/binary>>) -> {Val, Rest}; % pos fixnum +unpack_(<<2#111:3, Val:5, Rest/binary>>) -> {Val - 2#100000, Rest}; % neg fixnum +unpack_(<<2#101:3, Len:5, Val:Len/binary, Rest/binary>>) -> {Val, Rest}; % fixraw +unpack_(<<2#1001:4, Len:4, Rest/binary>>) -> unpack_array_(Rest, Len, []); % fixarray +unpack_(<<2#1000:4, Len:4, Rest/binary>>) -> unpack_map_(Rest, Len, []); % fixmap -%unpack_(<>) when F==16#C1; F==16#C4; F==16#C5; F==16#C6; F==16#C7; F==16#C8; F==16#C9; F==16#D5; F==16#D6; F==16#D7; F==16#D8; F==16#D9-> -% {error, {badarg, <>}}. -%unpack_(Other) when is_binary(Bin) -> -% {more, 1}. -unpack_(<<>>) -> - {more, 1}. -unpack_(Other) -> - {error, {badarg, Other}}. +% Incomplete / invalid data +unpack_(<>) when F==16#C1; F==16#C4; F==16#C5; F==16#C6; F==16#C7; F==16#C8; F==16#C9; F==16#D5; F==16#D6; F==16#D7; F==16#D8; F==16#D9-> + {error, {badarg, <>}}; +unpack_(_Bin) -> + {more, undefined}. % ===== test codes ===== % @@ -330,9 +278,7 @@ test_p(Len,Term,OrigBin,Len) -> {Term, <<>>}=msgpack:unpack(OrigBin); test_p(I,_,OrigBin,Len) when I < Len-> <> = OrigBin, - {more, N}=msgpack:unpack(Bin), - ?assert(0 < N), - ?assert(N < Len). + ?assertEqual({more, undefined}, msgpack:unpack(Bin)). partial_test()-> % error handling test. Term = lists:seq(0, 45), @@ -379,6 +325,6 @@ test_([S|Rest])-> 1+test_(Rest). other_test()-> - {more,1}=msgpack:unpack(<<>>). + ?assertEqual({more,undefined}, msgpack:unpack(<<>>)). -endif. From 45fb482ab42c7f47bf65313ade6808d0cfe3bca5 Mon Sep 17 00:00:00 2001 From: UENISHI Kota Date: Thu, 8 Jul 2010 23:36:18 +0900 Subject: [PATCH 03/12] erlang: added simple performance test. --- erlang/msgpack.erl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/erlang/msgpack.erl b/erlang/msgpack.erl index aab07b72..ee993115 100644 --- a/erlang/msgpack.erl +++ b/erlang/msgpack.erl @@ -359,4 +359,9 @@ test_([Before|Rest])-> other_test()-> {more,1}=msgpack:unpack(<<>>). +benchmark_test()-> + Data=[test_data() || _ <- lists:seq(0, 10000)], + S=?debugTime(" serialize", msgpack:pack(Data)), + {Data,<<>>}=?debugTime("deserialize", msgpack:unpack(S)). + -endif. From 485915c27a3ddf12e4ba4c9c0e27769869bb945c Mon Sep 17 00:00:00 2001 From: UENISHI Kota Date: Thu, 8 Jul 2010 23:39:47 +0900 Subject: [PATCH 04/12] erlang: added simple performance test description. --- erlang/msgpack.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/erlang/msgpack.erl b/erlang/msgpack.erl index ee993115..94fed86f 100644 --- a/erlang/msgpack.erl +++ b/erlang/msgpack.erl @@ -362,6 +362,7 @@ other_test()-> benchmark_test()-> Data=[test_data() || _ <- lists:seq(0, 10000)], S=?debugTime(" serialize", msgpack:pack(Data)), - {Data,<<>>}=?debugTime("deserialize", msgpack:unpack(S)). + {Data,<<>>}=?debugTime("deserialize", msgpack:unpack(S)), + ?debugFmt("for ~p KB test data.", [byte_size(S) div 1024]). -endif. From eab66a022e5b5fd9c4731ae8ba970b2146e27599 Mon Sep 17 00:00:00 2001 From: UENISHI Kota Date: Fri, 9 Jul 2010 01:04:09 +0900 Subject: [PATCH 05/12] erlang: added try-catch clause for easy error handling --- erlang/msgpack.erl | 220 +++++++++++++++++++++++---------------------- 1 file changed, 112 insertions(+), 108 deletions(-) diff --git a/erlang/msgpack.erl b/erlang/msgpack.erl index 94fed86f..d4fd0ba2 100644 --- a/erlang/msgpack.erl +++ b/erlang/msgpack.erl @@ -19,9 +19,8 @@ -author('kuenishi+msgpack@gmail.com'). %% tuples, atoms are not supported. lists, integers, double, and so on. -%% see http://msgpack.sourceforge.jp/spec for -%% supported formats. APIs are almost compatible -%% for C API (http://msgpack.sourceforge.jp/c:doc) +%% see http://msgpack.sourceforge.jp/spec for supported formats. +%% APIs are almost compatible with C API (http://msgpack.sourceforge.jp/c:doc) %% except buffering functions (both copying and zero-copying). -export([pack/1, unpack/1, unpack_all/1]). -export([pack_map/1]). @@ -30,51 +29,38 @@ % erl> c(msgpack). % erl> S = . % erl> {S, <<>>} = msgpack:unpack( msgpack:pack(S) ). --type reason() :: enomem | badarg | no_code_matches. +-type reason() :: enomem | badarg | no_code_matches | undefined. -type msgpack_term() :: [msgpack_term()] | {[{msgpack_term(),msgpack_term()}]} | integer() | float() | binary(). % ===== external APIs ===== % --spec pack(Term::msgpack_term()) -> binary(). -pack(I) when is_integer(I) andalso I < 0 -> - pack_int_(I); -pack(I) when is_integer(I) -> - pack_uint_(I); -pack(F) when is_float(F) -> - pack_double(F); -pack(nil) -> - << 16#C0:8 >>; -pack(true) -> - << 16#C3:8 >>; -pack(false) -> - << 16#C2:8 >>; -pack(Bin) when is_binary(Bin) -> - pack_raw(Bin); -pack(List) when is_list(List) -> - pack_array(List); -pack({Map}) when is_list(Map) -> - pack_map(Map); -pack(Map) when is_tuple(Map), element(1,Map)=:=dict -> - pack_map(dict:to_list(Map)); -pack(_Other) -> - {error, undefined}. +-spec pack(Term::msgpack_term()) -> binary() | {error, reason()}. +pack(Term)-> + try + pack_(Term) + catch + error:Error when is_tuple(Error), element(1, Error) =:= error -> + Error; + throw:Exception -> + erlang:display(Exception), + {error, Exception} + end. % unpacking. % if failed in decoding and not end, get more data % and feed more Bin into this function. % TODO: error case for imcomplete format when short for any type formats. --spec unpack( Bin::binary() )-> {msgpack_term(), binary()} | - {more, non_neg_integer()} | {more, undefined} | - {error, reason()}. -unpack(Bin) when not is_binary(Bin)-> - {error, badarg}; -unpack(Bin) when bit_size(Bin) >= 8 -> - unpack_(Bin); -unpack(<<>>)-> - {more, 1}; -unpack(_) -> - {more, undefined}. +-spec unpack( Bin::binary() )-> {msgpack_term(), binary()} | {error, reason()}. +unpack(Bin)-> + try + unpack_(Bin) + catch + error:Error when is_tuple(Error), element(1, Error) =:= error -> + Error; + throw:Exception -> + {error, Exception} + end. -spec unpack_all( binary() ) -> [msgpack_term()]. unpack_all(Data)-> @@ -85,7 +71,7 @@ unpack_all(Data)-> [Term|unpack_all(Binary)] end. --spec pack_map(M::[{msgpack_term(),msgpack_term()}])-> binary(). +-spec pack_map(M::[{msgpack_term(),msgpack_term()}])-> binary() | {error, badarg}. pack_map(M)-> case length(M) of Len when Len < 16 -> @@ -98,6 +84,31 @@ pack_map(M)-> % ===== internal APIs ===== % +% pack them all +-spec pack_(msgpack_term()) -> binary() | no_return(). +pack_(I) when is_integer(I) andalso I < 0 -> + pack_int_(I); +pack_(I) when is_integer(I) -> + pack_uint_(I); +pack_(F) when is_float(F) -> + pack_double(F); +pack_(nil) -> + << 16#C0:8 >>; +pack_(true) -> + << 16#C3:8 >>; +pack_(false) -> + << 16#C2:8 >>; +pack_(Bin) when is_binary(Bin) -> + pack_raw(Bin); +pack_(List) when is_list(List) -> + pack_array(List); +pack_({Map}) when is_list(Map) -> + pack_map(Map); +pack_(Map) when is_tuple(Map), element(1,Map)=:=dict -> + pack_map(dict:to_list(Map)); +pack_(_Other) -> + throw({error, undefined}). + % positive fixnum pack_uint_(N) when N < 128 -> << 2#0:1, N:7 >>; @@ -149,7 +160,7 @@ pack_raw(Bin) -> << 16#DB:8, Len:32/big-unsigned-integer-unit:1, Bin/binary >> end. -% list / tuple +% list pack_array(L) -> case length(L) of Len when Len < 16 -> @@ -159,43 +170,40 @@ pack_array(L) -> Len -> << 16#DD:8, Len:32/big-unsigned-integer-unit:1,(pack_array_(L, <<>>))/binary >> end. + pack_array_([], Acc) -> Acc; pack_array_([Head|Tail], Acc) -> - pack_array_(Tail, <>). + pack_array_(Tail, <>). -% FIXME! this should be without lists:reverse/1 -unpack_array_(<<>>, 0, RetList) -> {lists:reverse(RetList), <<>>}; -unpack_array_(Remain, 0, RetList) when is_binary(Remain)-> {lists:reverse(RetList), Remain}; -unpack_array_(<<>>, RestLen, _RetList) when RestLen > 0 -> {more, undefined}; -unpack_array_(Bin, RestLen, RetList) when is_binary(Bin)-> - case unpack(Bin) of - {more, _} -> {more, undefined}; - {Term, Rest}-> unpack_array_(Rest, RestLen-1, [Term|RetList]) - end. +% Users SHOULD NOT send too long list: this uses lists:reverse/1 +unpack_array_(Remain, 0, Acc) when is_binary(Remain)-> {lists:reverse(Acc), Remain}; +unpack_array_(<<>>, RestLen, _) when RestLen > 0 -> throw(short); +unpack_array_(Bin, RestLen, Acc) when is_binary(Bin)-> + {Term, Rest}=unpack_(Bin), + unpack_array_(Rest, RestLen-1, [Term|Acc]). pack_map_([], Acc) -> Acc; pack_map_([{Key,Value}|Tail], Acc) -> - pack_map_(Tail, << Acc/binary, (pack(Key))/binary, (pack(Value))/binary>>). + pack_map_(Tail, << Acc/binary, (pack_(Key))/binary, (pack_(Value))/binary>>). -% FIXME! this should be without lists:reverse/1 --spec unpack_map_(binary(), non_neg_integer(), [{term(), msgpack_term()}])-> - {more, non_neg_integer()} | { any(), binary()}. +% Users SHOULD NOT send too long list: this uses lists:reverse/1 +-spec unpack_map_(binary(), non_neg_integer(), [{msgpack_term(), msgpack_term()}])-> + {[{msgpack_term(), msgpack_term()}], binary()} | no_return(). unpack_map_(Bin, 0, Acc) -> {{lists:reverse(Acc)}, Bin}; +unpack_map_(<<>>, _, _ ) -> throw(short); unpack_map_(Bin, Len, Acc) -> - case unpack(Bin) of - {more, _} -> {more, undefined}; - {Key, Rest} -> - case unpack(Rest) of - {more, _} -> {more, undefined}; - {Value, Rest2} -> - unpack_map_(Rest2,Len-1,[{Key,Value}|Acc]) - end - end. + {Key, Rest} = unpack_(Bin), + {Value, Rest2} = unpack_(Rest), + unpack_map_(Rest2,Len-1,[{Key,Value}|Acc]). --spec unpack_(Payload::binary()) -> - {more, pos_integer()} | {msgpack_term(), binary()} | {error, reason()}. -unpack_(Binary)-> - case Binary of +% unpack then all +-spec unpack_(Bin::binary()) -> {msgpack_term(), binary()} | {error, reason()} | no_return(). +unpack_(Bin) when not is_binary(Bin)-> + throw(badarg); +unpack_(<<>>)-> + throw(short); +unpack_(Bin) when bit_size(Bin) >= 8 -> + case Bin of % ATOMS <<16#C0, Rest/binary>> -> {nil, Rest}; <<16#C2, Rest/binary>> -> {false, Rest}; @@ -231,35 +239,36 @@ unpack_(Binary)-> <<2#1000:4, L:4, Rest/binary>> -> unpack_map_(Rest, L, []); % map % Incomplete / invalid data - <<16#CA, Rest/binary>> -> {more, 4-byte_size(Rest)}; - <<16#CB, Rest/binary>> -> {more, 8-byte_size(Rest)}; - <<16#CC>> -> {more, 1}; - <<16#CD, Rest/binary>> -> {more, 2-byte_size(Rest)}; - <<16#CE, Rest/binary>> -> {more, 4-byte_size(Rest)}; - <<16#CF, Rest/binary>> -> {more, 8-byte_size(Rest)}; - <<16#D0>> -> {more, 1}; - <<16#D1, Rest/binary>> -> {more, 2-byte_size(Rest)}; - <<16#D2, Rest/binary>> -> {more, 4-byte_size(Rest)}; - <<16#D3, Rest/binary>> -> {more, 8-byte_size(Rest)}; - <<16#DA, Rest/binary>> -> {more, 16-byte_size(Rest)}; - <<16#DB, Rest/binary>> -> {more, 32-byte_size(Rest)}; - <<16#DC, Rest/binary>> -> {more, 2-byte_size(Rest)}; - <<16#DD, Rest/binary>> -> {more, 4-byte_size(Rest)}; - <<16#DE, Rest/binary>> -> {more, 2-byte_size(Rest)}; - <<16#DF, Rest/binary>> -> {more, 4-byte_size(Rest)}; - <<2#101:3, L:5, Rest/binary>> -> {more, L-byte_size(Rest)}; +% <<_:16/integer, _/binary>> + _ -> throw(short) +%% <<16#CA, _/binary>> -> {more, 4-byte_size(Rest)}; +%% <<16#CB, Rest/binary>> -> {more, 8-byte_size(Rest)}; +%% <<16#CC>> -> {more, 1}; +%% <<16#CD, Rest/binary>> -> {more, 2-byte_size(Rest)}; +%% <<16#CE, Rest/binary>> -> {more, 4-byte_size(Rest)}; +%% <<16#CF, Rest/binary>> -> {more, 8-byte_size(Rest)}; +%% <<16#D0>> -> {more, 1}; +%% <<16#D1, Rest/binary>> -> {more, 2-byte_size(Rest)}; +%% <<16#D2, Rest/binary>> -> {more, 4-byte_size(Rest)}; +%% <<16#D3, Rest/binary>> -> {more, 8-byte_size(Rest)}; +%% <<16#DA, Rest/binary>> -> {more, 16-byte_size(Rest)}; +%% <<16#DB, Rest/binary>> -> {more, 32-byte_size(Rest)}; +%% <<16#DC, Rest/binary>> -> {more, 2-byte_size(Rest)}; +%% <<16#DD, Rest/binary>> -> {more, 4-byte_size(Rest)}; +%% <<16#DE, Rest/binary>> -> {more, 2-byte_size(Rest)}; +%% <<16#DF, Rest/binary>> -> {more, 4-byte_size(Rest)}; +%% <<2#101:3, L:5, Rest/binary>> -> throw(short); % {more, L-byte_size(Rest)}; - <<>> -> {more, 1}; - <<2#101:3, _/binary>> -> {more, undefined}; - <> when F==16#C1; - F==16#C7; F==16#C8; F==16#C9; F==16#D5; - F==16#D6; F==16#D7; F==16#D8; F==16#D9-> - {error, {badarg, <>}}; - Other -> - {error, {badarg, Other}} +%% <<>> -> throw(short); % {more, 1}; +%% <<2#101:3, _/binary>> -> {more, undefined}; +%% <> when F==16#C1; +%% F==16#C7; F==16#C8; F==16#C9; F==16#D5; +%% F==16#D6; F==16#D7; F==16#D8; F==16#D9-> +%% throw({badarg, <>}); +% Other -> +% throw({unknown, Other}) end. - % ===== test codes ===== % -include_lib("eunit/include/eunit.hrl"). -ifdef(EUNIT). @@ -268,9 +277,15 @@ compare_all([], [])-> ok; compare_all([], R)-> {toomuchrhs, R}; compare_all(L, [])-> {toomuchlhs, L}; compare_all([LH|LTL], [RH|RTL]) -> - LH=RH, + ?assertEqual(LH, RH), compare_all(LTL, RTL). +test_([]) -> 0; +test_([Term|Rest])-> + Pack = msgpack:pack(Term), + ?assertEqual({Term, <<>>}, msgpack:unpack( Pack )), + 1+test_(Rest). + test_data()-> [true, false, nil, 0, 1, 2, 123, 512, 1230, 678908, 16#FFFFFFFFFF, @@ -303,12 +318,7 @@ test_p(Len,Term,OrigBin,Len) -> {Term, <<>>}=msgpack:unpack(OrigBin); test_p(I,_,OrigBin,Len) when I < Len-> <> = OrigBin, - case msgpack:unpack(Bin) of - {more, N} when not is_integer(N) -> - ?assertEqual(undefined, N); - {more, N} -> - ?assert( N < Len ) - end. + ?assertEqual({error,short}, msgpack:unpack(Bin)). partial_test()-> % error handling test. Term = lists:seq(0, 45), @@ -343,21 +353,15 @@ unknown_test()-> 42 ], Port = open_port({spawn, "ruby testcase_generator.rb"}, [binary]), + timer:sleep(1), receive {Port, {data, Data}}-> compare_all(Tests, msgpack:unpack_all(Data)) after 1024-> ?assert(false) end, port_close(Port). -test_([]) -> 0; -test_([Before|Rest])-> - Pack = msgpack:pack(Before), - {After, <<>>} = msgpack:unpack( Pack ), - ?assertEqual(Before, After), - 1+test_(Rest). - other_test()-> - {more,1}=msgpack:unpack(<<>>). + ?assertEqual({error,short},msgpack:unpack(<<>>)). benchmark_test()-> Data=[test_data() || _ <- lists:seq(0, 10000)], From e799082e5c4d39094c666da0f1c52ab2d6eb088c Mon Sep 17 00:00:00 2001 From: UENISHI Kota Date: Fri, 9 Jul 2010 01:21:35 +0900 Subject: [PATCH 06/12] erlang: better test cases, except 'Broken pipe' --- erlang/msgpack.erl | 74 +++++++++++++++++----------------------------- 1 file changed, 27 insertions(+), 47 deletions(-) diff --git a/erlang/msgpack.erl b/erlang/msgpack.erl index d4fd0ba2..96ea407c 100644 --- a/erlang/msgpack.erl +++ b/erlang/msgpack.erl @@ -142,7 +142,7 @@ pack_int_(N) -> << 16#D3:8, N:64/big-signed-integer-unit:1 >>. % float : erlang's float is always IEEE 754 64bit format. -%pack_float(F) when is_float(F)-> +% pack_float(F) when is_float(F)-> % << 16#CA:8, F:32/big-float-unit:1 >>. % pack_double(F). % double @@ -198,10 +198,7 @@ unpack_map_(Bin, Len, Acc) -> % unpack then all -spec unpack_(Bin::binary()) -> {msgpack_term(), binary()} | {error, reason()} | no_return(). -unpack_(Bin) when not is_binary(Bin)-> - throw(badarg); -unpack_(<<>>)-> - throw(short); +unpack_(Bin) when not is_binary(Bin)-> throw(badarg); unpack_(Bin) when bit_size(Bin) >= 8 -> case Bin of % ATOMS @@ -239,43 +236,31 @@ unpack_(Bin) when bit_size(Bin) >= 8 -> <<2#1000:4, L:4, Rest/binary>> -> unpack_map_(Rest, L, []); % map % Incomplete / invalid data -% <<_:16/integer, _/binary>> - _ -> throw(short) -%% <<16#CA, _/binary>> -> {more, 4-byte_size(Rest)}; -%% <<16#CB, Rest/binary>> -> {more, 8-byte_size(Rest)}; -%% <<16#CC>> -> {more, 1}; -%% <<16#CD, Rest/binary>> -> {more, 2-byte_size(Rest)}; -%% <<16#CE, Rest/binary>> -> {more, 4-byte_size(Rest)}; -%% <<16#CF, Rest/binary>> -> {more, 8-byte_size(Rest)}; -%% <<16#D0>> -> {more, 1}; -%% <<16#D1, Rest/binary>> -> {more, 2-byte_size(Rest)}; -%% <<16#D2, Rest/binary>> -> {more, 4-byte_size(Rest)}; -%% <<16#D3, Rest/binary>> -> {more, 8-byte_size(Rest)}; -%% <<16#DA, Rest/binary>> -> {more, 16-byte_size(Rest)}; -%% <<16#DB, Rest/binary>> -> {more, 32-byte_size(Rest)}; -%% <<16#DC, Rest/binary>> -> {more, 2-byte_size(Rest)}; -%% <<16#DD, Rest/binary>> -> {more, 4-byte_size(Rest)}; -%% <<16#DE, Rest/binary>> -> {more, 2-byte_size(Rest)}; -%% <<16#DF, Rest/binary>> -> {more, 4-byte_size(Rest)}; -%% <<2#101:3, L:5, Rest/binary>> -> throw(short); % {more, L-byte_size(Rest)}; - -%% <<>> -> throw(short); % {more, 1}; -%% <<2#101:3, _/binary>> -> {more, undefined}; -%% <> when F==16#C1; -%% F==16#C7; F==16#C8; F==16#C9; F==16#D5; -%% F==16#D6; F==16#D7; F==16#D8; F==16#D9-> -%% throw({badarg, <>}); -% Other -> -% throw({unknown, Other}) - end. + <> when F==16#CA; F==16#CB; F==16#CC; + F==16#CD; F==16#CE; F==16#CF; + F==16#D0; F==16#D1; F==16#D2; + F==16#D3; F==16#DA; F==16#DB; + F==16#DC; F==16#DD; F==16#DE; + F==16#DF -> + throw(short); + <> when F==16#C1; + F==16#C7; F==16#C8; F==16#C9; + F==16#D5; F==16#D6; F==16#D7; + F==16#D8; F==16#D9 -> + throw(badarg); + _ -> + throw(short) % or unknown/badarg? + end; +unpack_(<<>>)-> throw(short); +unpack_(<<2#101:3, _/binary>>) -> throw(short). % ===== test codes ===== % -include_lib("eunit/include/eunit.hrl"). -ifdef(EUNIT). compare_all([], [])-> ok; -compare_all([], R)-> {toomuchrhs, R}; -compare_all(L, [])-> {toomuchlhs, L}; +compare_all([], R)-> {toomuchrhs, R}; +compare_all(L, [])-> {toomuchlhs, L}; compare_all([LH|LTL], [RH|RTL]) -> ?assertEqual(LH, RH), compare_all(LTL, RTL). @@ -305,9 +290,9 @@ basic_test()-> Passed = length(Tests). port_test()-> + Port = open_port({spawn, "ruby ../test/crosslang.rb"}, [binary]), Tests = test_data(), {[Tests],<<>>} = msgpack:unpack(msgpack:pack([Tests])), - Port = open_port({spawn, "ruby ../test/crosslang.rb"}, [binary]), true = port_command(Port, msgpack:pack(Tests) ), receive {Port, {data, Data}}-> {Tests, <<>>}=msgpack:unpack(Data) @@ -328,10 +313,7 @@ partial_test()-> % error handling test. long_test()-> Longer = lists:seq(0, 655), -% Longest = lists:seq(0,12345), - {Longer, <<>>} = msgpack:unpack(msgpack:pack(Longer)), -% {Longest, <<>>} = msgpack:unpack(msgpack:pack(Longest)). - ok. + {Longer, <<>>} = msgpack:unpack(msgpack:pack(Longer)). map_test()-> Ints = lists:seq(0, 65), @@ -341,6 +323,7 @@ map_test()-> ok. unknown_test()-> + Port = open_port({spawn, "ruby testcase_generator.rb"}, [binary]), Tests = [0, 1, 2, 123, 512, 1230, 678908, -1, -23, -512, -1230, -567898, <<"hogehoge">>, <<"243546rf7g68h798j">>, @@ -348,16 +331,13 @@ unknown_test()-> -234.4355, 1.0e-34, 1.0e64, [23, 234, 0.23], [0,42,<<"sum">>, [1,2]], [1,42, nil, [3]], - {[{1,2},{<<"hoge">>,nil}]}, + {[{1,2},{<<"hoge">>,nil}]}, % map -234, -50000, 42 ], - Port = open_port({spawn, "ruby testcase_generator.rb"}, [binary]), - timer:sleep(1), receive - {Port, {data, Data}}-> - compare_all(Tests, msgpack:unpack_all(Data)) - after 1024-> ?assert(false) end, + {Port, {data, Data}}-> compare_all(Tests, msgpack:unpack_all(Data)) + after 1024-> ?assert(false) end, port_close(Port). other_test()-> From 64c36b7a8faac55d8dc80342f27929b5538c4307 Mon Sep 17 00:00:00 2001 From: Vincent de Phily Date: Fri, 9 Jul 2010 13:06:57 +0200 Subject: [PATCH 07/12] Remove a couple of superfluous 'when' clauses. The when clause for unpack_/1 has been moved to unpack/1 so that it is performed only once. --- erlang/msgpack.erl | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/erlang/msgpack.erl b/erlang/msgpack.erl index 1291d542..d1ba9cd0 100644 --- a/erlang/msgpack.erl +++ b/erlang/msgpack.erl @@ -52,7 +52,9 @@ pack(Term)-> % and feed more Bin into this function. % TODO: error case for imcomplete format when short for any type formats. -spec unpack( Bin::binary() )-> {msgpack_term(), binary()} | {error, reason()}. -unpack(Bin)-> +unpack(Bin) when not is_binary(Bin) -> + {error, badarg}; +unpack(Bin) -> try unpack_(Bin) catch @@ -126,7 +128,7 @@ pack_uint_(N) -> << 16#CF:8, N:64/big-unsigned-integer-unit:1 >>. % negative fixnum -pack_int_(N) when is_integer(N) , N >= -32-> +pack_int_(N) when N >= -32-> << 2#111:3, N:5 >>; % int 8 pack_int_(N) when N > -128 -> @@ -176,9 +178,9 @@ pack_array_([Head|Tail], Acc) -> pack_array_(Tail, <>). % Users SHOULD NOT send too long list: this uses lists:reverse/1 -unpack_array_(Remain, 0, Acc) when is_binary(Remain)-> {lists:reverse(Acc), Remain}; -unpack_array_(<<>>, RestLen, _) when RestLen > 0 -> throw(short); -unpack_array_(Bin, RestLen, Acc) when is_binary(Bin)-> +unpack_array_(Remain, 0, Acc) -> {lists:reverse(Acc), Remain}; +unpack_array_(<<>>, RestLen, _) -> throw(short); +unpack_array_(Bin, RestLen, Acc) -> {Term, Rest}=unpack_(Bin), unpack_array_(Rest, RestLen-1, [Term|Acc]). @@ -198,8 +200,7 @@ unpack_map_(Bin, Len, Acc) -> % unpack then all -spec unpack_(Bin::binary()) -> {msgpack_term(), binary()} | {error, reason()} | no_return(). -unpack_(Bin) when not is_binary(Bin)-> throw(badarg); -unpack_(Bin) when bit_size(Bin) >= 8 -> +unpack_(Bin) -> case Bin of % ATOMS <<16#C0, Rest/binary>> -> {nil, Rest}; @@ -234,7 +235,7 @@ unpack_(Bin) when bit_size(Bin) >= 8 -> <<2#101:3, L:5, V:L/binary, Rest/binary>> -> {V, Rest}; % raw bytes <<2#1001:4, L:4, Rest/binary>> -> unpack_array_(Rest, L, []); % array <<2#1000:4, L:4, Rest/binary>> -> unpack_map_(Rest, L, []); % map - + % Incomplete / invalid data <> when F==16#CA; F==16#CB; F==16#CC; F==16#CD; F==16#CE; F==16#CF; From 6abc120279ced47d899cd8596e4d48fc2171e2cf Mon Sep 17 00:00:00 2001 From: Vincent de Phily Date: Fri, 9 Jul 2010 13:29:47 +0200 Subject: [PATCH 08/12] erlang: Fix incomplete/invalid cases of unpack_/1 * fix list of invalid bytes was missing 3 possibilities (see type chart section of msgpack format spec) * fix matching of invalid bytes to look at 1 byte instead of 2 * simplify 'incomplete' case : anything that's not complete or invalid is by definition incomplete --- erlang/msgpack.erl | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/erlang/msgpack.erl b/erlang/msgpack.erl index d1ba9cd0..1c1eae91 100644 --- a/erlang/msgpack.erl +++ b/erlang/msgpack.erl @@ -236,24 +236,15 @@ unpack_(Bin) -> <<2#1001:4, L:4, Rest/binary>> -> unpack_array_(Rest, L, []); % array <<2#1000:4, L:4, Rest/binary>> -> unpack_map_(Rest, L, []); % map -% Incomplete / invalid data - <> when F==16#CA; F==16#CB; F==16#CC; - F==16#CD; F==16#CE; F==16#CF; - F==16#D0; F==16#D1; F==16#D2; - F==16#D3; F==16#DA; F==16#DB; - F==16#DC; F==16#DD; F==16#DE; - F==16#DF -> - throw(short); - <> when F==16#C1; - F==16#C7; F==16#C8; F==16#C9; - F==16#D5; F==16#D6; F==16#D7; - F==16#D8; F==16#D9 -> +% Invalid data + <> when F==16#C1; + F==16#C4; F==16#C5; F==16#C6; F==16#C7; F==16#C8; F==16#C9; + F==16#D4; F==16#D5; F==16#D6; F==16#D7; F==16#D8; F==16#D9 -> throw(badarg); +% Incomplete data (we've covered every complete/invalid case; anything left is incomplete) _ -> - throw(short) % or unknown/badarg? - end; -unpack_(<<>>)-> throw(short); -unpack_(<<2#101:3, _/binary>>) -> throw(short). + throw(short) + end. % ===== test codes ===== % -include_lib("eunit/include/eunit.hrl"). From ba4a971bfaabe7da2159a634cd07977e06c61e3a Mon Sep 17 00:00:00 2001 From: Vincent de Phily Date: Fri, 9 Jul 2010 13:44:02 +0200 Subject: [PATCH 09/12] erlang: Remove unecessary 'throw(short)' clause for unpack_{array,map}_/1 Unecessary because unpack_/1 will throw it anyway. This does mean that we go a tiny bit deeper to find that we don't have enough data, but that should be a rare code path. Keep the main code path fast and the code clean. While at it, rename vars to match its sibling function and to avoid thinking that RestLen is a byte count (it's an item count). --- erlang/msgpack.erl | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/erlang/msgpack.erl b/erlang/msgpack.erl index 1c1eae91..ff3eac74 100644 --- a/erlang/msgpack.erl +++ b/erlang/msgpack.erl @@ -178,11 +178,10 @@ pack_array_([Head|Tail], Acc) -> pack_array_(Tail, <>). % Users SHOULD NOT send too long list: this uses lists:reverse/1 -unpack_array_(Remain, 0, Acc) -> {lists:reverse(Acc), Remain}; -unpack_array_(<<>>, RestLen, _) -> throw(short); -unpack_array_(Bin, RestLen, Acc) -> - {Term, Rest}=unpack_(Bin), - unpack_array_(Rest, RestLen-1, [Term|Acc]). +unpack_array_(Bin, 0, Acc) -> {lists:reverse(Acc), Bin}; +unpack_array_(Bin, Len, Acc) -> + {Term, Rest} = unpack_(Bin), + unpack_array_(Rest, Len-1, [Term|Acc]). pack_map_([], Acc) -> Acc; pack_map_([{Key,Value}|Tail], Acc) -> @@ -191,14 +190,13 @@ pack_map_([{Key,Value}|Tail], Acc) -> % Users SHOULD NOT send too long list: this uses lists:reverse/1 -spec unpack_map_(binary(), non_neg_integer(), [{msgpack_term(), msgpack_term()}])-> {[{msgpack_term(), msgpack_term()}], binary()} | no_return(). -unpack_map_(Bin, 0, Acc) -> {{lists:reverse(Acc)}, Bin}; -unpack_map_(<<>>, _, _ ) -> throw(short); +unpack_map_(Bin, 0, Acc) -> {{lists:reverse(Acc)}, Bin}; unpack_map_(Bin, Len, Acc) -> {Key, Rest} = unpack_(Bin), {Value, Rest2} = unpack_(Rest), - unpack_map_(Rest2,Len-1,[{Key,Value}|Acc]). + unpack_map_(Rest2, Len-1, [{Key,Value}|Acc]). -% unpack then all +% unpack them all -spec unpack_(Bin::binary()) -> {msgpack_term(), binary()} | {error, reason()} | no_return(). unpack_(Bin) -> case Bin of From a4258505a99087df2397cf8d6cfbf194311a995c Mon Sep 17 00:00:00 2001 From: UENISHI Kota Date: Fri, 9 Jul 2010 23:23:00 +0900 Subject: [PATCH 10/12] erlang: modified wrong testcase. --- erlang/msgpack.erl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/erlang/msgpack.erl b/erlang/msgpack.erl index ff3eac74..e862cad1 100644 --- a/erlang/msgpack.erl +++ b/erlang/msgpack.erl @@ -282,10 +282,11 @@ basic_test()-> port_test()-> Port = open_port({spawn, "ruby ../test/crosslang.rb"}, [binary]), Tests = test_data(), - {[Tests],<<>>} = msgpack:unpack(msgpack:pack([Tests])), - true = port_command(Port, msgpack:pack(Tests) ), + S=msgpack:pack([Tests]), + true = port_command(Port, S), + {[Tests],<<>>} = msgpack:unpack(S), receive - {Port, {data, Data}}-> {Tests, <<>>}=msgpack:unpack(Data) + {Port, {data, Data}}-> {[Tests], <<>>}=msgpack:unpack(Data) after 1024-> ?assert(false) end, port_close(Port). From ca0c844f32038329b21f92ce42c62618057ecc02 Mon Sep 17 00:00:00 2001 From: tokuhirom Date: Wed, 14 Jul 2010 09:58:05 +0900 Subject: [PATCH 11/12] clearly specified this distribution requires requires C99. --- perl/Makefile.PL | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/perl/Makefile.PL b/perl/Makefile.PL index 58ab7c79..e9f9618a 100644 --- a/perl/Makefile.PL +++ b/perl/Makefile.PL @@ -1,4 +1,6 @@ use inc::Module::Install; +use Config; + name 'Data-MessagePack'; all_from 'lib/Data/MessagePack.pm'; readme_from('lib/Data/MessagePack.pm'); @@ -11,6 +13,8 @@ tests 't/*.t'; recursive_author_tests('xt'); use_ppport 3.19; +requires_c99(); # msgpack C library requires C99. + clean_files qw{ *.stackdump *.gcov *.gcda *.gcno From 9ac69337e89305a36ae3a3d3a9d89272a7453452 Mon Sep 17 00:00:00 2001 From: tokuhirom Date: Wed, 14 Jul 2010 09:58:28 +0900 Subject: [PATCH 12/12] perl: bump up version to 0.13! --- perl/Changes | 4 ++++ perl/lib/Data/MessagePack.pm | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/perl/Changes b/perl/Changes index b03a9d0c..f32d5f49 100644 --- a/perl/Changes +++ b/perl/Changes @@ -1,3 +1,7 @@ +0.13 + + - clearly specify requires_c99(), because msgpack C header requires C99. + 0.12 - PERL_NO_GET_CONTEXT makes horrible dTHXs. remove it. diff --git a/perl/lib/Data/MessagePack.pm b/perl/lib/Data/MessagePack.pm index dcc713d5..ee07d35f 100644 --- a/perl/lib/Data/MessagePack.pm +++ b/perl/lib/Data/MessagePack.pm @@ -4,7 +4,7 @@ use warnings; use XSLoader; use 5.008001; -our $VERSION = '0.12'; +our $VERSION = '0.13'; our $PreferInteger = 0; our $true = do { bless \(my $dummy = 1), "Data::MessagePack::Boolean" };