mirror of
https://github.com/msgpack/msgpack-c.git
synced 2025-05-27 23:10:20 +02:00
ruby: converts encodings into UTF-8 on Ruby 1.9
This commit is contained in:
parent
a1bd14e516
commit
b5c78de2dd
33
ruby/encoding.h
Normal file
33
ruby/encoding.h
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* MessagePack for Ruby
|
||||
*
|
||||
* Copyright (C) 2008-2010 FURUHASHI Sadayuki
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef ENCODING_H__
|
||||
#define ENCODING_H__
|
||||
|
||||
|
||||
#ifdef HAVE_RUBY_ENCODING_H
|
||||
#include "ruby/encoding.h"
|
||||
#define MSGPACK_RUBY_ENCODING
|
||||
extern int s_enc_utf8;
|
||||
extern int s_enc_ascii8bit;
|
||||
extern int s_enc_usascii;
|
||||
extern VALUE s_enc_utf8_value;
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* encoding.h */
|
||||
|
21
ruby/pack.c
21
ruby/pack.c
@ -16,6 +16,8 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "ruby.h"
|
||||
#include "encoding.h"
|
||||
|
||||
#include "msgpack/pack_define.h"
|
||||
|
||||
static ID s_to_msgpack;
|
||||
@ -131,7 +133,6 @@ static VALUE MessagePack_Fixnum_to_msgpack(int argc, VALUE *argv, VALUE self)
|
||||
static VALUE MessagePack_Bignum_to_msgpack(int argc, VALUE *argv, VALUE self)
|
||||
{
|
||||
ARG_BUFFER(out, argc, argv);
|
||||
// FIXME bignum
|
||||
if(RBIGNUM_SIGN(self)) { // positive
|
||||
msgpack_pack_uint64(out, rb_big2ull(self));
|
||||
} else { // negative
|
||||
@ -168,6 +169,14 @@ static VALUE MessagePack_Float_to_msgpack(int argc, VALUE *argv, VALUE self)
|
||||
static VALUE MessagePack_String_to_msgpack(int argc, VALUE *argv, VALUE self)
|
||||
{
|
||||
ARG_BUFFER(out, argc, argv);
|
||||
#ifdef MSGPACK_RUBY_ENCODING
|
||||
int enc = ENCODING_GET(self);
|
||||
if(enc != s_enc_utf8 && enc != s_enc_ascii8bit && enc != s_enc_usascii) {
|
||||
if(!ENC_CODERANGE_ASCIIONLY(self)) {
|
||||
self = rb_str_encode(self, s_enc_utf8_value, 0, Qnil);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
msgpack_pack_raw(out, RSTRING_LEN(self));
|
||||
msgpack_pack_raw_body(out, RSTRING_PTR(self), RSTRING_LEN(self));
|
||||
return out;
|
||||
@ -184,12 +193,16 @@ static VALUE MessagePack_String_to_msgpack(int argc, VALUE *argv, VALUE self)
|
||||
*/
|
||||
static VALUE MessagePack_Symbol_to_msgpack(int argc, VALUE *argv, VALUE self)
|
||||
{
|
||||
#ifdef MSGPACK_RUBY_ENCODING
|
||||
return MessagePack_String_to_msgpack(argc, argv, rb_id2str(SYM2ID(self)));
|
||||
#else
|
||||
ARG_BUFFER(out, argc, argv);
|
||||
const char* name = rb_id2name(SYM2ID(self));
|
||||
size_t len = strlen(name);
|
||||
msgpack_pack_raw(out, len);
|
||||
msgpack_pack_raw_body(out, name, len);
|
||||
return out;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -205,7 +218,8 @@ static VALUE MessagePack_Symbol_to_msgpack(int argc, VALUE *argv, VALUE self)
|
||||
static VALUE MessagePack_Array_to_msgpack(int argc, VALUE *argv, VALUE self)
|
||||
{
|
||||
ARG_BUFFER(out, argc, argv);
|
||||
msgpack_pack_array(out, RARRAY_LEN(self));
|
||||
// FIXME check sizeof(long) > sizeof(unsigned int) && RARRAY_LEN(self) > UINT_MAX
|
||||
msgpack_pack_array(out, (unsigned int)RARRAY_LEN(self));
|
||||
VALUE* p = RARRAY_PTR(self);
|
||||
VALUE* const pend = p + RARRAY_LEN(self);
|
||||
for(;p != pend; ++p) {
|
||||
@ -239,7 +253,8 @@ static int MessagePack_Hash_to_msgpack_foreach(VALUE key, VALUE value, VALUE out
|
||||
static VALUE MessagePack_Hash_to_msgpack(int argc, VALUE *argv, VALUE self)
|
||||
{
|
||||
ARG_BUFFER(out, argc, argv);
|
||||
msgpack_pack_map(out, RHASH_SIZE(self));
|
||||
// FIXME check sizeof(st_index_t) > sizeof(unsigned int) && RARRAY_LEN(self) > UINT_MAX
|
||||
msgpack_pack_map(out, (unsigned int)RHASH_SIZE(self));
|
||||
rb_hash_foreach(self, MessagePack_Hash_to_msgpack_foreach, out);
|
||||
return out;
|
||||
}
|
||||
|
@ -17,9 +17,17 @@
|
||||
*/
|
||||
#include "pack.h"
|
||||
#include "unpack.h"
|
||||
#include "encoding.h"
|
||||
|
||||
static VALUE mMessagePack;
|
||||
|
||||
#ifdef MSGPACK_RUBY_ENCODING
|
||||
int s_enc_utf8;
|
||||
int s_enc_ascii8bit;
|
||||
int s_enc_usascii;
|
||||
VALUE s_enc_utf8_value;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Document-module: MessagePack
|
||||
*
|
||||
@ -46,6 +54,13 @@ void Init_msgpack(void)
|
||||
|
||||
rb_define_const(mMessagePack, "VERSION", rb_str_new2(MESSAGEPACK_VERSION));
|
||||
|
||||
#ifdef MSGPACK_RUBY_ENCODING
|
||||
s_enc_ascii8bit = rb_ascii8bit_encindex();
|
||||
s_enc_utf8 = rb_utf8_encindex();
|
||||
s_enc_usascii = rb_usascii_encindex();
|
||||
s_enc_utf8_value = rb_enc_from_encoding(rb_utf8_encoding());
|
||||
#endif
|
||||
|
||||
Init_msgpack_unpack(mMessagePack);
|
||||
Init_msgpack_pack(mMessagePack);
|
||||
}
|
||||
|
68
ruby/test/test_encoding.rb
Normal file
68
ruby/test/test_encoding.rb
Normal file
@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env ruby
|
||||
require File.dirname(__FILE__)+'/test_helper'
|
||||
|
||||
if RUBY_VERSION < "1.9"
|
||||
exit
|
||||
end
|
||||
|
||||
class MessagePackTestEncoding < Test::Unit::TestCase
|
||||
def self.it(name, &block)
|
||||
define_method("test_#{name}", &block)
|
||||
end
|
||||
|
||||
it "US-ASCII" do
|
||||
check_unpack "abc".force_encoding("US-ASCII")
|
||||
end
|
||||
|
||||
it "UTF-8 ascii" do
|
||||
check_unpack "abc".force_encoding("UTF-8")
|
||||
end
|
||||
|
||||
it "UTF-8 mbstr" do
|
||||
check_unpack "\xE3\x81\x82".force_encoding("UTF-8")
|
||||
end
|
||||
|
||||
it "UTF-8 invalid" do
|
||||
check_unpack "\xD0".force_encoding("UTF-8")
|
||||
end
|
||||
|
||||
it "ASCII-8BIT" do
|
||||
check_unpack "\xD0".force_encoding("ASCII-8BIT")
|
||||
end
|
||||
|
||||
it "EUC-JP" do
|
||||
x = "\xA4\xA2".force_encoding("EUC-JP")
|
||||
check_unpack(x)
|
||||
end
|
||||
|
||||
it "EUC-JP invalid" do
|
||||
begin
|
||||
"\xD0".force_encoding("EUC-JP").to_msgpack
|
||||
assert(false)
|
||||
rescue Encoding::InvalidByteSequenceError
|
||||
assert(true)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
def check_unpack(str)
|
||||
if str.encoding.to_s == "ASCII-8BIT"
|
||||
should_str = str.dup.force_encoding("UTF-8")
|
||||
else
|
||||
should_str = str.encode("UTF-8")
|
||||
end
|
||||
|
||||
raw = str.to_msgpack
|
||||
r = MessagePack.unpack(str.to_msgpack)
|
||||
assert_equal(r.encoding.to_s, "UTF-8")
|
||||
assert_equal(r, should_str.force_encoding("UTF-8"))
|
||||
|
||||
if str.valid_encoding?
|
||||
sym = str.to_sym
|
||||
r = MessagePack.unpack(sym.to_msgpack)
|
||||
assert_equal(r.encoding.to_s, "UTF-8")
|
||||
assert_equal(r, should_str.force_encoding("UTF-8"))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -5,4 +5,6 @@ rescue LoadError
|
||||
require File.dirname(__FILE__) + '/../lib/msgpack'
|
||||
end
|
||||
|
||||
#GC.stress = true
|
||||
if ENV["GC_STRESS"]
|
||||
GC.stress = true
|
||||
end
|
||||
|
@ -16,17 +16,13 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "ruby.h"
|
||||
#include "encoding.h"
|
||||
|
||||
#include "msgpack/unpack_define.h"
|
||||
|
||||
static ID s_sysread;
|
||||
static ID s_readpartial;
|
||||
|
||||
#ifdef HAVE_RUBY_ENCODING_H
|
||||
#include "ruby/encoding.h"
|
||||
int s_ascii_8bit;
|
||||
#endif
|
||||
|
||||
struct unpack_buffer {
|
||||
size_t size;
|
||||
size_t free;
|
||||
@ -136,6 +132,9 @@ static inline int template_callback_raw(unpack_user* u, const char* b, const cha
|
||||
} else {
|
||||
*o = rb_str_substr(u->source, p - b, l);
|
||||
}
|
||||
#ifdef MSGPACK_RUBY_ENCODING
|
||||
ENCODING_SET(*o, s_enc_utf8);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -163,16 +162,6 @@ static inline int template_callback_raw(unpack_user* u, const char* b, const cha
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef HAVE_RUBY_ENCODING_H
|
||||
static VALUE template_execute_rescue_enc(VALUE data)
|
||||
{
|
||||
rb_gc_enable();
|
||||
VALUE* resc = (VALUE*)data;
|
||||
rb_enc_set_index(resc[0], (int)resc[1]);
|
||||
RERAISE;
|
||||
}
|
||||
#endif
|
||||
|
||||
static VALUE template_execute_rescue(VALUE nouse)
|
||||
{
|
||||
rb_gc_enable();
|
||||
@ -203,31 +192,16 @@ static int template_execute_wrap(msgpack_unpack_t* mp,
|
||||
(VALUE)from,
|
||||
};
|
||||
|
||||
#ifdef HAVE_RUBY_ENCODING_H
|
||||
int enc_orig = rb_enc_get_index(str);
|
||||
rb_enc_set_index(str, s_ascii_8bit);
|
||||
#endif
|
||||
|
||||
// FIXME execute実行中はmp->topが更新されないのでGC markが機能しない
|
||||
rb_gc_disable();
|
||||
|
||||
mp->user.source = str;
|
||||
|
||||
#ifdef HAVE_RUBY_ENCODING_H
|
||||
VALUE resc[2] = {str, enc_orig};
|
||||
int ret = (int)rb_rescue(template_execute_do, (VALUE)args,
|
||||
template_execute_rescue_enc, (VALUE)resc);
|
||||
#else
|
||||
int ret = (int)rb_rescue(template_execute_do, (VALUE)args,
|
||||
template_execute_rescue, Qnil);
|
||||
#endif
|
||||
|
||||
rb_gc_enable();
|
||||
|
||||
#ifdef HAVE_RUBY_ENCODING_H
|
||||
rb_enc_set_index(str, enc_orig);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -746,10 +720,6 @@ void Init_msgpack_unpack(VALUE mMessagePack)
|
||||
s_sysread = rb_intern("sysread");
|
||||
s_readpartial = rb_intern("readpartial");
|
||||
|
||||
#ifdef HAVE_RUBY_ENCODING_H
|
||||
s_ascii_8bit = rb_enc_find_index("ASCII-8BIT");
|
||||
#endif
|
||||
|
||||
eUnpackError = rb_define_class_under(mMessagePack, "UnpackError", rb_eStandardError);
|
||||
cUnpacker = rb_define_class_under(mMessagePack, "Unpacker", rb_cObject);
|
||||
rb_define_alloc_func(cUnpacker, MessagePack_Unpacker_alloc);
|
||||
|
Loading…
x
Reference in New Issue
Block a user