c++ binding: improve msgpack::zone, zero-copy stream deserializer

git-svn-id: file:///Users/frsyuki/project/msgpack-git/svn/x@92 5a5092ae-2292-43ba-b2d5-dcab9c1a2731
This commit is contained in:
frsyuki 2009-02-15 09:10:00 +00:00
parent c93d45371b
commit e582fa34c7
6 changed files with 336 additions and 189 deletions

View File

@ -26,14 +26,21 @@ noinst_HEADERS = \
# FIXME
object.lo: msgpack/type/tuple.hpp
unpack.lo: msgpack/type/tuple.hpp
zone.lo: msgpack/type/tuple.hpp
unpack.lo: msgpack/type/tuple.hpp msgpack/zone.hpp
zone.lo: msgpack/type/tuple.hpp msgpack/zone.hpp
msgpack/type/tuple.hpp: msgpack/type/tuple.hpp.erb
$(ERB) $< > $@.tmp
mv $@.tmp $@
MOSTLYCLEANFILES = msgpack/type/tuple.hpp
msgpack/zone.hpp: msgpack/zone.hpp.erb
$(ERB) $< > $@.tmp
mv $@.tmp $@
MOSTLYCLEANFILES = \
msgpack/type/tuple.hpp \
msgpack/zone.hpp
# FIXME
libmsgpack_la_LDFLAGS = -version-info 0:0:0

View File

@ -22,6 +22,19 @@
namespace msgpack {
//namespace {
struct allocator {
zone* z;
bool referenced;
inline object* malloc_object(size_t n)
{
return (object*)z->malloc(sizeof(object)*n);
}
};
//} // noname namespace
#define msgpack_unpack_struct(name) \
struct msgpack_unpacker_##name
@ -33,7 +46,7 @@ namespace msgpack {
#define msgpack_unpack_object object
#define msgpack_unpack_user zone*
#define msgpack_unpack_user allocator
struct msgpack_unpacker_context;
@ -46,92 +59,100 @@ static int msgpack_unpacker_execute(struct msgpack_unpacker_context* ctx,
const char* data, size_t len, size_t* off);
static inline object msgpack_unpack_init(zone** z)
static inline object msgpack_unpack_init(allocator* a)
{ return object(); }
static inline object msgpack_unpack_uint8(zone** z, uint8_t d)
static inline object msgpack_unpack_uint8(allocator* a, uint8_t d)
{ object o; o.type = type::POSITIVE_INTEGER; o.via.u64 = d; return o; }
static inline object msgpack_unpack_uint16(zone** z, uint16_t d)
static inline object msgpack_unpack_uint16(allocator* a, uint16_t d)
{ object o; o.type = type::POSITIVE_INTEGER; o.via.u64 = d; return o; }
static inline object msgpack_unpack_uint32(zone** z, uint32_t d)
static inline object msgpack_unpack_uint32(allocator* a, uint32_t d)
{ object o; o.type = type::POSITIVE_INTEGER; o.via.u64 = d; return o; }
static inline object msgpack_unpack_uint64(zone** z, uint64_t d)
static inline object msgpack_unpack_uint64(allocator* a, uint64_t d)
{ object o; o.type = type::POSITIVE_INTEGER; o.via.u64 = d; return o; }
static inline object msgpack_unpack_int8(zone** z, int8_t d)
static inline object msgpack_unpack_int8(allocator* a, int8_t d)
{ if(d >= 0) { object o; o.type = type::POSITIVE_INTEGER; o.via.u64 = d; return o; }
else { object o; o.type = type::NEGATIVE_INTEGER; o.via.i64 = d; return o; } }
static inline object msgpack_unpack_int16(zone** z, int16_t d)
static inline object msgpack_unpack_int16(allocator* a, int16_t d)
{ if(d >= 0) { object o; o.type = type::POSITIVE_INTEGER; o.via.u64 = d; return o; }
else { object o; o.type = type::NEGATIVE_INTEGER; o.via.i64 = d; return o; } }
static inline object msgpack_unpack_int32(zone** z, int32_t d)
static inline object msgpack_unpack_int32(allocator* a, int32_t d)
{ if(d >= 0) { object o; o.type = type::POSITIVE_INTEGER; o.via.u64 = d; return o; }
else { object o; o.type = type::NEGATIVE_INTEGER; o.via.i64 = d; return o; } }
static inline object msgpack_unpack_int64(zone** z, int64_t d)
static inline object msgpack_unpack_int64(allocator* a, int64_t d)
{ if(d >= 0) { object o; o.type = type::POSITIVE_INTEGER; o.via.u64 = d; return o; }
else { object o; o.type = type::NEGATIVE_INTEGER; o.via.i64 = d; return o; } }
static inline object msgpack_unpack_float(zone** z, float d)
static inline object msgpack_unpack_float(allocator* a, float d)
{ object o; o.type = type::DOUBLE; o.via.dec = d; return o; }
static inline object msgpack_unpack_double(zone** z, double d)
static inline object msgpack_unpack_double(allocator* a, double d)
{ object o; o.type = type::DOUBLE; o.via.dec = d; return o; }
static inline object msgpack_unpack_nil(zone** z)
static inline object msgpack_unpack_nil(allocator* a)
{ object o; o.type = type::NIL; return o; }
static inline object msgpack_unpack_true(zone** z)
static inline object msgpack_unpack_true(allocator* a)
{ object o; o.type = type::BOOLEAN; o.via.boolean = true; return o; }
static inline object msgpack_unpack_false(zone** z)
static inline object msgpack_unpack_false(allocator* a)
{ object o; o.type = type::BOOLEAN; o.via.boolean = false; return o; }
static inline object msgpack_unpack_array(zone** z, unsigned int n)
static inline object msgpack_unpack_array(allocator* a, unsigned int n)
{
object o;
o.type = type::ARRAY;
o.via.container.size = 0;
o.via.container.ptr = (*z)->malloc_container(n);
o.via.container.ptr = a->malloc_object(n);
return o;
}
static inline void msgpack_unpack_array_item(zone** z, object* c, object o)
static inline void msgpack_unpack_array_item(allocator* a, object* c, object o)
{ c->via.container.ptr[ c->via.container.size++ ] = o; }
static inline object msgpack_unpack_map(zone** z, unsigned int n)
static inline object msgpack_unpack_map(allocator* a, unsigned int n)
{
object o;
o.type = type::MAP;
o.via.container.size = 0;
o.via.container.ptr = (*z)->malloc_container(n*2);
o.via.container.ptr = a->malloc_object(n*2);
return o;
}
static inline void msgpack_unpack_map_item(zone** z, object* c, object k, object v)
static inline void msgpack_unpack_map_item(allocator* a, object* c, object k, object v)
{
c->via.container.ptr[ c->via.container.size ] = k;
c->via.container.ptr[ c->via.container.size+1 ] = v;
++c->via.container.size;
}
static inline object msgpack_unpack_raw(zone** z, const char* b, const char* p, unsigned int l)
{ object o; o.type = type::RAW; o.via.ref.ptr = p; o.via.ref.size = l; return o; }
static inline object msgpack_unpack_raw(allocator* a, const char* b, const char* p, unsigned int l)
{
object o;
o.type = type::RAW;
o.via.ref.ptr = p;
o.via.ref.size = l;
a->referenced = true;
return o;
}
#include "msgpack/unpack_template.h"
struct unpacker::context {
context(zone* z)
namespace {
struct context {
context()
{
msgpack_unpacker_init(&m_ctx);
m_ctx.user = z;
allocator a = {NULL, false};
m_ctx.user = a;
}
~context() { }
@ -148,35 +169,64 @@ struct unpacker::context {
void reset()
{
zone* z = m_ctx.user;
zone* z = m_ctx.user.z;
msgpack_unpacker_init(&m_ctx);
m_ctx.user = z;
allocator a = {z, false};
m_ctx.user = a;
}
void reset(zone* z)
void set_zone(zone* z)
{
msgpack_unpacker_init(&m_ctx);
m_ctx.user = z;
m_ctx.user.z = z;
}
zone* user()
bool is_referenced() const
{
return m_ctx.user;
}
void user(zone* z)
{
m_ctx.user = z;
return m_ctx.user.referenced;
}
private:
msgpack_unpacker_context m_ctx;
zone* m_zone;
private:
context();
context(const context&);
};
context* as_ctx(void* m)
{
return reinterpret_cast<context*>(m);
}
static const size_t COUNTER_SIZE = sizeof(unsigned int);
static inline void init_count(void* buffer)
{
*(volatile unsigned int*)buffer = 1;
}
static inline void decl_count(void* buffer)
{
//if(--*(unsigned int*)buffer == 0) {
if(__sync_sub_and_fetch((unsigned int*)buffer, 1) == 0) {
free(buffer);
}
}
static inline void incr_count(void* buffer)
{
//++*(unsigned int*)buffer;
__sync_add_and_fetch((unsigned int*)buffer, 1);
}
static inline unsigned int get_count(void* buffer)
{
return *(volatile unsigned int*)buffer;
}
} // noname namespace
unpacker::unpacker(size_t initial_buffer_size) :
m_buffer(NULL),
@ -184,45 +234,77 @@ unpacker::unpacker(size_t initial_buffer_size) :
m_free(0),
m_off(0),
m_zone(new zone()),
m_ctx(new context(&*m_zone)),
m_ctx(new context()),
m_initial_buffer_size(initial_buffer_size)
{ }
{
if(m_initial_buffer_size < COUNTER_SIZE) {
m_initial_buffer_size = COUNTER_SIZE;
}
as_ctx(m_ctx)->set_zone(m_zone.get());
m_buffer = (char*)::malloc(m_initial_buffer_size);
if(!m_buffer) { throw std::bad_alloc(); }
init_count(m_buffer);
m_used = COUNTER_SIZE;
m_free = m_initial_buffer_size - m_used;
m_off = COUNTER_SIZE;
}
unpacker::~unpacker()
{
delete m_ctx;
delete as_ctx(m_ctx);
decl_count(m_buffer);
}
void unpacker::expand_buffer(size_t len)
{
if(m_off == 0) {
size_t next_size;
if(m_used != 0) { next_size = (m_used + m_free) * 2; }
else { next_size = m_initial_buffer_size; }
if(m_used == m_off && get_count(m_buffer) == 1 &&
!as_ctx(m_ctx)->is_referenced()) {
// rewind buffer
m_free += m_used - COUNTER_SIZE;
m_used = COUNTER_SIZE;
m_off = COUNTER_SIZE;
if(m_free >= len) { return; }
}
if(m_off == COUNTER_SIZE) {
size_t next_size = (m_used + m_free) * 2;
while(next_size < len + m_used) { next_size *= 2; }
m_buffer = m_zone->realloc(m_buffer, next_size);
char* tmp = (char*)::realloc(m_buffer, next_size);
if(!tmp) { throw std::bad_alloc(); }
m_buffer = tmp;
m_free = next_size - m_used;
} else {
size_t next_size = m_initial_buffer_size;
while(next_size < len + m_used - m_off) { next_size *= 2; }
size_t next_size = m_initial_buffer_size; // include COUNTER_SIZE
size_t not_parsed = m_used - m_off;
while(next_size < len + not_parsed + COUNTER_SIZE) { next_size *= 2; }
char* tmp = m_zone->malloc(next_size);
memcpy(tmp, m_buffer+m_off, m_used-m_off);
char* tmp = (char*)::malloc(next_size);
if(!tmp) { throw std::bad_alloc(); }
init_count(tmp);
try {
m_zone->push_finalizer(decl_count, m_buffer);
} catch (...) { free(tmp); throw; }
memcpy(tmp+COUNTER_SIZE, m_buffer+m_off, not_parsed);
m_buffer = tmp;
m_used = m_used - m_off;
m_used = not_parsed + COUNTER_SIZE;
m_free = next_size - m_used;
m_off = 0;
m_off = COUNTER_SIZE;
}
}
bool unpacker::execute()
{
int ret = m_ctx->execute(m_buffer, m_used, &m_off);
int ret = as_ctx(m_ctx)->execute(m_buffer, m_used, &m_off);
if(ret < 0) {
throw unpack_error("parse error");
} else if(ret == 0) {
@ -234,56 +316,45 @@ bool unpacker::execute()
zone* unpacker::release_zone()
{
m_zone->push_finalizer(decl_count, m_buffer);
incr_count(m_buffer);
//std::auto_ptr<zone> old(new zone());
//m_zone.swap(old);
zone* n = new zone();
std::auto_ptr<zone> old(m_zone.release());
m_zone.reset(n);
//std::auto_ptr<zone> old(new zone());
//m_zone.swap(old);
as_ctx(m_ctx)->set_zone(m_zone.get());
// move all bytes in m_buffer to new buffer from the new zone
if(m_used <= m_off) {
m_buffer = NULL;
m_used = 0;
m_free = 0;
m_off = 0;
} else {
try {
expand_buffer(0);
} catch (...) {
// m_zone.swap(old);
zone* tmp = old.release();
old.reset(m_zone.release());
m_zone.reset(tmp);
throw;
}
}
m_ctx->user(m_zone.get());
return old.release();
}
object unpacker::data()
{
return m_ctx->data();
return as_ctx(m_ctx)->data();
}
void unpacker::reset()
{
if(m_off != 0) { delete release_zone(); }
m_ctx->reset();
if(!m_zone->empty()) { delete release_zone(); }
as_ctx(m_ctx)->reset();
}
object unpacker::unpack(const char* data, size_t len, zone& z, size_t* off)
{
context ctx(&z);
context ctx;
ctx.set_zone(&z);
if(off) {
int ret = ctx.execute(data, len, off);
size_t noff = *off;
int ret = ctx.execute(data, len, &noff);
if(ret < 0) {
throw unpack_error("parse error");
} else if(ret == 0) {
throw unpack_error("insufficient bytes");
}
*off = noff;
} else {
size_t noff = 0;
int ret = ctx.execute(data, len, &noff);

View File

@ -24,7 +24,8 @@
#include <stdexcept>
#ifndef MSGPACK_UNPACKER_DEFAULT_INITIAL_BUFFER_SIZE
#define MSGPACK_UNPACKER_DEFAULT_INITIAL_BUFFER_SIZE 8*1024
#define MSGPACK_UNPACKER_DEFAULT_INITIAL_BUFFER_SIZE 16
//#define MSGPACK_UNPACKER_DEFAULT_INITIAL_BUFFER_SIZE 8*1024
#endif
namespace msgpack {
@ -133,10 +134,9 @@ private:
std::auto_ptr<zone> m_zone;
struct context;
context* m_ctx;
void* m_ctx;
const size_t m_initial_buffer_size;
size_t m_initial_buffer_size;
private:
void expand_buffer(size_t len);

View File

@ -16,31 +16,83 @@
// limitations under the License.
//
#include "msgpack/zone.hpp"
#include <algorithm>
namespace msgpack {
void zone::clear()
zone::zone(size_t chunk_size) :
m_chunk_size(chunk_size)
{
for(std::vector<char*>::iterator it(m_ptrs.begin()), it_end(m_ptrs.end());
it != it_end; ++it) {
free(*it);
}
m_ptrs.clear();
chunk dummy = {0, NULL, NULL};
m_chunk_array.push_back(dummy);
}
char* zone::realloc_real(char* ptr, size_t count)
zone::~zone()
{
for(std::vector<char*>::reverse_iterator it(m_ptrs.rbegin()), it_end(m_ptrs.rend());
it != it_end; ++it) {
if(*it == ptr) {
char* tmp = (char*)::realloc(ptr, count);
if(!tmp) { throw std::bad_alloc(); }
*it = tmp;
return tmp;
clear();
}
namespace {
template <typename Private>
struct zone_finalize {
void operator() (Private& f) {
(*f.func)(f.obj);
}
};
template <typename Private>
struct zone_free {
void operator() (Private& c) {
::free(c.alloc);
}
};
}
void zone::clear()
{
std::for_each(m_finalizers.rbegin(), m_finalizers.rend(),
zone_finalize<finalizer>());
m_finalizers.clear();
std::for_each(m_chunk_array.begin(), m_chunk_array.end(),
zone_free<chunk>());
m_chunk_array.resize(1);
m_chunk_array[0].ptr = NULL;
m_chunk_array[0].free = 0;
}
bool zone::empty() const
{
return m_chunk_array.back().alloc == NULL &&
m_finalizers.empty();
}
void* zone::malloc(size_t size)
{
if(m_chunk_array.back().free > size) {
char* p = (char*)m_chunk_array.back().ptr;
m_chunk_array.back().ptr = p + size;
m_chunk_array.back().free -= size;
return p;
}
throw std::bad_alloc();
size_t sz = m_chunk_size;
while(sz < size) { sz *= 2; }
chunk dummy = {0, NULL, NULL};
m_chunk_array.push_back(dummy);
char* p = (char*)::malloc(sz);
if(!p) {
m_chunk_array.pop_back();
throw std::bad_alloc();
}
m_chunk_array.back().free = sz - size;
m_chunk_array.back().ptr = p + size;
m_chunk_array.back().alloc = p;
return p;
}

View File

@ -1,86 +0,0 @@
//
// MessagePack for C++ memory pool
//
// Copyright (C) 2008 FURUHASHI Sadayuki
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef MSGPACK_ZONE_HPP__
#define MSGPACK_ZONE_HPP__
#include "msgpack/object.hpp"
#include <cstdlib>
#include <vector>
namespace msgpack {
class zone {
public:
zone();
~zone();
public:
char* malloc(size_t count);
char* realloc(char* ptr, size_t count);
object* malloc_container(size_t count);
void clear();
private:
std::vector<char*> m_ptrs;
private:
char* realloc_real(char* ptr, size_t count);
private:
zone(const zone&);
};
inline zone::zone() { }
inline zone::~zone() { clear(); }
inline char* zone::malloc(size_t count)
{
char* ptr = (char*)::malloc(count);
if(!ptr) { throw std::bad_alloc(); }
try {
m_ptrs.push_back(ptr);
} catch (...) {
free(ptr);
throw;
}
return ptr;
}
inline char* zone::realloc(char* ptr, size_t count)
{
if(ptr == NULL) {
return zone::malloc(count);
} else {
return realloc_real(ptr, count);
}
}
inline object* zone::malloc_container(size_t count)
{
return (object*)zone::malloc(sizeof(object)*count);
}
} // namespace msgpack
#endif /* msgpack/zone.hpp */

103
cpp/zone.hpp.erb Normal file
View File

@ -0,0 +1,103 @@
//
// MessagePack for C++ memory pool
//
// Copyright (C) 2008 FURUHASHI Sadayuki
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef MSGPACK_ZONE_HPP__
#define MSGPACK_ZONE_HPP__
#include "msgpack/object.hpp"
#include <cstdlib>
#include <vector>
#ifndef MSGPACK_ZONE_CHUNK_SIZE
#define MSGPACK_ZONE_CHUNK_SIZE 2048
#endif
<% GENERATION_LIMIT = 15 %>
namespace msgpack {
class zone {
public:
zone(size_t chunk_size = MSGPACK_ZONE_CHUNK_SIZE);
~zone();
public:
void* malloc(size_t size);
void push_finalizer(void (*func)(void*), void* obj);
void clear();
bool empty() const;
<%0.upto(GENERATION_LIMIT) {|i|%>
template <typename T<%1.upto(i) {|j|%>, typename A<%=j%><%}%>>
T* allocate(<%=(1..i).map{|j|"A#{j} a#{j}"}.join(', ')%>);
<%}%>
private:
struct chunk {
size_t free;
void* ptr;
void* alloc;
};
std::vector<chunk> m_chunk_array;
struct finalizer {
void (*func)(void*);
void* obj;
};
std::vector<finalizer> m_finalizers;
template <typename T>
static void object_destructor(void* obj);
size_t m_chunk_size;
private:
zone(const zone&);
};
inline void zone::push_finalizer(void (*func)(void*), void* obj)
{
finalizer f = {func, obj};
m_finalizers.push_back(f);
}
template <typename T>
void zone::object_destructor(void* obj)
{
reinterpret_cast<T*>(obj)->~T();
}
<%0.upto(GENERATION_LIMIT) {|i|%>
template <typename T<%1.upto(i) {|j|%>, typename A<%=j%><%}%>>
T* zone::allocate(<%=(1..i).map{|j|"A#{j} a#{j}"}.join(', ')%>)
{
void* x = malloc(sizeof(T));
push_finalizer(&zone::object_destructor<T>, x);
try { return new (x) T(<%=(1..i).map{|j|"a#{j}"}.join(', ')%>); }
catch (...) { m_finalizers.pop_back(); throw; }
}
<%}%>
} // namespace msgpack
#endif /* msgpack/zone.hpp */