/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include "precomp.hpp" #define CV_USE_SYSTEM_MALLOC 1 namespace cv { static void* OutOfMemoryError(size_t size) { CV_Error_(CV_StsNoMem, ("Failed to allocate %lu bytes", (unsigned long)size)); return 0; } #if CV_USE_SYSTEM_MALLOC void deleteThreadAllocData() {} void* fastMalloc( size_t size ) { uchar* udata = (uchar*)malloc(size + sizeof(void*) + CV_MALLOC_ALIGN); if(!udata) return OutOfMemoryError(size); uchar** adata = alignPtr((uchar**)udata + 1, CV_MALLOC_ALIGN); adata[-1] = udata; return adata; } void fastFree(void* ptr) { if(ptr) { uchar* udata = ((uchar**)ptr)[-1]; CV_DbgAssert(udata < (uchar*)ptr && ((uchar*)ptr - udata) <= (ptrdiff_t)(sizeof(void*)+CV_MALLOC_ALIGN)); free(udata); } } #else #if 0 #define SANITY_CHECK(block) \ CV_Assert(((size_t)(block) & (MEM_BLOCK_SIZE-1)) == 0 && \ (unsigned)(block)->binIdx <= (unsigned)MAX_BIN && \ (block)->signature == MEM_BLOCK_SIGNATURE) #else #define SANITY_CHECK(block) #endif #define STAT(stmt) #ifdef WIN32 struct CriticalSection { CriticalSection() { InitializeCriticalSection(&cs); } ~CriticalSection() { DeleteCriticalSection(&cs); } void lock() { EnterCriticalSection(&cs); } void unlock() { LeaveCriticalSection(&cs); } bool trylock() { return TryEnterCriticalSection(&cs) != 0; } CRITICAL_SECTION cs; }; void* SystemAlloc(size_t size) { void* ptr = malloc(size); return ptr ? ptr : OutOfMemoryError(size); } void SystemFree(void* ptr, size_t) { free(ptr); } #else struct CriticalSection { CriticalSection() { pthread_mutex_init(&mutex, 0); } ~CriticalSection() { pthread_mutex_destroy(&mutex); } void lock() { pthread_mutex_lock(&mutex); } void unlock() { pthread_mutex_unlock(&mutex); } bool trylock() { return pthread_mutex_trylock(&mutex) == 0; } pthread_mutex_t mutex; }; void* SystemAlloc(size_t size) { #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON #endif void* ptr = 0; ptr = mmap(ptr, size, (PROT_READ | PROT_WRITE), MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); return ptr != MAP_FAILED ? ptr : OutOfMemoryError(size); } void SystemFree(void* ptr, size_t size) { munmap(ptr, size); } #endif struct AutoLock { AutoLock(CriticalSection& _cs) : cs(&_cs) { cs->lock(); } ~AutoLock() { cs->unlock(); } CriticalSection* cs; }; const size_t MEM_BLOCK_SIGNATURE = 0x01234567; const int MEM_BLOCK_SHIFT = 14; const size_t MEM_BLOCK_SIZE = 1 << MEM_BLOCK_SHIFT; const size_t HDR_SIZE = 128; const size_t MAX_BLOCK_SIZE = MEM_BLOCK_SIZE - HDR_SIZE; const int MAX_BIN = 28; static const int binSizeTab[MAX_BIN+1] = { 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 128, 160, 192, 256, 320, 384, 480, 544, 672, 768, 896, 1056, 1328, 1600, 2688, 4048, 5408, 8128, 16256 }; struct MallocTables { void initBinTab() { int i, j = 0, n; for( i = 0; i <= MAX_BIN; i++ ) { n = binSizeTab[i]>>3; for( ; j <= n; j++ ) binIdx[j] = (uchar)i; } } int bin(size_t size) { assert( size <= MAX_BLOCK_SIZE ); return binIdx[(size + 7)>>3]; } MallocTables() { initBinTab(); } uchar binIdx[MAX_BLOCK_SIZE/8+1]; }; MallocTables mallocTables; struct Node { Node* next; }; struct ThreadData; struct Block { Block(Block* _next) { signature = MEM_BLOCK_SIGNATURE; prev = 0; next = _next; privateFreeList = publicFreeList = 0; bumpPtr = endPtr = 0; objSize = 0; threadData = 0; data = (uchar*)this + HDR_SIZE; } ~Block() {} void init(Block* _prev, Block* _next, int _objSize, ThreadData* _threadData) { prev = _prev; if(prev) prev->next = this; next = _next; if(next) next->prev = this; objSize = _objSize; binIdx = mallocTables.bin(objSize); threadData = _threadData; privateFreeList = publicFreeList = 0; bumpPtr = data; int nobjects = MAX_BLOCK_SIZE/objSize; endPtr = bumpPtr + nobjects*objSize; almostEmptyThreshold = (nobjects + 1)/2; allocated = 0; } bool isFilled() const { return allocated > almostEmptyThreshold; } size_t signature; Block* prev; Block* next; Node* privateFreeList; Node* publicFreeList; uchar* bumpPtr; uchar* endPtr; uchar* data; ThreadData* threadData; int objSize; int binIdx; int allocated; int almostEmptyThreshold; CriticalSection cs; }; struct BigBlock { BigBlock(int bigBlockSize, BigBlock* _next) { first = alignPtr((Block*)(this+1), MEM_BLOCK_SIZE); next = _next; nblocks = (int)(((char*)this + bigBlockSize - (char*)first)/MEM_BLOCK_SIZE); Block* p = 0; for( int i = nblocks-1; i >= 0; i-- ) p = ::new((uchar*)first + i*MEM_BLOCK_SIZE) Block(p); } ~BigBlock() { for( int i = nblocks-1; i >= 0; i-- ) ((Block*)((uchar*)first+i*MEM_BLOCK_SIZE))->~Block(); } BigBlock* next; Block* first; int nblocks; }; struct BlockPool { BlockPool(int _bigBlockSize=1<<20) : pool(0), bigBlockSize(_bigBlockSize) { } ~BlockPool() { AutoLock lock(cs); while( pool ) { BigBlock* nextBlock = pool->next; pool->~BigBlock(); SystemFree(pool, bigBlockSize); pool = nextBlock; } } Block* alloc() { AutoLock lock(cs); Block* block; if( !freeBlocks ) { BigBlock* bblock = ::new(SystemAlloc(bigBlockSize)) BigBlock(bigBlockSize, pool); assert( bblock != 0 ); freeBlocks = bblock->first; pool = bblock; } block = freeBlocks; freeBlocks = freeBlocks->next; if( freeBlocks ) freeBlocks->prev = 0; STAT(stat.bruttoBytes += MEM_BLOCK_SIZE); return block; } void free(Block* block) { AutoLock lock(cs); block->prev = 0; block->next = freeBlocks; freeBlocks = block; STAT(stat.bruttoBytes -= MEM_BLOCK_SIZE); } CriticalSection cs; Block* freeBlocks; BigBlock* pool; int bigBlockSize; int blocksPerBigBlock; }; BlockPool mallocPool; enum { START=0, FREE=1, GC=2 }; struct ThreadData { ThreadData() { for(int i = 0; i <= MAX_BIN; i++) bins[i][START] = bins[i][FREE] = bins[i][GC] = 0; } ~ThreadData() { // mark all the thread blocks as abandoned or even release them for( int i = 0; i <= MAX_BIN; i++ ) { Block *bin = bins[i][START], *block = bin; bins[i][START] = bins[i][FREE] = bins[i][GC] = 0; if( block ) { do { Block* next = block->next; int allocated = block->allocated; { AutoLock lock(block->cs); block->next = block->prev = 0; block->threadData = 0; Node *node = block->publicFreeList; for( ; node != 0; node = node->next ) allocated--; } if( allocated == 0 ) mallocPool.free(block); block = next; } while( block != bin ); } } } void moveBlockToFreeList( Block* block ) { int i = block->binIdx; Block*& freePtr = bins[i][FREE]; CV_DbgAssert( block->next->prev == block && block->prev->next == block ); if( block != freePtr ) { Block*& gcPtr = bins[i][GC]; if( gcPtr == block ) gcPtr = block->next; if( block->next != block ) { block->prev->next = block->next; block->next->prev = block->prev; } block->next = freePtr->next; block->prev = freePtr; freePtr = block->next->prev = block->prev->next = block; } } Block* bins[MAX_BIN+1][3]; #ifdef WIN32 #ifdef WINCE # define TLS_OUT_OF_INDEXES ((DWORD)0xFFFFFFFF) #endif static DWORD tlsKey; static ThreadData* get() { ThreadData* data; if( tlsKey == TLS_OUT_OF_INDEXES ) tlsKey = TlsAlloc(); data = (ThreadData*)TlsGetValue(tlsKey); if( !data ) { data = new ThreadData; TlsSetValue(tlsKey, data); } return data; } #else static void deleteData(void* data) { delete (ThreadData*)data; } static pthread_key_t tlsKey; static ThreadData* get() { ThreadData* data; if( !tlsKey ) pthread_key_create(&tlsKey, deleteData); data = (ThreadData*)pthread_getspecific(tlsKey); if( !data ) { data = new ThreadData; pthread_setspecific(tlsKey, data); } return data; } #endif }; #ifdef WIN32 DWORD ThreadData::tlsKey = TLS_OUT_OF_INDEXES; void deleteThreadAllocData() { if( ThreadData::tlsKey != TLS_OUT_OF_INDEXES ) delete (ThreadData*)TlsGetValue( ThreadData::tlsKey ); } #else pthread_key_t ThreadData::tlsKey = 0; #endif #if 0 static void checkList(ThreadData* tls, int idx) { Block* block = tls->bins[idx][START]; if( !block ) { CV_DbgAssert( tls->bins[idx][FREE] == 0 && tls->bins[idx][GC] == 0 ); } else { bool gcInside = false; bool freeInside = false; do { if( tls->bins[idx][FREE] == block ) freeInside = true; if( tls->bins[idx][GC] == block ) gcInside = true; block = block->next; } while( block != tls->bins[idx][START] ); CV_DbgAssert( gcInside && freeInside ); } } #else #define checkList(tls, idx) #endif void* fastMalloc( size_t size ) { if( size > MAX_BLOCK_SIZE ) { size_t size1 = size + sizeof(uchar*)*2 + MEM_BLOCK_SIZE; uchar* udata = (uchar*)SystemAlloc(size1); uchar** adata = alignPtr((uchar**)udata + 2, MEM_BLOCK_SIZE); adata[-1] = udata; adata[-2] = (uchar*)size1; return adata; } { ThreadData* tls = ThreadData::get(); int idx = mallocTables.bin(size); Block*& startPtr = tls->bins[idx][START]; Block*& gcPtr = tls->bins[idx][GC]; Block*& freePtr = tls->bins[idx][FREE], *block = freePtr; checkList(tls, idx); size = binSizeTab[idx]; STAT( stat.nettoBytes += size; stat.mallocCalls++; ); uchar* data = 0; for(;;) { if( block ) { // try to find non-full block for(;;) { CV_DbgAssert( block->next->prev == block && block->prev->next == block ); if( block->bumpPtr ) { data = block->bumpPtr; if( (block->bumpPtr += size) >= block->endPtr ) block->bumpPtr = 0; break; } if( block->privateFreeList ) { data = (uchar*)block->privateFreeList; block->privateFreeList = block->privateFreeList->next; break; } if( block == startPtr ) break; block = block->next; } #if 0 avg_k += _k; avg_nk++; if( avg_nk == 1000 ) { printf("avg search iters per 1e3 allocs = %g\n", (double)avg_k/avg_nk ); avg_k = avg_nk = 0; } #endif freePtr = block; if( !data ) { block = gcPtr; for( int k = 0; k < 2; k++ ) { SANITY_CHECK(block); CV_DbgAssert( block->next->prev == block && block->prev->next == block ); if( block->publicFreeList ) { { AutoLock lock(block->cs); block->privateFreeList = block->publicFreeList; block->publicFreeList = 0; } Node* node = block->privateFreeList; for(;node != 0; node = node->next) --block->allocated; data = (uchar*)block->privateFreeList; block->privateFreeList = block->privateFreeList->next; gcPtr = block->next; if( block->allocated+1 <= block->almostEmptyThreshold ) tls->moveBlockToFreeList(block); break; } block = block->next; } if( !data ) gcPtr = block; } } if( data ) break; block = mallocPool.alloc(); block->init(startPtr ? startPtr->prev : block, startPtr ? startPtr : block, (int)size, tls); if( !startPtr ) startPtr = gcPtr = freePtr = block; checkList(tls, block->binIdx); SANITY_CHECK(block); } ++block->allocated; return data; } } void fastFree( void* ptr ) { if( ((size_t)ptr & (MEM_BLOCK_SIZE-1)) == 0 ) { if( ptr != 0 ) { void* origPtr = ((void**)ptr)[-1]; size_t sz = (size_t)((void**)ptr)[-2]; SystemFree( origPtr, sz ); } return; } { ThreadData* tls = ThreadData::get(); Node* node = (Node*)ptr; Block* block = (Block*)((size_t)ptr & -(int)MEM_BLOCK_SIZE); assert( block->signature == MEM_BLOCK_SIGNATURE ); if( block->threadData == tls ) { STAT( stat.nettoBytes -= block->objSize; stat.freeCalls++; float ratio = (float)stat.nettoBytes/stat.bruttoBytes; if( stat.minUsageRatio > ratio ) stat.minUsageRatio = ratio; ); SANITY_CHECK(block); bool prevFilled = block->isFilled(); --block->allocated; if( !block->isFilled() && (block->allocated == 0 || prevFilled) ) { if( block->allocated == 0 ) { int idx = block->binIdx; Block*& startPtr = tls->bins[idx][START]; Block*& freePtr = tls->bins[idx][FREE]; Block*& gcPtr = tls->bins[idx][GC]; if( block == block->next ) { CV_DbgAssert( startPtr == block && freePtr == block && gcPtr == block ); startPtr = freePtr = gcPtr = 0; } else { if( freePtr == block ) freePtr = block->next; if( gcPtr == block ) gcPtr = block->next; if( startPtr == block ) startPtr = block->next; block->prev->next = block->next; block->next->prev = block->prev; } mallocPool.free(block); checkList(tls, idx); return; } tls->moveBlockToFreeList(block); } node->next = block->privateFreeList; block->privateFreeList = node; } else { AutoLock lock(block->cs); SANITY_CHECK(block); node->next = block->publicFreeList; block->publicFreeList = node; if( block->threadData == 0 ) { // take ownership of the abandoned block. // note that it can happen at the same time as // ThreadData::deleteData() marks the blocks as abandoned, // so this part of the algorithm needs to be checked for data races int idx = block->binIdx; block->threadData = tls; Block*& startPtr = tls->bins[idx][START]; if( startPtr ) { block->next = startPtr; block->prev = startPtr->prev; block->next->prev = block->prev->next = block; } else startPtr = tls->bins[idx][FREE] = tls->bins[idx][GC] = block; } } } } #endif } CV_IMPL void cvSetMemoryManager( CvAllocFunc, CvFreeFunc, void * ) { CV_Error( -1, "Custom memory allocator is not supported" ); } CV_IMPL void* cvAlloc( size_t size ) { return cv::fastMalloc( size ); } CV_IMPL void cvFree_( void* ptr ) { cv::fastFree( ptr ); } /* End of file. */