/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. // Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include "precomp.hpp" using namespace cv; using namespace cv::cuda; #ifdef HAVE_CUDA namespace { size_t alignUpStep(size_t what, size_t alignment) { size_t alignMask = alignment - 1; size_t inverseAlignMask = ~alignMask; size_t res = (what + alignMask) & inverseAlignMask; return res; } } #endif void cv::cuda::CudaMem::create(int rows_, int cols_, int type_) { #ifndef HAVE_CUDA (void) rows_; (void) cols_; (void) type_; throw_no_cuda(); #else if (alloc_type == SHARED) { DeviceInfo devInfo; CV_Assert( devInfo.canMapHostMemory() ); } type_ &= Mat::TYPE_MASK; if (rows == rows_ && cols == cols_ && type() == type_ && data) return; if (data) release(); CV_DbgAssert( rows_ >= 0 && cols_ >= 0 ); if (rows_ > 0 && cols_ > 0) { flags = Mat::MAGIC_VAL + Mat::CONTINUOUS_FLAG + type_; rows = rows_; cols = cols_; step = elemSize() * cols; if (alloc_type == SHARED) { DeviceInfo devInfo; step = alignUpStep(step, devInfo.textureAlignment()); } int64 _nettosize = (int64)step*rows; size_t nettosize = (size_t)_nettosize; if (_nettosize != (int64)nettosize) CV_Error(cv::Error::StsNoMem, "Too big buffer is allocated"); size_t datasize = alignSize(nettosize, (int)sizeof(*refcount)); void* ptr = 0; switch (alloc_type) { case PAGE_LOCKED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocDefault) ); break; case SHARED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocMapped) ); break; case WRITE_COMBINED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocWriteCombined) ); break; default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type"); } datastart = data = (uchar*)ptr; dataend = data + nettosize; refcount = (int*)cv::fastMalloc(sizeof(*refcount)); *refcount = 1; } #endif } CudaMem cv::cuda::CudaMem::reshape(int new_cn, int new_rows) const { CudaMem hdr = *this; int cn = channels(); if (new_cn == 0) new_cn = cn; int total_width = cols * cn; if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0) new_rows = rows * total_width / new_cn; if (new_rows != 0 && new_rows != rows) { int total_size = total_width * rows; if (!isContinuous()) CV_Error(cv::Error::BadStep, "The matrix is not continuous, thus its number of rows can not be changed"); if ((unsigned)new_rows > (unsigned)total_size) CV_Error(cv::Error::StsOutOfRange, "Bad new number of rows"); total_width = total_size / new_rows; if (total_width * new_rows != total_size) CV_Error(cv::Error::StsBadArg, "The total number of matrix elements is not divisible by the new number of rows"); hdr.rows = new_rows; hdr.step = total_width * elemSize1(); } int new_width = total_width / new_cn; if (new_width * new_cn != total_width) CV_Error(cv::Error::BadNumChannels, "The total width is not divisible by the new number of channels"); hdr.cols = new_width; hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT); return hdr; } void cv::cuda::CudaMem::release() { #ifdef HAVE_CUDA if (refcount && CV_XADD(refcount, -1) == 1) { cudaFreeHost(datastart); fastFree(refcount); } dataend = data = datastart = 0; step = rows = cols = 0; refcount = 0; #endif } GpuMat cv::cuda::CudaMem::createGpuMatHeader() const { #ifndef HAVE_CUDA throw_no_cuda(); return GpuMat(); #else CV_Assert( alloc_type == SHARED ); void *pdev; cudaSafeCall( cudaHostGetDevicePointer(&pdev, data, 0) ); return GpuMat(rows, cols, type(), pdev, step); #endif } void cv::cuda::registerPageLocked(Mat& m) { #ifndef HAVE_CUDA (void) m; throw_no_cuda(); #else CV_Assert( m.isContinuous() ); cudaSafeCall( cudaHostRegister(m.data, m.step * m.rows, cudaHostRegisterPortable) ); #endif } void cv::cuda::unregisterPageLocked(Mat& m) { #ifndef HAVE_CUDA (void) m; #else cudaSafeCall( cudaHostUnregister(m.data) ); #endif }