meanShiftFiltering added (by masha)
get free memory function
This commit is contained in:
parent
37f47c9fa3
commit
9ee159462d
@ -64,6 +64,8 @@ namespace cv
|
||||
CV_EXPORTS void getComputeCapability(int device, int* major, int* minor);
|
||||
CV_EXPORTS int getNumberOfSMs(int device);
|
||||
|
||||
CV_EXPORTS void getGpuMemInfo(size_t *free, size_t* total);
|
||||
|
||||
//////////////////////////////// GpuMat ////////////////////////////////
|
||||
class CudaStream;
|
||||
class MatPL;
|
||||
@ -328,7 +330,10 @@ namespace cv
|
||||
|
||||
////////////////////////////// Image processing //////////////////////////////
|
||||
|
||||
void CV_EXPORTS remap(const GpuMat& src, const GpuMat& xmap, const GpuMat& ymap, GpuMat& dst);
|
||||
CV_EXPORTS void remap(const GpuMat& src, const GpuMat& xmap, const GpuMat& ymap, GpuMat& dst);
|
||||
|
||||
|
||||
CV_EXPORTS void meanShiftFiltering_GPU(const GpuMat& src, GpuMat& dst, float sp, float sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
|
||||
|
||||
//////////////////////////////// StereoBM_GPU ////////////////////////////////
|
||||
|
||||
|
@ -46,7 +46,7 @@ using namespace cv::gpu;
|
||||
|
||||
namespace imgproc
|
||||
{
|
||||
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex;
|
||||
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex1;
|
||||
|
||||
__global__ void kernel_remap(const float *mapx, const float *mapy, size_t map_step, unsigned char* out, size_t out_step, int width, int height)
|
||||
{
|
||||
@ -59,7 +59,78 @@ namespace imgproc
|
||||
float xcoo = mapx[idx];
|
||||
float ycoo = mapy[idx];
|
||||
|
||||
out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex, xcoo, ycoo));
|
||||
out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex1, xcoo, ycoo));
|
||||
}
|
||||
}
|
||||
|
||||
texture< uchar4, 2, cudaReadModeElementType > tex_meanshift;
|
||||
|
||||
extern "C" __global__ void meanshift_kernel( unsigned char* out, int out_step, int cols, int rows, int sp, int sr, int maxIter, float eps )
|
||||
{
|
||||
int x0 = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y0 = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if( x0 < cols && y0 < rows )
|
||||
{
|
||||
|
||||
int isr2 = sr*sr;
|
||||
uchar4 c = tex2D( tex_meanshift, x0, y0 );
|
||||
// iterate meanshift procedure
|
||||
for( int iter = 0; iter < maxIter; iter++ )
|
||||
{
|
||||
int count = 0;
|
||||
int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
|
||||
float icount;
|
||||
|
||||
//mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
|
||||
int minx = x0-sp;
|
||||
int miny = y0-sp;
|
||||
int maxx = x0+sp;
|
||||
int maxy = y0+sp;
|
||||
|
||||
for( int y = miny; y <= maxy; y++)
|
||||
{
|
||||
int rowCount = 0;
|
||||
for( int x = minx; x <= maxx; x++ )
|
||||
{
|
||||
uchar4 t = tex2D( tex_meanshift, x, y );
|
||||
|
||||
int norm2 = (t.x - c.x) * (t.x - c.x) + (t.y - c.y) * (t.y - c.y) + (t.z - c.z) * (t.z - c.z);
|
||||
if( norm2 <= isr2 )
|
||||
{
|
||||
s0 += t.x; s1 += t.y; s2 += t.z;
|
||||
sx += x; rowCount++;
|
||||
}
|
||||
}
|
||||
count += rowCount;
|
||||
sy += y*rowCount;
|
||||
}
|
||||
|
||||
if( count == 0 )
|
||||
break;
|
||||
|
||||
icount = 1./count;
|
||||
int x1 = floor(sx*icount);
|
||||
int y1 = floor(sy*icount);
|
||||
s0 = floor(s0*icount);
|
||||
s1 = floor(s1*icount);
|
||||
s2 = floor(s2*icount);
|
||||
|
||||
int norm2 = (s0 - c.x) * (s0 - c.x) + (s1 - c.y) * (s1 - c.y) + (s2 - c.z) * (s2 - c.z);
|
||||
|
||||
bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1-x0) + abs(y1-y0) + norm2 <= eps);
|
||||
|
||||
x0 = x1; y0 = y1;
|
||||
c.x = s0; c.y = s1; c.z = s2;
|
||||
|
||||
if( stopFlag )
|
||||
break;
|
||||
}
|
||||
|
||||
int base = (blockIdx.y * blockDim.y + threadIdx.y) * out_step + (blockIdx.x * blockDim.x + threadIdx.x) * 3 * sizeof(uchar);
|
||||
out[base+0] = c.x;
|
||||
out[base+1] = c.y;
|
||||
out[base+2] = c.z;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -75,14 +146,31 @@ namespace cv { namespace gpu { namespace impl
|
||||
grid.x = divUp(dst.cols, block.x);
|
||||
grid.y = divUp(dst.rows, block.y);
|
||||
|
||||
tex.filterMode = cudaFilterModeLinear;
|
||||
tex.addressMode[0] = tex.addressMode[1] = cudaAddressModeWrap;
|
||||
tex1.filterMode = cudaFilterModeLinear;
|
||||
tex1.addressMode[0] = tex1.addressMode[1] = cudaAddressModeWrap;
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, tex, src.ptr, desc, dst.cols, dst.rows, src.step) );
|
||||
cudaSafeCall( cudaBindTexture2D(0, tex1, src.ptr, desc, dst.cols, dst.rows, src.step) );
|
||||
|
||||
kernel_remap<<<grid, block>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
|
||||
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
cudaSafeCall( cudaUnbindTexture(tex) );
|
||||
cudaSafeCall( cudaUnbindTexture(tex1) );
|
||||
}
|
||||
|
||||
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, float sp, float sr, int maxIter, float eps)
|
||||
{
|
||||
dim3 grid(1, 1, 1);
|
||||
dim3 threads(32, 16, 1);
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
|
||||
cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.ptr, desc, src.cols * 4, src.rows, src.step ) );
|
||||
|
||||
meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
|
||||
}
|
||||
}}}
|
||||
|
||||
|
||||
|
@ -48,6 +48,7 @@ using namespace cv::gpu;
|
||||
#if !defined (HAVE_CUDA)
|
||||
|
||||
void cv::gpu::remap(const GpuMat& /*src*/, const GpuMat& /*xmap*/, const GpuMat& /*ymap*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::meanShiftFiltering_GPU(const GpuMat&, GpuMat&, float, float, TermCriteria ) { throw_nogpu(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
@ -56,6 +57,8 @@ namespace cv { namespace gpu
|
||||
namespace impl
|
||||
{
|
||||
extern "C" void remap_gpu(const DevMem2D& src, const DevMem2D_<float>& xmap, const DevMem2D_<float>& ymap, DevMem2D dst);
|
||||
|
||||
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, float sp, float sr, int maxIter, float eps);
|
||||
}
|
||||
}}
|
||||
|
||||
@ -70,4 +73,30 @@ void cv::gpu::remap(const GpuMat& src, const GpuMat& xmap, const GpuMat& ymap, G
|
||||
impl::remap_gpu(src, xmap, ymap, dst);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void cv::gpu::meanShiftFiltering_GPU(const GpuMat& src, GpuMat& dst, float sp, float sr, TermCriteria criteria)
|
||||
{
|
||||
if( src.empty() )
|
||||
CV_Error( CV_StsBadArg, "The input image is empty" );
|
||||
|
||||
if( src.depth() != CV_8U || src.channels() != 4 )
|
||||
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
|
||||
|
||||
dst.create( src.size(), CV_8UC3 );
|
||||
|
||||
float eps;
|
||||
if( !(criteria.type & TermCriteria::MAX_ITER) )
|
||||
criteria.maxCount = 5;
|
||||
|
||||
int maxIter = std::min(std::max(criteria.maxCount, 1), 100);
|
||||
|
||||
if( !(criteria.type & TermCriteria::EPS) )
|
||||
eps = 1.f;
|
||||
|
||||
eps = std::max(criteria.epsilon, 0.0);
|
||||
impl::meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps);
|
||||
}
|
||||
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
@ -54,6 +54,7 @@ CV_EXPORTS void cv::gpu::setDevice(int /*device*/) { throw_nogpu(); }
|
||||
CV_EXPORTS int cv::gpu::getDevice() { throw_nogpu(); return 0; }
|
||||
CV_EXPORTS void cv::gpu::getComputeCapability(int /*device*/, int* /*major*/, int* /*minor*/) { throw_nogpu(); }
|
||||
CV_EXPORTS int cv::gpu::getNumberOfSMs(int /*device*/) { throw_nogpu(); return 0; }
|
||||
CV_EXPORTS void cv::gpu::getGpuMemInfo(size_t* /*free*/, size_t* /*total*/) { throw_nogpu(); }
|
||||
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
@ -99,5 +100,11 @@ CV_EXPORTS int cv::gpu::getNumberOfSMs(int device)
|
||||
return prop.multiProcessorCount;
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS void cv::gpu::getGpuMemInfo(size_t *free, size_t* total)
|
||||
{
|
||||
cudaSafeCall( cudaMemGetInfo( free, total ) );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user