added downsample function into gpu module, refactored it a little bit, added guard for CUDA related include in cascadeclassifier_nvidia_api.cpp

This commit is contained in:
Alexey Spizhevoy
2011-04-08 08:04:56 +00:00
parent 6cec5ff552
commit 97282d8ff8
8 changed files with 127 additions and 14 deletions

View File

@@ -73,7 +73,7 @@ namespace cv { namespace gpu
dim3 threads(16, 16);
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
blendLinearKernel<T><<<grid, threads>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
blendLinearKernel<<<grid, threads>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
cudaSafeCall(cudaThreadSynchronize());
}

View File

@@ -883,5 +883,32 @@ namespace cv { namespace gpu { namespace imgproc
cudaSafeCall(cudaThreadSynchronize());
}
/////////////////////////////////////////////////////////////////////////
// downsample
template <typename T>
__global__ void downsampleKernel(const PtrStep_<T> src, int rows, int cols, int k, PtrStep_<T> dst)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < cols && y < rows)
dst.ptr(y)[x] = src.ptr(y * k)[x * k];
}
template <typename T>
void downsampleCaller(const PtrStep_<T> src, int rows, int cols, int k, PtrStep_<T> dst)
{
dim3 threads(16, 16);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
downsampleKernel<<<grid, threads>>>(src, rows, cols, k, dst);
cudaSafeCall(cudaThreadSynchronize());
}
template void downsampleCaller(const PtrStep src, int rows, int cols, int k, PtrStep dst);
template void downsampleCaller(const PtrStepf src, int rows, int cols, int k, PtrStepf dst);
}}}