added downsample function into gpu module, refactored it a little bit, added guard for CUDA related include in cascadeclassifier_nvidia_api.cpp

2011-04-08 08:04:56 +00:00
parent 6cec5ff552
commit 97282d8ff8
8 changed files with 127 additions and 14 deletions
--- a/modules/gpu/src/cuda/blend.cu
+++ b/modules/gpu/src/cuda/blend.cu
@@ -73,7 +73,7 @@ namespace cv { namespace gpu
        dim3 threads(16, 16);
        dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
        
-        blendLinearKernel<T><<<grid, threads>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
+        blendLinearKernel<<<grid, threads>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
        cudaSafeCall(cudaThreadSynchronize());
    }

--- a/modules/gpu/src/cuda/imgproc.cu
+++ b/modules/gpu/src/cuda/imgproc.cu
@@ -883,5 +883,32 @@ namespace cv { namespace gpu { namespace imgproc
        cudaSafeCall(cudaThreadSynchronize());
    }

+    /////////////////////////////////////////////////////////////////////////
+    // downsample
+
+    template <typename T>
+    __global__ void downsampleKernel(const PtrStep_<T> src, int rows, int cols, int k, PtrStep_<T> dst)
+    {
+        int x = blockIdx.x * blockDim.x + threadIdx.x;
+        int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (x < cols && y < rows)
+            dst.ptr(y)[x] = src.ptr(y * k)[x * k];
+    }
+
+
+    template <typename T>
+    void downsampleCaller(const PtrStep_<T> src, int rows, int cols, int k, PtrStep_<T> dst)
+    {
+        dim3 threads(16, 16);
+        dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
+
+        downsampleKernel<<<grid, threads>>>(src, rows, cols, k, dst);
+        cudaSafeCall(cudaThreadSynchronize());
+    }
+
+    template void downsampleCaller(const PtrStep src, int rows, int cols, int k, PtrStep dst);
+    template void downsampleCaller(const PtrStepf src, int rows, int cols, int k, PtrStepf dst);
+
 }}}