added vecmath to gpu module.
This commit is contained in:
		| @@ -44,6 +44,7 @@ | |||||||
| #include "saturate_cast.hpp" | #include "saturate_cast.hpp" | ||||||
| #include "safe_call.hpp" | #include "safe_call.hpp" | ||||||
| #include "cuda_shared.hpp" | #include "cuda_shared.hpp" | ||||||
|  | #include "vecmath.hpp" | ||||||
|  |  | ||||||
| using namespace cv::gpu; | using namespace cv::gpu; | ||||||
|  |  | ||||||
| @@ -71,7 +72,7 @@ namespace cv { namespace gpu { namespace filters | |||||||
|  |  | ||||||
| namespace filter_krnls | namespace filter_krnls | ||||||
| { | { | ||||||
|     template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE, typename T, typename D> |     template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE, int CN, typename T, typename D> | ||||||
|     __global__ void linearRowFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height) |     __global__ void linearRowFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height) | ||||||
|     { |     { | ||||||
|         __shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3]; |         __shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3]; | ||||||
| @@ -91,23 +92,24 @@ namespace filter_krnls | |||||||
|         { |         { | ||||||
|             const T* rowSrc = src + threadY * src_step; |             const T* rowSrc = src + threadY * src_step; | ||||||
|  |  | ||||||
|             sDataRow[threadIdx.x + blockDim.x] = threadX < width ? rowSrc[threadX] : 0; |             sDataRow[threadIdx.x + blockDim.x] = threadX < width ? rowSrc[threadX] : VecTraits<T>::all(0); | ||||||
|  |  | ||||||
|             sDataRow[threadIdx.x] = prevThreadX >= 0 ? rowSrc[prevThreadX] : 0; |             sDataRow[threadIdx.x] = prevThreadX >= 0 ? rowSrc[prevThreadX] : VecTraits<T>::all(0); | ||||||
|  |  | ||||||
|             sDataRow[(blockDim.x << 1) + threadIdx.x] = nextThreadX < width ? rowSrc[nextThreadX] : 0; |             sDataRow[(blockDim.x << 1) + threadIdx.x] = nextThreadX < width ? rowSrc[nextThreadX] : VecTraits<T>::all(0); | ||||||
|  |  | ||||||
|             __syncthreads(); |             __syncthreads(); | ||||||
|  |  | ||||||
|             if (threadX < width) |             if (threadX < width) | ||||||
|             { |             { | ||||||
|                 float sum = 0; |                 typedef typename TypeVec<float, CN>::vec_t sum_t; | ||||||
|  |                 sum_t sum = VecTraits<sum_t>::all(0); | ||||||
|  |  | ||||||
|                 sDataRow += threadIdx.x + blockDim.x - anchor; |                 sDataRow += threadIdx.x + blockDim.x - anchor; | ||||||
|  |  | ||||||
|                 #pragma unroll |                 #pragma unroll | ||||||
|                 for(int i = 0; i < KERNEL_SIZE; ++i) |                 for(int i = 0; i < KERNEL_SIZE; ++i) | ||||||
|                     sum += cLinearKernel[i] * sDataRow[i]; |                     sum = sum + sDataRow[i] * cLinearKernel[i]; | ||||||
|  |  | ||||||
|                 dst[threadY * dst_step + threadX] = saturate_cast<D>(sum); |                 dst[threadY * dst_step + threadX] = saturate_cast<D>(sum); | ||||||
|             } |             } | ||||||
| @@ -117,7 +119,7 @@ namespace filter_krnls | |||||||
|  |  | ||||||
| namespace cv { namespace gpu { namespace filters | namespace cv { namespace gpu { namespace filters | ||||||
| { | { | ||||||
|     template <int KERNEL_SIZE, typename T, typename D> |     template <int KERNEL_SIZE, int CN, typename T, typename D> | ||||||
|     void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor) |     void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor) | ||||||
|     { |     { | ||||||
|         const int BLOCK_DIM_X = 16; |         const int BLOCK_DIM_X = 16; | ||||||
| @@ -126,51 +128,83 @@ namespace cv { namespace gpu { namespace filters | |||||||
|         dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); |         dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); | ||||||
|         dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y)); |         dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y)); | ||||||
|  |  | ||||||
|         filter_krnls::linearRowFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE><<<blocks, threads>>>(src.ptr, src.elem_step,  |         filter_krnls::linearRowFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE, CN><<<blocks, threads>>>(src.ptr, src.elem_step,  | ||||||
|             dst.ptr, dst.elem_step, anchor, src.cols, src.rows); |             dst.ptr, dst.elem_step, anchor, src.cols, src.rows); | ||||||
|  |  | ||||||
|         cudaSafeCall( cudaThreadSynchronize() ); |         cudaSafeCall( cudaThreadSynchronize() ); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     template <typename T, typename D> |     template <int CN, typename T, typename D> | ||||||
|     inline void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) |     inline void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|     { |     { | ||||||
|         typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor); |         typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor); | ||||||
|         static const caller_t callers[] =  |         static const caller_t callers[] =  | ||||||
|         {linearRowFilter_caller<0 , T, D>, linearRowFilter_caller<1 , T, D>,  |         {linearRowFilter_caller<0 , CN, T, D>, linearRowFilter_caller<1 , CN, T, D>,  | ||||||
|          linearRowFilter_caller<2 , T, D>, linearRowFilter_caller<3 , T, D>,  |          linearRowFilter_caller<2 , CN, T, D>, linearRowFilter_caller<3 , CN, T, D>,  | ||||||
|          linearRowFilter_caller<4 , T, D>, linearRowFilter_caller<5 , T, D>,  |          linearRowFilter_caller<4 , CN, T, D>, linearRowFilter_caller<5 , CN, T, D>,  | ||||||
|          linearRowFilter_caller<6 , T, D>, linearRowFilter_caller<7 , T, D>,  |          linearRowFilter_caller<6 , CN, T, D>, linearRowFilter_caller<7 , CN, T, D>,  | ||||||
|          linearRowFilter_caller<8 , T, D>, linearRowFilter_caller<9 , T, D>,  |          linearRowFilter_caller<8 , CN, T, D>, linearRowFilter_caller<9 , CN, T, D>,  | ||||||
|          linearRowFilter_caller<10, T, D>, linearRowFilter_caller<11, T, D>,  |          linearRowFilter_caller<10, CN, T, D>, linearRowFilter_caller<11, CN, T, D>,  | ||||||
|          linearRowFilter_caller<12, T, D>, linearRowFilter_caller<13, T, D>,  |          linearRowFilter_caller<12, CN, T, D>, linearRowFilter_caller<13, CN, T, D>,  | ||||||
|          linearRowFilter_caller<14, T, D>, linearRowFilter_caller<15, T, D>}; |          linearRowFilter_caller<14, CN, T, D>, linearRowFilter_caller<15, CN, T, D>}; | ||||||
|  |  | ||||||
|         loadLinearKernel(kernel, ksize); |         loadLinearKernel(kernel, ksize); | ||||||
|         callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor); |         callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     void linearRowFilter_gpu_32s32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) |     void linearRowFilter_gpu_8u_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|     { |     { | ||||||
|         linearRowFilter_gpu<int, int>(src, dst, kernel, ksize, anchor); |         linearRowFilter_gpu<4, uchar4, uchar4>(src, dst, kernel, ksize, anchor); | ||||||
|     } |     } | ||||||
|     void linearRowFilter_gpu_32s32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) |     void linearRowFilter_gpu_8u_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|     { |     { | ||||||
|         linearRowFilter_gpu<int, float>(src, dst, kernel, ksize, anchor); |         linearRowFilter_gpu<4, uchar4, char4>(src, dst, kernel, ksize, anchor); | ||||||
|     } |     } | ||||||
|     void linearRowFilter_gpu_32f32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) |     void linearRowFilter_gpu_8s_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|     { |     { | ||||||
|         linearRowFilter_gpu<float, int>(src, dst, kernel, ksize, anchor); |         linearRowFilter_gpu<4, char4, uchar4>(src, dst, kernel, ksize, anchor); | ||||||
|     } |     } | ||||||
|     void linearRowFilter_gpu_32f32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) |     void linearRowFilter_gpu_8s_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|     { |     { | ||||||
|         linearRowFilter_gpu<float, float>(src, dst, kernel, ksize, anchor); |         linearRowFilter_gpu<4, char4, char4>(src, dst, kernel, ksize, anchor); | ||||||
|  |     } | ||||||
|  |     void linearRowFilter_gpu_16u_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|  |     { | ||||||
|  |         linearRowFilter_gpu<2, ushort2, ushort2>(src, dst, kernel, ksize, anchor); | ||||||
|  |     } | ||||||
|  |     void linearRowFilter_gpu_16u_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|  |     { | ||||||
|  |         linearRowFilter_gpu<2, ushort2, short2>(src, dst, kernel, ksize, anchor); | ||||||
|  |     } | ||||||
|  |     void linearRowFilter_gpu_16s_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|  |     { | ||||||
|  |         linearRowFilter_gpu<2, short2, ushort2>(src, dst, kernel, ksize, anchor); | ||||||
|  |     } | ||||||
|  |     void linearRowFilter_gpu_16s_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|  |     { | ||||||
|  |         linearRowFilter_gpu<2, short2, short2>(src, dst, kernel, ksize, anchor); | ||||||
|  |     } | ||||||
|  |     void linearRowFilter_gpu_32s_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|  |     { | ||||||
|  |         linearRowFilter_gpu<1, int, int>(src, dst, kernel, ksize, anchor); | ||||||
|  |     } | ||||||
|  |     void linearRowFilter_gpu_32s_32f_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|  |     { | ||||||
|  |         linearRowFilter_gpu<1, int, float>(src, dst, kernel, ksize, anchor); | ||||||
|  |     } | ||||||
|  |     void linearRowFilter_gpu_32f_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|  |     { | ||||||
|  |         linearRowFilter_gpu<1, float, int>(src, dst, kernel, ksize, anchor); | ||||||
|  |     } | ||||||
|  |     void linearRowFilter_gpu_32f_32f_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|  |     { | ||||||
|  |         linearRowFilter_gpu<1 ,float, float>(src, dst, kernel, ksize, anchor); | ||||||
|     } |     } | ||||||
| }}} | }}} | ||||||
|  |  | ||||||
| namespace filter_krnls | namespace filter_krnls | ||||||
| { | { | ||||||
|     template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE, typename T, typename D> |     template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE, int CN, typename T, typename D> | ||||||
|     __global__ void linearColumnFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height) |     __global__ void linearColumnFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height) | ||||||
|     { |     { | ||||||
|         __shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3]; |         __shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3]; | ||||||
| @@ -192,23 +226,24 @@ namespace filter_krnls | |||||||
|         { |         { | ||||||
|             const T* colSrc = src + threadX; |             const T* colSrc = src + threadX; | ||||||
|  |  | ||||||
|             sDataColumn[(threadIdx.y + blockDim.y) * smem_step] = threadY < height ? colSrc[threadY * src_step] : 0; |             sDataColumn[(threadIdx.y + blockDim.y) * smem_step] = threadY < height ? colSrc[threadY * src_step] : VecTraits<T>::all(0); | ||||||
|  |  | ||||||
|             sDataColumn[threadIdx.y * smem_step] = prevThreadY >= 0 ? colSrc[prevThreadY * src_step] : 0; |             sDataColumn[threadIdx.y * smem_step] = prevThreadY >= 0 ? colSrc[prevThreadY * src_step] : VecTraits<T>::all(0); | ||||||
|  |  | ||||||
|             sDataColumn[(threadIdx.y + (blockDim.y << 1)) * smem_step] = nextThreadY < height ? colSrc[nextThreadY * src_step] : 0; |             sDataColumn[(threadIdx.y + (blockDim.y << 1)) * smem_step] = nextThreadY < height ? colSrc[nextThreadY * src_step] : VecTraits<T>::all(0); | ||||||
|  |  | ||||||
|             __syncthreads(); |             __syncthreads(); | ||||||
|  |  | ||||||
|             if (threadY < height) |             if (threadY < height) | ||||||
|             { |             { | ||||||
|                 float sum = 0; |                 typedef typename TypeVec<float, CN>::vec_t sum_t; | ||||||
|  |                 sum_t sum = VecTraits<sum_t>::all(0); | ||||||
|  |  | ||||||
|                 sDataColumn += (threadIdx.y + blockDim.y - anchor)* smem_step; |                 sDataColumn += (threadIdx.y + blockDim.y - anchor)* smem_step; | ||||||
|  |  | ||||||
|                 #pragma unroll |                 #pragma unroll | ||||||
|                 for(int i = 0; i < KERNEL_SIZE; ++i) |                 for(int i = 0; i < KERNEL_SIZE; ++i) | ||||||
|                     sum += cLinearKernel[i] * sDataColumn[i * smem_step]; |                     sum = sum + sDataColumn[i * smem_step] * cLinearKernel[i]; | ||||||
|  |  | ||||||
|                 dst[threadY * dst_step + threadX] = saturate_cast<D>(sum); |                 dst[threadY * dst_step + threadX] = saturate_cast<D>(sum); | ||||||
|             } |             } | ||||||
| @@ -218,7 +253,7 @@ namespace filter_krnls | |||||||
|  |  | ||||||
| namespace cv { namespace gpu { namespace filters | namespace cv { namespace gpu { namespace filters | ||||||
| { | { | ||||||
|     template <int KERNEL_SIZE, typename T, typename D> |     template <int KERNEL_SIZE, int CN, typename T, typename D> | ||||||
|     void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor) |     void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor) | ||||||
|     { |     { | ||||||
|         const int BLOCK_DIM_X = 16; |         const int BLOCK_DIM_X = 16; | ||||||
| @@ -227,45 +262,77 @@ namespace cv { namespace gpu { namespace filters | |||||||
|         dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); |         dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); | ||||||
|         dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y)); |         dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y)); | ||||||
|  |  | ||||||
|         filter_krnls::linearColumnFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE><<<blocks, threads>>>(src.ptr, src.elem_step,  |         filter_krnls::linearColumnFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE, CN><<<blocks, threads>>>(src.ptr, src.elem_step,  | ||||||
|             dst.ptr, dst.elem_step, anchor, src.cols, src.rows); |             dst.ptr, dst.elem_step, anchor, src.cols, src.rows); | ||||||
|  |  | ||||||
|         cudaSafeCall( cudaThreadSynchronize() ); |         cudaSafeCall( cudaThreadSynchronize() ); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     template <typename T, typename D> |     template <int CN, typename T, typename D> | ||||||
|     inline void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) |     inline void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|     { |     { | ||||||
|         typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor); |         typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor); | ||||||
|         static const caller_t callers[] =  |         static const caller_t callers[] =  | ||||||
|         {linearColumnFilter_caller<0 , T, D>, linearColumnFilter_caller<1 , T, D>,  |         {linearColumnFilter_caller<0 , CN, T, D>, linearColumnFilter_caller<1 , CN, T, D>,  | ||||||
|          linearColumnFilter_caller<2 , T, D>, linearColumnFilter_caller<3 , T, D>,  |          linearColumnFilter_caller<2 , CN, T, D>, linearColumnFilter_caller<3 , CN, T, D>,  | ||||||
|          linearColumnFilter_caller<4 , T, D>, linearColumnFilter_caller<5 , T, D>,  |          linearColumnFilter_caller<4 , CN, T, D>, linearColumnFilter_caller<5 , CN, T, D>,  | ||||||
|          linearColumnFilter_caller<6 , T, D>, linearColumnFilter_caller<7 , T, D>,  |          linearColumnFilter_caller<6 , CN, T, D>, linearColumnFilter_caller<7 , CN, T, D>,  | ||||||
|          linearColumnFilter_caller<8 , T, D>, linearColumnFilter_caller<9 , T, D>,  |          linearColumnFilter_caller<8 , CN, T, D>, linearColumnFilter_caller<9 , CN, T, D>,  | ||||||
|          linearColumnFilter_caller<10, T, D>, linearColumnFilter_caller<11, T, D>,  |          linearColumnFilter_caller<10, CN, T, D>, linearColumnFilter_caller<11, CN, T, D>,  | ||||||
|          linearColumnFilter_caller<12, T, D>, linearColumnFilter_caller<13, T, D>,  |          linearColumnFilter_caller<12, CN, T, D>, linearColumnFilter_caller<13, CN, T, D>,  | ||||||
|          linearColumnFilter_caller<14, T, D>, linearColumnFilter_caller<15, T, D>}; |          linearColumnFilter_caller<14, CN, T, D>, linearColumnFilter_caller<15, CN, T, D>}; | ||||||
|  |  | ||||||
|         loadLinearKernel(kernel, ksize); |         loadLinearKernel(kernel, ksize); | ||||||
|         callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor); |         callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     void linearColumnFilter_gpu_32s32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) |     void linearColumnFilter_gpu_8u_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|     { |     { | ||||||
|         linearColumnFilter_gpu<int, int>(src, dst, kernel, ksize, anchor); |         linearColumnFilter_gpu<4, uchar4, uchar4>(src, dst, kernel, ksize, anchor); | ||||||
|     } |     } | ||||||
|     void linearColumnFilter_gpu_32s32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) |     void linearColumnFilter_gpu_8u_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|     { |     { | ||||||
|         linearColumnFilter_gpu<int, float>(src, dst, kernel, ksize, anchor); |         linearColumnFilter_gpu<4, uchar4, char4>(src, dst, kernel, ksize, anchor); | ||||||
|     } |     } | ||||||
|     void linearColumnFilter_gpu_32f32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) |     void linearColumnFilter_gpu_8s_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|     { |     { | ||||||
|         linearColumnFilter_gpu<float, int>(src, dst, kernel, ksize, anchor); |         linearColumnFilter_gpu<4, char4, uchar4>(src, dst, kernel, ksize, anchor); | ||||||
|     } |     } | ||||||
|     void linearColumnFilter_gpu_32f32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) |     void linearColumnFilter_gpu_8s_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|     { |     { | ||||||
|         linearColumnFilter_gpu<float, float>(src, dst, kernel, ksize, anchor); |         linearColumnFilter_gpu<4, char4, char4>(src, dst, kernel, ksize, anchor); | ||||||
|  |     } | ||||||
|  |     void linearColumnFilter_gpu_16u_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|  |     { | ||||||
|  |         linearColumnFilter_gpu<2, ushort2, ushort2>(src, dst, kernel, ksize, anchor); | ||||||
|  |     } | ||||||
|  |     void linearColumnFilter_gpu_16u_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|  |     { | ||||||
|  |         linearColumnFilter_gpu<2, ushort2, short2>(src, dst, kernel, ksize, anchor); | ||||||
|  |     } | ||||||
|  |     void linearColumnFilter_gpu_16s_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|  |     { | ||||||
|  |         linearColumnFilter_gpu<2, short2, ushort2>(src, dst, kernel, ksize, anchor); | ||||||
|  |     } | ||||||
|  |     void linearColumnFilter_gpu_16s_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|  |     { | ||||||
|  |         linearColumnFilter_gpu<2, short2, short2>(src, dst, kernel, ksize, anchor); | ||||||
|  |     } | ||||||
|  |     void linearColumnFilter_gpu_32s_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|  |     { | ||||||
|  |         linearColumnFilter_gpu<1, int, int>(src, dst, kernel, ksize, anchor); | ||||||
|  |     } | ||||||
|  |     void linearColumnFilter_gpu_32s_32f_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|  |     { | ||||||
|  |         linearColumnFilter_gpu<1, int, float>(src, dst, kernel, ksize, anchor); | ||||||
|  |     } | ||||||
|  |     void linearColumnFilter_gpu_32f_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|  |     { | ||||||
|  |         linearColumnFilter_gpu<1, float, int>(src, dst, kernel, ksize, anchor); | ||||||
|  |     } | ||||||
|  |     void linearColumnFilter_gpu_32f_32f_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) | ||||||
|  |     { | ||||||
|  |         linearColumnFilter_gpu<1, float, float>(src, dst, kernel, ksize, anchor); | ||||||
|     } |     } | ||||||
| }}} | }}} | ||||||
|  |  | ||||||
|   | |||||||
| @@ -163,92 +163,6 @@ namespace cv | |||||||
|             return saturate_cast<uint>((float)v); |             return saturate_cast<uint>((float)v); | ||||||
|         #endif |         #endif | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         template<typename _Tp> static __device__ _Tp saturate_cast(uchar4 v) { return _Tp(v); } |  | ||||||
|         template<typename _Tp> static __device__ _Tp saturate_cast(char4 v) { return _Tp(v); } |  | ||||||
|         template<typename _Tp> static __device__ _Tp saturate_cast(ushort4 v) { return _Tp(v); } |  | ||||||
|         template<typename _Tp> static __device__ _Tp saturate_cast(short4 v) { return _Tp(v); } |  | ||||||
|         template<typename _Tp> static __device__ _Tp saturate_cast(uint4 v) { return _Tp(v); } |  | ||||||
|         template<typename _Tp> static __device__ _Tp saturate_cast(int4 v) { return _Tp(v); } |  | ||||||
|         template<typename _Tp> static __device__ _Tp saturate_cast(float4 v) { return _Tp(v); } |  | ||||||
|  |  | ||||||
|         template<> static __device__ uchar4 saturate_cast<uchar4>(char4 v) |  | ||||||
|         { return make_uchar4(saturate_cast<uchar>(v.x), saturate_cast<uchar>(v.y), saturate_cast<uchar>(v.z), saturate_cast<uchar>(v.w)); } |  | ||||||
|         template<> static __device__ uchar4 saturate_cast<uchar4>(ushort4 v) |  | ||||||
|         { return make_uchar4(saturate_cast<uchar>(v.x), saturate_cast<uchar>(v.y), saturate_cast<uchar>(v.z), saturate_cast<uchar>(v.w)); } |  | ||||||
|         template<> static __device__ uchar4 saturate_cast<uchar4>(short4 v) |  | ||||||
|         { return make_uchar4(saturate_cast<uchar>(v.x), saturate_cast<uchar>(v.y), saturate_cast<uchar>(v.z), saturate_cast<uchar>(v.w)); } |  | ||||||
|         template<> static __device__ uchar4 saturate_cast<uchar4>(uint4 v) |  | ||||||
|         { return make_uchar4(saturate_cast<uchar>(v.x), saturate_cast<uchar>(v.y), saturate_cast<uchar>(v.z), saturate_cast<uchar>(v.w)); } |  | ||||||
|         template<> static __device__ uchar4 saturate_cast<uchar4>(int4 v) |  | ||||||
|         { return make_uchar4(saturate_cast<uchar>(v.x), saturate_cast<uchar>(v.y), saturate_cast<uchar>(v.z), saturate_cast<uchar>(v.w)); } |  | ||||||
|         template<> static __device__ uchar4 saturate_cast<uchar4>(float4 v) |  | ||||||
|         { return make_uchar4(saturate_cast<uchar>(v.x), saturate_cast<uchar>(v.y), saturate_cast<uchar>(v.z), saturate_cast<uchar>(v.w)); } |  | ||||||
|  |  | ||||||
|         template<> static __device__ char4 saturate_cast<char4>(uchar4 v) |  | ||||||
|         { return make_char4(saturate_cast<char>(v.x), saturate_cast<char>(v.y), saturate_cast<char>(v.z), saturate_cast<char>(v.w)); } |  | ||||||
|         template<> static __device__ char4 saturate_cast<char4>(ushort4 v) |  | ||||||
|         { return make_char4(saturate_cast<char>(v.x), saturate_cast<char>(v.y), saturate_cast<char>(v.z), saturate_cast<char>(v.w)); } |  | ||||||
|         template<> static __device__ char4 saturate_cast<char4>(short4 v) |  | ||||||
|         { return make_char4(saturate_cast<char>(v.x), saturate_cast<char>(v.y), saturate_cast<char>(v.z), saturate_cast<char>(v.w)); } |  | ||||||
|         template<> static __device__ char4 saturate_cast<char4>(uint4 v) |  | ||||||
|         { return make_char4(saturate_cast<char>(v.x), saturate_cast<char>(v.y), saturate_cast<char>(v.z), saturate_cast<char>(v.w)); } |  | ||||||
|         template<> static __device__ char4 saturate_cast<char4>(int4 v) |  | ||||||
|         { return make_char4(saturate_cast<char>(v.x), saturate_cast<char>(v.y), saturate_cast<char>(v.z), saturate_cast<char>(v.w)); } |  | ||||||
|         template<> static __device__ char4 saturate_cast<char4>(float4 v) |  | ||||||
|         { return make_char4(saturate_cast<char>(v.x), saturate_cast<char>(v.y), saturate_cast<char>(v.z), saturate_cast<char>(v.w)); } |  | ||||||
|  |  | ||||||
|         template<> static __device__ ushort4 saturate_cast<ushort4>(uchar4 v) |  | ||||||
|         { return make_ushort4(v.x, v.y, v.z, v.w); } |  | ||||||
|         template<> static __device__ ushort4 saturate_cast<ushort4>(char4 v) |  | ||||||
|         { return make_ushort4(saturate_cast<ushort>(v.x), saturate_cast<ushort>(v.y), saturate_cast<ushort>(v.z), saturate_cast<ushort>(v.w)); } |  | ||||||
|         template<> static __device__ ushort4 saturate_cast<ushort4>(short4 v) |  | ||||||
|         { return make_ushort4(saturate_cast<ushort>(v.x), saturate_cast<ushort>(v.y), saturate_cast<ushort>(v.z), saturate_cast<ushort>(v.w)); } |  | ||||||
|         template<> static __device__ ushort4 saturate_cast<ushort4>(uint4 v) |  | ||||||
|         { return make_ushort4(saturate_cast<ushort>(v.x), saturate_cast<ushort>(v.y), saturate_cast<ushort>(v.z), saturate_cast<ushort>(v.w)); } |  | ||||||
|         template<> static __device__ ushort4 saturate_cast<ushort4>(int4 v) |  | ||||||
|         { return make_ushort4(saturate_cast<ushort>(v.x), saturate_cast<ushort>(v.y), saturate_cast<ushort>(v.z), saturate_cast<ushort>(v.w)); } |  | ||||||
|         template<> static __device__ ushort4 saturate_cast<ushort4>(float4 v) |  | ||||||
|         { return make_ushort4(saturate_cast<ushort>(v.x), saturate_cast<ushort>(v.y), saturate_cast<ushort>(v.z), saturate_cast<ushort>(v.w)); } |  | ||||||
|  |  | ||||||
|         template<> static __device__ short4 saturate_cast<short4>(uchar4 v) |  | ||||||
|         { return make_short4(v.x, v.y, v.z, v.w); } |  | ||||||
|         template<> static __device__ short4 saturate_cast<short4>(char4 v) |  | ||||||
|         { return make_short4(v.x, v.y, v.z, v.w); } |  | ||||||
|         template<> static __device__ short4 saturate_cast<short4>(ushort4 v) |  | ||||||
|         { return make_short4(saturate_cast<short>(v.x), saturate_cast<short>(v.y), saturate_cast<short>(v.z), saturate_cast<short>(v.w)); } |  | ||||||
|         template<> static __device__ short4 saturate_cast<short4>(uint4 v) |  | ||||||
|         { return make_short4(saturate_cast<short>(v.x), saturate_cast<short>(v.y), saturate_cast<short>(v.z), saturate_cast<short>(v.w)); } |  | ||||||
|         template<> static __device__ short4 saturate_cast<short4>(int4 v) |  | ||||||
|         { return make_short4(saturate_cast<short>(v.x), saturate_cast<short>(v.y), saturate_cast<short>(v.z), saturate_cast<short>(v.w)); } |  | ||||||
|         template<> static __device__ short4 saturate_cast<short4>(float4 v) |  | ||||||
|         { return make_short4(saturate_cast<short>(v.x), saturate_cast<short>(v.y), saturate_cast<short>(v.z), saturate_cast<short>(v.w)); } |  | ||||||
|          |  | ||||||
|         template<> static __device__ uint4 saturate_cast<uint4>(uchar4 v) |  | ||||||
|         { return make_uint4(v.x, v.y, v.z, v.w); } |  | ||||||
|         template<> static __device__ uint4 saturate_cast<uint4>(char4 v) |  | ||||||
|         { return make_uint4(saturate_cast<uint>(v.x), saturate_cast<uint>(v.y), saturate_cast<uint>(v.z), saturate_cast<uint>(v.w)); } |  | ||||||
|         template<> static __device__ uint4 saturate_cast<uint4>(ushort4 v) |  | ||||||
|         { return make_uint4(v.x, v.y, v.z, v.w); } |  | ||||||
|         template<> static __device__ uint4 saturate_cast<uint4>(short4 v) |  | ||||||
|         { return make_uint4(saturate_cast<uint>(v.x), saturate_cast<uint>(v.y), saturate_cast<uint>(v.z), saturate_cast<uint>(v.w)); } |  | ||||||
|         template<> static __device__ uint4 saturate_cast<uint4>(int4 v) |  | ||||||
|         { return make_uint4(saturate_cast<uint>(v.x), saturate_cast<uint>(v.y), saturate_cast<uint>(v.z), saturate_cast<uint>(v.w)); } |  | ||||||
|         template<> static __device__ uint4 saturate_cast<uint4>(float4 v) |  | ||||||
|         { return make_uint4(saturate_cast<uint>(v.x), saturate_cast<uint>(v.y), saturate_cast<uint>(v.z), saturate_cast<uint>(v.w)); } |  | ||||||
|          |  | ||||||
|         template<> static __device__ int4 saturate_cast<int4>(uchar4 v) |  | ||||||
|         { return make_int4(v.x, v.y, v.z, v.w); } |  | ||||||
|         template<> static __device__ int4 saturate_cast<int4>(char4 v) |  | ||||||
|         { return make_int4(v.x, v.y, v.z, v.w); } |  | ||||||
|         template<> static __device__ int4 saturate_cast<int4>(ushort4 v) |  | ||||||
|         { return make_int4(v.x, v.y, v.z, v.w); } |  | ||||||
|         template<> static __device__ int4 saturate_cast<int4>(short4 v) |  | ||||||
|         { return make_int4(v.x, v.y, v.z, v.w); } |  | ||||||
|         template<> static __device__ int4 saturate_cast<int4>(uint4 v) |  | ||||||
|         { return make_int4(saturate_cast<int>(v.x), saturate_cast<int>(v.y), saturate_cast<int>(v.z), saturate_cast<int>(v.w)); } |  | ||||||
|         template<> static __device__ int4 saturate_cast<int4>(float4 v) |  | ||||||
|         { return make_int4(saturate_cast<int>(v.x), saturate_cast<int>(v.y), saturate_cast<int>(v.z), saturate_cast<int>(v.w)); } |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -44,49 +44,70 @@ | |||||||
| #define __OPENCV_GPU_VECMATH_HPP__ | #define __OPENCV_GPU_VECMATH_HPP__ | ||||||
|  |  | ||||||
| #include "cuda_shared.hpp" | #include "cuda_shared.hpp" | ||||||
|  | #include "saturate_cast.hpp" | ||||||
|  |  | ||||||
| namespace cv | namespace cv | ||||||
| { | { | ||||||
|     namespace gpu |     namespace gpu | ||||||
|     { |     { | ||||||
|         template<typename T, int N> struct TypeVec; |         template<typename T, int N> struct TypeVec; | ||||||
|         template<typename T> struct TypeVec<T, 1> { typedef T vec_t; }; |  | ||||||
|         template<> struct TypeVec<unsigned char, 2> { typedef uchar2 vec_t; }; |         template<> struct TypeVec<uchar, 1> { typedef uchar vec_t; }; | ||||||
|  |         template<> struct TypeVec<uchar1, 1> { typedef uchar1 vec_t; }; | ||||||
|  |         template<> struct TypeVec<uchar, 2> { typedef uchar2 vec_t; }; | ||||||
|         template<> struct TypeVec<uchar2, 2> { typedef uchar2 vec_t; }; |         template<> struct TypeVec<uchar2, 2> { typedef uchar2 vec_t; }; | ||||||
|         template<> struct TypeVec<unsigned char, 3> { typedef uchar3 vec_t; };; |         template<> struct TypeVec<uchar, 3> { typedef uchar3 vec_t; }; | ||||||
|         template<> struct TypeVec<uchar3, 3> { typedef uchar3 vec_t; }; |         template<> struct TypeVec<uchar3, 3> { typedef uchar3 vec_t; }; | ||||||
|         template<> struct TypeVec<unsigned char, 4> { typedef uchar4 vec_t; };; |         template<> struct TypeVec<uchar, 4> { typedef uchar4 vec_t; }; | ||||||
|         template<> struct TypeVec<uchar4, 4> { typedef uchar4 vec_t; }; |         template<> struct TypeVec<uchar4, 4> { typedef uchar4 vec_t; }; | ||||||
|  |  | ||||||
|  |         template<> struct TypeVec<char, 1> { typedef char vec_t; }; | ||||||
|  |         template<> struct TypeVec<char1, 1> { typedef char1 vec_t; }; | ||||||
|         template<> struct TypeVec<char, 2> { typedef char2 vec_t; }; |         template<> struct TypeVec<char, 2> { typedef char2 vec_t; }; | ||||||
|         template<> struct TypeVec<char2, 2> { typedef char2 vec_t; }; |         template<> struct TypeVec<char2, 2> { typedef char2 vec_t; }; | ||||||
|         template<> struct TypeVec<char, 3> { typedef char3 vec_t; }; |         template<> struct TypeVec<char, 3> { typedef char3 vec_t; }; | ||||||
|         template<> struct TypeVec<char3, 3> { typedef char3 vec_t; }; |         template<> struct TypeVec<char3, 3> { typedef char3 vec_t; }; | ||||||
|         template<> struct TypeVec<char, 4> { typedef char4 vec_t; }; |         template<> struct TypeVec<char, 4> { typedef char4 vec_t; }; | ||||||
|         template<> struct TypeVec<char4, 4> { typedef char4 vec_t; }; |         template<> struct TypeVec<char4, 4> { typedef char4 vec_t; }; | ||||||
|         template<> struct TypeVec<unsigned short, 2> { typedef ushort2 vec_t; }; |  | ||||||
|  |         template<> struct TypeVec<ushort, 1> { typedef ushort vec_t; }; | ||||||
|  |         template<> struct TypeVec<ushort1, 1> { typedef ushort1 vec_t; }; | ||||||
|  |         template<> struct TypeVec<ushort, 2> { typedef ushort2 vec_t; }; | ||||||
|         template<> struct TypeVec<ushort2, 2> { typedef ushort2 vec_t; }; |         template<> struct TypeVec<ushort2, 2> { typedef ushort2 vec_t; }; | ||||||
|         template<> struct TypeVec<unsigned short, 3> { typedef ushort3 vec_t; }; |         template<> struct TypeVec<ushort, 3> { typedef ushort3 vec_t; }; | ||||||
|         template<> struct TypeVec<ushort3, 3> { typedef ushort3 vec_t; }; |         template<> struct TypeVec<ushort3, 3> { typedef ushort3 vec_t; }; | ||||||
|         template<> struct TypeVec<unsigned short, 4> { typedef ushort4 vec_t; }; |         template<> struct TypeVec<ushort, 4> { typedef ushort4 vec_t; }; | ||||||
|         template<> struct TypeVec<ushort4, 4> { typedef ushort4 vec_t; }; |         template<> struct TypeVec<ushort4, 4> { typedef ushort4 vec_t; }; | ||||||
|  |  | ||||||
|  |         template<> struct TypeVec<short, 1> { typedef short vec_t; }; | ||||||
|  |         template<> struct TypeVec<short1, 1> { typedef short1 vec_t; }; | ||||||
|         template<> struct TypeVec<short, 2> { typedef short2 vec_t; }; |         template<> struct TypeVec<short, 2> { typedef short2 vec_t; }; | ||||||
|         template<> struct TypeVec<short2, 2> { typedef short2 vec_t; }; |         template<> struct TypeVec<short2, 2> { typedef short2 vec_t; }; | ||||||
|         template<> struct TypeVec<short, 3> { typedef short3 vec_t; }; |         template<> struct TypeVec<short, 3> { typedef short3 vec_t; }; | ||||||
|         template<> struct TypeVec<short3, 3> { typedef short3 vec_t; }; |         template<> struct TypeVec<short3, 3> { typedef short3 vec_t; }; | ||||||
|         template<> struct TypeVec<short, 4> { typedef short4 vec_t; }; |         template<> struct TypeVec<short, 4> { typedef short4 vec_t; }; | ||||||
|         template<> struct TypeVec<short4, 4> { typedef short4 vec_t; }; |         template<> struct TypeVec<short4, 4> { typedef short4 vec_t; }; | ||||||
|         template<> struct TypeVec<unsigned int, 2> { typedef uint2 vec_t; }; |  | ||||||
|  |         template<> struct TypeVec<uint, 1> { typedef uint vec_t; }; | ||||||
|  |         template<> struct TypeVec<uint1, 1> { typedef uint1 vec_t; }; | ||||||
|  |         template<> struct TypeVec<uint, 2> { typedef uint2 vec_t; }; | ||||||
|         template<> struct TypeVec<uint2, 2> { typedef uint2 vec_t; }; |         template<> struct TypeVec<uint2, 2> { typedef uint2 vec_t; }; | ||||||
|         template<> struct TypeVec<unsigned int, 3> { typedef uint3 vec_t; }; |         template<> struct TypeVec<uint, 3> { typedef uint3 vec_t; }; | ||||||
|         template<> struct TypeVec<uint3, 3> { typedef uint3 vec_t; }; |         template<> struct TypeVec<uint3, 3> { typedef uint3 vec_t; }; | ||||||
|         template<> struct TypeVec<unsigned int, 4> { typedef uint4 vec_t; }; |         template<> struct TypeVec<uint, 4> { typedef uint4 vec_t; }; | ||||||
|         template<> struct TypeVec<uint4, 4> { typedef uint4 vec_t; }; |         template<> struct TypeVec<uint4, 4> { typedef uint4 vec_t; }; | ||||||
|  |  | ||||||
|  |         template<> struct TypeVec<int, 1> { typedef int vec_t; }; | ||||||
|  |         template<> struct TypeVec<int1, 1> { typedef int1 vec_t; }; | ||||||
|         template<> struct TypeVec<int, 2> { typedef int2 vec_t; }; |         template<> struct TypeVec<int, 2> { typedef int2 vec_t; }; | ||||||
|         template<> struct TypeVec<int2, 2> { typedef int2 vec_t; }; |         template<> struct TypeVec<int2, 2> { typedef int2 vec_t; }; | ||||||
|         template<> struct TypeVec<int, 3> { typedef int3 vec_t; }; |         template<> struct TypeVec<int, 3> { typedef int3 vec_t; }; | ||||||
|         template<> struct TypeVec<int3, 3> { typedef int3 vec_t; }; |         template<> struct TypeVec<int3, 3> { typedef int3 vec_t; }; | ||||||
|         template<> struct TypeVec<int, 4> { typedef int4 vec_t; }; |         template<> struct TypeVec<int, 4> { typedef int4 vec_t; }; | ||||||
|         template<> struct TypeVec<int4, 4> { typedef int4 vec_t; }; |         template<> struct TypeVec<int4, 4> { typedef int4 vec_t; }; | ||||||
|  |  | ||||||
|  |         template<> struct TypeVec<float, 1> { typedef float vec_t; }; | ||||||
|  |         template<> struct TypeVec<float1, 1> { typedef float1 vec_t; }; | ||||||
|         template<> struct TypeVec<float, 2> { typedef float2 vec_t; }; |         template<> struct TypeVec<float, 2> { typedef float2 vec_t; }; | ||||||
|         template<> struct TypeVec<float2, 2> { typedef float2 vec_t; }; |         template<> struct TypeVec<float2, 2> { typedef float2 vec_t; }; | ||||||
|         template<> struct TypeVec<float, 3> { typedef float3 vec_t; }; |         template<> struct TypeVec<float, 3> { typedef float3 vec_t; }; | ||||||
| @@ -94,6 +115,374 @@ namespace cv | |||||||
|         template<> struct TypeVec<float, 4> { typedef float4 vec_t; }; |         template<> struct TypeVec<float, 4> { typedef float4 vec_t; }; | ||||||
|         template<> struct TypeVec<float4, 4> { typedef float4 vec_t; }; |         template<> struct TypeVec<float4, 4> { typedef float4 vec_t; }; | ||||||
|  |  | ||||||
|  |         template<typename T> struct VecTraits; | ||||||
|  |  | ||||||
|  |         template<> struct VecTraits<uchar>  | ||||||
|  |         {  | ||||||
|  |             typedef uchar elem_t;  | ||||||
|  |             enum {cn=1}; | ||||||
|  |             static __device__ uchar all(uchar v) {return v;} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<uchar1>  | ||||||
|  |         {  | ||||||
|  |             typedef uchar elem_t;  | ||||||
|  |             enum {cn=1}; | ||||||
|  |             static __device__ uchar1 all(uchar v) {return make_uchar1(v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<uchar2>  | ||||||
|  |         {  | ||||||
|  |             typedef uchar elem_t;  | ||||||
|  |             enum {cn=2};  | ||||||
|  |             static __device__ uchar2 all(uchar v) {return make_uchar2(v, v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<uchar3>  | ||||||
|  |         {  | ||||||
|  |             typedef uchar elem_t;  | ||||||
|  |             enum {cn=3};  | ||||||
|  |             static __device__ uchar3 all(uchar v) {return make_uchar3(v, v, v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<uchar4>  | ||||||
|  |         {  | ||||||
|  |             typedef uchar elem_t;  | ||||||
|  |             enum {cn=4};  | ||||||
|  |             static __device__ uchar4 all(uchar v) {return make_uchar4(v, v, v, v);} | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         template<> struct VecTraits<char>  | ||||||
|  |         {  | ||||||
|  |             typedef char elem_t;  | ||||||
|  |             enum {cn=1};  | ||||||
|  |             static __device__ char all(char v) {return v;} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<char1>  | ||||||
|  |         {  | ||||||
|  |             typedef char elem_t;  | ||||||
|  |             enum {cn=1};  | ||||||
|  |             static __device__ char1 all(char v) {return make_char1(v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<char2>  | ||||||
|  |         {  | ||||||
|  |             typedef char elem_t;  | ||||||
|  |             enum {cn=2};  | ||||||
|  |             static  __device__ char2 all(char v) {return make_char2(v, v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<char3>  | ||||||
|  |         {  | ||||||
|  |             typedef char elem_t;  | ||||||
|  |             enum {cn=3};  | ||||||
|  |             static __device__ char3 all(char v) {return make_char3(v, v, v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<char4>  | ||||||
|  |         {  | ||||||
|  |             typedef char elem_t;  | ||||||
|  |             enum {cn=4};  | ||||||
|  |             static __device__ char4 all(char v) {return make_char4(v, v, v, v);} | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         template<> struct VecTraits<ushort>  | ||||||
|  |         {  | ||||||
|  |             typedef ushort elem_t;  | ||||||
|  |             enum {cn=1};  | ||||||
|  |             static __device__ ushort all(ushort v) {return v;} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<ushort1>  | ||||||
|  |         {  | ||||||
|  |             typedef ushort elem_t;  | ||||||
|  |             enum {cn=1};  | ||||||
|  |             static __device__ ushort1 all(ushort v) {return make_ushort1(v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<ushort2>  | ||||||
|  |         {  | ||||||
|  |             typedef ushort elem_t;  | ||||||
|  |             enum {cn=2};  | ||||||
|  |             static __device__ ushort2 all(ushort v) {return make_ushort2(v, v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<ushort3>  | ||||||
|  |         {  | ||||||
|  |             typedef ushort elem_t;  | ||||||
|  |             enum {cn=3};  | ||||||
|  |             static __device__ ushort3 all(ushort v) {return make_ushort3(v, v, v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<ushort4>  | ||||||
|  |         {  | ||||||
|  |             typedef ushort elem_t;  | ||||||
|  |             enum {cn=4};  | ||||||
|  |             static __device__ ushort4 all(ushort v) {return make_ushort4(v, v, v, v);} | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         template<> struct VecTraits<short>  | ||||||
|  |         {  | ||||||
|  |             typedef short elem_t;  | ||||||
|  |             enum {cn=1};  | ||||||
|  |             static __device__ short all(short v) {return v;} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<short1>  | ||||||
|  |         {  | ||||||
|  |             typedef short elem_t;  | ||||||
|  |             enum {cn=1};  | ||||||
|  |             static __device__ short1 all(short v) {return make_short1(v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<short2>  | ||||||
|  |         {  | ||||||
|  |             typedef short elem_t;  | ||||||
|  |             enum {cn=2};  | ||||||
|  |             static __device__ short2 all(short v) {return make_short2(v, v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<short3>  | ||||||
|  |         {  | ||||||
|  |             typedef short elem_t;  | ||||||
|  |             enum {cn=3};  | ||||||
|  |             static __device__ short3 all(short v) {return make_short3(v, v, v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<short4>  | ||||||
|  |         {  | ||||||
|  |             typedef short elem_t;  | ||||||
|  |             enum {cn=4};  | ||||||
|  |             static __device__ short4 all(short v) {return make_short4(v, v, v, v);} | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         template<> struct VecTraits<uint>  | ||||||
|  |         {  | ||||||
|  |             typedef uint elem_t;  | ||||||
|  |             enum {cn=1};  | ||||||
|  |             static __device__ uint all(uint v) {return v;} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<uint1>  | ||||||
|  |         {  | ||||||
|  |             typedef uint elem_t;  | ||||||
|  |             enum {cn=1};  | ||||||
|  |             static __device__ uint1 all(uint v) {return make_uint1(v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<uint2>  | ||||||
|  |         {  | ||||||
|  |             typedef uint elem_t;  | ||||||
|  |             enum {cn=2};  | ||||||
|  |             static __device__ uint2 all(uint v) {return make_uint2(v, v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<uint3>  | ||||||
|  |         {  | ||||||
|  |             typedef uint elem_t;  | ||||||
|  |             enum {cn=3};  | ||||||
|  |             static __device__ uint3 all(uint v) {return make_uint3(v, v, v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<uint4>  | ||||||
|  |         {  | ||||||
|  |             typedef uint elem_t;  | ||||||
|  |             enum {cn=4};  | ||||||
|  |             static __device__ uint4 all(uint v) {return make_uint4(v, v, v, v);} | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         template<> struct VecTraits<int>  | ||||||
|  |         {  | ||||||
|  |             typedef int elem_t;  | ||||||
|  |             enum {cn=1};  | ||||||
|  |             static __device__ int all(int v) {return v;} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<int1>  | ||||||
|  |         {  | ||||||
|  |             typedef int elem_t;  | ||||||
|  |             enum {cn=1};  | ||||||
|  |             static __device__ int1 all(int v) {return make_int1(v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<int2>  | ||||||
|  |         {  | ||||||
|  |             typedef int elem_t;  | ||||||
|  |             enum {cn=2};  | ||||||
|  |             static __device__ int2 all(int v) {return make_int2(v, v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<int3>  | ||||||
|  |         {  | ||||||
|  |             typedef int elem_t;  | ||||||
|  |             enum {cn=3};  | ||||||
|  |             static __device__ int3 all(int v) {return make_int3(v, v, v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<int4>  | ||||||
|  |         {  | ||||||
|  |             typedef int elem_t;  | ||||||
|  |             enum {cn=4};  | ||||||
|  |             static __device__ int4 all(int v) {return make_int4(v, v, v, v);} | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         template<> struct VecTraits<float>  | ||||||
|  |         {  | ||||||
|  |             typedef float elem_t;  | ||||||
|  |             enum {cn=1};  | ||||||
|  |             static __device__ float all(float v) {return v;} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<float1>  | ||||||
|  |         {  | ||||||
|  |             typedef float elem_t;  | ||||||
|  |             enum {cn=1};  | ||||||
|  |             static __device__ float1 all(float v) {return make_float1(v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<float2>  | ||||||
|  |         {  | ||||||
|  |             typedef float elem_t;  | ||||||
|  |             enum {cn=2};  | ||||||
|  |             static __device__ float2 all(float v) {return make_float2(v, v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<float3>  | ||||||
|  |         {  | ||||||
|  |             typedef float elem_t;  | ||||||
|  |             enum {cn=3};  | ||||||
|  |             static __device__ float3 all(float v) {return make_float3(v, v, v);} | ||||||
|  |         }; | ||||||
|  |         template<> struct VecTraits<float4>  | ||||||
|  |         {  | ||||||
|  |             typedef float elem_t; | ||||||
|  |             enum {cn=4};  | ||||||
|  |             static __device__ float4 all(float v) {return make_float4(v, v, v, v);} | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         template <int cn, typename VecD> struct SatCast; | ||||||
|  |         template <typename VecD> struct SatCast<1, VecD> | ||||||
|  |         { | ||||||
|  |             template <typename VecS> | ||||||
|  |             __device__ VecD operator()(const VecS& v) | ||||||
|  |             { | ||||||
|  |                 VecD res;  | ||||||
|  |                 res.x = saturate_cast< VecTraits<VecD>::elem_t >(v.x); | ||||||
|  |                 return res; | ||||||
|  |             } | ||||||
|  |         }; | ||||||
|  |         template <typename VecD> struct SatCast<2, VecD> | ||||||
|  |         { | ||||||
|  |             template <typename VecS> | ||||||
|  |             __device__ VecD operator()(const VecS& v) | ||||||
|  |             { | ||||||
|  |                 VecD res;  | ||||||
|  |                 res.x = saturate_cast< VecTraits<VecD>::elem_t >(v.x); | ||||||
|  |                 res.y = saturate_cast< VecTraits<VecD>::elem_t >(v.y); | ||||||
|  |                 return res; | ||||||
|  |             } | ||||||
|  |         }; | ||||||
|  |         template <typename VecD> struct SatCast<3, VecD> | ||||||
|  |         { | ||||||
|  |             template <typename VecS> | ||||||
|  |             __device__ VecD operator()(const VecS& v) | ||||||
|  |             { | ||||||
|  |                 VecD res;  | ||||||
|  |                 res.x = saturate_cast< VecTraits<VecD>::elem_t >(v.x); | ||||||
|  |                 res.y = saturate_cast< VecTraits<VecD>::elem_t >(v.y); | ||||||
|  |                 res.y = saturate_cast< VecTraits<VecD>::elem_t >(v.z); | ||||||
|  |                 return res; | ||||||
|  |             } | ||||||
|  |         }; | ||||||
|  |         template <typename VecD> struct SatCast<4, VecD> | ||||||
|  |         { | ||||||
|  |             template <typename VecS> | ||||||
|  |             __device__ VecD operator()(const VecS& v) | ||||||
|  |             { | ||||||
|  |                 VecD res;  | ||||||
|  |                 res.x = saturate_cast< VecTraits<VecD>::elem_t >(v.x); | ||||||
|  |                 res.y = saturate_cast< VecTraits<VecD>::elem_t >(v.y); | ||||||
|  |                 res.y = saturate_cast< VecTraits<VecD>::elem_t >(v.z); | ||||||
|  |                 res.w = saturate_cast< VecTraits<VecD>::elem_t >(v.w); | ||||||
|  |                 return res; | ||||||
|  |             } | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         template <typename VecD, typename VecS> static __device__ VecD saturate_cast_caller(const VecS& v) | ||||||
|  |         { | ||||||
|  |             SatCast<VecTraits<VecD>::cn, VecD> cast; | ||||||
|  |             return cast(v); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const uchar1& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const char1& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const ushort1& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const short1& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const uint1& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const int1& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const float1& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |  | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const uchar2& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const char2& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const ushort2& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const short2& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const uint2& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const int2& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const float2& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |  | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const uchar3& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const char3& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const ushort3& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const short3& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const uint3& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const int3& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const float3& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |  | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const uchar4& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const char4& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const ushort4& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const short4& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const uint4& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const int4& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |         template<typename _Tp> static __device__ _Tp saturate_cast(const float4& v) {return saturate_cast_caller<_Tp>(v);} | ||||||
|  |  | ||||||
|  |         static __device__  uchar1 operator+(const uchar1& a, const uchar1& b) | ||||||
|  |         { | ||||||
|  |             return make_uchar1(a.x + b.x); | ||||||
|  |         } | ||||||
|  |         static __device__  uchar1 operator-(const uchar1& a, const uchar1& b) | ||||||
|  |         { | ||||||
|  |             return make_uchar1(a.x - b.x); | ||||||
|  |         } | ||||||
|  |         static __device__  uchar1 operator*(const uchar1& a, const uchar1& b) | ||||||
|  |         { | ||||||
|  |             return make_uchar1(a.x * b.x); | ||||||
|  |         } | ||||||
|  |         static __device__  uchar1 operator/(const uchar1& a, const uchar1& b) | ||||||
|  |         { | ||||||
|  |             return make_uchar1(a.x / b.x); | ||||||
|  |         } | ||||||
|  |         static __device__ float1 operator*(const uchar1& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float1(a.x * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  uchar2 operator+(const uchar2& a, const uchar2& b) | ||||||
|  |         { | ||||||
|  |             return make_uchar2(a.x + b.x, a.y + b.y); | ||||||
|  |         } | ||||||
|  |         static __device__  uchar2 operator-(const uchar2& a, const uchar2& b) | ||||||
|  |         { | ||||||
|  |             return make_uchar2(a.x - b.x, a.y - b.y); | ||||||
|  |         } | ||||||
|  |         static __device__  uchar2 operator*(const uchar2& a, const uchar2& b) | ||||||
|  |         { | ||||||
|  |             return make_uchar2(a.x * b.x, a.y * b.y); | ||||||
|  |         } | ||||||
|  |         static __device__  uchar2 operator/(const uchar2& a, const uchar2& b) | ||||||
|  |         { | ||||||
|  |             return make_uchar2(a.x / b.x, a.y / b.y); | ||||||
|  |         } | ||||||
|  |         static __device__ float2 operator*(const uchar2& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float2(a.x * s, a.y * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  uchar3 operator+(const uchar3& a, const uchar3& b) | ||||||
|  |         { | ||||||
|  |             return make_uchar3(a.x + b.x, a.y + b.y, a.z + b.z); | ||||||
|  |         } | ||||||
|  |         static __device__  uchar3 operator-(const uchar3& a, const uchar3& b) | ||||||
|  |         { | ||||||
|  |             return make_uchar3(a.x - b.x, a.y - b.y, a.z - b.z); | ||||||
|  |         } | ||||||
|  |         static __device__  uchar3 operator*(const uchar3& a, const uchar3& b) | ||||||
|  |         { | ||||||
|  |             return make_uchar3(a.x * b.x, a.y * b.y, a.z * b.z); | ||||||
|  |         } | ||||||
|  |         static __device__  uchar3 operator/(const uchar3& a, const uchar3& b) | ||||||
|  |         { | ||||||
|  |             return make_uchar3(a.x / b.x, a.y / b.y, a.z / b.z); | ||||||
|  |         } | ||||||
|  |         static __device__ float3 operator*(const uchar3& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float3(a.x * s, a.y * s, a.z * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         static __device__  uchar4 operator+(const uchar4& a, const uchar4& b) |         static __device__  uchar4 operator+(const uchar4& a, const uchar4& b) | ||||||
|         { |         { | ||||||
|             return make_uchar4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); |             return make_uchar4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); | ||||||
| @@ -110,15 +499,429 @@ namespace cv | |||||||
|         { |         { | ||||||
|             return make_uchar4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); |             return make_uchar4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); | ||||||
|         } |         } | ||||||
|         template <typename T> |         static __device__ float4 operator*(const uchar4& a, float s) | ||||||
|         static __device__ uchar4 operator*(const uchar4& a, T s) |  | ||||||
|         { |         { | ||||||
|             return make_uchar4(a.x * s, a.y * s, a.z * s, a.w * s); |             return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); | ||||||
|         } |         } | ||||||
|         template <typename T> |  | ||||||
|         static __device__ uchar4 operator*(T s, const uchar4& a) |         static __device__  char1 operator+(const char1& a, const char1& b) | ||||||
|         { |         { | ||||||
|             return a * s; |             return make_char1(a.x + b.x); | ||||||
|  |         } | ||||||
|  |         static __device__  char1 operator-(const char1& a, const char1& b) | ||||||
|  |         { | ||||||
|  |             return make_char1(a.x - b.x); | ||||||
|  |         } | ||||||
|  |         static __device__  char1 operator*(const char1& a, const char1& b) | ||||||
|  |         { | ||||||
|  |             return make_char1(a.x * b.x); | ||||||
|  |         } | ||||||
|  |         static __device__  char1 operator/(const char1& a, const char1& b) | ||||||
|  |         { | ||||||
|  |             return make_char1(a.x / b.x); | ||||||
|  |         } | ||||||
|  |         static __device__ float1 operator*(const char1& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float1(a.x * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  char2 operator+(const char2& a, const char2& b) | ||||||
|  |         { | ||||||
|  |             return make_char2(a.x + b.x, a.y + b.y); | ||||||
|  |         } | ||||||
|  |         static __device__  char2 operator-(const char2& a, const char2& b) | ||||||
|  |         { | ||||||
|  |             return make_char2(a.x - b.x, a.y - b.y); | ||||||
|  |         } | ||||||
|  |         static __device__  char2 operator*(const char2& a, const char2& b) | ||||||
|  |         { | ||||||
|  |             return make_char2(a.x * b.x, a.y * b.y); | ||||||
|  |         } | ||||||
|  |         static __device__  char2 operator/(const char2& a, const char2& b) | ||||||
|  |         { | ||||||
|  |             return make_char2(a.x / b.x, a.y / b.y); | ||||||
|  |         } | ||||||
|  |         static __device__ float2 operator*(const char2& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float2(a.x * s, a.y * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  char3 operator+(const char3& a, const char3& b) | ||||||
|  |         { | ||||||
|  |             return make_char3(a.x + b.x, a.y + b.y, a.z + b.z); | ||||||
|  |         } | ||||||
|  |         static __device__  char3 operator-(const char3& a, const char3& b) | ||||||
|  |         { | ||||||
|  |             return make_char3(a.x - b.x, a.y - b.y, a.z - b.z); | ||||||
|  |         } | ||||||
|  |         static __device__  char3 operator*(const char3& a, const char3& b) | ||||||
|  |         { | ||||||
|  |             return make_char3(a.x * b.x, a.y * b.y, a.z * b.z); | ||||||
|  |         } | ||||||
|  |         static __device__  char3 operator/(const char3& a, const char3& b) | ||||||
|  |         { | ||||||
|  |             return make_char3(a.x / b.x, a.y / b.y, a.z / b.z); | ||||||
|  |         } | ||||||
|  |         static __device__ float3 operator*(const char3& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float3(a.x * s, a.y * s, a.z * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  char4 operator+(const char4& a, const char4& b) | ||||||
|  |         { | ||||||
|  |             return make_char4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); | ||||||
|  |         } | ||||||
|  |         static __device__  char4 operator-(const char4& a, const char4& b) | ||||||
|  |         { | ||||||
|  |             return make_char4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); | ||||||
|  |         } | ||||||
|  |         static __device__  char4 operator*(const char4& a, const char4& b) | ||||||
|  |         { | ||||||
|  |             return make_char4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); | ||||||
|  |         } | ||||||
|  |         static __device__  char4 operator/(const char4& a, const char4& b) | ||||||
|  |         { | ||||||
|  |             return make_char4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); | ||||||
|  |         } | ||||||
|  |         static __device__ float4 operator*(const char4& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  ushort1 operator+(const ushort1& a, const ushort1& b) | ||||||
|  |         { | ||||||
|  |             return make_ushort1(a.x + b.x); | ||||||
|  |         } | ||||||
|  |         static __device__  ushort1 operator-(const ushort1& a, const ushort1& b) | ||||||
|  |         { | ||||||
|  |             return make_ushort1(a.x - b.x); | ||||||
|  |         } | ||||||
|  |         static __device__  ushort1 operator*(const ushort1& a, const ushort1& b) | ||||||
|  |         { | ||||||
|  |             return make_ushort1(a.x * b.x); | ||||||
|  |         } | ||||||
|  |         static __device__  ushort1 operator/(const ushort1& a, const ushort1& b) | ||||||
|  |         { | ||||||
|  |             return make_ushort1(a.x / b.x); | ||||||
|  |         } | ||||||
|  |         static __device__ float1 operator*(const ushort1& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float1(a.x * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  ushort2 operator+(const ushort2& a, const ushort2& b) | ||||||
|  |         { | ||||||
|  |             return make_ushort2(a.x + b.x, a.y + b.y); | ||||||
|  |         } | ||||||
|  |         static __device__  ushort2 operator-(const ushort2& a, const ushort2& b) | ||||||
|  |         { | ||||||
|  |             return make_ushort2(a.x - b.x, a.y - b.y); | ||||||
|  |         } | ||||||
|  |         static __device__  ushort2 operator*(const ushort2& a, const ushort2& b) | ||||||
|  |         { | ||||||
|  |             return make_ushort2(a.x * b.x, a.y * b.y); | ||||||
|  |         } | ||||||
|  |         static __device__  ushort2 operator/(const ushort2& a, const ushort2& b) | ||||||
|  |         { | ||||||
|  |             return make_ushort2(a.x / b.x, a.y / b.y); | ||||||
|  |         } | ||||||
|  |         static __device__ float2 operator*(const ushort2& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float2(a.x * s, a.y * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  ushort3 operator+(const ushort3& a, const ushort3& b) | ||||||
|  |         { | ||||||
|  |             return make_ushort3(a.x + b.x, a.y + b.y, a.z + b.z); | ||||||
|  |         } | ||||||
|  |         static __device__  ushort3 operator-(const ushort3& a, const ushort3& b) | ||||||
|  |         { | ||||||
|  |             return make_ushort3(a.x - b.x, a.y - b.y, a.z - b.z); | ||||||
|  |         } | ||||||
|  |         static __device__  ushort3 operator*(const ushort3& a, const ushort3& b) | ||||||
|  |         { | ||||||
|  |             return make_ushort3(a.x * b.x, a.y * b.y, a.z * b.z); | ||||||
|  |         } | ||||||
|  |         static __device__  ushort3 operator/(const ushort3& a, const ushort3& b) | ||||||
|  |         { | ||||||
|  |             return make_ushort3(a.x / b.x, a.y / b.y, a.z / b.z); | ||||||
|  |         } | ||||||
|  |         static __device__ float3 operator*(const ushort3& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float3(a.x * s, a.y * s, a.z * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  ushort4 operator+(const ushort4& a, const ushort4& b) | ||||||
|  |         { | ||||||
|  |             return make_ushort4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); | ||||||
|  |         } | ||||||
|  |         static __device__  ushort4 operator-(const ushort4& a, const ushort4& b) | ||||||
|  |         { | ||||||
|  |             return make_ushort4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); | ||||||
|  |         } | ||||||
|  |         static __device__  ushort4 operator*(const ushort4& a, const ushort4& b) | ||||||
|  |         { | ||||||
|  |             return make_ushort4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); | ||||||
|  |         } | ||||||
|  |         static __device__  ushort4 operator/(const ushort4& a, const ushort4& b) | ||||||
|  |         { | ||||||
|  |             return make_ushort4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); | ||||||
|  |         } | ||||||
|  |         static __device__ float4 operator*(const ushort4& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  short1 operator+(const short1& a, const short1& b) | ||||||
|  |         { | ||||||
|  |             return make_short1(a.x + b.x); | ||||||
|  |         } | ||||||
|  |         static __device__  short1 operator-(const short1& a, const short1& b) | ||||||
|  |         { | ||||||
|  |             return make_short1(a.x - b.x); | ||||||
|  |         } | ||||||
|  |         static __device__  short1 operator*(const short1& a, const short1& b) | ||||||
|  |         { | ||||||
|  |             return make_short1(a.x * b.x); | ||||||
|  |         } | ||||||
|  |         static __device__  short1 operator/(const short1& a, const short1& b) | ||||||
|  |         { | ||||||
|  |             return make_short1(a.x / b.x); | ||||||
|  |         } | ||||||
|  |         static __device__ float1 operator*(const short1& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float1(a.x * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  short2 operator+(const short2& a, const short2& b) | ||||||
|  |         { | ||||||
|  |             return make_short2(a.x + b.x, a.y + b.y); | ||||||
|  |         } | ||||||
|  |         static __device__  short2 operator-(const short2& a, const short2& b) | ||||||
|  |         { | ||||||
|  |             return make_short2(a.x - b.x, a.y - b.y); | ||||||
|  |         } | ||||||
|  |         static __device__  short2 operator*(const short2& a, const short2& b) | ||||||
|  |         { | ||||||
|  |             return make_short2(a.x * b.x, a.y * b.y); | ||||||
|  |         } | ||||||
|  |         static __device__  short2 operator/(const short2& a, const short2& b) | ||||||
|  |         { | ||||||
|  |             return make_short2(a.x / b.x, a.y / b.y); | ||||||
|  |         } | ||||||
|  |         static __device__ float2 operator*(const short2& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float2(a.x * s, a.y * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  short3 operator+(const short3& a, const short3& b) | ||||||
|  |         { | ||||||
|  |             return make_short3(a.x + b.x, a.y + b.y, a.z + b.z); | ||||||
|  |         } | ||||||
|  |         static __device__  short3 operator-(const short3& a, const short3& b) | ||||||
|  |         { | ||||||
|  |             return make_short3(a.x - b.x, a.y - b.y, a.z - b.z); | ||||||
|  |         } | ||||||
|  |         static __device__  short3 operator*(const short3& a, const short3& b) | ||||||
|  |         { | ||||||
|  |             return make_short3(a.x * b.x, a.y * b.y, a.z * b.z); | ||||||
|  |         } | ||||||
|  |         static __device__  short3 operator/(const short3& a, const short3& b) | ||||||
|  |         { | ||||||
|  |             return make_short3(a.x / b.x, a.y / b.y, a.z / b.z); | ||||||
|  |         } | ||||||
|  |         static __device__ float3 operator*(const short3& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float3(a.x * s, a.y * s, a.z * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  short4 operator+(const short4& a, const short4& b) | ||||||
|  |         { | ||||||
|  |             return make_short4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); | ||||||
|  |         } | ||||||
|  |         static __device__  short4 operator-(const short4& a, const short4& b) | ||||||
|  |         { | ||||||
|  |             return make_short4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); | ||||||
|  |         } | ||||||
|  |         static __device__  short4 operator*(const short4& a, const short4& b) | ||||||
|  |         { | ||||||
|  |             return make_short4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); | ||||||
|  |         } | ||||||
|  |         static __device__  short4 operator/(const short4& a, const short4& b) | ||||||
|  |         { | ||||||
|  |             return make_short4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); | ||||||
|  |         } | ||||||
|  |         static __device__ float4 operator*(const short4& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  int1 operator+(const int1& a, const int1& b) | ||||||
|  |         { | ||||||
|  |             return make_int1(a.x + b.x); | ||||||
|  |         } | ||||||
|  |         static __device__  int1 operator-(const int1& a, const int1& b) | ||||||
|  |         { | ||||||
|  |             return make_int1(a.x - b.x); | ||||||
|  |         } | ||||||
|  |         static __device__  int1 operator*(const int1& a, const int1& b) | ||||||
|  |         { | ||||||
|  |             return make_int1(a.x * b.x); | ||||||
|  |         } | ||||||
|  |         static __device__  int1 operator/(const int1& a, const int1& b) | ||||||
|  |         { | ||||||
|  |             return make_int1(a.x / b.x); | ||||||
|  |         } | ||||||
|  |         static __device__ float1 operator*(const int1& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float1(a.x * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  int2 operator+(const int2& a, const int2& b) | ||||||
|  |         { | ||||||
|  |             return make_int2(a.x + b.x, a.y + b.y); | ||||||
|  |         } | ||||||
|  |         static __device__  int2 operator-(const int2& a, const int2& b) | ||||||
|  |         { | ||||||
|  |             return make_int2(a.x - b.x, a.y - b.y); | ||||||
|  |         } | ||||||
|  |         static __device__  int2 operator*(const int2& a, const int2& b) | ||||||
|  |         { | ||||||
|  |             return make_int2(a.x * b.x, a.y * b.y); | ||||||
|  |         } | ||||||
|  |         static __device__  int2 operator/(const int2& a, const int2& b) | ||||||
|  |         { | ||||||
|  |             return make_int2(a.x / b.x, a.y / b.y); | ||||||
|  |         } | ||||||
|  |         static __device__ float2 operator*(const int2& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float2(a.x * s, a.y * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  int3 operator+(const int3& a, const int3& b) | ||||||
|  |         { | ||||||
|  |             return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); | ||||||
|  |         } | ||||||
|  |         static __device__  int3 operator-(const int3& a, const int3& b) | ||||||
|  |         { | ||||||
|  |             return make_int3(a.x - b.x, a.y - b.y, a.z - b.z); | ||||||
|  |         } | ||||||
|  |         static __device__  int3 operator*(const int3& a, const int3& b) | ||||||
|  |         { | ||||||
|  |             return make_int3(a.x * b.x, a.y * b.y, a.z * b.z); | ||||||
|  |         } | ||||||
|  |         static __device__  int3 operator/(const int3& a, const int3& b) | ||||||
|  |         { | ||||||
|  |             return make_int3(a.x / b.x, a.y / b.y, a.z / b.z); | ||||||
|  |         } | ||||||
|  |         static __device__ float3 operator*(const int3& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float3(a.x * s, a.y * s, a.z * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  int4 operator+(const int4& a, const int4& b) | ||||||
|  |         { | ||||||
|  |             return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); | ||||||
|  |         } | ||||||
|  |         static __device__  int4 operator-(const int4& a, const int4& b) | ||||||
|  |         { | ||||||
|  |             return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); | ||||||
|  |         } | ||||||
|  |         static __device__  int4 operator*(const int4& a, const int4& b) | ||||||
|  |         { | ||||||
|  |             return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); | ||||||
|  |         } | ||||||
|  |         static __device__  int4 operator/(const int4& a, const int4& b) | ||||||
|  |         { | ||||||
|  |             return make_int4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); | ||||||
|  |         } | ||||||
|  |         static __device__ float4 operator*(const int4& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  float1 operator+(const float1& a, const float1& b) | ||||||
|  |         { | ||||||
|  |             return make_float1(a.x + b.x); | ||||||
|  |         } | ||||||
|  |         static __device__  float1 operator-(const float1& a, const float1& b) | ||||||
|  |         { | ||||||
|  |             return make_float1(a.x - b.x); | ||||||
|  |         } | ||||||
|  |         static __device__  float1 operator*(const float1& a, const float1& b) | ||||||
|  |         { | ||||||
|  |             return make_float1(a.x * b.x); | ||||||
|  |         } | ||||||
|  |         static __device__  float1 operator/(const float1& a, const float1& b) | ||||||
|  |         { | ||||||
|  |             return make_float1(a.x / b.x); | ||||||
|  |         } | ||||||
|  |         static __device__ float1 operator*(const float1& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float1(a.x * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  float2 operator+(const float2& a, const float2& b) | ||||||
|  |         { | ||||||
|  |             return make_float2(a.x + b.x, a.y + b.y); | ||||||
|  |         } | ||||||
|  |         static __device__  float2 operator-(const float2& a, const float2& b) | ||||||
|  |         { | ||||||
|  |             return make_float2(a.x - b.x, a.y - b.y); | ||||||
|  |         } | ||||||
|  |         static __device__  float2 operator*(const float2& a, const float2& b) | ||||||
|  |         { | ||||||
|  |             return make_float2(a.x * b.x, a.y * b.y); | ||||||
|  |         } | ||||||
|  |         static __device__  float2 operator/(const float2& a, const float2& b) | ||||||
|  |         { | ||||||
|  |             return make_float2(a.x / b.x, a.y / b.y); | ||||||
|  |         } | ||||||
|  |         static __device__ float2 operator*(const float2& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float2(a.x * s, a.y * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  float3 operator+(const float3& a, const float3& b) | ||||||
|  |         { | ||||||
|  |             return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); | ||||||
|  |         } | ||||||
|  |         static __device__  float3 operator-(const float3& a, const float3& b) | ||||||
|  |         { | ||||||
|  |             return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); | ||||||
|  |         } | ||||||
|  |         static __device__  float3 operator*(const float3& a, const float3& b) | ||||||
|  |         { | ||||||
|  |             return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); | ||||||
|  |         } | ||||||
|  |         static __device__  float3 operator/(const float3& a, const float3& b) | ||||||
|  |         { | ||||||
|  |             return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); | ||||||
|  |         } | ||||||
|  |         static __device__ float3 operator*(const float3& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float3(a.x * s, a.y * s, a.z * s); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         static __device__  float4 operator+(const float4& a, const float4& b) | ||||||
|  |         { | ||||||
|  |             return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); | ||||||
|  |         } | ||||||
|  |         static __device__  float4 operator-(const float4& a, const float4& b) | ||||||
|  |         { | ||||||
|  |             return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); | ||||||
|  |         } | ||||||
|  |         static __device__  float4 operator*(const float4& a, const float4& b) | ||||||
|  |         { | ||||||
|  |             return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); | ||||||
|  |         } | ||||||
|  |         static __device__  float4 operator/(const float4& a, const float4& b) | ||||||
|  |         { | ||||||
|  |             return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); | ||||||
|  |         } | ||||||
|  |         static __device__ float4 operator*(const float4& a, float s) | ||||||
|  |         { | ||||||
|  |             return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -163,7 +163,7 @@ void cv::gpu::Stream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( | |||||||
| void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst) | void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst) | ||||||
| { | { | ||||||
|     // if not -> allocation will be done, but after that dst will not point to page locked memory |     // if not -> allocation will be done, but after that dst will not point to page locked memory | ||||||
|     CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() ) |     CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() ); | ||||||
|     devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); |     devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); | ||||||
| } | } | ||||||
| void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); } | void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); } | ||||||
|   | |||||||
| @@ -577,15 +577,31 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke | |||||||
|  |  | ||||||
| namespace cv { namespace gpu { namespace filters | namespace cv { namespace gpu { namespace filters | ||||||
| { | { | ||||||
|     void linearRowFilter_gpu_32s32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); |     void linearRowFilter_gpu_8u_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|     void linearRowFilter_gpu_32s32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); |     void linearRowFilter_gpu_8u_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|     void linearRowFilter_gpu_32f32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); |     void linearRowFilter_gpu_8s_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|     void linearRowFilter_gpu_32f32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); |     void linearRowFilter_gpu_8s_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|  |     void linearRowFilter_gpu_16u_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|  |     void linearRowFilter_gpu_16u_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|  |     void linearRowFilter_gpu_16s_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|  |     void linearRowFilter_gpu_16s_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|  |     void linearRowFilter_gpu_32s_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|  |     void linearRowFilter_gpu_32s_32f_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|  |     void linearRowFilter_gpu_32f_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|  |     void linearRowFilter_gpu_32f_32f_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|  |  | ||||||
|     void linearColumnFilter_gpu_32s32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); |     void linearColumnFilter_gpu_8u_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|     void linearColumnFilter_gpu_32s32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); |     void linearColumnFilter_gpu_8u_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|     void linearColumnFilter_gpu_32f32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); |     void linearColumnFilter_gpu_8s_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|     void linearColumnFilter_gpu_32f32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); |     void linearColumnFilter_gpu_8s_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|  |     void linearColumnFilter_gpu_16u_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|  |     void linearColumnFilter_gpu_16u_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|  |     void linearColumnFilter_gpu_16s_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|  |     void linearColumnFilter_gpu_16s_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|  |     void linearColumnFilter_gpu_32s_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|  |     void linearColumnFilter_gpu_32s_32f_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|  |     void linearColumnFilter_gpu_32f_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
|  |     void linearColumnFilter_gpu_32f_32f_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); | ||||||
| }}} | }}} | ||||||
|  |  | ||||||
| namespace | namespace | ||||||
| @@ -637,15 +653,15 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, | |||||||
|     static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R}; |     static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R}; | ||||||
|     static const gpuFilter1D_t gpuFilter1D_callers[6][6] = |     static const gpuFilter1D_t gpuFilter1D_callers[6][6] = | ||||||
|     { |     { | ||||||
|         {0,0,0,0,0,0}, |         {linearRowFilter_gpu_8u_8u_c4,linearRowFilter_gpu_8u_8s_c4,0,0,0,0}, | ||||||
|         {0,0,0,0,0,0}, |         {linearRowFilter_gpu_8s_8u_c4,linearRowFilter_gpu_8s_8s_c4,0,0,0,0}, | ||||||
|         {0,0,0,0,0,0}, |         {0,0,linearRowFilter_gpu_16u_16u_c2,linearRowFilter_gpu_16u_16s_c2,0,0}, | ||||||
|         {0,0,0,0,0,0}, |         {0,0,linearRowFilter_gpu_16s_16u_c2,linearRowFilter_gpu_16s_16s_c2,0,0}, | ||||||
|         {0,0,0,0,linearRowFilter_gpu_32s32s, linearRowFilter_gpu_32s32f}, |         {0,0,0,0,linearRowFilter_gpu_32s_32s_c1, linearRowFilter_gpu_32s_32f_c1}, | ||||||
|         {0,0,0,0,linearRowFilter_gpu_32f32s, linearRowFilter_gpu_32f32f} |         {0,0,0,0,linearRowFilter_gpu_32f_32s_c1, linearRowFilter_gpu_32f_32f_c1} | ||||||
|     }; |     }; | ||||||
|      |      | ||||||
|     if ((srcType == CV_8UC1 || srcType == CV_8UC4) && bufType == srcType) |     if ((bufType == srcType) && (srcType == CV_8UC1 || srcType == CV_8UC4)) | ||||||
|     { |     { | ||||||
|         GpuMat gpu_row_krnl; |         GpuMat gpu_row_krnl; | ||||||
|         int nDivisor; |         int nDivisor; | ||||||
| @@ -657,8 +673,10 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, | |||||||
|         return Ptr<BaseRowFilter_GPU>(new NppLinearRowFilter(ksize, anchor, gpu_row_krnl, nDivisor, |         return Ptr<BaseRowFilter_GPU>(new NppLinearRowFilter(ksize, anchor, gpu_row_krnl, nDivisor, | ||||||
|             nppFilter1D_callers[CV_MAT_CN(srcType)])); |             nppFilter1D_callers[CV_MAT_CN(srcType)])); | ||||||
|     } |     } | ||||||
|     else if ((srcType == CV_32SC1 || srcType == CV_32FC1) && (bufType == CV_32SC1 || bufType == CV_32FC1)) |  | ||||||
|     { |     CV_Assert(srcType == CV_8UC4 || srcType == CV_8SC4 || srcType == CV_16UC2 || srcType == CV_16SC2 || srcType == CV_32SC1 || srcType == CV_32FC1); | ||||||
|  |     CV_Assert(bufType == CV_8UC4 || bufType == CV_8SC4 || bufType == CV_16UC2 || bufType == CV_16SC2 || bufType == CV_32SC1 || bufType == CV_32FC1); | ||||||
|  |  | ||||||
|     Mat temp(rowKernel.size(), CV_32FC1); |     Mat temp(rowKernel.size(), CV_32FC1); | ||||||
|     rowKernel.convertTo(temp, CV_32FC1); |     rowKernel.convertTo(temp, CV_32FC1); | ||||||
|     Mat cont_krnl = temp.reshape(1, 1); |     Mat cont_krnl = temp.reshape(1, 1); | ||||||
| @@ -668,10 +686,6 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, | |||||||
|  |  | ||||||
|     return Ptr<BaseRowFilter_GPU>(new GpuLinearRowFilter(ksize, anchor, cont_krnl,  |     return Ptr<BaseRowFilter_GPU>(new GpuLinearRowFilter(ksize, anchor, cont_krnl,  | ||||||
|         gpuFilter1D_callers[CV_MAT_DEPTH(srcType)][CV_MAT_DEPTH(bufType)])); |         gpuFilter1D_callers[CV_MAT_DEPTH(srcType)][CV_MAT_DEPTH(bufType)])); | ||||||
|     } |  | ||||||
|  |  | ||||||
|     CV_Assert(!"Unsupported types");   |  | ||||||
|     return Ptr<BaseRowFilter_GPU>(0); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| namespace | namespace | ||||||
| @@ -718,15 +732,18 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds | |||||||
|     static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R}; |     static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R}; | ||||||
|     static const gpuFilter1D_t gpuFilter1D_callers[6][6] = |     static const gpuFilter1D_t gpuFilter1D_callers[6][6] = | ||||||
|     { |     { | ||||||
|         {0,0,0,0,0,0}, |         {linearColumnFilter_gpu_8u_8u_c4,linearColumnFilter_gpu_8u_8s_c4,0,0,0,0}, | ||||||
|         {0,0,0,0,0,0}, |         {linearColumnFilter_gpu_8s_8u_c4,linearColumnFilter_gpu_8s_8s_c4,0,0,0,0}, | ||||||
|         {0,0,0,0,0,0}, |         {0,0,linearColumnFilter_gpu_16u_16u_c2,linearColumnFilter_gpu_16u_16s_c2,0,0}, | ||||||
|         {0,0,0,0,0,0}, |         {0,0,linearColumnFilter_gpu_16s_16u_c2,linearColumnFilter_gpu_16s_16s_c2,0,0}, | ||||||
|         {0,0,0,0,linearColumnFilter_gpu_32s32s, linearColumnFilter_gpu_32s32f}, |         {0,0,0,0,linearColumnFilter_gpu_32s_32s_c1, linearColumnFilter_gpu_32s_32f_c1}, | ||||||
|         {0,0,0,0,linearColumnFilter_gpu_32f32s, linearColumnFilter_gpu_32f32f} |         {0,0,0,0,linearColumnFilter_gpu_32f_32s_c1, linearColumnFilter_gpu_32f_32f_c1} | ||||||
|     }; |     }; | ||||||
|      |      | ||||||
|     if ((bufType == CV_8UC1 || bufType == CV_8UC4) && dstType == bufType) |     double kernelMin; | ||||||
|  |     minMaxLoc(columnKernel, &kernelMin); | ||||||
|  |      | ||||||
|  |     if ((bufType == dstType) && (bufType == CV_8UC1 || bufType == CV_8UC4)) | ||||||
|     { |     { | ||||||
|         GpuMat gpu_col_krnl; |         GpuMat gpu_col_krnl; | ||||||
|         int nDivisor; |         int nDivisor; | ||||||
| @@ -738,8 +755,10 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds | |||||||
|         return Ptr<BaseColumnFilter_GPU>(new NppLinearColumnFilter(ksize, anchor, gpu_col_krnl, nDivisor,  |         return Ptr<BaseColumnFilter_GPU>(new NppLinearColumnFilter(ksize, anchor, gpu_col_krnl, nDivisor,  | ||||||
|             nppFilter1D_callers[CV_MAT_CN(bufType)])); |             nppFilter1D_callers[CV_MAT_CN(bufType)])); | ||||||
|     } |     } | ||||||
|     else if ((bufType == CV_32SC1 || bufType == CV_32FC1) && (dstType == CV_32SC1 || dstType == CV_32FC1)) |  | ||||||
|     { |     CV_Assert(dstType == CV_8UC4 || dstType == CV_8SC4 || dstType == CV_16UC2 || dstType == CV_16SC2 || dstType == CV_32SC1 || dstType == CV_32FC1); | ||||||
|  |     CV_Assert(bufType == CV_8UC4 || bufType == CV_8SC4 || bufType == CV_16UC2 || bufType == CV_16SC2 || bufType == CV_32SC1 || bufType == CV_32FC1); | ||||||
|  |  | ||||||
|     Mat temp(columnKernel.size(), CV_32FC1); |     Mat temp(columnKernel.size(), CV_32FC1); | ||||||
|     columnKernel.convertTo(temp, CV_32FC1); |     columnKernel.convertTo(temp, CV_32FC1); | ||||||
|     Mat cont_krnl = temp.reshape(1, 1); |     Mat cont_krnl = temp.reshape(1, 1); | ||||||
| @@ -749,10 +768,6 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds | |||||||
|  |  | ||||||
|     return Ptr<BaseColumnFilter_GPU>(new GpuLinearColumnFilter(ksize, anchor, cont_krnl,  |     return Ptr<BaseColumnFilter_GPU>(new GpuLinearColumnFilter(ksize, anchor, cont_krnl,  | ||||||
|         gpuFilter1D_callers[CV_MAT_DEPTH(bufType)][CV_MAT_DEPTH(dstType)])); |         gpuFilter1D_callers[CV_MAT_DEPTH(bufType)][CV_MAT_DEPTH(dstType)])); | ||||||
|     } |  | ||||||
|  |  | ||||||
|     CV_Assert(!"Unsupported types");   |  | ||||||
|     return Ptr<BaseColumnFilter_GPU>(0); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel,  | Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel,  | ||||||
|   | |||||||
| @@ -652,7 +652,7 @@ void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, const | |||||||
|  |  | ||||||
| double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh)  | double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh)  | ||||||
| {  | {  | ||||||
|     CV_Assert(src.type() == CV_32FC1) |     CV_Assert(src.type() == CV_32FC1); | ||||||
|  |  | ||||||
|     dst.create( src.size(), src.type() ); |     dst.create( src.size(), src.type() ); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -166,6 +166,8 @@ struct CV_GpuNppImageSobelTest : public CV_GpuNppFilterTest | |||||||
|  |  | ||||||
|     int test(const Mat& img) |     int test(const Mat& img) | ||||||
|     { |     { | ||||||
|  |         if (img.type() != CV_8UC1) | ||||||
|  |             return CvTS::OK; | ||||||
|         int ksizes[] = {3, 5, 7}; |         int ksizes[] = {3, 5, 7}; | ||||||
|         int ksizes_num = sizeof(ksizes) / sizeof(int); |         int ksizes_num = sizeof(ksizes) / sizeof(int); | ||||||
|  |  | ||||||
| @@ -181,8 +183,10 @@ struct CV_GpuNppImageSobelTest : public CV_GpuNppFilterTest | |||||||
|             cv::Sobel(img, cpudst, -1, dx, dy, ksizes[i]); |             cv::Sobel(img, cpudst, -1, dx, dy, ksizes[i]); | ||||||
|  |  | ||||||
|             GpuMat gpu1(img); |             GpuMat gpu1(img); | ||||||
|  |             gpu1.convertTo(gpu1, CV_32S); | ||||||
|             GpuMat gpudst; |             GpuMat gpudst; | ||||||
|             cv::gpu::Sobel(gpu1, gpudst, -1, dx, dy, ksizes[i]); |             cv::gpu::Sobel(gpu1, gpudst, -1, dx, dy, ksizes[i]); | ||||||
|  |             gpudst.convertTo(gpudst, CV_8U); | ||||||
|  |  | ||||||
|             if (CheckNorm(cpudst, gpudst, Size(ksizes[i], ksizes[i])) != CvTS::OK) |             if (CheckNorm(cpudst, gpudst, Size(ksizes[i], ksizes[i])) != CvTS::OK) | ||||||
|                 test_res = CvTS::FAIL_GENERIC; |                 test_res = CvTS::FAIL_GENERIC; | ||||||
| @@ -200,14 +204,19 @@ struct CV_GpuNppImageScharrTest : public CV_GpuNppFilterTest | |||||||
|  |  | ||||||
|     int test(const Mat& img) |     int test(const Mat& img) | ||||||
|     { |     { | ||||||
|  |         if (img.type() != CV_8UC1) | ||||||
|  |             return CvTS::OK; | ||||||
|  |  | ||||||
|         int dx = 1, dy = 0; |         int dx = 1, dy = 0; | ||||||
|  |  | ||||||
|         Mat cpudst; |         Mat cpudst; | ||||||
|         cv::Scharr(img, cpudst, -1, dx, dy); |         cv::Scharr(img, cpudst, -1, dx, dy); | ||||||
|  |  | ||||||
|         GpuMat gpu1(img); |         GpuMat gpu1(img); | ||||||
|  |         gpu1.convertTo(gpu1, CV_32S); | ||||||
|         GpuMat gpudst; |         GpuMat gpudst; | ||||||
|         cv::gpu::Scharr(gpu1, gpudst, -1, dx, dy); |         cv::gpu::Scharr(gpu1, gpudst, -1, dx, dy); | ||||||
|  |         gpudst.convertTo(gpudst, CV_8U); | ||||||
|          |          | ||||||
|         return CheckNorm(cpudst, gpudst, Size(3, 3)); |         return CheckNorm(cpudst, gpudst, Size(3, 3)); | ||||||
|     } |     } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Vladislav Vinogradov
					Vladislav Vinogradov