fixed build for CARMA platform
This commit is contained in:
@@ -52,7 +52,7 @@
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::device;
|
||||
|
||||
namespace
|
||||
namespace canny
|
||||
{
|
||||
struct L1 : binary_function<int, int, float>
|
||||
{
|
||||
@@ -78,17 +78,17 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <> struct TransformFunctorTraits<L1> : DefaultTransformFunctorTraits<L1>
|
||||
template <> struct TransformFunctorTraits<canny::L1> : DefaultTransformFunctorTraits<canny::L1>
|
||||
{
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
template <> struct TransformFunctorTraits<L2> : DefaultTransformFunctorTraits<L2>
|
||||
template <> struct TransformFunctorTraits<canny::L2> : DefaultTransformFunctorTraits<canny::L2>
|
||||
{
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace
|
||||
namespace canny
|
||||
{
|
||||
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_src(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
struct SrcTex
|
||||
@@ -104,7 +104,7 @@ namespace
|
||||
};
|
||||
|
||||
template <class Norm> __global__
|
||||
void calcMagnitude(const SrcTex src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
|
||||
void calcMagnitudeKernel(const SrcTex src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
@@ -120,10 +120,7 @@ namespace
|
||||
|
||||
mag(y, x) = norm(dxVal, dyVal);
|
||||
}
|
||||
}
|
||||
|
||||
namespace canny
|
||||
{
|
||||
void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad)
|
||||
{
|
||||
const dim3 block(16, 16);
|
||||
@@ -135,12 +132,12 @@ namespace canny
|
||||
if (L2Grad)
|
||||
{
|
||||
L2 norm;
|
||||
::calcMagnitude<<<grid, block>>>(src, dx, dy, mag, norm);
|
||||
calcMagnitudeKernel<<<grid, block>>>(src, dx, dy, mag, norm);
|
||||
}
|
||||
else
|
||||
{
|
||||
L1 norm;
|
||||
::calcMagnitude<<<grid, block>>>(src, dx, dy, mag, norm);
|
||||
calcMagnitudeKernel<<<grid, block>>>(src, dx, dy, mag, norm);
|
||||
}
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
@@ -165,11 +162,11 @@ namespace canny
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace
|
||||
namespace canny
|
||||
{
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_mag(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
|
||||
__global__ void calcMap(const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh)
|
||||
__global__ void calcMapKernel(const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh)
|
||||
{
|
||||
const int CANNY_SHIFT = 15;
|
||||
const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
|
||||
@@ -220,10 +217,7 @@ namespace
|
||||
|
||||
map(y, x) = edge_type;
|
||||
}
|
||||
}
|
||||
|
||||
namespace canny
|
||||
{
|
||||
void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh)
|
||||
{
|
||||
const dim3 block(16, 16);
|
||||
@@ -231,7 +225,7 @@ namespace canny
|
||||
|
||||
bindTexture(&tex_mag, mag);
|
||||
|
||||
::calcMap<<<grid, block>>>(dx, dy, map, low_thresh, high_thresh);
|
||||
calcMapKernel<<<grid, block>>>(dx, dy, map, low_thresh, high_thresh);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
@@ -240,11 +234,11 @@ namespace canny
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace
|
||||
namespace canny
|
||||
{
|
||||
__device__ int counter = 0;
|
||||
|
||||
__global__ void edgesHysteresisLocal(PtrStepSzi map, ushort2* st)
|
||||
__global__ void edgesHysteresisLocalKernel(PtrStepSzi map, ushort2* st)
|
||||
{
|
||||
__shared__ volatile int smem[18][18];
|
||||
|
||||
@@ -325,10 +319,7 @@ namespace
|
||||
st[ind] = make_ushort2(x, y);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace canny
|
||||
{
|
||||
void edgesHysteresisLocal(PtrStepSzi map, ushort2* st1)
|
||||
{
|
||||
void* counter_ptr;
|
||||
@@ -339,7 +330,7 @@ namespace canny
|
||||
const dim3 block(16, 16);
|
||||
const dim3 grid(divUp(map.cols, block.x), divUp(map.rows, block.y));
|
||||
|
||||
::edgesHysteresisLocal<<<grid, block>>>(map, st1);
|
||||
edgesHysteresisLocalKernel<<<grid, block>>>(map, st1);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
@@ -348,12 +339,12 @@ namespace canny
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace
|
||||
namespace canny
|
||||
{
|
||||
__constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
|
||||
__constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
|
||||
|
||||
__global__ void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2, const int count)
|
||||
__global__ void edgesHysteresisGlobalKernel(PtrStepSzi map, ushort2* st1, ushort2* st2, const int count)
|
||||
{
|
||||
const int stack_size = 512;
|
||||
|
||||
@@ -439,14 +430,11 @@ namespace
|
||||
st2[ind + i] = s_st[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace canny
|
||||
{
|
||||
void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, ::counter) );
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, canny::counter) );
|
||||
|
||||
int count;
|
||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
@@ -458,7 +446,7 @@ namespace canny
|
||||
const dim3 block(128);
|
||||
const dim3 grid(::min(count, 65535u), divUp(count, 65535), 1);
|
||||
|
||||
::edgesHysteresisGlobal<<<grid, block>>>(map, st1, st2, count);
|
||||
edgesHysteresisGlobalKernel<<<grid, block>>>(map, st1, st2, count);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
@@ -472,7 +460,7 @@ namespace canny
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace
|
||||
namespace canny
|
||||
{
|
||||
struct GetEdges : unary_function<int, uchar>
|
||||
{
|
||||
@@ -488,7 +476,7 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <> struct TransformFunctorTraits<GetEdges> : DefaultTransformFunctorTraits<GetEdges>
|
||||
template <> struct TransformFunctorTraits<canny::GetEdges> : DefaultTransformFunctorTraits<canny::GetEdges>
|
||||
{
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
@@ -497,6 +497,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
void labelComponents(const PtrStepSzb& edges, PtrStepSzi comps, int flags, cudaStream_t stream)
|
||||
{
|
||||
(void) flags;
|
||||
dim3 block(CTA_SIZE_X, CTA_SIZE_Y);
|
||||
dim3 grid(divUp(edges.cols, TILE_COLS), divUp(edges.rows, TILE_ROWS));
|
||||
|
||||
@@ -529,4 +530,4 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
} } }
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
#endif /* CUDA_DISABLER */
|
||||
|
@@ -49,20 +49,12 @@
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::device;
|
||||
|
||||
namespace
|
||||
namespace column_filter
|
||||
{
|
||||
#define MAX_KERNEL_SIZE 32
|
||||
|
||||
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
||||
|
||||
void loadKernel(const float* kernel, int ksize, cudaStream_t stream)
|
||||
{
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
}
|
||||
|
||||
template <int KSIZE, typename T, typename D, typename B>
|
||||
__global__ void linearColumnFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
|
||||
{
|
||||
@@ -196,182 +188,185 @@ namespace filter
|
||||
{
|
||||
{
|
||||
0,
|
||||
::caller< 1, T, D, BrdColReflect101>,
|
||||
::caller< 2, T, D, BrdColReflect101>,
|
||||
::caller< 3, T, D, BrdColReflect101>,
|
||||
::caller< 4, T, D, BrdColReflect101>,
|
||||
::caller< 5, T, D, BrdColReflect101>,
|
||||
::caller< 6, T, D, BrdColReflect101>,
|
||||
::caller< 7, T, D, BrdColReflect101>,
|
||||
::caller< 8, T, D, BrdColReflect101>,
|
||||
::caller< 9, T, D, BrdColReflect101>,
|
||||
::caller<10, T, D, BrdColReflect101>,
|
||||
::caller<11, T, D, BrdColReflect101>,
|
||||
::caller<12, T, D, BrdColReflect101>,
|
||||
::caller<13, T, D, BrdColReflect101>,
|
||||
::caller<14, T, D, BrdColReflect101>,
|
||||
::caller<15, T, D, BrdColReflect101>,
|
||||
::caller<16, T, D, BrdColReflect101>,
|
||||
::caller<17, T, D, BrdColReflect101>,
|
||||
::caller<18, T, D, BrdColReflect101>,
|
||||
::caller<19, T, D, BrdColReflect101>,
|
||||
::caller<20, T, D, BrdColReflect101>,
|
||||
::caller<21, T, D, BrdColReflect101>,
|
||||
::caller<22, T, D, BrdColReflect101>,
|
||||
::caller<23, T, D, BrdColReflect101>,
|
||||
::caller<24, T, D, BrdColReflect101>,
|
||||
::caller<25, T, D, BrdColReflect101>,
|
||||
::caller<26, T, D, BrdColReflect101>,
|
||||
::caller<27, T, D, BrdColReflect101>,
|
||||
::caller<28, T, D, BrdColReflect101>,
|
||||
::caller<29, T, D, BrdColReflect101>,
|
||||
::caller<30, T, D, BrdColReflect101>,
|
||||
::caller<31, T, D, BrdColReflect101>,
|
||||
::caller<32, T, D, BrdColReflect101>
|
||||
column_filter::caller< 1, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 2, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 3, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 4, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 5, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 6, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 7, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 8, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 9, T, D, BrdColReflect101>,
|
||||
column_filter::caller<10, T, D, BrdColReflect101>,
|
||||
column_filter::caller<11, T, D, BrdColReflect101>,
|
||||
column_filter::caller<12, T, D, BrdColReflect101>,
|
||||
column_filter::caller<13, T, D, BrdColReflect101>,
|
||||
column_filter::caller<14, T, D, BrdColReflect101>,
|
||||
column_filter::caller<15, T, D, BrdColReflect101>,
|
||||
column_filter::caller<16, T, D, BrdColReflect101>,
|
||||
column_filter::caller<17, T, D, BrdColReflect101>,
|
||||
column_filter::caller<18, T, D, BrdColReflect101>,
|
||||
column_filter::caller<19, T, D, BrdColReflect101>,
|
||||
column_filter::caller<20, T, D, BrdColReflect101>,
|
||||
column_filter::caller<21, T, D, BrdColReflect101>,
|
||||
column_filter::caller<22, T, D, BrdColReflect101>,
|
||||
column_filter::caller<23, T, D, BrdColReflect101>,
|
||||
column_filter::caller<24, T, D, BrdColReflect101>,
|
||||
column_filter::caller<25, T, D, BrdColReflect101>,
|
||||
column_filter::caller<26, T, D, BrdColReflect101>,
|
||||
column_filter::caller<27, T, D, BrdColReflect101>,
|
||||
column_filter::caller<28, T, D, BrdColReflect101>,
|
||||
column_filter::caller<29, T, D, BrdColReflect101>,
|
||||
column_filter::caller<30, T, D, BrdColReflect101>,
|
||||
column_filter::caller<31, T, D, BrdColReflect101>,
|
||||
column_filter::caller<32, T, D, BrdColReflect101>
|
||||
},
|
||||
{
|
||||
0,
|
||||
::caller< 1, T, D, BrdColReplicate>,
|
||||
::caller< 2, T, D, BrdColReplicate>,
|
||||
::caller< 3, T, D, BrdColReplicate>,
|
||||
::caller< 4, T, D, BrdColReplicate>,
|
||||
::caller< 5, T, D, BrdColReplicate>,
|
||||
::caller< 6, T, D, BrdColReplicate>,
|
||||
::caller< 7, T, D, BrdColReplicate>,
|
||||
::caller< 8, T, D, BrdColReplicate>,
|
||||
::caller< 9, T, D, BrdColReplicate>,
|
||||
::caller<10, T, D, BrdColReplicate>,
|
||||
::caller<11, T, D, BrdColReplicate>,
|
||||
::caller<12, T, D, BrdColReplicate>,
|
||||
::caller<13, T, D, BrdColReplicate>,
|
||||
::caller<14, T, D, BrdColReplicate>,
|
||||
::caller<15, T, D, BrdColReplicate>,
|
||||
::caller<16, T, D, BrdColReplicate>,
|
||||
::caller<17, T, D, BrdColReplicate>,
|
||||
::caller<18, T, D, BrdColReplicate>,
|
||||
::caller<19, T, D, BrdColReplicate>,
|
||||
::caller<20, T, D, BrdColReplicate>,
|
||||
::caller<21, T, D, BrdColReplicate>,
|
||||
::caller<22, T, D, BrdColReplicate>,
|
||||
::caller<23, T, D, BrdColReplicate>,
|
||||
::caller<24, T, D, BrdColReplicate>,
|
||||
::caller<25, T, D, BrdColReplicate>,
|
||||
::caller<26, T, D, BrdColReplicate>,
|
||||
::caller<27, T, D, BrdColReplicate>,
|
||||
::caller<28, T, D, BrdColReplicate>,
|
||||
::caller<29, T, D, BrdColReplicate>,
|
||||
::caller<30, T, D, BrdColReplicate>,
|
||||
::caller<31, T, D, BrdColReplicate>,
|
||||
::caller<32, T, D, BrdColReplicate>
|
||||
column_filter::caller< 1, T, D, BrdColReplicate>,
|
||||
column_filter::caller< 2, T, D, BrdColReplicate>,
|
||||
column_filter::caller< 3, T, D, BrdColReplicate>,
|
||||
column_filter::caller< 4, T, D, BrdColReplicate>,
|
||||
column_filter::caller< 5, T, D, BrdColReplicate>,
|
||||
column_filter::caller< 6, T, D, BrdColReplicate>,
|
||||
column_filter::caller< 7, T, D, BrdColReplicate>,
|
||||
column_filter::caller< 8, T, D, BrdColReplicate>,
|
||||
column_filter::caller< 9, T, D, BrdColReplicate>,
|
||||
column_filter::caller<10, T, D, BrdColReplicate>,
|
||||
column_filter::caller<11, T, D, BrdColReplicate>,
|
||||
column_filter::caller<12, T, D, BrdColReplicate>,
|
||||
column_filter::caller<13, T, D, BrdColReplicate>,
|
||||
column_filter::caller<14, T, D, BrdColReplicate>,
|
||||
column_filter::caller<15, T, D, BrdColReplicate>,
|
||||
column_filter::caller<16, T, D, BrdColReplicate>,
|
||||
column_filter::caller<17, T, D, BrdColReplicate>,
|
||||
column_filter::caller<18, T, D, BrdColReplicate>,
|
||||
column_filter::caller<19, T, D, BrdColReplicate>,
|
||||
column_filter::caller<20, T, D, BrdColReplicate>,
|
||||
column_filter::caller<21, T, D, BrdColReplicate>,
|
||||
column_filter::caller<22, T, D, BrdColReplicate>,
|
||||
column_filter::caller<23, T, D, BrdColReplicate>,
|
||||
column_filter::caller<24, T, D, BrdColReplicate>,
|
||||
column_filter::caller<25, T, D, BrdColReplicate>,
|
||||
column_filter::caller<26, T, D, BrdColReplicate>,
|
||||
column_filter::caller<27, T, D, BrdColReplicate>,
|
||||
column_filter::caller<28, T, D, BrdColReplicate>,
|
||||
column_filter::caller<29, T, D, BrdColReplicate>,
|
||||
column_filter::caller<30, T, D, BrdColReplicate>,
|
||||
column_filter::caller<31, T, D, BrdColReplicate>,
|
||||
column_filter::caller<32, T, D, BrdColReplicate>
|
||||
},
|
||||
{
|
||||
0,
|
||||
::caller< 1, T, D, BrdColConstant>,
|
||||
::caller< 2, T, D, BrdColConstant>,
|
||||
::caller< 3, T, D, BrdColConstant>,
|
||||
::caller< 4, T, D, BrdColConstant>,
|
||||
::caller< 5, T, D, BrdColConstant>,
|
||||
::caller< 6, T, D, BrdColConstant>,
|
||||
::caller< 7, T, D, BrdColConstant>,
|
||||
::caller< 8, T, D, BrdColConstant>,
|
||||
::caller< 9, T, D, BrdColConstant>,
|
||||
::caller<10, T, D, BrdColConstant>,
|
||||
::caller<11, T, D, BrdColConstant>,
|
||||
::caller<12, T, D, BrdColConstant>,
|
||||
::caller<13, T, D, BrdColConstant>,
|
||||
::caller<14, T, D, BrdColConstant>,
|
||||
::caller<15, T, D, BrdColConstant>,
|
||||
::caller<16, T, D, BrdColConstant>,
|
||||
::caller<17, T, D, BrdColConstant>,
|
||||
::caller<18, T, D, BrdColConstant>,
|
||||
::caller<19, T, D, BrdColConstant>,
|
||||
::caller<20, T, D, BrdColConstant>,
|
||||
::caller<21, T, D, BrdColConstant>,
|
||||
::caller<22, T, D, BrdColConstant>,
|
||||
::caller<23, T, D, BrdColConstant>,
|
||||
::caller<24, T, D, BrdColConstant>,
|
||||
::caller<25, T, D, BrdColConstant>,
|
||||
::caller<26, T, D, BrdColConstant>,
|
||||
::caller<27, T, D, BrdColConstant>,
|
||||
::caller<28, T, D, BrdColConstant>,
|
||||
::caller<29, T, D, BrdColConstant>,
|
||||
::caller<30, T, D, BrdColConstant>,
|
||||
::caller<31, T, D, BrdColConstant>,
|
||||
::caller<32, T, D, BrdColConstant>
|
||||
column_filter::caller< 1, T, D, BrdColConstant>,
|
||||
column_filter::caller< 2, T, D, BrdColConstant>,
|
||||
column_filter::caller< 3, T, D, BrdColConstant>,
|
||||
column_filter::caller< 4, T, D, BrdColConstant>,
|
||||
column_filter::caller< 5, T, D, BrdColConstant>,
|
||||
column_filter::caller< 6, T, D, BrdColConstant>,
|
||||
column_filter::caller< 7, T, D, BrdColConstant>,
|
||||
column_filter::caller< 8, T, D, BrdColConstant>,
|
||||
column_filter::caller< 9, T, D, BrdColConstant>,
|
||||
column_filter::caller<10, T, D, BrdColConstant>,
|
||||
column_filter::caller<11, T, D, BrdColConstant>,
|
||||
column_filter::caller<12, T, D, BrdColConstant>,
|
||||
column_filter::caller<13, T, D, BrdColConstant>,
|
||||
column_filter::caller<14, T, D, BrdColConstant>,
|
||||
column_filter::caller<15, T, D, BrdColConstant>,
|
||||
column_filter::caller<16, T, D, BrdColConstant>,
|
||||
column_filter::caller<17, T, D, BrdColConstant>,
|
||||
column_filter::caller<18, T, D, BrdColConstant>,
|
||||
column_filter::caller<19, T, D, BrdColConstant>,
|
||||
column_filter::caller<20, T, D, BrdColConstant>,
|
||||
column_filter::caller<21, T, D, BrdColConstant>,
|
||||
column_filter::caller<22, T, D, BrdColConstant>,
|
||||
column_filter::caller<23, T, D, BrdColConstant>,
|
||||
column_filter::caller<24, T, D, BrdColConstant>,
|
||||
column_filter::caller<25, T, D, BrdColConstant>,
|
||||
column_filter::caller<26, T, D, BrdColConstant>,
|
||||
column_filter::caller<27, T, D, BrdColConstant>,
|
||||
column_filter::caller<28, T, D, BrdColConstant>,
|
||||
column_filter::caller<29, T, D, BrdColConstant>,
|
||||
column_filter::caller<30, T, D, BrdColConstant>,
|
||||
column_filter::caller<31, T, D, BrdColConstant>,
|
||||
column_filter::caller<32, T, D, BrdColConstant>
|
||||
},
|
||||
{
|
||||
0,
|
||||
::caller< 1, T, D, BrdColReflect>,
|
||||
::caller< 2, T, D, BrdColReflect>,
|
||||
::caller< 3, T, D, BrdColReflect>,
|
||||
::caller< 4, T, D, BrdColReflect>,
|
||||
::caller< 5, T, D, BrdColReflect>,
|
||||
::caller< 6, T, D, BrdColReflect>,
|
||||
::caller< 7, T, D, BrdColReflect>,
|
||||
::caller< 8, T, D, BrdColReflect>,
|
||||
::caller< 9, T, D, BrdColReflect>,
|
||||
::caller<10, T, D, BrdColReflect>,
|
||||
::caller<11, T, D, BrdColReflect>,
|
||||
::caller<12, T, D, BrdColReflect>,
|
||||
::caller<13, T, D, BrdColReflect>,
|
||||
::caller<14, T, D, BrdColReflect>,
|
||||
::caller<15, T, D, BrdColReflect>,
|
||||
::caller<16, T, D, BrdColReflect>,
|
||||
::caller<17, T, D, BrdColReflect>,
|
||||
::caller<18, T, D, BrdColReflect>,
|
||||
::caller<19, T, D, BrdColReflect>,
|
||||
::caller<20, T, D, BrdColReflect>,
|
||||
::caller<21, T, D, BrdColReflect>,
|
||||
::caller<22, T, D, BrdColReflect>,
|
||||
::caller<23, T, D, BrdColReflect>,
|
||||
::caller<24, T, D, BrdColReflect>,
|
||||
::caller<25, T, D, BrdColReflect>,
|
||||
::caller<26, T, D, BrdColReflect>,
|
||||
::caller<27, T, D, BrdColReflect>,
|
||||
::caller<28, T, D, BrdColReflect>,
|
||||
::caller<29, T, D, BrdColReflect>,
|
||||
::caller<30, T, D, BrdColReflect>,
|
||||
::caller<31, T, D, BrdColReflect>,
|
||||
::caller<32, T, D, BrdColReflect>
|
||||
column_filter::caller< 1, T, D, BrdColReflect>,
|
||||
column_filter::caller< 2, T, D, BrdColReflect>,
|
||||
column_filter::caller< 3, T, D, BrdColReflect>,
|
||||
column_filter::caller< 4, T, D, BrdColReflect>,
|
||||
column_filter::caller< 5, T, D, BrdColReflect>,
|
||||
column_filter::caller< 6, T, D, BrdColReflect>,
|
||||
column_filter::caller< 7, T, D, BrdColReflect>,
|
||||
column_filter::caller< 8, T, D, BrdColReflect>,
|
||||
column_filter::caller< 9, T, D, BrdColReflect>,
|
||||
column_filter::caller<10, T, D, BrdColReflect>,
|
||||
column_filter::caller<11, T, D, BrdColReflect>,
|
||||
column_filter::caller<12, T, D, BrdColReflect>,
|
||||
column_filter::caller<13, T, D, BrdColReflect>,
|
||||
column_filter::caller<14, T, D, BrdColReflect>,
|
||||
column_filter::caller<15, T, D, BrdColReflect>,
|
||||
column_filter::caller<16, T, D, BrdColReflect>,
|
||||
column_filter::caller<17, T, D, BrdColReflect>,
|
||||
column_filter::caller<18, T, D, BrdColReflect>,
|
||||
column_filter::caller<19, T, D, BrdColReflect>,
|
||||
column_filter::caller<20, T, D, BrdColReflect>,
|
||||
column_filter::caller<21, T, D, BrdColReflect>,
|
||||
column_filter::caller<22, T, D, BrdColReflect>,
|
||||
column_filter::caller<23, T, D, BrdColReflect>,
|
||||
column_filter::caller<24, T, D, BrdColReflect>,
|
||||
column_filter::caller<25, T, D, BrdColReflect>,
|
||||
column_filter::caller<26, T, D, BrdColReflect>,
|
||||
column_filter::caller<27, T, D, BrdColReflect>,
|
||||
column_filter::caller<28, T, D, BrdColReflect>,
|
||||
column_filter::caller<29, T, D, BrdColReflect>,
|
||||
column_filter::caller<30, T, D, BrdColReflect>,
|
||||
column_filter::caller<31, T, D, BrdColReflect>,
|
||||
column_filter::caller<32, T, D, BrdColReflect>
|
||||
},
|
||||
{
|
||||
0,
|
||||
::caller< 1, T, D, BrdColWrap>,
|
||||
::caller< 2, T, D, BrdColWrap>,
|
||||
::caller< 3, T, D, BrdColWrap>,
|
||||
::caller< 4, T, D, BrdColWrap>,
|
||||
::caller< 5, T, D, BrdColWrap>,
|
||||
::caller< 6, T, D, BrdColWrap>,
|
||||
::caller< 7, T, D, BrdColWrap>,
|
||||
::caller< 8, T, D, BrdColWrap>,
|
||||
::caller< 9, T, D, BrdColWrap>,
|
||||
::caller<10, T, D, BrdColWrap>,
|
||||
::caller<11, T, D, BrdColWrap>,
|
||||
::caller<12, T, D, BrdColWrap>,
|
||||
::caller<13, T, D, BrdColWrap>,
|
||||
::caller<14, T, D, BrdColWrap>,
|
||||
::caller<15, T, D, BrdColWrap>,
|
||||
::caller<16, T, D, BrdColWrap>,
|
||||
::caller<17, T, D, BrdColWrap>,
|
||||
::caller<18, T, D, BrdColWrap>,
|
||||
::caller<19, T, D, BrdColWrap>,
|
||||
::caller<20, T, D, BrdColWrap>,
|
||||
::caller<21, T, D, BrdColWrap>,
|
||||
::caller<22, T, D, BrdColWrap>,
|
||||
::caller<23, T, D, BrdColWrap>,
|
||||
::caller<24, T, D, BrdColWrap>,
|
||||
::caller<25, T, D, BrdColWrap>,
|
||||
::caller<26, T, D, BrdColWrap>,
|
||||
::caller<27, T, D, BrdColWrap>,
|
||||
::caller<28, T, D, BrdColWrap>,
|
||||
::caller<29, T, D, BrdColWrap>,
|
||||
::caller<30, T, D, BrdColWrap>,
|
||||
::caller<31, T, D, BrdColWrap>,
|
||||
::caller<32, T, D, BrdColWrap>
|
||||
column_filter::caller< 1, T, D, BrdColWrap>,
|
||||
column_filter::caller< 2, T, D, BrdColWrap>,
|
||||
column_filter::caller< 3, T, D, BrdColWrap>,
|
||||
column_filter::caller< 4, T, D, BrdColWrap>,
|
||||
column_filter::caller< 5, T, D, BrdColWrap>,
|
||||
column_filter::caller< 6, T, D, BrdColWrap>,
|
||||
column_filter::caller< 7, T, D, BrdColWrap>,
|
||||
column_filter::caller< 8, T, D, BrdColWrap>,
|
||||
column_filter::caller< 9, T, D, BrdColWrap>,
|
||||
column_filter::caller<10, T, D, BrdColWrap>,
|
||||
column_filter::caller<11, T, D, BrdColWrap>,
|
||||
column_filter::caller<12, T, D, BrdColWrap>,
|
||||
column_filter::caller<13, T, D, BrdColWrap>,
|
||||
column_filter::caller<14, T, D, BrdColWrap>,
|
||||
column_filter::caller<15, T, D, BrdColWrap>,
|
||||
column_filter::caller<16, T, D, BrdColWrap>,
|
||||
column_filter::caller<17, T, D, BrdColWrap>,
|
||||
column_filter::caller<18, T, D, BrdColWrap>,
|
||||
column_filter::caller<19, T, D, BrdColWrap>,
|
||||
column_filter::caller<20, T, D, BrdColWrap>,
|
||||
column_filter::caller<21, T, D, BrdColWrap>,
|
||||
column_filter::caller<22, T, D, BrdColWrap>,
|
||||
column_filter::caller<23, T, D, BrdColWrap>,
|
||||
column_filter::caller<24, T, D, BrdColWrap>,
|
||||
column_filter::caller<25, T, D, BrdColWrap>,
|
||||
column_filter::caller<26, T, D, BrdColWrap>,
|
||||
column_filter::caller<27, T, D, BrdColWrap>,
|
||||
column_filter::caller<28, T, D, BrdColWrap>,
|
||||
column_filter::caller<29, T, D, BrdColWrap>,
|
||||
column_filter::caller<30, T, D, BrdColWrap>,
|
||||
column_filter::caller<31, T, D, BrdColWrap>,
|
||||
column_filter::caller<32, T, D, BrdColWrap>
|
||||
}
|
||||
};
|
||||
|
||||
::loadKernel(kernel, ksize, stream);
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaMemcpyToSymbol(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
|
||||
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
|
||||
}
|
||||
|
@@ -52,7 +52,7 @@
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::device;
|
||||
|
||||
namespace
|
||||
namespace arithm
|
||||
{
|
||||
template <size_t src_size, size_t dst_size> struct ArithmFuncTraits
|
||||
{
|
||||
@@ -152,7 +152,7 @@ namespace
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// addMat
|
||||
|
||||
namespace
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename D> struct VAdd4;
|
||||
template <> struct VAdd4<uint, uint> : binary_function<uint, uint, uint>
|
||||
@@ -336,19 +336,19 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T, typename D> struct TransformFunctorTraits< VAdd4<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::VAdd4<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
|
||||
template <typename T, typename D> struct TransformFunctorTraits< VAdd2<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::VAdd2<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
|
||||
template <typename T, typename D> struct TransformFunctorTraits< AddMat<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::AddMat<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -446,7 +446,7 @@ namespace arithm
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// addScalar
|
||||
|
||||
namespace
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename S, typename D> struct AddScalar : unary_function<T, D>
|
||||
{
|
||||
@@ -463,7 +463,7 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< AddScalar<T, S, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::AddScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -541,7 +541,7 @@ namespace arithm
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// subMat
|
||||
|
||||
namespace
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename D> struct VSub4;
|
||||
template <> struct VSub4<uint, uint> : binary_function<uint, uint, uint>
|
||||
@@ -725,19 +725,19 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T, typename D> struct TransformFunctorTraits< VSub4<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::VSub4<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
|
||||
template <typename T, typename D> struct TransformFunctorTraits< VSub2<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::VSub2<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
|
||||
template <typename T, typename D> struct TransformFunctorTraits< SubMat<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::SubMat<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -908,7 +908,7 @@ namespace arithm
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// mulMat
|
||||
|
||||
namespace
|
||||
namespace arithm
|
||||
{
|
||||
struct Mul_8uc4_32f : binary_function<uint, float, uint>
|
||||
{
|
||||
@@ -966,15 +966,15 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <> struct TransformFunctorTraits<Mul_8uc4_32f> : ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
template <> struct TransformFunctorTraits<arithm::Mul_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T, typename D> struct TransformFunctorTraits< Mul<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::Mul<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< MulScale<T, S, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::MulScale<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -1066,7 +1066,7 @@ namespace arithm
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// mulScalar
|
||||
|
||||
namespace
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename S, typename D> struct MulScalar : unary_function<T, D>
|
||||
{
|
||||
@@ -1083,7 +1083,7 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< MulScalar<T, S, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::MulScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -1157,7 +1157,7 @@ namespace arithm
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// divMat
|
||||
|
||||
namespace
|
||||
namespace arithm
|
||||
{
|
||||
struct Div_8uc4_32f : binary_function<uint, float, uint>
|
||||
{
|
||||
@@ -1234,15 +1234,15 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <> struct TransformFunctorTraits<Div_8uc4_32f> : ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
template <> struct TransformFunctorTraits<arithm::Div_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T, typename D> struct TransformFunctorTraits< Div<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::Div<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< DivScale<T, S, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivScale<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -1403,7 +1403,7 @@ namespace arithm
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// divInv
|
||||
|
||||
namespace
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename S, typename D> struct DivInv : unary_function<T, D>
|
||||
{
|
||||
@@ -1420,7 +1420,7 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< DivInv<T, S, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivInv<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -1494,7 +1494,7 @@ namespace arithm
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// absDiffMat
|
||||
|
||||
namespace
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename D> struct VAbsDiff4;
|
||||
template <> struct VAbsDiff4<uint, uint> : binary_function<uint, uint, uint>
|
||||
@@ -1611,19 +1611,19 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T, typename D> struct TransformFunctorTraits< VAbsDiff4<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::VAbsDiff4<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
|
||||
template <typename T, typename D> struct TransformFunctorTraits< VAbsDiff2<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::VAbsDiff2<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< AbsDiffMat<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< arithm::AbsDiffMat<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -1666,7 +1666,7 @@ namespace arithm
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// absDiffScalar
|
||||
|
||||
namespace
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename S> struct AbsDiffScalar : unary_function<T, T>
|
||||
{
|
||||
@@ -1684,7 +1684,7 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T, typename S> struct TransformFunctorTraits< AbsDiffScalar<T, S> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T, typename S> struct TransformFunctorTraits< arithm::AbsDiffScalar<T, S> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -1713,7 +1713,7 @@ namespace arithm
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< abs_func<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< abs_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -1738,7 +1738,7 @@ namespace arithm
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// sqrMat
|
||||
|
||||
namespace
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T> struct Sqr : unary_function<T, T>
|
||||
{
|
||||
@@ -1754,7 +1754,7 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< Sqr<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< arithm::Sqr<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -1781,7 +1781,7 @@ namespace arithm
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< sqrt_func<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< sqrt_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -1808,7 +1808,7 @@ namespace arithm
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< log_func<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< log_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -1833,7 +1833,7 @@ namespace arithm
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// expMat
|
||||
|
||||
namespace
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T> struct Exp : unary_function<T, T>
|
||||
{
|
||||
@@ -1850,7 +1850,7 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< Exp<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< arithm::Exp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -1875,7 +1875,7 @@ namespace arithm
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// cmpMat
|
||||
|
||||
namespace
|
||||
namespace arithm
|
||||
{
|
||||
template <class Op, typename T>
|
||||
struct Cmp : binary_function<T, T, uchar>
|
||||
@@ -1890,7 +1890,7 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <class Op, typename T> struct TransformFunctorTraits< Cmp<Op, T> > : ArithmFuncTraits<sizeof(T), sizeof(uchar)>
|
||||
template <class Op, typename T> struct TransformFunctorTraits< arithm::Cmp<Op, T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -1957,7 +1957,7 @@ namespace arithm
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// cmpScalar
|
||||
|
||||
namespace
|
||||
namespace arithm
|
||||
{
|
||||
#define TYPE_VEC(type, cn) typename TypeVec<type, cn>::vec_type
|
||||
|
||||
@@ -2020,7 +2020,7 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <class Op, typename T> struct TransformFunctorTraits< CmpScalar<Op, T, 1> > : ArithmFuncTraits<sizeof(T), sizeof(uchar)>
|
||||
template <class Op, typename T> struct TransformFunctorTraits< arithm::CmpScalar<Op, T, 1> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -2179,19 +2179,19 @@ namespace arithm
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< bit_not<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< bit_not<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< bit_and<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< bit_and<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< bit_or<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< bit_or<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< bit_xor<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< bit_xor<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -2252,15 +2252,15 @@ namespace arithm
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< bit_and<T> > > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< bit_and<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< bit_or<T> > > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< bit_or<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< bit_xor<T> > > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< bit_xor<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -2298,7 +2298,7 @@ namespace arithm
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// min
|
||||
|
||||
namespace
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T> struct VMin4;
|
||||
template <> struct VMin4<uint> : binary_function<uint, uint, uint>
|
||||
@@ -2389,23 +2389,23 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< VMin4<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< arithm::VMin4<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< VMin2<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< arithm::VMin2<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< minimum<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< minimum<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< minimum<T> > > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< minimum<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -2458,7 +2458,7 @@ namespace arithm
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// max
|
||||
|
||||
namespace
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T> struct VMax4;
|
||||
template <> struct VMax4<uint> : binary_function<uint, uint, uint>
|
||||
@@ -2549,23 +2549,23 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< VMax4<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< arithm::VMax4<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< VMax2<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< arithm::VMax2<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< maximum<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< maximum<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< maximum<T> > > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< maximum<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -2620,23 +2620,23 @@ namespace arithm
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< thresh_binary_func<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< thresh_binary_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< thresh_binary_inv_func<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< thresh_binary_inv_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< thresh_trunc_func<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< thresh_trunc_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< thresh_to_zero_func<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< thresh_to_zero_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< thresh_to_zero_inv_func<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< thresh_to_zero_inv_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -2679,7 +2679,7 @@ namespace arithm
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// pow
|
||||
|
||||
namespace
|
||||
namespace arithm
|
||||
{
|
||||
template<typename T, bool Signed = numeric_limits<T>::is_signed> struct PowOp : unary_function<T, T>
|
||||
{
|
||||
@@ -2734,7 +2734,7 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< PowOp<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
template <typename T> struct TransformFunctorTraits< arithm::PowOp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
@@ -2759,7 +2759,7 @@ namespace arithm
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// addWeighted
|
||||
|
||||
namespace
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T> struct UseDouble_
|
||||
{
|
||||
@@ -2809,14 +2809,14 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T1, typename T2, typename D, size_t src1_size, size_t src2_size, size_t dst_size> struct AddWeightedTraits : DefaultTransformFunctorTraits< AddWeighted<T1, T2, D> >
|
||||
template <typename T1, typename T2, typename D, size_t src1_size, size_t src2_size, size_t dst_size> struct AddWeightedTraits : DefaultTransformFunctorTraits< arithm::AddWeighted<T1, T2, D> >
|
||||
{
|
||||
};
|
||||
template <typename T1, typename T2, typename D, size_t src_size, size_t dst_size> struct AddWeightedTraits<T1, T2, D, src_size, src_size, dst_size> : ArithmFuncTraits<src_size, dst_size>
|
||||
template <typename T1, typename T2, typename D, size_t src_size, size_t dst_size> struct AddWeightedTraits<T1, T2, D, src_size, src_size, dst_size> : arithm::ArithmFuncTraits<src_size, dst_size>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T1, typename T2, typename D> struct TransformFunctorTraits< AddWeighted<T1, T2, D> > : AddWeightedTraits<T1, T2, D, sizeof(T1), sizeof(T2), sizeof(D)>
|
||||
template <typename T1, typename T2, typename D> struct TransformFunctorTraits< arithm::AddWeighted<T1, T2, D> > : AddWeightedTraits<T1, T2, D, sizeof(T1), sizeof(T2), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
@@ -47,6 +47,7 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/sort.h>
|
||||
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
@@ -148,4 +149,4 @@ namespace cv { namespace gpu { namespace device
|
||||
}}}
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
#endif /* CUDA_DISABLER */
|
||||
|
@@ -43,12 +43,11 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "thrust/device_ptr.h"
|
||||
#include "thrust/remove.h"
|
||||
#include "thrust/functional.h"
|
||||
#include "internal_shared.hpp"
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/remove.h>
|
||||
#include <thrust/functional.h>
|
||||
|
||||
using namespace thrust;
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device { namespace globmotion {
|
||||
|
||||
@@ -64,7 +63,7 @@ int compactPoints(int N, float *points0, float *points1, const uchar *mask)
|
||||
return thrust::remove_if(thrust::make_zip_iterator(thrust::make_tuple(dpoints0, dpoints1)),
|
||||
thrust::make_zip_iterator(thrust::make_tuple(dpoints0 + N, dpoints1 + N)),
|
||||
dmask, thrust::not1(thrust::identity<uchar>()))
|
||||
- make_zip_iterator(make_tuple(dpoints0, dpoints1));
|
||||
- thrust::make_zip_iterator(make_tuple(dpoints0, dpoints1));
|
||||
}
|
||||
|
||||
|
||||
@@ -117,4 +116,4 @@ void calcWobbleSuppressionMaps(
|
||||
}}}}
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
#endif /* CUDA_DISABLER */
|
||||
|
@@ -51,9 +51,9 @@
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::device;
|
||||
|
||||
namespace
|
||||
namespace hist
|
||||
{
|
||||
__global__ void histogram256(const uchar* src, int cols, int rows, size_t step, int* hist)
|
||||
__global__ void histogram256Kernel(const uchar* src, int cols, int rows, size_t step, int* hist)
|
||||
{
|
||||
__shared__ int shist[256];
|
||||
|
||||
@@ -94,16 +94,13 @@ namespace
|
||||
if (histVal > 0)
|
||||
::atomicAdd(hist + tid, histVal);
|
||||
}
|
||||
}
|
||||
|
||||
namespace hist
|
||||
{
|
||||
void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(src.rows, block.y));
|
||||
|
||||
::histogram256<<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, hist);
|
||||
histogram256Kernel<<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, hist);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
@@ -113,7 +110,7 @@ namespace hist
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace
|
||||
namespace hist
|
||||
{
|
||||
__constant__ int c_lut[256];
|
||||
|
||||
@@ -133,7 +130,7 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <> struct TransformFunctorTraits<EqualizeHist> : DefaultTransformFunctorTraits<EqualizeHist>
|
||||
template <> struct TransformFunctorTraits<hist::EqualizeHist> : DefaultTransformFunctorTraits<hist::EqualizeHist>
|
||||
{
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
@@ -244,15 +244,17 @@ namespace cv { namespace gpu { namespace device
|
||||
return smem[0];
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
if (threadIdx.x == 0)
|
||||
smem[0] = sum;
|
||||
#endif
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
if (threadIdx.x == 0)
|
||||
smem[0] = sum;
|
||||
#endif
|
||||
__syncthreads();
|
||||
|
||||
__syncthreads();
|
||||
|
||||
return smem[0];
|
||||
return smem[0];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@@ -42,7 +42,9 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/sort.h>
|
||||
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
#include "opencv2/gpu/device/emulation.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
|
@@ -55,7 +55,7 @@
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::device;
|
||||
|
||||
namespace
|
||||
namespace detail
|
||||
{
|
||||
template <int cn> struct Unroll;
|
||||
template <> struct Unroll<1>
|
||||
@@ -218,7 +218,7 @@ namespace sum
|
||||
{
|
||||
sum = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<result_type>::all(0);
|
||||
|
||||
device::reduce<BLOCK_SIZE>(Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), Unroll<cn>::tie(sum), tid, Unroll<cn>::op(plus<R>()));
|
||||
device::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>()));
|
||||
|
||||
if (tid == 0)
|
||||
{
|
||||
@@ -254,7 +254,7 @@ namespace sum
|
||||
{
|
||||
sum = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<result_type>::all(0);
|
||||
|
||||
device::reduce<BLOCK_SIZE>(Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), Unroll<cn>::tie(sum), tid, Unroll<cn>::op(plus<double>()));
|
||||
device::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<double>()));
|
||||
|
||||
if (tid == 0)
|
||||
{
|
||||
@@ -294,7 +294,7 @@ namespace sum
|
||||
}
|
||||
}
|
||||
|
||||
device::reduce<BLOCK_SIZE>(Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), Unroll<cn>::tie(sum), tid, Unroll<cn>::op(plus<R>()));
|
||||
device::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>()));
|
||||
|
||||
GlobalReduce<BLOCK_SIZE, R, cn>::run(sum, result, tid, bid, smem);
|
||||
}
|
||||
@@ -918,13 +918,11 @@ namespace countNonZero
|
||||
__global__ void kernel(const PtrStepSz<T> src, unsigned int* count, const int twidth, const int theight)
|
||||
{
|
||||
__shared__ unsigned int scount[BLOCK_SIZE];
|
||||
__shared__ bool is_last;
|
||||
|
||||
const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x;
|
||||
const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y;
|
||||
|
||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
const int bid = blockIdx.y * gridDim.x + blockIdx.x;
|
||||
|
||||
unsigned int mycount = 0;
|
||||
|
||||
@@ -946,6 +944,9 @@ namespace countNonZero
|
||||
if (tid == 0)
|
||||
::atomicAdd(count, mycount);
|
||||
#else
|
||||
__shared__ bool is_last;
|
||||
const int bid = blockIdx.y * gridDim.x + blockIdx.x;
|
||||
|
||||
if (tid == 0)
|
||||
{
|
||||
count[bid] = mycount;
|
||||
@@ -1244,7 +1245,7 @@ namespace reduce
|
||||
for (int x = threadIdx.x; x < src.cols; x += BLOCK_SIZE)
|
||||
myVal = op(myVal, saturate_cast<work_type>(srcRow[x]));
|
||||
|
||||
device::reduce<BLOCK_SIZE>(Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), Unroll<cn>::tie(myVal), threadIdx.x, Unroll<cn>::op(op));
|
||||
device::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(myVal), threadIdx.x, detail::Unroll<cn>::op(op));
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
dst[y] = saturate_cast<dst_type>(op.result(myVal, src.cols));
|
||||
|
@@ -48,7 +48,7 @@
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::device;
|
||||
|
||||
namespace
|
||||
namespace optflowbm
|
||||
{
|
||||
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_prev(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_curr(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
@@ -145,10 +145,7 @@ namespace
|
||||
velx(i, j) = static_cast<float>(sumx) / countMin;
|
||||
vely(i, j) = static_cast<float>(sumy) / countMin;
|
||||
}
|
||||
}
|
||||
|
||||
namespace optflowbm
|
||||
{
|
||||
void calc(PtrStepSzb prev, PtrStepSzb curr, PtrStepSzf velx, PtrStepSzf vely, int2 blockSize, int2 shiftSize, bool usePrevious,
|
||||
int maxX, int maxY, int acceptLevel, int escapeLevel, const short2* ss, int ssCount, cudaStream_t stream)
|
||||
{
|
||||
|
@@ -47,6 +47,7 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/sort.h>
|
||||
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
|
@@ -57,7 +57,7 @@
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::device;
|
||||
|
||||
namespace
|
||||
namespace pyrlk
|
||||
{
|
||||
__constant__ int c_winSize_x;
|
||||
__constant__ int c_winSize_y;
|
||||
@@ -123,7 +123,7 @@ namespace
|
||||
}
|
||||
|
||||
template <int cn, int PATCH_X, int PATCH_Y, bool calcErr>
|
||||
__global__ void sparse(const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols)
|
||||
__global__ void sparseKernel(const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols)
|
||||
{
|
||||
#if __CUDA_ARCH__ <= 110
|
||||
const int BLOCK_SIZE = 128;
|
||||
@@ -321,9 +321,9 @@ namespace
|
||||
dim3 grid(ptcount);
|
||||
|
||||
if (level == 0 && err)
|
||||
sparse<cn, PATCH_X, PATCH_Y, true><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
|
||||
sparseKernel<cn, PATCH_X, PATCH_Y, true><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
|
||||
else
|
||||
sparse<cn, PATCH_X, PATCH_Y, false><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
|
||||
sparseKernel<cn, PATCH_X, PATCH_Y, false><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
@@ -332,7 +332,7 @@ namespace
|
||||
}
|
||||
|
||||
template <bool calcErr>
|
||||
__global__ void dense(PtrStepf u, PtrStepf v, const PtrStepf prevU, const PtrStepf prevV, PtrStepf err, const int rows, const int cols)
|
||||
__global__ void denseKernel(PtrStepf u, PtrStepf v, const PtrStepf prevU, const PtrStepf prevV, PtrStepf err, const int rows, const int cols)
|
||||
{
|
||||
extern __shared__ int smem[];
|
||||
|
||||
@@ -476,10 +476,7 @@ namespace
|
||||
err(y, x) = static_cast<float>(errval) / (c_winSize_x * c_winSize_y);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace pyrlk
|
||||
{
|
||||
void loadConstants(int2 winSize, int iters)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) );
|
||||
@@ -500,11 +497,11 @@ namespace pyrlk
|
||||
|
||||
static const func_t funcs[5][5] =
|
||||
{
|
||||
{::sparse_caller<1, 1, 1>, ::sparse_caller<1, 2, 1>, ::sparse_caller<1, 3, 1>, ::sparse_caller<1, 4, 1>, ::sparse_caller<1, 5, 1>},
|
||||
{::sparse_caller<1, 1, 2>, ::sparse_caller<1, 2, 2>, ::sparse_caller<1, 3, 2>, ::sparse_caller<1, 4, 2>, ::sparse_caller<1, 5, 2>},
|
||||
{::sparse_caller<1, 1, 3>, ::sparse_caller<1, 2, 3>, ::sparse_caller<1, 3, 3>, ::sparse_caller<1, 4, 3>, ::sparse_caller<1, 5, 3>},
|
||||
{::sparse_caller<1, 1, 4>, ::sparse_caller<1, 2, 4>, ::sparse_caller<1, 3, 4>, ::sparse_caller<1, 4, 4>, ::sparse_caller<1, 5, 4>},
|
||||
{::sparse_caller<1, 1, 5>, ::sparse_caller<1, 2, 5>, ::sparse_caller<1, 3, 5>, ::sparse_caller<1, 4, 5>, ::sparse_caller<1, 5, 5>}
|
||||
{sparse_caller<1, 1, 1>, sparse_caller<1, 2, 1>, sparse_caller<1, 3, 1>, sparse_caller<1, 4, 1>, sparse_caller<1, 5, 1>},
|
||||
{sparse_caller<1, 1, 2>, sparse_caller<1, 2, 2>, sparse_caller<1, 3, 2>, sparse_caller<1, 4, 2>, sparse_caller<1, 5, 2>},
|
||||
{sparse_caller<1, 1, 3>, sparse_caller<1, 2, 3>, sparse_caller<1, 3, 3>, sparse_caller<1, 4, 3>, sparse_caller<1, 5, 3>},
|
||||
{sparse_caller<1, 1, 4>, sparse_caller<1, 2, 4>, sparse_caller<1, 3, 4>, sparse_caller<1, 4, 4>, sparse_caller<1, 5, 4>},
|
||||
{sparse_caller<1, 1, 5>, sparse_caller<1, 2, 5>, sparse_caller<1, 3, 5>, sparse_caller<1, 4, 5>, sparse_caller<1, 5, 5>}
|
||||
};
|
||||
|
||||
bindTexture(&tex_If, I);
|
||||
@@ -522,11 +519,11 @@ namespace pyrlk
|
||||
|
||||
static const func_t funcs[5][5] =
|
||||
{
|
||||
{::sparse_caller<4, 1, 1>, ::sparse_caller<4, 2, 1>, ::sparse_caller<4, 3, 1>, ::sparse_caller<4, 4, 1>, ::sparse_caller<4, 5, 1>},
|
||||
{::sparse_caller<4, 1, 2>, ::sparse_caller<4, 2, 2>, ::sparse_caller<4, 3, 2>, ::sparse_caller<4, 4, 2>, ::sparse_caller<4, 5, 2>},
|
||||
{::sparse_caller<4, 1, 3>, ::sparse_caller<4, 2, 3>, ::sparse_caller<4, 3, 3>, ::sparse_caller<4, 4, 3>, ::sparse_caller<4, 5, 3>},
|
||||
{::sparse_caller<4, 1, 4>, ::sparse_caller<4, 2, 4>, ::sparse_caller<4, 3, 4>, ::sparse_caller<4, 4, 4>, ::sparse_caller<4, 5, 4>},
|
||||
{::sparse_caller<4, 1, 5>, ::sparse_caller<4, 2, 5>, ::sparse_caller<4, 3, 5>, ::sparse_caller<4, 4, 5>, ::sparse_caller<4, 5, 5>}
|
||||
{sparse_caller<4, 1, 1>, sparse_caller<4, 2, 1>, sparse_caller<4, 3, 1>, sparse_caller<4, 4, 1>, sparse_caller<4, 5, 1>},
|
||||
{sparse_caller<4, 1, 2>, sparse_caller<4, 2, 2>, sparse_caller<4, 3, 2>, sparse_caller<4, 4, 2>, sparse_caller<4, 5, 2>},
|
||||
{sparse_caller<4, 1, 3>, sparse_caller<4, 2, 3>, sparse_caller<4, 3, 3>, sparse_caller<4, 4, 3>, sparse_caller<4, 5, 3>},
|
||||
{sparse_caller<4, 1, 4>, sparse_caller<4, 2, 4>, sparse_caller<4, 3, 4>, sparse_caller<4, 4, 4>, sparse_caller<4, 5, 4>},
|
||||
{sparse_caller<4, 1, 5>, sparse_caller<4, 2, 5>, sparse_caller<4, 3, 5>, sparse_caller<4, 4, 5>, sparse_caller<4, 5, 5>}
|
||||
};
|
||||
|
||||
bindTexture(&tex_If4, I);
|
||||
@@ -551,12 +548,12 @@ namespace pyrlk
|
||||
|
||||
if (err.data)
|
||||
{
|
||||
::dense<true><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, err, I.rows, I.cols);
|
||||
denseKernel<true><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, err, I.rows, I.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
else
|
||||
{
|
||||
::dense<false><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, PtrStepf(), I.rows, I.cols);
|
||||
denseKernel<false><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, PtrStepf(), I.rows, I.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
|
@@ -49,20 +49,12 @@
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::device;
|
||||
|
||||
namespace
|
||||
namespace row_filter
|
||||
{
|
||||
#define MAX_KERNEL_SIZE 32
|
||||
|
||||
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
||||
|
||||
void loadKernel(const float* kernel, int ksize, cudaStream_t stream)
|
||||
{
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
}
|
||||
|
||||
template <int KSIZE, typename T, typename D, typename B>
|
||||
__global__ void linearRowFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
|
||||
{
|
||||
@@ -195,182 +187,185 @@ namespace filter
|
||||
{
|
||||
{
|
||||
0,
|
||||
::caller< 1, T, D, BrdRowReflect101>,
|
||||
::caller< 2, T, D, BrdRowReflect101>,
|
||||
::caller< 3, T, D, BrdRowReflect101>,
|
||||
::caller< 4, T, D, BrdRowReflect101>,
|
||||
::caller< 5, T, D, BrdRowReflect101>,
|
||||
::caller< 6, T, D, BrdRowReflect101>,
|
||||
::caller< 7, T, D, BrdRowReflect101>,
|
||||
::caller< 8, T, D, BrdRowReflect101>,
|
||||
::caller< 9, T, D, BrdRowReflect101>,
|
||||
::caller<10, T, D, BrdRowReflect101>,
|
||||
::caller<11, T, D, BrdRowReflect101>,
|
||||
::caller<12, T, D, BrdRowReflect101>,
|
||||
::caller<13, T, D, BrdRowReflect101>,
|
||||
::caller<14, T, D, BrdRowReflect101>,
|
||||
::caller<15, T, D, BrdRowReflect101>,
|
||||
::caller<16, T, D, BrdRowReflect101>,
|
||||
::caller<17, T, D, BrdRowReflect101>,
|
||||
::caller<18, T, D, BrdRowReflect101>,
|
||||
::caller<19, T, D, BrdRowReflect101>,
|
||||
::caller<20, T, D, BrdRowReflect101>,
|
||||
::caller<21, T, D, BrdRowReflect101>,
|
||||
::caller<22, T, D, BrdRowReflect101>,
|
||||
::caller<23, T, D, BrdRowReflect101>,
|
||||
::caller<24, T, D, BrdRowReflect101>,
|
||||
::caller<25, T, D, BrdRowReflect101>,
|
||||
::caller<26, T, D, BrdRowReflect101>,
|
||||
::caller<27, T, D, BrdRowReflect101>,
|
||||
::caller<28, T, D, BrdRowReflect101>,
|
||||
::caller<29, T, D, BrdRowReflect101>,
|
||||
::caller<30, T, D, BrdRowReflect101>,
|
||||
::caller<31, T, D, BrdRowReflect101>,
|
||||
::caller<32, T, D, BrdRowReflect101>
|
||||
row_filter::caller< 1, T, D, BrdRowReflect101>,
|
||||
row_filter::caller< 2, T, D, BrdRowReflect101>,
|
||||
row_filter::caller< 3, T, D, BrdRowReflect101>,
|
||||
row_filter::caller< 4, T, D, BrdRowReflect101>,
|
||||
row_filter::caller< 5, T, D, BrdRowReflect101>,
|
||||
row_filter::caller< 6, T, D, BrdRowReflect101>,
|
||||
row_filter::caller< 7, T, D, BrdRowReflect101>,
|
||||
row_filter::caller< 8, T, D, BrdRowReflect101>,
|
||||
row_filter::caller< 9, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<10, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<11, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<12, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<13, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<14, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<15, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<16, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<17, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<18, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<19, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<20, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<21, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<22, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<23, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<24, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<25, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<26, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<27, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<28, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<29, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<30, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<31, T, D, BrdRowReflect101>,
|
||||
row_filter::caller<32, T, D, BrdRowReflect101>
|
||||
},
|
||||
{
|
||||
0,
|
||||
::caller< 1, T, D, BrdRowReplicate>,
|
||||
::caller< 2, T, D, BrdRowReplicate>,
|
||||
::caller< 3, T, D, BrdRowReplicate>,
|
||||
::caller< 4, T, D, BrdRowReplicate>,
|
||||
::caller< 5, T, D, BrdRowReplicate>,
|
||||
::caller< 6, T, D, BrdRowReplicate>,
|
||||
::caller< 7, T, D, BrdRowReplicate>,
|
||||
::caller< 8, T, D, BrdRowReplicate>,
|
||||
::caller< 9, T, D, BrdRowReplicate>,
|
||||
::caller<10, T, D, BrdRowReplicate>,
|
||||
::caller<11, T, D, BrdRowReplicate>,
|
||||
::caller<12, T, D, BrdRowReplicate>,
|
||||
::caller<13, T, D, BrdRowReplicate>,
|
||||
::caller<14, T, D, BrdRowReplicate>,
|
||||
::caller<15, T, D, BrdRowReplicate>,
|
||||
::caller<16, T, D, BrdRowReplicate>,
|
||||
::caller<17, T, D, BrdRowReplicate>,
|
||||
::caller<18, T, D, BrdRowReplicate>,
|
||||
::caller<19, T, D, BrdRowReplicate>,
|
||||
::caller<20, T, D, BrdRowReplicate>,
|
||||
::caller<21, T, D, BrdRowReplicate>,
|
||||
::caller<22, T, D, BrdRowReplicate>,
|
||||
::caller<23, T, D, BrdRowReplicate>,
|
||||
::caller<24, T, D, BrdRowReplicate>,
|
||||
::caller<25, T, D, BrdRowReplicate>,
|
||||
::caller<26, T, D, BrdRowReplicate>,
|
||||
::caller<27, T, D, BrdRowReplicate>,
|
||||
::caller<28, T, D, BrdRowReplicate>,
|
||||
::caller<29, T, D, BrdRowReplicate>,
|
||||
::caller<30, T, D, BrdRowReplicate>,
|
||||
::caller<31, T, D, BrdRowReplicate>,
|
||||
::caller<32, T, D, BrdRowReplicate>
|
||||
row_filter::caller< 1, T, D, BrdRowReplicate>,
|
||||
row_filter::caller< 2, T, D, BrdRowReplicate>,
|
||||
row_filter::caller< 3, T, D, BrdRowReplicate>,
|
||||
row_filter::caller< 4, T, D, BrdRowReplicate>,
|
||||
row_filter::caller< 5, T, D, BrdRowReplicate>,
|
||||
row_filter::caller< 6, T, D, BrdRowReplicate>,
|
||||
row_filter::caller< 7, T, D, BrdRowReplicate>,
|
||||
row_filter::caller< 8, T, D, BrdRowReplicate>,
|
||||
row_filter::caller< 9, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<10, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<11, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<12, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<13, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<14, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<15, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<16, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<17, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<18, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<19, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<20, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<21, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<22, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<23, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<24, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<25, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<26, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<27, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<28, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<29, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<30, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<31, T, D, BrdRowReplicate>,
|
||||
row_filter::caller<32, T, D, BrdRowReplicate>
|
||||
},
|
||||
{
|
||||
0,
|
||||
::caller< 1, T, D, BrdRowConstant>,
|
||||
::caller< 2, T, D, BrdRowConstant>,
|
||||
::caller< 3, T, D, BrdRowConstant>,
|
||||
::caller< 4, T, D, BrdRowConstant>,
|
||||
::caller< 5, T, D, BrdRowConstant>,
|
||||
::caller< 6, T, D, BrdRowConstant>,
|
||||
::caller< 7, T, D, BrdRowConstant>,
|
||||
::caller< 8, T, D, BrdRowConstant>,
|
||||
::caller< 9, T, D, BrdRowConstant>,
|
||||
::caller<10, T, D, BrdRowConstant>,
|
||||
::caller<11, T, D, BrdRowConstant>,
|
||||
::caller<12, T, D, BrdRowConstant>,
|
||||
::caller<13, T, D, BrdRowConstant>,
|
||||
::caller<14, T, D, BrdRowConstant>,
|
||||
::caller<15, T, D, BrdRowConstant>,
|
||||
::caller<16, T, D, BrdRowConstant>,
|
||||
::caller<17, T, D, BrdRowConstant>,
|
||||
::caller<18, T, D, BrdRowConstant>,
|
||||
::caller<19, T, D, BrdRowConstant>,
|
||||
::caller<20, T, D, BrdRowConstant>,
|
||||
::caller<21, T, D, BrdRowConstant>,
|
||||
::caller<22, T, D, BrdRowConstant>,
|
||||
::caller<23, T, D, BrdRowConstant>,
|
||||
::caller<24, T, D, BrdRowConstant>,
|
||||
::caller<25, T, D, BrdRowConstant>,
|
||||
::caller<26, T, D, BrdRowConstant>,
|
||||
::caller<27, T, D, BrdRowConstant>,
|
||||
::caller<28, T, D, BrdRowConstant>,
|
||||
::caller<29, T, D, BrdRowConstant>,
|
||||
::caller<30, T, D, BrdRowConstant>,
|
||||
::caller<31, T, D, BrdRowConstant>,
|
||||
::caller<32, T, D, BrdRowConstant>
|
||||
row_filter::caller< 1, T, D, BrdRowConstant>,
|
||||
row_filter::caller< 2, T, D, BrdRowConstant>,
|
||||
row_filter::caller< 3, T, D, BrdRowConstant>,
|
||||
row_filter::caller< 4, T, D, BrdRowConstant>,
|
||||
row_filter::caller< 5, T, D, BrdRowConstant>,
|
||||
row_filter::caller< 6, T, D, BrdRowConstant>,
|
||||
row_filter::caller< 7, T, D, BrdRowConstant>,
|
||||
row_filter::caller< 8, T, D, BrdRowConstant>,
|
||||
row_filter::caller< 9, T, D, BrdRowConstant>,
|
||||
row_filter::caller<10, T, D, BrdRowConstant>,
|
||||
row_filter::caller<11, T, D, BrdRowConstant>,
|
||||
row_filter::caller<12, T, D, BrdRowConstant>,
|
||||
row_filter::caller<13, T, D, BrdRowConstant>,
|
||||
row_filter::caller<14, T, D, BrdRowConstant>,
|
||||
row_filter::caller<15, T, D, BrdRowConstant>,
|
||||
row_filter::caller<16, T, D, BrdRowConstant>,
|
||||
row_filter::caller<17, T, D, BrdRowConstant>,
|
||||
row_filter::caller<18, T, D, BrdRowConstant>,
|
||||
row_filter::caller<19, T, D, BrdRowConstant>,
|
||||
row_filter::caller<20, T, D, BrdRowConstant>,
|
||||
row_filter::caller<21, T, D, BrdRowConstant>,
|
||||
row_filter::caller<22, T, D, BrdRowConstant>,
|
||||
row_filter::caller<23, T, D, BrdRowConstant>,
|
||||
row_filter::caller<24, T, D, BrdRowConstant>,
|
||||
row_filter::caller<25, T, D, BrdRowConstant>,
|
||||
row_filter::caller<26, T, D, BrdRowConstant>,
|
||||
row_filter::caller<27, T, D, BrdRowConstant>,
|
||||
row_filter::caller<28, T, D, BrdRowConstant>,
|
||||
row_filter::caller<29, T, D, BrdRowConstant>,
|
||||
row_filter::caller<30, T, D, BrdRowConstant>,
|
||||
row_filter::caller<31, T, D, BrdRowConstant>,
|
||||
row_filter::caller<32, T, D, BrdRowConstant>
|
||||
},
|
||||
{
|
||||
0,
|
||||
::caller< 1, T, D, BrdRowReflect>,
|
||||
::caller< 2, T, D, BrdRowReflect>,
|
||||
::caller< 3, T, D, BrdRowReflect>,
|
||||
::caller< 4, T, D, BrdRowReflect>,
|
||||
::caller< 5, T, D, BrdRowReflect>,
|
||||
::caller< 6, T, D, BrdRowReflect>,
|
||||
::caller< 7, T, D, BrdRowReflect>,
|
||||
::caller< 8, T, D, BrdRowReflect>,
|
||||
::caller< 9, T, D, BrdRowReflect>,
|
||||
::caller<10, T, D, BrdRowReflect>,
|
||||
::caller<11, T, D, BrdRowReflect>,
|
||||
::caller<12, T, D, BrdRowReflect>,
|
||||
::caller<13, T, D, BrdRowReflect>,
|
||||
::caller<14, T, D, BrdRowReflect>,
|
||||
::caller<15, T, D, BrdRowReflect>,
|
||||
::caller<16, T, D, BrdRowReflect>,
|
||||
::caller<17, T, D, BrdRowReflect>,
|
||||
::caller<18, T, D, BrdRowReflect>,
|
||||
::caller<19, T, D, BrdRowReflect>,
|
||||
::caller<20, T, D, BrdRowReflect>,
|
||||
::caller<21, T, D, BrdRowReflect>,
|
||||
::caller<22, T, D, BrdRowReflect>,
|
||||
::caller<23, T, D, BrdRowReflect>,
|
||||
::caller<24, T, D, BrdRowReflect>,
|
||||
::caller<25, T, D, BrdRowReflect>,
|
||||
::caller<26, T, D, BrdRowReflect>,
|
||||
::caller<27, T, D, BrdRowReflect>,
|
||||
::caller<28, T, D, BrdRowReflect>,
|
||||
::caller<29, T, D, BrdRowReflect>,
|
||||
::caller<30, T, D, BrdRowReflect>,
|
||||
::caller<31, T, D, BrdRowReflect>,
|
||||
::caller<32, T, D, BrdRowReflect>
|
||||
row_filter::caller< 1, T, D, BrdRowReflect>,
|
||||
row_filter::caller< 2, T, D, BrdRowReflect>,
|
||||
row_filter::caller< 3, T, D, BrdRowReflect>,
|
||||
row_filter::caller< 4, T, D, BrdRowReflect>,
|
||||
row_filter::caller< 5, T, D, BrdRowReflect>,
|
||||
row_filter::caller< 6, T, D, BrdRowReflect>,
|
||||
row_filter::caller< 7, T, D, BrdRowReflect>,
|
||||
row_filter::caller< 8, T, D, BrdRowReflect>,
|
||||
row_filter::caller< 9, T, D, BrdRowReflect>,
|
||||
row_filter::caller<10, T, D, BrdRowReflect>,
|
||||
row_filter::caller<11, T, D, BrdRowReflect>,
|
||||
row_filter::caller<12, T, D, BrdRowReflect>,
|
||||
row_filter::caller<13, T, D, BrdRowReflect>,
|
||||
row_filter::caller<14, T, D, BrdRowReflect>,
|
||||
row_filter::caller<15, T, D, BrdRowReflect>,
|
||||
row_filter::caller<16, T, D, BrdRowReflect>,
|
||||
row_filter::caller<17, T, D, BrdRowReflect>,
|
||||
row_filter::caller<18, T, D, BrdRowReflect>,
|
||||
row_filter::caller<19, T, D, BrdRowReflect>,
|
||||
row_filter::caller<20, T, D, BrdRowReflect>,
|
||||
row_filter::caller<21, T, D, BrdRowReflect>,
|
||||
row_filter::caller<22, T, D, BrdRowReflect>,
|
||||
row_filter::caller<23, T, D, BrdRowReflect>,
|
||||
row_filter::caller<24, T, D, BrdRowReflect>,
|
||||
row_filter::caller<25, T, D, BrdRowReflect>,
|
||||
row_filter::caller<26, T, D, BrdRowReflect>,
|
||||
row_filter::caller<27, T, D, BrdRowReflect>,
|
||||
row_filter::caller<28, T, D, BrdRowReflect>,
|
||||
row_filter::caller<29, T, D, BrdRowReflect>,
|
||||
row_filter::caller<30, T, D, BrdRowReflect>,
|
||||
row_filter::caller<31, T, D, BrdRowReflect>,
|
||||
row_filter::caller<32, T, D, BrdRowReflect>
|
||||
},
|
||||
{
|
||||
0,
|
||||
::caller< 1, T, D, BrdRowWrap>,
|
||||
::caller< 2, T, D, BrdRowWrap>,
|
||||
::caller< 3, T, D, BrdRowWrap>,
|
||||
::caller< 4, T, D, BrdRowWrap>,
|
||||
::caller< 5, T, D, BrdRowWrap>,
|
||||
::caller< 6, T, D, BrdRowWrap>,
|
||||
::caller< 7, T, D, BrdRowWrap>,
|
||||
::caller< 8, T, D, BrdRowWrap>,
|
||||
::caller< 9, T, D, BrdRowWrap>,
|
||||
::caller<10, T, D, BrdRowWrap>,
|
||||
::caller<11, T, D, BrdRowWrap>,
|
||||
::caller<12, T, D, BrdRowWrap>,
|
||||
::caller<13, T, D, BrdRowWrap>,
|
||||
::caller<14, T, D, BrdRowWrap>,
|
||||
::caller<15, T, D, BrdRowWrap>,
|
||||
::caller<16, T, D, BrdRowWrap>,
|
||||
::caller<17, T, D, BrdRowWrap>,
|
||||
::caller<18, T, D, BrdRowWrap>,
|
||||
::caller<19, T, D, BrdRowWrap>,
|
||||
::caller<20, T, D, BrdRowWrap>,
|
||||
::caller<21, T, D, BrdRowWrap>,
|
||||
::caller<22, T, D, BrdRowWrap>,
|
||||
::caller<23, T, D, BrdRowWrap>,
|
||||
::caller<24, T, D, BrdRowWrap>,
|
||||
::caller<25, T, D, BrdRowWrap>,
|
||||
::caller<26, T, D, BrdRowWrap>,
|
||||
::caller<27, T, D, BrdRowWrap>,
|
||||
::caller<28, T, D, BrdRowWrap>,
|
||||
::caller<29, T, D, BrdRowWrap>,
|
||||
::caller<30, T, D, BrdRowWrap>,
|
||||
::caller<31, T, D, BrdRowWrap>,
|
||||
::caller<32, T, D, BrdRowWrap>
|
||||
row_filter::caller< 1, T, D, BrdRowWrap>,
|
||||
row_filter::caller< 2, T, D, BrdRowWrap>,
|
||||
row_filter::caller< 3, T, D, BrdRowWrap>,
|
||||
row_filter::caller< 4, T, D, BrdRowWrap>,
|
||||
row_filter::caller< 5, T, D, BrdRowWrap>,
|
||||
row_filter::caller< 6, T, D, BrdRowWrap>,
|
||||
row_filter::caller< 7, T, D, BrdRowWrap>,
|
||||
row_filter::caller< 8, T, D, BrdRowWrap>,
|
||||
row_filter::caller< 9, T, D, BrdRowWrap>,
|
||||
row_filter::caller<10, T, D, BrdRowWrap>,
|
||||
row_filter::caller<11, T, D, BrdRowWrap>,
|
||||
row_filter::caller<12, T, D, BrdRowWrap>,
|
||||
row_filter::caller<13, T, D, BrdRowWrap>,
|
||||
row_filter::caller<14, T, D, BrdRowWrap>,
|
||||
row_filter::caller<15, T, D, BrdRowWrap>,
|
||||
row_filter::caller<16, T, D, BrdRowWrap>,
|
||||
row_filter::caller<17, T, D, BrdRowWrap>,
|
||||
row_filter::caller<18, T, D, BrdRowWrap>,
|
||||
row_filter::caller<19, T, D, BrdRowWrap>,
|
||||
row_filter::caller<20, T, D, BrdRowWrap>,
|
||||
row_filter::caller<21, T, D, BrdRowWrap>,
|
||||
row_filter::caller<22, T, D, BrdRowWrap>,
|
||||
row_filter::caller<23, T, D, BrdRowWrap>,
|
||||
row_filter::caller<24, T, D, BrdRowWrap>,
|
||||
row_filter::caller<25, T, D, BrdRowWrap>,
|
||||
row_filter::caller<26, T, D, BrdRowWrap>,
|
||||
row_filter::caller<27, T, D, BrdRowWrap>,
|
||||
row_filter::caller<28, T, D, BrdRowWrap>,
|
||||
row_filter::caller<29, T, D, BrdRowWrap>,
|
||||
row_filter::caller<30, T, D, BrdRowWrap>,
|
||||
row_filter::caller<31, T, D, BrdRowWrap>,
|
||||
row_filter::caller<32, T, D, BrdRowWrap>
|
||||
}
|
||||
};
|
||||
|
||||
loadKernel(kernel, ksize, stream);
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaMemcpyToSymbol(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
|
||||
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
|
||||
}
|
||||
|
@@ -508,4 +508,4 @@ namespace cv { namespace gpu { namespace device
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
#endif /* CUDA_DISABLER */
|
||||
|
@@ -52,9 +52,9 @@ using namespace cv::gpu::device;
|
||||
////////////////////////////////////////////////////////////
|
||||
// centeredGradient
|
||||
|
||||
namespace
|
||||
namespace tvl1flow
|
||||
{
|
||||
__global__ void centeredGradient(const PtrStepSzf src, PtrStepf dx, PtrStepf dy)
|
||||
__global__ void centeredGradientKernel(const PtrStepSzf src, PtrStepf dx, PtrStepf dy)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
@@ -65,16 +65,13 @@ namespace
|
||||
dx(y, x) = 0.5f * (src(y, ::min(x + 1, src.cols - 1)) - src(y, ::max(x - 1, 0)));
|
||||
dy(y, x) = 0.5f * (src(::min(y + 1, src.rows - 1), x) - src(::max(y - 1, 0), x));
|
||||
}
|
||||
}
|
||||
|
||||
namespace tvl1flow
|
||||
{
|
||||
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||
|
||||
::centeredGradient<<<grid, block>>>(src, dx, dy);
|
||||
centeredGradientKernel<<<grid, block>>>(src, dx, dy);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
@@ -84,7 +81,7 @@ namespace tvl1flow
|
||||
////////////////////////////////////////////////////////////
|
||||
// warpBackward
|
||||
|
||||
namespace
|
||||
namespace tvl1flow
|
||||
{
|
||||
static __device__ __forceinline__ float bicubicCoeff(float x_)
|
||||
{
|
||||
@@ -107,7 +104,7 @@ namespace
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1x(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1y(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
|
||||
__global__ void warpBackward(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho)
|
||||
__global__ void warpBackwardKernel(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
@@ -166,10 +163,7 @@ namespace
|
||||
const float I0Val = I0(y, x);
|
||||
rho(y, x) = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
|
||||
}
|
||||
}
|
||||
|
||||
namespace tvl1flow
|
||||
{
|
||||
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
@@ -179,7 +173,7 @@ namespace tvl1flow
|
||||
bindTexture(&tex_I1x, I1x);
|
||||
bindTexture(&tex_I1y, I1y);
|
||||
|
||||
::warpBackward<<<grid, block>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
|
||||
warpBackwardKernel<<<grid, block>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
@@ -189,7 +183,7 @@ namespace tvl1flow
|
||||
////////////////////////////////////////////////////////////
|
||||
// estimateU
|
||||
|
||||
namespace
|
||||
namespace tvl1flow
|
||||
{
|
||||
__device__ float divergence(const PtrStepf& v1, const PtrStepf& v2, int y, int x)
|
||||
{
|
||||
@@ -213,7 +207,7 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void estimateU(const PtrStepSzf I1wx, const PtrStepf I1wy,
|
||||
__global__ void estimateUKernel(const PtrStepSzf I1wx, const PtrStepf I1wy,
|
||||
const PtrStepf grad, const PtrStepf rho_c,
|
||||
const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22,
|
||||
PtrStepf u1, PtrStepf u2, PtrStepf error,
|
||||
@@ -275,10 +269,7 @@ namespace
|
||||
const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
|
||||
error(y, x) = n1 + n2;
|
||||
}
|
||||
}
|
||||
|
||||
namespace tvl1flow
|
||||
{
|
||||
void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,
|
||||
PtrStepSzf grad, PtrStepSzf rho_c,
|
||||
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22,
|
||||
@@ -288,7 +279,7 @@ namespace tvl1flow
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));
|
||||
|
||||
::estimateU<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta);
|
||||
estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
@@ -298,9 +289,9 @@ namespace tvl1flow
|
||||
////////////////////////////////////////////////////////////
|
||||
// estimateDualVariables
|
||||
|
||||
namespace
|
||||
namespace tvl1flow
|
||||
{
|
||||
__global__ void estimateDualVariables(const PtrStepSzf u1, const PtrStepf u2, PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, const float taut)
|
||||
__global__ void estimateDualVariablesKernel(const PtrStepSzf u1, const PtrStepf u2, PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, const float taut)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
@@ -325,16 +316,13 @@ namespace
|
||||
p21(y, x) = (p21(y, x) + taut * u2x) / ng2;
|
||||
p22(y, x) = (p22(y, x) + taut * u2y) / ng2;
|
||||
}
|
||||
}
|
||||
|
||||
namespace tvl1flow
|
||||
{
|
||||
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y));
|
||||
|
||||
::estimateDualVariables<<<grid, block>>>(u1, u2, p11, p12, p21, p22, taut);
|
||||
estimateDualVariablesKernel<<<grid, block>>>(u1, u2, p11, p12, p21, p22, taut);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
@@ -551,7 +551,7 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
|
||||
src.locateROI(whole, offset);
|
||||
|
||||
if (info.supports(WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048
|
||||
&& offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (src.step - offset.x))
|
||||
&& offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast<int>(src.step) - offset.x))
|
||||
{
|
||||
ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer);
|
||||
|
||||
|
Reference in New Issue
Block a user