added assertion on device features (global atomics) into gpu tests
This commit is contained in:
@@ -40,7 +40,7 @@
|
||||
//
|
||||
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
|
||||
//
|
||||
// The original code was written by Paul Furgale and Chi Hay Tong
|
||||
// The original code was written by Paul Furgale and Chi Hay Tong
|
||||
// and later optimized and prepared for integration into OpenCV by Itseez.
|
||||
//
|
||||
//M*/
|
||||
@@ -52,9 +52,9 @@
|
||||
#include "opencv2/gpu/device/functional.hpp"
|
||||
#include "opencv2/gpu/device/filters.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace surf
|
||||
namespace surf
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Global parameters
|
||||
@@ -123,7 +123,7 @@ namespace cv { namespace gpu { namespace device
|
||||
#endif
|
||||
|
||||
float ratio = (float)newSize / oldSize;
|
||||
|
||||
|
||||
real_t d = 0;
|
||||
|
||||
#pragma unroll
|
||||
@@ -225,7 +225,7 @@ namespace cv { namespace gpu { namespace device
|
||||
static __device__ bool check(int sum_i, int sum_j, int size)
|
||||
{
|
||||
float ratio = (float)size / 9.0f;
|
||||
|
||||
|
||||
float d = 0;
|
||||
|
||||
int dx1 = __float2int_rn(ratio * c_DM[0]);
|
||||
@@ -423,12 +423,12 @@ namespace cv { namespace gpu { namespace device
|
||||
if (::fabs(x[0]) <= 1.f && ::fabs(x[1]) <= 1.f && ::fabs(x[2]) <= 1.f)
|
||||
{
|
||||
// if the step is within the interpolation region, perform it
|
||||
|
||||
|
||||
const int size = calcSize(c_octave, maxPos.z);
|
||||
|
||||
const int sum_i = (maxPos.y - ((size >> 1) >> c_octave)) << c_octave;
|
||||
const int sum_j = (maxPos.x - ((size >> 1) >> c_octave)) << c_octave;
|
||||
|
||||
|
||||
const float center_i = sum_i + (float)(size - 1) / 2;
|
||||
const float center_j = sum_j + (float)(size - 1) / 2;
|
||||
|
||||
@@ -471,8 +471,8 @@ namespace cv { namespace gpu { namespace device
|
||||
#endif
|
||||
}
|
||||
|
||||
void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter,
|
||||
float* featureX, float* featureY, int* featureLaplacian, int* featureOctave, float* featureSize, float* featureHessian,
|
||||
void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter,
|
||||
float* featureX, float* featureY, int* featureLaplacian, int* featureOctave, float* featureSize, float* featureHessian,
|
||||
unsigned int* featureCounter)
|
||||
{
|
||||
dim3 threads;
|
||||
@@ -509,7 +509,8 @@ namespace cv { namespace gpu { namespace device
|
||||
__shared__ float s_Y[128];
|
||||
__shared__ float s_angle[128];
|
||||
|
||||
__shared__ float s_sum[32 * 4];
|
||||
__shared__ float s_sumx[32 * 4];
|
||||
__shared__ float s_sumy[32 * 4];
|
||||
|
||||
/* The sampling intervals and wavelet sized for selecting an orientation
|
||||
and building the keypoint descriptor are defined relative to 's' */
|
||||
@@ -522,126 +523,109 @@ namespace cv { namespace gpu { namespace device
|
||||
const int grad_wav_size = 2 * __float2int_rn(2.0f * s);
|
||||
|
||||
// check when grad_wav_size is too big
|
||||
if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size)
|
||||
if ((c_img_rows + 1) < grad_wav_size || (c_img_cols + 1) < grad_wav_size)
|
||||
return;
|
||||
|
||||
// Calc X, Y, angle and store it to shared memory
|
||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
|
||||
float X = 0.0f, Y = 0.0f, angle = 0.0f;
|
||||
|
||||
if (tid < ORI_SAMPLES)
|
||||
{
|
||||
// Calc X, Y, angle and store it to shared memory
|
||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
const float margin = (float)(grad_wav_size - 1) / 2.0f;
|
||||
const int x = __float2int_rn(featureX[blockIdx.x] + c_aptX[tid] * s - margin);
|
||||
const int y = __float2int_rn(featureY[blockIdx.x] + c_aptY[tid] * s - margin);
|
||||
|
||||
float X = 0.0f, Y = 0.0f, angle = 0.0f;
|
||||
|
||||
if (tid < ORI_SAMPLES)
|
||||
if (y >= 0 && y < (c_img_rows + 1) - grad_wav_size &&
|
||||
x >= 0 && x < (c_img_cols + 1) - grad_wav_size)
|
||||
{
|
||||
const float margin = (float)(grad_wav_size - 1) / 2.0f;
|
||||
const int x = __float2int_rn(featureX[blockIdx.x] + c_aptX[tid] * s - margin);
|
||||
const int y = __float2int_rn(featureY[blockIdx.x] + c_aptY[tid] * s - margin);
|
||||
X = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NX, 4, grad_wav_size, y, x);
|
||||
Y = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NY, 4, grad_wav_size, y, x);
|
||||
|
||||
if ((unsigned)y < (unsigned)((c_img_rows + 1) - grad_wav_size) && (unsigned)x < (unsigned)((c_img_cols + 1) - grad_wav_size))
|
||||
{
|
||||
X = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NX, 4, grad_wav_size, y, x);
|
||||
Y = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NY, 4, grad_wav_size, y, x);
|
||||
|
||||
angle = atan2f(Y, X);
|
||||
if (angle < 0)
|
||||
angle += 2.0f * CV_PI_F;
|
||||
angle *= 180.0f / CV_PI_F;
|
||||
}
|
||||
angle = atan2f(Y, X);
|
||||
if (angle < 0)
|
||||
angle += 2.0f * CV_PI_F;
|
||||
angle *= 180.0f / CV_PI_F;
|
||||
}
|
||||
s_X[tid] = X;
|
||||
s_Y[tid] = Y;
|
||||
s_angle[tid] = angle;
|
||||
}
|
||||
s_X[tid] = X;
|
||||
s_Y[tid] = Y;
|
||||
s_angle[tid] = angle;
|
||||
__syncthreads();
|
||||
|
||||
float bestx = 0, besty = 0, best_mod = 0;
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 18; ++i)
|
||||
{
|
||||
const int dir = (i * 4 + threadIdx.y) * ORI_SEARCH_INC;
|
||||
|
||||
float sumx = 0.0f, sumy = 0.0f;
|
||||
int d = ::abs(__float2int_rn(s_angle[threadIdx.x]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx = s_X[threadIdx.x];
|
||||
sumy = s_Y[threadIdx.x];
|
||||
}
|
||||
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 32]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[threadIdx.x + 32];
|
||||
sumy += s_Y[threadIdx.x + 32];
|
||||
}
|
||||
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 64]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[threadIdx.x + 64];
|
||||
sumy += s_Y[threadIdx.x + 64];
|
||||
}
|
||||
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 96]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[threadIdx.x + 96];
|
||||
sumy += s_Y[threadIdx.x + 96];
|
||||
}
|
||||
|
||||
device::reduce<32>(s_sumx + threadIdx.y * 32, sumx, threadIdx.x, plus<volatile float>());
|
||||
device::reduce<32>(s_sumy + threadIdx.y * 32, sumy, threadIdx.x, plus<volatile float>());
|
||||
|
||||
const float temp_mod = sumx * sumx + sumy * sumy;
|
||||
if (temp_mod > best_mod)
|
||||
{
|
||||
best_mod = temp_mod;
|
||||
bestx = sumx;
|
||||
besty = sumy;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
float bestx = 0, besty = 0, best_mod = 0;
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
s_X[threadIdx.y] = bestx;
|
||||
s_Y[threadIdx.y] = besty;
|
||||
s_angle[threadIdx.y] = best_mod;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 18; ++i)
|
||||
{
|
||||
const int dir = (i * 4 + threadIdx.y) * ORI_SEARCH_INC;
|
||||
if (threadIdx.x == 0 && threadIdx.y == 0)
|
||||
{
|
||||
int bestIdx = 0;
|
||||
|
||||
float sumx = 0.0f, sumy = 0.0f;
|
||||
int d = ::abs(__float2int_rn(s_angle[threadIdx.x]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx = s_X[threadIdx.x];
|
||||
sumy = s_Y[threadIdx.x];
|
||||
}
|
||||
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 32]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[threadIdx.x + 32];
|
||||
sumy += s_Y[threadIdx.x + 32];
|
||||
}
|
||||
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 64]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[threadIdx.x + 64];
|
||||
sumy += s_Y[threadIdx.x + 64];
|
||||
}
|
||||
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 96]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[threadIdx.x + 96];
|
||||
sumy += s_Y[threadIdx.x + 96];
|
||||
}
|
||||
if (s_angle[1] > s_angle[bestIdx])
|
||||
bestIdx = 1;
|
||||
if (s_angle[2] > s_angle[bestIdx])
|
||||
bestIdx = 2;
|
||||
if (s_angle[3] > s_angle[bestIdx])
|
||||
bestIdx = 3;
|
||||
|
||||
float* s_sum_row = s_sum + threadIdx.y * 32;
|
||||
float kp_dir = atan2f(s_Y[bestIdx], s_X[bestIdx]);
|
||||
if (kp_dir < 0)
|
||||
kp_dir += 2.0f * CV_PI_F;
|
||||
kp_dir *= 180.0f / CV_PI_F;
|
||||
|
||||
device::reduce<32>(s_sum_row, sumx, threadIdx.x, plus<volatile float>());
|
||||
device::reduce<32>(s_sum_row, sumy, threadIdx.x, plus<volatile float>());
|
||||
|
||||
const float temp_mod = sumx * sumx + sumy * sumy;
|
||||
if (temp_mod > best_mod)
|
||||
{
|
||||
best_mod = temp_mod;
|
||||
bestx = sumx;
|
||||
besty = sumy;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
s_X[threadIdx.y] = bestx;
|
||||
s_Y[threadIdx.y] = besty;
|
||||
s_angle[threadIdx.y] = best_mod;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x < 2 && threadIdx.y == 0)
|
||||
{
|
||||
volatile float* v_x = s_X;
|
||||
volatile float* v_y = s_Y;
|
||||
volatile float* v_mod = s_angle;
|
||||
|
||||
bestx = v_x[threadIdx.x];
|
||||
besty = v_y[threadIdx.x];
|
||||
best_mod = v_mod[threadIdx.x];
|
||||
|
||||
float temp_mod = v_mod[threadIdx.x + 2];
|
||||
if (temp_mod > best_mod)
|
||||
{
|
||||
v_x[threadIdx.x] = bestx = v_x[threadIdx.x + 2];
|
||||
v_y[threadIdx.x] = besty = v_y[threadIdx.x + 2];
|
||||
v_mod[threadIdx.x] = best_mod = temp_mod;
|
||||
}
|
||||
temp_mod = v_mod[threadIdx.x + 1];
|
||||
if (temp_mod > best_mod)
|
||||
{
|
||||
v_x[threadIdx.x] = bestx = v_x[threadIdx.x + 1];
|
||||
v_y[threadIdx.x] = besty = v_y[threadIdx.x + 1];
|
||||
}
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0 && threadIdx.y == 0 && best_mod != 0)
|
||||
{
|
||||
float kp_dir = atan2f(besty, bestx);
|
||||
if (kp_dir < 0)
|
||||
kp_dir += 2.0f * CV_PI_F;
|
||||
kp_dir *= 180.0f / CV_PI_F;
|
||||
|
||||
featureDir[blockIdx.x] = kp_dir;
|
||||
}
|
||||
featureDir[blockIdx.x] = kp_dir;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -649,7 +633,7 @@ namespace cv { namespace gpu { namespace device
|
||||
#undef ORI_WIN
|
||||
#undef ORI_SAMPLES
|
||||
|
||||
void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures)
|
||||
void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures)
|
||||
{
|
||||
dim3 threads;
|
||||
threads.x = 32;
|
||||
@@ -669,27 +653,27 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
#define PATCH_SZ 20
|
||||
|
||||
__constant__ float c_DW[PATCH_SZ * PATCH_SZ] =
|
||||
__constant__ float c_DW[PATCH_SZ * PATCH_SZ] =
|
||||
{
|
||||
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f,
|
||||
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f,
|
||||
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f,
|
||||
3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f,
|
||||
5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f,
|
||||
9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f,
|
||||
0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f,
|
||||
0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f,
|
||||
0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f,
|
||||
0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f,
|
||||
0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f,
|
||||
0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f,
|
||||
0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f,
|
||||
0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f,
|
||||
9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f,
|
||||
5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f,
|
||||
3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f,
|
||||
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f,
|
||||
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f,
|
||||
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f,
|
||||
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f,
|
||||
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f,
|
||||
3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f,
|
||||
5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f,
|
||||
9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f,
|
||||
0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f,
|
||||
0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f,
|
||||
0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f,
|
||||
0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f,
|
||||
0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f,
|
||||
0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f,
|
||||
0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f,
|
||||
0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f,
|
||||
9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f,
|
||||
5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f,
|
||||
3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f,
|
||||
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f,
|
||||
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f,
|
||||
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f
|
||||
};
|
||||
|
||||
@@ -697,7 +681,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
typedef uchar elem_type;
|
||||
|
||||
__device__ __forceinline__ WinReader(float centerX_, float centerY_, float win_offset_, float cos_dir_, float sin_dir_) :
|
||||
__device__ __forceinline__ WinReader(float centerX_, float centerY_, float win_offset_, float cos_dir_, float sin_dir_) :
|
||||
centerX(centerX_), centerY(centerY_), win_offset(win_offset_), cos_dir(cos_dir_), sin_dir(sin_dir_)
|
||||
{
|
||||
}
|
||||
@@ -710,14 +694,14 @@ namespace cv { namespace gpu { namespace device
|
||||
return tex2D(imgTex, pixel_x, pixel_y);
|
||||
}
|
||||
|
||||
float centerX;
|
||||
float centerX;
|
||||
float centerY;
|
||||
float win_offset;
|
||||
float cos_dir;
|
||||
float win_offset;
|
||||
float cos_dir;
|
||||
float sin_dir;
|
||||
};
|
||||
|
||||
__device__ void calc_dx_dy(float s_dx_bin[25], float s_dy_bin[25],
|
||||
__device__ void calc_dx_dy(float s_dx_bin[25], float s_dy_bin[25],
|
||||
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir)
|
||||
{
|
||||
__shared__ float s_PATCH[6][6];
|
||||
@@ -739,7 +723,7 @@ namespace cv { namespace gpu { namespace device
|
||||
sincosf(descriptor_dir, &sin_dir, &cos_dir);
|
||||
|
||||
/* Nearest neighbour version (faster) */
|
||||
const float win_offset = -(float)(win_size - 1) / 2;
|
||||
const float win_offset = -(float)(win_size - 1) / 2;
|
||||
|
||||
// Compute sampling points
|
||||
// since grids are 2D, need to compute xBlock and yBlock indices
|
||||
@@ -966,11 +950,11 @@ namespace cv { namespace gpu { namespace device
|
||||
descriptor_base[threadIdx.x] = lookup / len;
|
||||
}
|
||||
|
||||
void compute_descriptors_gpu(const DevMem2Df& descriptors,
|
||||
void compute_descriptors_gpu(const DevMem2Df& descriptors,
|
||||
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir, int nFeatures)
|
||||
{
|
||||
// compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
|
||||
|
||||
|
||||
if (descriptors.cols == 64)
|
||||
{
|
||||
compute_descriptors64<<<dim3(nFeatures, 16, 1), dim3(6, 6, 1)>>>(descriptors, featureX, featureY, featureSize, featureDir);
|
||||
@@ -985,12 +969,12 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
else
|
||||
{
|
||||
compute_descriptors128<<<dim3(nFeatures, 16, 1), dim3(6, 6, 1)>>>(descriptors, featureX, featureY, featureSize, featureDir);
|
||||
compute_descriptors128<<<dim3(nFeatures, 16, 1), dim3(6, 6, 1)>>>(descriptors, featureX, featureY, featureSize, featureDir);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
normalize_descriptors<128><<<dim3(nFeatures, 1, 1), dim3(128, 1, 1)>>>(descriptors);
|
||||
normalize_descriptors<128><<<dim3(nFeatures, 1, 1), dim3(128, 1, 1)>>>(descriptors);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
Reference in New Issue
Block a user