Merge pull request #1298 from jet47:gpu-eliminate-doubles
This commit is contained in:
commit
a1b633e28e
@ -72,7 +72,7 @@ namespace reduce
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T result(T r, double) const
|
||||
__device__ __forceinline__ T result(T r, int) const
|
||||
{
|
||||
return r;
|
||||
}
|
||||
@ -81,6 +81,15 @@ namespace reduce
|
||||
__host__ __device__ __forceinline__ Sum(const Sum&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct OutputType
|
||||
{
|
||||
typedef float type;
|
||||
};
|
||||
template <> struct OutputType<double>
|
||||
{
|
||||
typedef double type;
|
||||
};
|
||||
|
||||
struct Avg
|
||||
{
|
||||
template <typename T>
|
||||
@ -96,7 +105,7 @@ namespace reduce
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ typename TypeVec<double, VecTraits<T>::cn>::vec_type result(T r, double sz) const
|
||||
__device__ __forceinline__ typename TypeVec<typename OutputType<typename VecTraits<T>::elem_type>::type, VecTraits<T>::cn>::vec_type result(T r, float sz) const
|
||||
{
|
||||
return r / sz;
|
||||
}
|
||||
@ -121,7 +130,7 @@ namespace reduce
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T result(T r, double) const
|
||||
__device__ __forceinline__ T result(T r, int) const
|
||||
{
|
||||
return r;
|
||||
}
|
||||
@ -146,7 +155,7 @@ namespace reduce
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T result(T r, double) const
|
||||
__device__ __forceinline__ T result(T r, int) const
|
||||
{
|
||||
return r;
|
||||
}
|
||||
|
@ -227,7 +227,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
//check prune
|
||||
if (weight < -prune)
|
||||
{
|
||||
weight = 0.0;
|
||||
weight = 0.0f;
|
||||
nmodes--;
|
||||
}
|
||||
|
||||
|
@ -123,7 +123,7 @@ PERF_TEST_P(Image_NFeatures, ORB,
|
||||
|
||||
sortKeyPoints(gpu_keypoints, gpu_descriptors);
|
||||
|
||||
SANITY_CHECK_KEYPOINTS(gpu_keypoints);
|
||||
SANITY_CHECK_KEYPOINTS(gpu_keypoints, 1e-4);
|
||||
SANITY_CHECK(gpu_descriptors);
|
||||
}
|
||||
else
|
||||
|
@ -197,8 +197,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
float kp_dir = ::atan2f((float)m_01, (float)m_10);
|
||||
kp_dir += (kp_dir < 0) * (2.0f * CV_PI);
|
||||
kp_dir *= 180.0f / CV_PI;
|
||||
kp_dir += (kp_dir < 0) * (2.0f * CV_PI_F);
|
||||
kp_dir *= 180.0f / CV_PI_F;
|
||||
|
||||
angle[ptidx] = kp_dir;
|
||||
}
|
||||
@ -349,7 +349,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
if (ptidx < npoints && descidx < dsize)
|
||||
{
|
||||
float angle = angle_[ptidx];
|
||||
angle *= (float)(CV_PI / 180.f);
|
||||
angle *= (float)(CV_PI_F / 180.f);
|
||||
|
||||
float sina, cosa;
|
||||
::sincosf(angle, &sina, &cosa);
|
||||
|
@ -133,7 +133,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
B<T> b(src.rows, src.cols);
|
||||
|
||||
float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial);
|
||||
float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
|
||||
float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
|
||||
|
||||
cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
|
||||
bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);
|
||||
|
@ -43,7 +43,7 @@
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include <utility>
|
||||
#include <algorithm>//std::swap
|
||||
#include <algorithm>
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/emulation.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
|
@ -140,7 +140,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const float u_avg_val = u_avg(y, x);
|
||||
const float v_avg_val = v_avg(y, x);
|
||||
|
||||
const float theta = ::atan2f(v_avg_val, u_avg_val);// + CV_PI;
|
||||
const float theta = ::atan2f(v_avg_val, u_avg_val);
|
||||
|
||||
float r = ::sqrtf(v_avg_val * v_avg_val + u_avg_val * u_avg_val);
|
||||
r = fmin(14.0f * (r / max_flow), 14.0f);
|
||||
|
@ -137,10 +137,10 @@ typedef unsigned char uchar;
|
||||
template<bool isDefaultNum>
|
||||
__device__ __forceinline__ int fast_angle_bin(const float& dx, const float& dy)
|
||||
{
|
||||
const float angle_quantum = CV_PI / 6.f;
|
||||
const float angle_quantum = CV_PI_F / 6.f;
|
||||
float angle = atan2(dx, dy) + (angle_quantum / 2.f);
|
||||
|
||||
if (angle < 0) angle += CV_PI;
|
||||
if (angle < 0) angle += CV_PI_F;
|
||||
|
||||
const float angle_scaling = 1.f / angle_quantum;
|
||||
return static_cast<int>(angle * angle_scaling) % 6;
|
||||
@ -174,8 +174,8 @@ typedef unsigned char uchar;
|
||||
{
|
||||
int i = 3;
|
||||
float2 bin_vector_i;
|
||||
bin_vector_i.x = ::cos(i * (CV_PI / 6.f));
|
||||
bin_vector_i.y = ::sin(i * (CV_PI / 6.f));
|
||||
bin_vector_i.x = ::cos(i * (CV_PI_F / 6.f));
|
||||
bin_vector_i.y = ::sin(i * (CV_PI_F / 6.f));
|
||||
|
||||
const float dot_product = fabs(dx * bin_vector_i.x + dy * bin_vector_i.y);
|
||||
if(dot_product > max_dot)
|
||||
|
@ -160,7 +160,7 @@ PERF_TEST_P(Size_MatType, SuperResolution_BTVL1,
|
||||
|
||||
TEST_CYCLE_N(10) superRes->nextFrame(dst);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
GPU_SANITY_CHECK(dst, 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user