Merge pull request #1298 from jet47:gpu-eliminate-doubles
This commit is contained in:
commit
a1b633e28e
@ -72,7 +72,7 @@ namespace reduce
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ __forceinline__ T result(T r, double) const
|
__device__ __forceinline__ T result(T r, int) const
|
||||||
{
|
{
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
@ -81,6 +81,15 @@ namespace reduce
|
|||||||
__host__ __device__ __forceinline__ Sum(const Sum&) {}
|
__host__ __device__ __forceinline__ Sum(const Sum&) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T> struct OutputType
|
||||||
|
{
|
||||||
|
typedef float type;
|
||||||
|
};
|
||||||
|
template <> struct OutputType<double>
|
||||||
|
{
|
||||||
|
typedef double type;
|
||||||
|
};
|
||||||
|
|
||||||
struct Avg
|
struct Avg
|
||||||
{
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -96,7 +105,7 @@ namespace reduce
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ __forceinline__ typename TypeVec<double, VecTraits<T>::cn>::vec_type result(T r, double sz) const
|
__device__ __forceinline__ typename TypeVec<typename OutputType<typename VecTraits<T>::elem_type>::type, VecTraits<T>::cn>::vec_type result(T r, float sz) const
|
||||||
{
|
{
|
||||||
return r / sz;
|
return r / sz;
|
||||||
}
|
}
|
||||||
@ -121,7 +130,7 @@ namespace reduce
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ __forceinline__ T result(T r, double) const
|
__device__ __forceinline__ T result(T r, int) const
|
||||||
{
|
{
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
@ -146,7 +155,7 @@ namespace reduce
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ __forceinline__ T result(T r, double) const
|
__device__ __forceinline__ T result(T r, int) const
|
||||||
{
|
{
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
@ -227,7 +227,7 @@ namespace cv { namespace gpu { namespace cudev
|
|||||||
//check prune
|
//check prune
|
||||||
if (weight < -prune)
|
if (weight < -prune)
|
||||||
{
|
{
|
||||||
weight = 0.0;
|
weight = 0.0f;
|
||||||
nmodes--;
|
nmodes--;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,7 +123,7 @@ PERF_TEST_P(Image_NFeatures, ORB,
|
|||||||
|
|
||||||
sortKeyPoints(gpu_keypoints, gpu_descriptors);
|
sortKeyPoints(gpu_keypoints, gpu_descriptors);
|
||||||
|
|
||||||
SANITY_CHECK_KEYPOINTS(gpu_keypoints);
|
SANITY_CHECK_KEYPOINTS(gpu_keypoints, 1e-4);
|
||||||
SANITY_CHECK(gpu_descriptors);
|
SANITY_CHECK(gpu_descriptors);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -197,8 +197,8 @@ namespace cv { namespace gpu { namespace cudev
|
|||||||
if (threadIdx.x == 0)
|
if (threadIdx.x == 0)
|
||||||
{
|
{
|
||||||
float kp_dir = ::atan2f((float)m_01, (float)m_10);
|
float kp_dir = ::atan2f((float)m_01, (float)m_10);
|
||||||
kp_dir += (kp_dir < 0) * (2.0f * CV_PI);
|
kp_dir += (kp_dir < 0) * (2.0f * CV_PI_F);
|
||||||
kp_dir *= 180.0f / CV_PI;
|
kp_dir *= 180.0f / CV_PI_F;
|
||||||
|
|
||||||
angle[ptidx] = kp_dir;
|
angle[ptidx] = kp_dir;
|
||||||
}
|
}
|
||||||
@ -349,7 +349,7 @@ namespace cv { namespace gpu { namespace cudev
|
|||||||
if (ptidx < npoints && descidx < dsize)
|
if (ptidx < npoints && descidx < dsize)
|
||||||
{
|
{
|
||||||
float angle = angle_[ptidx];
|
float angle = angle_[ptidx];
|
||||||
angle *= (float)(CV_PI / 180.f);
|
angle *= (float)(CV_PI_F / 180.f);
|
||||||
|
|
||||||
float sina, cosa;
|
float sina, cosa;
|
||||||
::sincosf(angle, &sina, &cosa);
|
::sincosf(angle, &sina, &cosa);
|
||||||
|
@ -133,7 +133,7 @@ namespace cv { namespace gpu { namespace cudev
|
|||||||
B<T> b(src.rows, src.cols);
|
B<T> b(src.rows, src.cols);
|
||||||
|
|
||||||
float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial);
|
float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial);
|
||||||
float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
|
float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
|
||||||
|
|
||||||
cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
|
cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
|
||||||
bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);
|
bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);
|
||||||
|
@ -43,7 +43,7 @@
|
|||||||
#if !defined CUDA_DISABLER
|
#if !defined CUDA_DISABLER
|
||||||
|
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <algorithm>//std::swap
|
#include <algorithm>
|
||||||
#include "opencv2/core/cuda/common.hpp"
|
#include "opencv2/core/cuda/common.hpp"
|
||||||
#include "opencv2/core/cuda/emulation.hpp"
|
#include "opencv2/core/cuda/emulation.hpp"
|
||||||
#include "opencv2/core/cuda/transform.hpp"
|
#include "opencv2/core/cuda/transform.hpp"
|
||||||
|
@ -140,7 +140,7 @@ namespace cv { namespace gpu { namespace cudev
|
|||||||
const float u_avg_val = u_avg(y, x);
|
const float u_avg_val = u_avg(y, x);
|
||||||
const float v_avg_val = v_avg(y, x);
|
const float v_avg_val = v_avg(y, x);
|
||||||
|
|
||||||
const float theta = ::atan2f(v_avg_val, u_avg_val);// + CV_PI;
|
const float theta = ::atan2f(v_avg_val, u_avg_val);
|
||||||
|
|
||||||
float r = ::sqrtf(v_avg_val * v_avg_val + u_avg_val * u_avg_val);
|
float r = ::sqrtf(v_avg_val * v_avg_val + u_avg_val * u_avg_val);
|
||||||
r = fmin(14.0f * (r / max_flow), 14.0f);
|
r = fmin(14.0f * (r / max_flow), 14.0f);
|
||||||
|
@ -137,10 +137,10 @@ typedef unsigned char uchar;
|
|||||||
template<bool isDefaultNum>
|
template<bool isDefaultNum>
|
||||||
__device__ __forceinline__ int fast_angle_bin(const float& dx, const float& dy)
|
__device__ __forceinline__ int fast_angle_bin(const float& dx, const float& dy)
|
||||||
{
|
{
|
||||||
const float angle_quantum = CV_PI / 6.f;
|
const float angle_quantum = CV_PI_F / 6.f;
|
||||||
float angle = atan2(dx, dy) + (angle_quantum / 2.f);
|
float angle = atan2(dx, dy) + (angle_quantum / 2.f);
|
||||||
|
|
||||||
if (angle < 0) angle += CV_PI;
|
if (angle < 0) angle += CV_PI_F;
|
||||||
|
|
||||||
const float angle_scaling = 1.f / angle_quantum;
|
const float angle_scaling = 1.f / angle_quantum;
|
||||||
return static_cast<int>(angle * angle_scaling) % 6;
|
return static_cast<int>(angle * angle_scaling) % 6;
|
||||||
@ -174,8 +174,8 @@ typedef unsigned char uchar;
|
|||||||
{
|
{
|
||||||
int i = 3;
|
int i = 3;
|
||||||
float2 bin_vector_i;
|
float2 bin_vector_i;
|
||||||
bin_vector_i.x = ::cos(i * (CV_PI / 6.f));
|
bin_vector_i.x = ::cos(i * (CV_PI_F / 6.f));
|
||||||
bin_vector_i.y = ::sin(i * (CV_PI / 6.f));
|
bin_vector_i.y = ::sin(i * (CV_PI_F / 6.f));
|
||||||
|
|
||||||
const float dot_product = fabs(dx * bin_vector_i.x + dy * bin_vector_i.y);
|
const float dot_product = fabs(dx * bin_vector_i.x + dy * bin_vector_i.y);
|
||||||
if(dot_product > max_dot)
|
if(dot_product > max_dot)
|
||||||
|
@ -160,7 +160,7 @@ PERF_TEST_P(Size_MatType, SuperResolution_BTVL1,
|
|||||||
|
|
||||||
TEST_CYCLE_N(10) superRes->nextFrame(dst);
|
TEST_CYCLE_N(10) superRes->nextFrame(dst);
|
||||||
|
|
||||||
GPU_SANITY_CHECK(dst);
|
GPU_SANITY_CHECK(dst, 2);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user