added cv::gpu::pow, ticket #1227
This commit is contained in:
parent
c722128ddd
commit
3a1beb1c01
@ -532,6 +532,12 @@ namespace cv
|
||||
//! computes exponent of each matrix element (b = e**a)
|
||||
//! supports only CV_32FC1 type
|
||||
CV_EXPORTS void exp(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null());
|
||||
|
||||
//! computes power of each matrix element:
|
||||
// (dst(i,j) = pow( src(i,j) , power), if src.type() is integer
|
||||
// (dst(i,j) = pow(fabs(src(i,j)), power), otherwise
|
||||
//! supports all, except depth == CV_64F
|
||||
CV_EXPORTS void pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
//! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
|
||||
//! supports only CV_32FC1 type
|
||||
|
@ -42,6 +42,7 @@
|
||||
|
||||
#include "opencv2/gpu/device/vecmath.hpp"
|
||||
#include "opencv2/gpu/device/transform.hpp"
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
@ -669,4 +670,63 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
}
|
||||
|
||||
template void subtractCaller<short>(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream);
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// pow
|
||||
|
||||
template<typename T, bool Signed = device::numeric_limits_gpu<T>::is_signed>
|
||||
struct PowOp
|
||||
{
|
||||
float power;
|
||||
PowOp(float power_) : power(power_) {}
|
||||
|
||||
template<typename T>
|
||||
__device__ __forceinline__ T operator()(const T& e) const
|
||||
{
|
||||
return saturate_cast<T>(__powf((float)e, power));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct PowOp<T, true>
|
||||
{
|
||||
float power;
|
||||
PowOp(float power_) : power(power_) {}
|
||||
|
||||
__device__ __forceinline__ float operator()(const T& e)
|
||||
{
|
||||
T res = saturate_cast<T>(__powf((float)e, power));
|
||||
|
||||
if ( (e < 0) && (1 & (int)power) )
|
||||
res *= -1;
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct PowOp<float>
|
||||
{
|
||||
float power;
|
||||
PowOp(float power_) : power(power_) {}
|
||||
|
||||
__device__ __forceinline__ float operator()(const float& e)
|
||||
{
|
||||
return __powf(fabs(e), power);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
void pow_caller(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream)
|
||||
{
|
||||
transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, PowOp<T>(power), stream);
|
||||
}
|
||||
|
||||
template void pow_caller<uchar>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
|
||||
template void pow_caller<schar>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
|
||||
template void pow_caller<short>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
|
||||
template void pow_caller<ushort>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
|
||||
template void pow_caller<int>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
|
||||
template void pow_caller<uint>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
|
||||
template void pow_caller<float>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
|
||||
}}}
|
||||
|
@ -68,6 +68,8 @@ void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(
|
||||
void cv::gpu::max(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
double cv::gpu::threshold(const GpuMat&, GpuMat&, double, double, int, Stream&) {throw_nogpu(); return 0.0;}
|
||||
|
||||
void cv::gpu::pow(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
|
||||
#else
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -768,4 +770,36 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
|
||||
return thresh;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// pow
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace gpu
|
||||
{
|
||||
namespace mathfunc
|
||||
{
|
||||
template<typename T>
|
||||
void pow_caller(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
CV_Assert( src.depth() != CV_64F );
|
||||
dst.create(src.size(), src.type());
|
||||
|
||||
typedef void (*caller_t)(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
mathfunc::pow_caller<unsigned char>, mathfunc::pow_caller<signed char>,
|
||||
mathfunc::pow_caller<unsigned short>, mathfunc::pow_caller<short>,
|
||||
mathfunc::pow_caller<int>, mathfunc::pow_caller<float>
|
||||
};
|
||||
|
||||
callers[src.depth()](src.reshape(1), (float)power, dst.reshape(1), StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -87,6 +87,20 @@ namespace cv { namespace gpu { namespace device
|
||||
static const bool is_signed = (char)-1 == -1;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits_gpu<signed char>
|
||||
{
|
||||
typedef char type;
|
||||
__device__ __forceinline__ static type min() { return CHAR_MIN; };
|
||||
__device__ __forceinline__ static type max() { return CHAR_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = (signed char)-1 == -1;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits_gpu<unsigned char>
|
||||
{
|
||||
typedef unsigned char type;
|
||||
|
@ -107,7 +107,7 @@ void cv::gpu::StereoConstantSpaceBP::estimateRecommendedParams(int width, int he
|
||||
levels = (int)::log(static_cast<double>(mm)) * 2 / 3;
|
||||
if (levels == 0) levels++;
|
||||
|
||||
nr_plane = (int) ((float) ndisp / pow(2.0, levels + 1));
|
||||
nr_plane = (int) ((float) ndisp / std::pow(2.0, levels + 1));
|
||||
}
|
||||
|
||||
cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, int levels_, int nr_plane_,
|
||||
|
@ -752,6 +752,74 @@ TEST_P(Exp, Accuracy)
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Exp, testing::ValuesIn(devices()));
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// pow
|
||||
|
||||
struct Pow : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo;
|
||||
int type;
|
||||
|
||||
double power;
|
||||
cv::Size size;
|
||||
cv::Mat mat;
|
||||
|
||||
cv::Mat dst_gold;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = std::tr1::get<0>(GetParam());
|
||||
type = std::tr1::get<1>(GetParam());
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
|
||||
cv::RNG& rng = cvtest::TS::ptr()->get_rng();
|
||||
|
||||
size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
|
||||
//size = cv::Size(2, 2);
|
||||
|
||||
mat = cvtest::randomMat(rng, size, type, 0.0, 100.0, false);
|
||||
|
||||
if (mat.depth() == CV_32F)
|
||||
power = rng.uniform(1.2f, 3.f);
|
||||
else
|
||||
{
|
||||
int ipower = rng.uniform(2, 8);
|
||||
power = (float)ipower;
|
||||
}
|
||||
cv::pow(mat, power, dst_gold);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(Pow, Accuracy)
|
||||
{
|
||||
PRINT_PARAM(devInfo);
|
||||
PRINT_TYPE(type);
|
||||
PRINT_PARAM(size);
|
||||
PRINT_PARAM(power);
|
||||
|
||||
cv::Mat dst;
|
||||
|
||||
ASSERT_NO_THROW(
|
||||
cv::gpu::GpuMat gpu_res;
|
||||
|
||||
cv::gpu::pow(cv::gpu::GpuMat(mat), power, gpu_res);
|
||||
|
||||
gpu_res.download(dst);
|
||||
);
|
||||
|
||||
/*std::cout << mat << std::endl << std::endl;
|
||||
std::cout << dst << std::endl << std::endl;
|
||||
std::cout << dst_gold << std::endl;*/
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 1);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Pow, testing::Combine(
|
||||
testing::ValuesIn(devices()),
|
||||
testing::Values(CV_32F, CV_32FC3)));
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// log
|
||||
|
||||
|
@ -68,9 +68,13 @@ void print_info()
|
||||
#endif
|
||||
|
||||
int deviceCount = cv::gpu::getCudaEnabledDeviceCount();
|
||||
int driver;
|
||||
cudaDriverGetVersion(&driver);
|
||||
|
||||
printf("CUDA version: %d\n", CUDART_VERSION);
|
||||
printf("CUDA Driver version: %d\n", driver);
|
||||
printf("CUDA Runtime version: %d\n", CUDART_VERSION);
|
||||
printf("CUDA device count: %d\n\n", deviceCount);
|
||||
|
||||
|
||||
for (int i = 0; i < deviceCount; ++i)
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user