fixed gpu::sum* on CC1.0, updated some tests
This commit is contained in:
parent
f7e62d89f8
commit
557dd39f03
@ -66,8 +66,14 @@ namespace cv
|
|||||||
|
|
||||||
enum GpuFeature
|
enum GpuFeature
|
||||||
{
|
{
|
||||||
NATIVE_DOUBLE,
|
COMPUTE_10 = 10,
|
||||||
ATOMICS
|
COMPUTE_11 = 11,
|
||||||
|
COMPUTE_12 = 12,
|
||||||
|
COMPUTE_13 = 13,
|
||||||
|
COMPUTE_20 = 20,
|
||||||
|
COMPUTE_21 = 21,
|
||||||
|
ATOMICS = COMPUTE_11,
|
||||||
|
NATIVE_DOUBLE = COMPUTE_13
|
||||||
};
|
};
|
||||||
|
|
||||||
class CV_EXPORTS TargetArchs
|
class CV_EXPORTS TargetArchs
|
||||||
|
@ -1394,7 +1394,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
smem[tid] = res.x;
|
smem[tid] = res.x;
|
||||||
smem[tid + nthreads] = res.y;
|
smem[tid + nthreads] = res.y;
|
||||||
smem[tid + 2 * nthreads] = res.z;
|
smem[tid + 2 * nthreads] = res.z;
|
||||||
smem[tid + 3 * nthreads] = res.z;
|
smem[tid + 3 * nthreads] = res.w;
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
sumInSmem<nthreads, R>(smem, tid);
|
sumInSmem<nthreads, R>(smem, tid);
|
||||||
@ -1432,21 +1432,25 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||||
sumPass2Kernel<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
sumPass2Kernel<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||||
(typename TypeVec<R, 1>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
(typename TypeVec<R, 1>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||||
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
sumKernel_C2<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
sumKernel_C2<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||||
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));
|
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));
|
||||||
sumPass2Kernel_C2<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
sumPass2Kernel_C2<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||||
(typename TypeVec<R, 2>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
(typename TypeVec<R, 2>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||||
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
sumKernel_C3<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
sumKernel_C3<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||||
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));
|
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));
|
||||||
sumPass2Kernel_C3<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
sumPass2Kernel_C3<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||||
(typename TypeVec<R, 3>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
(typename TypeVec<R, 3>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||||
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
sumKernel_C4<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
sumKernel_C4<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||||
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));
|
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));
|
||||||
sumPass2Kernel_C4<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
sumPass2Kernel_C4<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||||
(typename TypeVec<R, 4>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
(typename TypeVec<R, 4>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
|
|
||||||
|
@ -71,11 +71,7 @@ namespace
|
|||||||
|
|
||||||
CV_EXPORTS bool cv::gpu::TargetArchs::builtWith(cv::gpu::GpuFeature feature)
|
CV_EXPORTS bool cv::gpu::TargetArchs::builtWith(cv::gpu::GpuFeature feature)
|
||||||
{
|
{
|
||||||
if (feature == NATIVE_DOUBLE)
|
return ::compareToSet(CUDA_ARCH_FEATURES, feature, std::greater_equal<int>());
|
||||||
return ::compareToSet(CUDA_ARCH_FEATURES, 13, std::greater_equal<int>());
|
|
||||||
if (feature == ATOMICS)
|
|
||||||
return ::compareToSet(CUDA_ARCH_FEATURES, 11, std::greater_equal<int>());
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -947,6 +947,16 @@ struct CV_GpuSumTest: CvTest
|
|||||||
// sum
|
// sum
|
||||||
//
|
//
|
||||||
|
|
||||||
|
gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 1), src);
|
||||||
|
a = sum(src);
|
||||||
|
b = sum(GpuMat(src));
|
||||||
|
if (abs(a[0] - b[0]) > src.size().area() * max_err)
|
||||||
|
{
|
||||||
|
ts->printf(CvTS::CONSOLE, "1 cols: %d, rows: %d, expected: %f, actual: %f\n", src.cols, src.rows, a[0], b[0]);
|
||||||
|
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 2), src);
|
gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 2), src);
|
||||||
a = sum(src);
|
a = sum(src);
|
||||||
b = sum(GpuMat(src));
|
b = sum(GpuMat(src));
|
||||||
|
@ -56,7 +56,7 @@ struct CV_GpuMeanShiftTest : public CvTest
|
|||||||
cv::Mat img = cv::imread(std::string(ts->get_data_path()) + "meanshift/cones.png");
|
cv::Mat img = cv::imread(std::string(ts->get_data_path()) + "meanshift/cones.png");
|
||||||
cv::Mat img_template;
|
cv::Mat img_template;
|
||||||
|
|
||||||
if (cv::gpu::TargetArchs::hasEqualOrGreater(2, 0) && cv::gpu::DeviceInfo().major() >= 2)
|
if (cv::gpu::TargetArchs::builtWith(cv::gpu::COMPUTE_20) && cv::gpu::DeviceInfo().major() >= 2)
|
||||||
img_template = cv::imread(std::string(ts->get_data_path()) + "meanshift/con_result.png");
|
img_template = cv::imread(std::string(ts->get_data_path()) + "meanshift/con_result.png");
|
||||||
else
|
else
|
||||||
img_template = cv::imread(std::string(ts->get_data_path()) + "meanshift/con_result_CC1X.png");
|
img_template = cv::imread(std::string(ts->get_data_path()) + "meanshift/con_result_CC1X.png");
|
||||||
@ -199,7 +199,7 @@ struct CV_GpuMeanShiftProcTest : public CvTest
|
|||||||
cv::Mat spmap_template;
|
cv::Mat spmap_template;
|
||||||
cv::FileStorage fs;
|
cv::FileStorage fs;
|
||||||
|
|
||||||
if (cv::gpu::TargetArchs::hasEqualOrGreater(2, 0) && cv::gpu::DeviceInfo().major() >= 2)
|
if (cv::gpu::TargetArchs::builtWith(cv::gpu::COMPUTE_20) && cv::gpu::DeviceInfo().major() >= 2)
|
||||||
fs.open(std::string(ts->get_data_path()) + "meanshift/spmap.yaml", cv::FileStorage::READ);
|
fs.open(std::string(ts->get_data_path()) + "meanshift/spmap.yaml", cv::FileStorage::READ);
|
||||||
else
|
else
|
||||||
fs.open(std::string(ts->get_data_path()) + "meanshift/spmap_CC1X.yaml", cv::FileStorage::READ);
|
fs.open(std::string(ts->get_data_path()) + "meanshift/spmap_CC1X.yaml", cv::FileStorage::READ);
|
||||||
|
@ -69,7 +69,7 @@ struct CV_GpuMeanShiftSegmentationTest : public CvTest {
|
|||||||
{
|
{
|
||||||
stringstream path;
|
stringstream path;
|
||||||
path << ts->get_data_path() << "meanshift/cones_segmented_sp10_sr10_minsize" << minsize;
|
path << ts->get_data_path() << "meanshift/cones_segmented_sp10_sr10_minsize" << minsize;
|
||||||
if (TargetArchs::hasEqualOrGreater(2, 0) && DeviceInfo().major() >= 2)
|
if (TargetArchs::builtWith(COMPUTE_20) && DeviceInfo().major() >= 2)
|
||||||
path << ".png";
|
path << ".png";
|
||||||
else
|
else
|
||||||
path << "_CC1X.png";
|
path << "_CC1X.png";
|
||||||
|
Loading…
x
Reference in New Issue
Block a user