*applied patch from NVidia (nppstTraspose bug)
*fixed some warnings *finished gpu test port to gtest framework
This commit is contained in:
parent
916690a674
commit
047c7e0fd6
@ -100,6 +100,9 @@ macro(define_opencv_module name)
|
||||
|
||||
file(GLOB test_srcs "test/*.cpp")
|
||||
file(GLOB test_hdrs "test/*.h*")
|
||||
|
||||
source_group("Src" FILES ${test_srcs})
|
||||
source_group("Include" FILES ${test_hdrs})
|
||||
|
||||
set(the_target "opencv_test_${name}")
|
||||
|
||||
|
@ -55,5 +55,6 @@
|
||||
#include "opencv2/imgproc/imgproc_c.h"
|
||||
#include "opencv2/core/internal.hpp"
|
||||
#include "opencv2/features2d/features2d.hpp"
|
||||
#include <vector>
|
||||
|
||||
#endif
|
||||
|
@ -41,6 +41,8 @@
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
|
||||
#undef INFINITY
|
||||
#define INFINITY 10000
|
||||
#define OCCLUSION_PENALTY 10000
|
||||
|
@ -38,10 +38,9 @@ source_group("Device" FILES ${lib_device_hdrs})
|
||||
if (HAVE_CUDA)
|
||||
file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp")
|
||||
file(GLOB_RECURSE ncv_cuda "src/nvidia/*.cu")
|
||||
file(GLOB_RECURSE ncv_hdr1 "src/nvidia/*.hpp")
|
||||
file(GLOB_RECURSE ncv_hdr2 "src/nvidia/*.h")
|
||||
|
||||
source_group("Src\\NVidia" FILES ${ncv_srcs} ${ncv_hdr1} ${ncv_hdr2} ${ncv_cuda})
|
||||
file(GLOB_RECURSE ncv_hdrs "src/nvidia/*.hpp" "src/nvidia/*.h")
|
||||
|
||||
source_group("Src\\NVidia" FILES ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda})
|
||||
include_directories("src/nvidia/core" "src/nvidia/NPP_staging")
|
||||
endif()
|
||||
|
||||
@ -83,7 +82,7 @@ if (HAVE_CUDA)
|
||||
#CUDA_BUILD_CLEAN_TARGET()
|
||||
endif()
|
||||
|
||||
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdr1} ${ncv_hdr2} ${ncv_cuda} ${cuda_objs})
|
||||
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs})
|
||||
|
||||
if(PCHSupport_FOUND)
|
||||
set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp)
|
||||
@ -147,10 +146,15 @@ install(FILES ${lib_hdrs}
|
||||
# DESTINATION include/opencv2/${name}/device
|
||||
# COMPONENT main)
|
||||
|
||||
|
||||
################################################################################################################
|
||||
################################ GPU Module Tests #####################################################
|
||||
################################################################################################################
|
||||
|
||||
# Test files processing is in the separated directory to avoid 'Src' source
|
||||
# filter creation in Visual Studio
|
||||
if(BUILD_TESTS AND NOT ANDROID AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/test)
|
||||
set(the_target "opencv_test_${name}")
|
||||
set(the_test_target "opencv_test_${name}")
|
||||
|
||||
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/test"
|
||||
@ -169,44 +173,46 @@ if(BUILD_TESTS AND NOT ANDROID AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/test)
|
||||
|
||||
file(GLOB test_srcs "test/*.cpp")
|
||||
file(GLOB test_hdrs "test/*.h*")
|
||||
|
||||
|
||||
source_group("Src" FILES ${test_hdrs} ${test_srcs})
|
||||
if(HAVE_CUDA)
|
||||
include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/NPP_staging)
|
||||
file(GLOB nvidia "test/nvidia/*.cpp" "test/nvidia/*.h*")
|
||||
source_group("nvidia" FILES ${nvidia})
|
||||
include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/NPP_staging)
|
||||
|
||||
file(GLOB nvidia "test/nvidia/*.cpp" "test/nvidia/*.h*")
|
||||
source_group("Src\\NVidia" FILES ${nvidia})
|
||||
endif()
|
||||
|
||||
|
||||
add_executable(${the_target} ${test_srcs} ${test_hdrs} ${nvidia})
|
||||
add_executable(${the_test_target} ${test_srcs} ${test_hdrs} ${nvidia})
|
||||
|
||||
if(PCHSupport_FOUND)
|
||||
set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/test/test_precomp.hpp)
|
||||
if(${CMAKE_GENERATOR} MATCHES "Visual*" OR ${CMAKE_GENERATOR} MATCHES "Xcode*")
|
||||
if(${CMAKE_GENERATOR} MATCHES "Visual*")
|
||||
set(${the_target}_pch "test/test_precomp.cpp")
|
||||
set(${the_test_target}_pch "test/test_precomp.cpp")
|
||||
endif()
|
||||
add_native_precompiled_header(${the_target} ${pch_header})
|
||||
add_native_precompiled_header(${the_test_target} ${pch_header})
|
||||
elseif(CMAKE_COMPILER_IS_GNUCXX AND ${CMAKE_GENERATOR} MATCHES ".*Makefiles")
|
||||
add_precompiled_header(${the_target} ${pch_header})
|
||||
add_precompiled_header(${the_test_target} ${pch_header})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Additional target properties
|
||||
set_target_properties(${the_target} PROPERTIES
|
||||
set_target_properties(${the_test_target} PROPERTIES
|
||||
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
|
||||
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/"
|
||||
)
|
||||
|
||||
add_dependencies(${the_target} ${test_deps})
|
||||
add_dependencies(${the_test_target} ${test_deps})
|
||||
|
||||
# Add the required libraries for linking:
|
||||
target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${test_deps})
|
||||
target_link_libraries(${the_test_target} ${OPENCV_LINKER_LIBS} ${test_deps})
|
||||
|
||||
enable_testing()
|
||||
get_target_property(LOC ${the_target} LOCATION)
|
||||
add_test(${the_target} "${LOC}")
|
||||
get_target_property(LOC ${the_test_target} LOCATION)
|
||||
add_test(${the_test_target} "${LOC}")
|
||||
|
||||
if(WIN32)
|
||||
install(TARGETS ${the_target} RUNTIME DESTINATION bin COMPONENT main)
|
||||
install(TARGETS ${the_test_target} RUNTIME DESTINATION bin COMPONENT main)
|
||||
endif()
|
||||
endif()
|
@ -1198,26 +1198,32 @@ __global__ void transpose(T *d_src, Ncv32u srcStride,
|
||||
|
||||
Ncv32u xIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.x;
|
||||
Ncv32u yIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.y;
|
||||
Ncv32u index_in = xIndex + yIndex * srcStride;
|
||||
Ncv32u index_gmem = xIndex + yIndex * srcStride;
|
||||
|
||||
xIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.x;
|
||||
yIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.y;
|
||||
Ncv32u index_out = xIndex + yIndex * dstStride;
|
||||
|
||||
for (Ncv32u i=0; i<TRANSPOSE_TILE_DIM; i+=TRANSPOSE_BLOCK_ROWS)
|
||||
if (xIndex < srcRoi.width)
|
||||
{
|
||||
tile[threadIdx.y+i][threadIdx.x] = d_src[index_in+i*srcStride];
|
||||
for (Ncv32u i=0; i<TRANSPOSE_TILE_DIM; i+=TRANSPOSE_BLOCK_ROWS)
|
||||
{
|
||||
if (yIndex + i < srcRoi.height)
|
||||
{
|
||||
tile[threadIdx.y+i][threadIdx.x] = d_src[index_gmem+i*srcStride];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
xIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.x;
|
||||
yIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.y;
|
||||
index_gmem = xIndex + yIndex * dstStride;
|
||||
|
||||
if (xIndex < srcRoi.height)
|
||||
{
|
||||
for (Ncv32u i=0; i<TRANSPOSE_TILE_DIM; i+=TRANSPOSE_BLOCK_ROWS)
|
||||
{
|
||||
if (yIndex + i < srcRoi.width)
|
||||
{
|
||||
d_dst[index_out+i*dstStride] = tile[threadIdx.x][threadIdx.y+i];
|
||||
d_dst[index_gmem+i*dstStride] = tile[threadIdx.x][threadIdx.y+i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -71,21 +71,21 @@ namespace cv { namespace gpu { namespace device
|
||||
template <size_t src_elem_size, size_t dst_elem_size>
|
||||
struct UnReadWriteTraits_
|
||||
{
|
||||
enum {shift=1};
|
||||
enum { shift = 1 };
|
||||
};
|
||||
template <size_t src_elem_size>
|
||||
struct UnReadWriteTraits_<src_elem_size, 1>
|
||||
{
|
||||
enum {shift=4};
|
||||
enum { shift = 4 };
|
||||
};
|
||||
template <size_t src_elem_size>
|
||||
struct UnReadWriteTraits_<src_elem_size, 2>
|
||||
{
|
||||
enum {shift=2};
|
||||
enum { shift = 2 };
|
||||
};
|
||||
template <typename T, typename D> struct UnReadWriteTraits
|
||||
{
|
||||
enum {shift=UnReadWriteTraits_<sizeof(T), sizeof(D)>::shift};
|
||||
enum { shift = UnReadWriteTraits_<sizeof(T), sizeof(D)>::shift };
|
||||
|
||||
typedef typename TypeVec<T, shift>::vec_t read_type;
|
||||
typedef typename TypeVec<D, shift>::vec_t write_type;
|
||||
@ -94,21 +94,21 @@ namespace cv { namespace gpu { namespace device
|
||||
template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size>
|
||||
struct BinReadWriteTraits_
|
||||
{
|
||||
enum {shift=1};
|
||||
enum { shift = 1 };
|
||||
};
|
||||
template <size_t src_elem_size1, size_t src_elem_size2>
|
||||
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 1>
|
||||
{
|
||||
enum {shift=4};
|
||||
enum { shift = 4 };
|
||||
};
|
||||
template <size_t src_elem_size1, size_t src_elem_size2>
|
||||
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 2>
|
||||
{
|
||||
enum {shift=2};
|
||||
enum { shift = 2 };
|
||||
};
|
||||
template <typename T1, typename T2, typename D> struct BinReadWriteTraits
|
||||
{
|
||||
enum {shift=BinReadWriteTraits_<sizeof(T1), sizeof(T2), sizeof(D)>::shift};
|
||||
enum {shift = BinReadWriteTraits_<sizeof(T1), sizeof(T2), sizeof(D)>::shift};
|
||||
|
||||
typedef typename TypeVec<T1, shift>::vec_t read_type1;
|
||||
typedef typename TypeVec<T2, shift>::vec_t read_type2;
|
||||
|
@ -179,6 +179,9 @@ void generateTransposeTests(NCVAutoTestLister &testLister, NCVTestSourceProvider
|
||||
|
||||
testLister.add(new TestTranspose<T>("TestTranspose_VGA", src, 640, 480));
|
||||
testLister.add(new TestTranspose<T>("TestTranspose_HD1080", src, 1920, 1080));
|
||||
|
||||
//regression tests
|
||||
testLister.add(new TestTranspose<T>("TestTranspose_reg_0", src, 1072, 375));
|
||||
}
|
||||
|
||||
|
||||
|
@ -69,4 +69,4 @@ protected:
|
||||
}
|
||||
};
|
||||
|
||||
TEST(NPP_Staging, DISABLED_multitest) { CV_NVidiaTestsCaller test; test.safe_run(); }
|
||||
TEST(NVidia, DISABLED_multitest) { CV_NVidiaTestsCaller test; test.safe_run(); }
|
||||
|
@ -1,4 +1,8 @@
|
||||
#define GTEST_CREATE_AS_SHARED_LIBRARY 1
|
||||
|
||||
#if _MSC_VER >= 1200
|
||||
#pragma warning( disable: 4127 4251)
|
||||
#endif
|
||||
|
||||
#include "opencv2/ts/ts.hpp"
|
||||
#include "opencv2/core/core_c.h"
|
||||
|
@ -5,10 +5,5 @@
|
||||
#add_subdirectory(ml)
|
||||
#add_subdirectory(cxts)
|
||||
|
||||
#if(WITH_CUDA)
|
||||
# set (BUILD_TESTS_GPU OFF CACHE BOOL "Build tests GPU")
|
||||
|
||||
# if(BUILD_TESTS_GPU AND WITH_CUDA)
|
||||
# add_subdirectory(gpu)
|
||||
# endif()
|
||||
#endif()
|
||||
#add_subdirectory(gpu)
|
||||
|
||||
|
@ -38,12 +38,11 @@ include_directories(../cxts)
|
||||
if(HAVE_CUDA)
|
||||
include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/NPP_staging)
|
||||
|
||||
file(GLOB nvidia "src/nvidia/*.*")
|
||||
SET(ncv_cpp ../../modules/gpu/src/nvidia/core/NCV.cpp)
|
||||
file(GLOB nvidia "src/nvidia/*.*")
|
||||
source_group("Src\\nvidia" FILES ${nvidia})
|
||||
endif()
|
||||
|
||||
add_executable(${the_target} ${test_srcs} ${test_hdrs} ${nvidia} ${ncv_cpp})
|
||||
add_executable(${the_target} ${test_srcs} ${test_hdrs} ${nvidia})
|
||||
|
||||
# Additional target properties
|
||||
set_target_properties(${the_target} PROPERTIES
|
||||
|
@ -179,6 +179,9 @@ void generateTransposeTests(NCVAutoTestLister &testLister, NCVTestSourceProvider
|
||||
|
||||
testLister.add(new TestTranspose<T>("TestTranspose_VGA", src, 640, 480));
|
||||
testLister.add(new TestTranspose<T>("TestTranspose_HD1080", src, 1920, 1080));
|
||||
|
||||
//regression tests
|
||||
testLister.add(new TestTranspose<T>("TestTranspose_reg_0", src, 1072, 375));
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user