Merge remote-tracking branch 'refs/remotes/upstream/master' into rho
This commit is contained in:
commit
045f8294bb
@ -113,7 +113,6 @@ endmacro()
|
||||
macro(ocv_add_module _name)
|
||||
ocv_debug_message("ocv_add_module(" ${_name} ${ARGN} ")")
|
||||
string(TOLOWER "${_name}" name)
|
||||
string(REGEX REPLACE "^opencv_" "" ${name} "${name}")
|
||||
set(the_module opencv_${name})
|
||||
|
||||
# the first pass - collect modules info, the second pass - create targets
|
||||
@ -787,7 +786,7 @@ macro(__ocv_parse_test_sources tests_type)
|
||||
set(__file_group_sources "")
|
||||
elseif(arg STREQUAL "DEPENDS_ON")
|
||||
set(__currentvar "OPENCV_${tests_type}_${the_module}_DEPS")
|
||||
elseif("${__currentvar}" STREQUAL "__file_group_sources" AND NOT __file_group_name)
|
||||
elseif(" ${__currentvar}" STREQUAL " __file_group_sources" AND NOT __file_group_name) # spaces to avoid CMP0054
|
||||
set(__file_group_name "${arg}")
|
||||
else()
|
||||
list(APPEND ${__currentvar} "${arg}")
|
||||
@ -808,7 +807,7 @@ function(ocv_add_perf_tests)
|
||||
__ocv_parse_test_sources(PERF ${ARGN})
|
||||
|
||||
# opencv_imgcodecs is required for imread/imwrite
|
||||
set(perf_deps ${the_module} opencv_ts opencv_imgcodecs ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_opencv_ts_DEPS})
|
||||
set(perf_deps opencv_ts ${the_module} opencv_imgcodecs ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_opencv_ts_DEPS})
|
||||
ocv_check_dependencies(${perf_deps})
|
||||
|
||||
if(OCV_DEPENDENCIES_FOUND)
|
||||
@ -829,7 +828,7 @@ function(ocv_add_perf_tests)
|
||||
|
||||
ocv_add_executable(${the_target} ${OPENCV_PERF_${the_module}_SOURCES} ${${the_target}_pch})
|
||||
ocv_target_include_modules(${the_target} ${perf_deps} "${perf_path}")
|
||||
ocv_target_link_libraries(${the_target} ${OPENCV_MODULE_${the_module}_DEPS} ${perf_deps} ${OPENCV_LINKER_LIBS})
|
||||
ocv_target_link_libraries(${the_target} ${perf_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS})
|
||||
add_dependencies(opencv_perf_tests ${the_target})
|
||||
|
||||
# Additional target properties
|
||||
@ -864,7 +863,7 @@ function(ocv_add_accuracy_tests)
|
||||
__ocv_parse_test_sources(TEST ${ARGN})
|
||||
|
||||
# opencv_imgcodecs is required for imread/imwrite
|
||||
set(test_deps ${the_module} opencv_ts opencv_imgcodecs opencv_videoio ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_opencv_ts_DEPS})
|
||||
set(test_deps opencv_ts ${the_module} opencv_imgcodecs opencv_videoio ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_opencv_ts_DEPS})
|
||||
ocv_check_dependencies(${test_deps})
|
||||
if(OCV_DEPENDENCIES_FOUND)
|
||||
set(the_target "opencv_test_${name}")
|
||||
@ -884,7 +883,7 @@ function(ocv_add_accuracy_tests)
|
||||
|
||||
ocv_add_executable(${the_target} ${OPENCV_TEST_${the_module}_SOURCES} ${${the_target}_pch})
|
||||
ocv_target_include_modules(${the_target} ${test_deps} "${test_path}")
|
||||
ocv_target_link_libraries(${the_target} ${OPENCV_MODULE_${the_module}_DEPS} ${test_deps} ${OPENCV_LINKER_LIBS})
|
||||
ocv_target_link_libraries(${the_target} ${test_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS})
|
||||
add_dependencies(opencv_tests ${the_target})
|
||||
|
||||
# Additional target properties
|
||||
|
@ -276,12 +276,12 @@ macro(OCV_OPTION variable description value)
|
||||
endif()
|
||||
endforeach()
|
||||
unset(__varname)
|
||||
if("${__condition}" STREQUAL "")
|
||||
if(__condition STREQUAL "")
|
||||
set(__condition 2 GREATER 1)
|
||||
endif()
|
||||
|
||||
if(${__condition})
|
||||
if("${__value}" MATCHES ";")
|
||||
if(__value MATCHES ";")
|
||||
if(${__value})
|
||||
option(${variable} "${description}" ON)
|
||||
else()
|
||||
|
@ -3,7 +3,7 @@ Reading Geospatial Raster files with GDAL {#tutorial_raster_io_gdal}
|
||||
|
||||
Geospatial raster data is a heavily used product in Geographic Information Systems and
|
||||
Photogrammetry. Raster data typically can represent imagery and Digital Elevation Models (DEM). The
|
||||
standard library for loading GIS imagery is the Geographic Data Abstraction Library (GDAL). In this
|
||||
standard library for loading GIS imagery is the Geographic Data Abstraction Library [(GDAL)](http://www.gdal.org). In this
|
||||
example, we will show techniques for loading GIS raster formats using native OpenCV functions. In
|
||||
addition, we will show some an example of how OpenCV can use this data for novel and interesting
|
||||
purposes.
|
||||
@ -13,8 +13,8 @@ Goals
|
||||
|
||||
The primary objectives for this tutorial:
|
||||
|
||||
- How to use OpenCV imread to load satellite imagery.
|
||||
- How to use OpenCV imread to load SRTM Digital Elevation Models
|
||||
- How to use OpenCV [imread](@ref imread) to load satellite imagery.
|
||||
- How to use OpenCV [imread](@ref imread) to load SRTM Digital Elevation Models
|
||||
- Given the corner coordinates of both the image and DEM, correllate the elevation data to the
|
||||
image to find elevations for each pixel.
|
||||
- Show a basic, easy-to-implement example of a terrain heat map.
|
||||
@ -54,9 +54,9 @@ signed shorts.
|
||||
Notes
|
||||
-----
|
||||
|
||||
### Lat/Lon (Geodetic) Coordinates should normally be avoided
|
||||
### Lat/Lon (Geographic) Coordinates should normally be avoided
|
||||
|
||||
The Geodetic Coordinate System is a spherical coordinate system, meaning that using them with
|
||||
The Geographic Coordinate System is a spherical coordinate system, meaning that using them with
|
||||
Cartesian mathematics is technically incorrect. This demo uses them to increase the readability and
|
||||
is accurate enough to make the point. A better coordinate system would be Universal Transverse
|
||||
Mercator.
|
||||
@ -94,8 +94,8 @@ Below is the output of the program. Use the first image as the input. For the DE
|
||||
the SRTM file located at the USGS here.
|
||||
[<http://dds.cr.usgs.gov/srtm/version2_1/SRTM1/Region_04/N37W123.hgt.zip>](http://dds.cr.usgs.gov/srtm/version2_1/SRTM1/Region_04/N37W123.hgt.zip)
|
||||
|
||||
data:image/s3,"s3://crabby-images/5b267/5b26739de8b5cf994056717cd43cd6385dc37f5e" alt=""
|
||||
data:image/s3,"s3://crabby-images/5b267/5b26739de8b5cf994056717cd43cd6385dc37f5e" alt="Input Image"
|
||||
|
||||
data:image/s3,"s3://crabby-images/ffc08/ffc08f1800159e2037b84d4113b306e048c8a080" alt=""
|
||||
data:image/s3,"s3://crabby-images/ffc08/ffc08f1800159e2037b84d4113b306e048c8a080" alt="Heat Map"
|
||||
|
||||
data:image/s3,"s3://crabby-images/cb7da/cb7da7e1657451a664ad0d441859865c9b4547ad" alt=""
|
||||
data:image/s3,"s3://crabby-images/cb7da/cb7da7e1657451a664ad0d441859865c9b4547ad" alt="Heat Map Overlay"
|
||||
|
@ -2972,7 +2972,13 @@ static void collectCalibrationData( InputArrayOfArrays objectPoints,
|
||||
for( i = 0; i < nimages; i++ )
|
||||
{
|
||||
ni = objectPoints.getMat(i).checkVector(3, CV_32F);
|
||||
CV_Assert( ni >= 0 );
|
||||
if( ni <= 0 )
|
||||
CV_Error(CV_StsUnsupportedFormat, "objectPoints should contain vector of vectors of points of type Point3f");
|
||||
int ni1 = imagePoints1.getMat(i).checkVector(2, CV_32F);
|
||||
if( ni1 <= 0 )
|
||||
CV_Error(CV_StsUnsupportedFormat, "imagePoints1 should contain vector of vectors of points of type Point2f");
|
||||
CV_Assert( ni == ni1 );
|
||||
|
||||
total += ni;
|
||||
}
|
||||
|
||||
@ -2995,8 +3001,6 @@ static void collectCalibrationData( InputArrayOfArrays objectPoints,
|
||||
Mat objpt = objectPoints.getMat(i);
|
||||
Mat imgpt1 = imagePoints1.getMat(i);
|
||||
ni = objpt.checkVector(3, CV_32F);
|
||||
int ni1 = imgpt1.checkVector(2, CV_32F);
|
||||
CV_Assert( ni > 0 && ni == ni1 );
|
||||
npoints.at<int>(i) = ni;
|
||||
memcpy( objPtData + j, objpt.ptr(), ni*sizeof(objPtData[0]) );
|
||||
memcpy( imgPtData1 + j, imgpt1.ptr(), ni*sizeof(imgPtData1[0]) );
|
||||
|
@ -3284,7 +3284,8 @@ inline void UMat::release()
|
||||
{
|
||||
if( u && CV_XADD(&(u->urefcount), -1) == 1 )
|
||||
deallocate();
|
||||
size.p[0] = 0;
|
||||
for(int i = 0; i < dims; i++)
|
||||
size.p[i] = 0;
|
||||
u = 0;
|
||||
}
|
||||
|
||||
|
@ -80,6 +80,16 @@
|
||||
namespace cv { namespace cuda {
|
||||
CV_EXPORTS cv::String getNppErrorMessage(int code);
|
||||
CV_EXPORTS cv::String getCudaDriverApiErrorMessage(int code);
|
||||
|
||||
CV_EXPORTS GpuMat getInputMat(InputArray _src, Stream& stream);
|
||||
|
||||
CV_EXPORTS GpuMat getOutputMat(OutputArray _dst, int rows, int cols, int type, Stream& stream);
|
||||
static inline GpuMat getOutputMat(OutputArray _dst, Size size, int type, Stream& stream)
|
||||
{
|
||||
return getOutputMat(_dst, size.height, size.width, type, stream);
|
||||
}
|
||||
|
||||
CV_EXPORTS void syncOutput(const GpuMat& dst, OutputArray _dst, Stream& stream);
|
||||
}}
|
||||
|
||||
#ifndef HAVE_CUDA
|
||||
|
@ -2355,6 +2355,165 @@ struct Mul_SIMD<float, float>
|
||||
}
|
||||
};
|
||||
|
||||
#elif CV_SSE2
|
||||
|
||||
#if CV_SSE4_1
|
||||
|
||||
template <>
|
||||
struct Mul_SIMD<ushort, float>
|
||||
{
|
||||
Mul_SIMD()
|
||||
{
|
||||
haveSSE = checkHardwareSupport(CV_CPU_SSE4_1);
|
||||
}
|
||||
|
||||
int operator() (const ushort * src1, const ushort * src2, ushort * dst, int width, float scale) const
|
||||
{
|
||||
int x = 0;
|
||||
|
||||
if (!haveSSE)
|
||||
return x;
|
||||
|
||||
__m128i v_zero = _mm_setzero_si128();
|
||||
|
||||
if( scale != 1.0f )
|
||||
{
|
||||
__m128 v_scale = _mm_set1_ps(scale);
|
||||
for ( ; x <= width - 8; x += 8)
|
||||
{
|
||||
__m128i v_src1 = _mm_loadu_si128((__m128i const *)(src1 + x));
|
||||
__m128i v_src2 = _mm_loadu_si128((__m128i const *)(src2 + x));
|
||||
|
||||
__m128 v_dst1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src1, v_zero)),
|
||||
_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src2, v_zero)));
|
||||
v_dst1 = _mm_mul_ps(v_dst1, v_scale);
|
||||
|
||||
__m128 v_dst2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src1, v_zero)),
|
||||
_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src2, v_zero)));
|
||||
v_dst2 = _mm_mul_ps(v_dst2, v_scale);
|
||||
|
||||
__m128i v_dsti = _mm_packus_epi32(_mm_cvtps_epi32(v_dst1), _mm_cvtps_epi32(v_dst2));
|
||||
_mm_storeu_si128((__m128i *)(dst + x), v_dsti);
|
||||
}
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
bool haveSSE;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
template <>
|
||||
struct Mul_SIMD<schar, float>
|
||||
{
|
||||
Mul_SIMD()
|
||||
{
|
||||
haveSSE = checkHardwareSupport(CV_CPU_SSE2);
|
||||
}
|
||||
|
||||
int operator() (const schar * src1, const schar * src2, schar * dst, int width, float scale) const
|
||||
{
|
||||
int x = 0;
|
||||
|
||||
if (!haveSSE)
|
||||
return x;
|
||||
|
||||
__m128i v_zero = _mm_setzero_si128();
|
||||
|
||||
if( scale == 1.0f )
|
||||
for ( ; x <= width - 8; x += 8)
|
||||
{
|
||||
__m128i v_src1 = _mm_loadl_epi64((__m128i const *)(src1 + x));
|
||||
__m128i v_src2 = _mm_loadl_epi64((__m128i const *)(src2 + x));
|
||||
|
||||
v_src1 = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, v_src1), 8);
|
||||
v_src2 = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, v_src2), 8);
|
||||
|
||||
__m128 v_dst1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src1), 16)),
|
||||
_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src2), 16)));
|
||||
|
||||
__m128 v_dst2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src1), 16)),
|
||||
_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src2), 16)));
|
||||
|
||||
__m128i v_dsti = _mm_packs_epi32(_mm_cvtps_epi32(v_dst1), _mm_cvtps_epi32(v_dst2));
|
||||
_mm_storel_epi64((__m128i *)(dst + x), _mm_packs_epi16(v_dsti, v_zero));
|
||||
}
|
||||
else
|
||||
{
|
||||
__m128 v_scale = _mm_set1_ps(scale);
|
||||
for ( ; x <= width - 8; x += 8)
|
||||
{
|
||||
__m128i v_src1 = _mm_loadl_epi64((__m128i const *)(src1 + x));
|
||||
__m128i v_src2 = _mm_loadl_epi64((__m128i const *)(src2 + x));
|
||||
|
||||
v_src1 = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, v_src1), 8);
|
||||
v_src2 = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, v_src2), 8);
|
||||
|
||||
__m128 v_dst1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src1), 16)),
|
||||
_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src2), 16)));
|
||||
v_dst1 = _mm_mul_ps(v_dst1, v_scale);
|
||||
|
||||
__m128 v_dst2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src1), 16)),
|
||||
_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src2), 16)));
|
||||
v_dst2 = _mm_mul_ps(v_dst2, v_scale);
|
||||
|
||||
__m128i v_dsti = _mm_packs_epi32(_mm_cvtps_epi32(v_dst1), _mm_cvtps_epi32(v_dst2));
|
||||
_mm_storel_epi64((__m128i *)(dst + x), _mm_packs_epi16(v_dsti, v_zero));
|
||||
}
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
bool haveSSE;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Mul_SIMD<short, float>
|
||||
{
|
||||
Mul_SIMD()
|
||||
{
|
||||
haveSSE = checkHardwareSupport(CV_CPU_SSE2);
|
||||
}
|
||||
|
||||
int operator() (const short * src1, const short * src2, short * dst, int width, float scale) const
|
||||
{
|
||||
int x = 0;
|
||||
|
||||
if (!haveSSE)
|
||||
return x;
|
||||
|
||||
__m128i v_zero = _mm_setzero_si128();
|
||||
|
||||
if( scale != 1.0f )
|
||||
{
|
||||
__m128 v_scale = _mm_set1_ps(scale);
|
||||
for ( ; x <= width - 8; x += 8)
|
||||
{
|
||||
__m128i v_src1 = _mm_loadu_si128((__m128i const *)(src1 + x));
|
||||
__m128i v_src2 = _mm_loadu_si128((__m128i const *)(src2 + x));
|
||||
|
||||
__m128 v_dst1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src1), 16)),
|
||||
_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src2), 16)));
|
||||
v_dst1 = _mm_mul_ps(v_dst1, v_scale);
|
||||
|
||||
__m128 v_dst2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src1), 16)),
|
||||
_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src2), 16)));
|
||||
v_dst2 = _mm_mul_ps(v_dst2, v_scale);
|
||||
|
||||
__m128i v_dsti = _mm_packs_epi32(_mm_cvtps_epi32(v_dst1), _mm_cvtps_epi32(v_dst2));
|
||||
_mm_storeu_si128((__m128i *)(dst + x), v_dsti);
|
||||
}
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
bool haveSSE;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
template<typename T, typename WT> static void
|
||||
@ -2772,7 +2931,144 @@ struct AddWeighted_SIMD
|
||||
}
|
||||
};
|
||||
|
||||
#if CV_NEON
|
||||
#if CV_SSE2
|
||||
|
||||
template <>
|
||||
struct AddWeighted_SIMD<schar, float>
|
||||
{
|
||||
AddWeighted_SIMD()
|
||||
{
|
||||
haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
|
||||
}
|
||||
|
||||
int operator() (const schar * src1, const schar * src2, schar * dst, int width, float alpha, float beta, float gamma) const
|
||||
{
|
||||
int x = 0;
|
||||
|
||||
if (!haveSSE2)
|
||||
return x;
|
||||
|
||||
__m128i v_zero = _mm_setzero_si128();
|
||||
__m128 v_alpha = _mm_set1_ps(alpha), v_beta = _mm_set1_ps(beta),
|
||||
v_gamma = _mm_set1_ps(gamma);
|
||||
|
||||
for( ; x <= width - 8; x += 8 )
|
||||
{
|
||||
__m128i v_src1 = _mm_loadl_epi64((const __m128i *)(src1 + x));
|
||||
__m128i v_src2 = _mm_loadl_epi64((const __m128i *)(src2 + x));
|
||||
|
||||
__m128i v_src1_p = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, v_src1), 8);
|
||||
__m128i v_src2_p = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, v_src2), 8);
|
||||
|
||||
__m128 v_dstf0 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src1_p), 16)), v_alpha);
|
||||
v_dstf0 = _mm_add_ps(_mm_add_ps(v_dstf0, v_gamma),
|
||||
_mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src2_p), 16)), v_beta));
|
||||
|
||||
__m128 v_dstf1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src1_p), 16)), v_alpha);
|
||||
v_dstf1 = _mm_add_ps(_mm_add_ps(v_dstf1, v_gamma),
|
||||
_mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src2_p), 16)), v_beta));
|
||||
|
||||
__m128i v_dst16 = _mm_packs_epi32(_mm_cvtps_epi32(v_dstf0),
|
||||
_mm_cvtps_epi32(v_dstf1));
|
||||
|
||||
_mm_storel_epi64((__m128i *)(dst + x), _mm_packs_epi16(v_dst16, v_zero));
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
bool haveSSE2;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct AddWeighted_SIMD<short, float>
|
||||
{
|
||||
AddWeighted_SIMD()
|
||||
{
|
||||
haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
|
||||
}
|
||||
|
||||
int operator() (const short * src1, const short * src2, short * dst, int width, float alpha, float beta, float gamma) const
|
||||
{
|
||||
int x = 0;
|
||||
|
||||
if (!haveSSE2)
|
||||
return x;
|
||||
|
||||
__m128i v_zero = _mm_setzero_si128();
|
||||
__m128 v_alpha = _mm_set1_ps(alpha), v_beta = _mm_set1_ps(beta),
|
||||
v_gamma = _mm_set1_ps(gamma);
|
||||
|
||||
for( ; x <= width - 8; x += 8 )
|
||||
{
|
||||
__m128i v_src1 = _mm_loadu_si128((const __m128i *)(src1 + x));
|
||||
__m128i v_src2 = _mm_loadu_si128((const __m128i *)(src2 + x));
|
||||
|
||||
__m128 v_dstf0 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src1), 16)), v_alpha);
|
||||
v_dstf0 = _mm_add_ps(_mm_add_ps(v_dstf0, v_gamma),
|
||||
_mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src2), 16)), v_beta));
|
||||
|
||||
__m128 v_dstf1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src1), 16)), v_alpha);
|
||||
v_dstf1 = _mm_add_ps(_mm_add_ps(v_dstf1, v_gamma),
|
||||
_mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src2), 16)), v_beta));
|
||||
|
||||
_mm_storeu_si128((__m128i *)(dst + x), _mm_packs_epi32(_mm_cvtps_epi32(v_dstf0),
|
||||
_mm_cvtps_epi32(v_dstf1)));
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
bool haveSSE2;
|
||||
};
|
||||
|
||||
#if CV_SSE4_1
|
||||
|
||||
template <>
|
||||
struct AddWeighted_SIMD<ushort, float>
|
||||
{
|
||||
AddWeighted_SIMD()
|
||||
{
|
||||
haveSSE4_1 = checkHardwareSupport(CV_CPU_SSE4_1);
|
||||
}
|
||||
|
||||
int operator() (const ushort * src1, const ushort * src2, ushort * dst, int width, float alpha, float beta, float gamma) const
|
||||
{
|
||||
int x = 0;
|
||||
|
||||
if (!haveSSE4_1)
|
||||
return x;
|
||||
|
||||
__m128i v_zero = _mm_setzero_si128();
|
||||
__m128 v_alpha = _mm_set1_ps(alpha), v_beta = _mm_set1_ps(beta),
|
||||
v_gamma = _mm_set1_ps(gamma);
|
||||
|
||||
for( ; x <= width - 8; x += 8 )
|
||||
{
|
||||
__m128i v_src1 = _mm_loadu_si128((const __m128i *)(src1 + x));
|
||||
__m128i v_src2 = _mm_loadu_si128((const __m128i *)(src2 + x));
|
||||
|
||||
__m128 v_dstf0 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src1, v_zero)), v_alpha);
|
||||
v_dstf0 = _mm_add_ps(_mm_add_ps(v_dstf0, v_gamma),
|
||||
_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src2, v_zero)), v_beta));
|
||||
|
||||
__m128 v_dstf1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src1, v_zero)), v_alpha);
|
||||
v_dstf1 = _mm_add_ps(_mm_add_ps(v_dstf1, v_gamma),
|
||||
_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src2, v_zero)), v_beta));
|
||||
|
||||
_mm_storeu_si128((__m128i *)(dst + x), _mm_packus_epi32(_mm_cvtps_epi32(v_dstf0),
|
||||
_mm_cvtps_epi32(v_dstf1)));
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
bool haveSSE4_1;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#elif CV_NEON
|
||||
|
||||
template <>
|
||||
struct AddWeighted_SIMD<schar, float>
|
||||
|
@ -390,6 +390,11 @@ GpuMat& cv::cuda::GpuMat::setTo(Scalar value, InputArray _mask, Stream& stream)
|
||||
|
||||
GpuMat mask = _mask.getGpuMat();
|
||||
|
||||
if (mask.empty())
|
||||
{
|
||||
return setTo(value, stream);
|
||||
}
|
||||
|
||||
CV_DbgAssert( size() == mask.size() && mask.type() == CV_8UC1 );
|
||||
|
||||
typedef void (*func_t)(const GpuMat& mat, const GpuMat& mask, Scalar scalar, Stream& stream);
|
||||
|
@ -342,6 +342,75 @@ void cv::cuda::ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr)
|
||||
}
|
||||
}
|
||||
|
||||
GpuMat cv::cuda::getInputMat(InputArray _src, Stream& stream)
|
||||
{
|
||||
GpuMat src;
|
||||
|
||||
#ifndef HAVE_CUDA
|
||||
(void) _src;
|
||||
(void) stream;
|
||||
throw_no_cuda();
|
||||
#else
|
||||
if (_src.kind() == _InputArray::CUDA_GPU_MAT)
|
||||
{
|
||||
src = _src.getGpuMat();
|
||||
}
|
||||
else if (!_src.empty())
|
||||
{
|
||||
BufferPool pool(stream);
|
||||
src = pool.getBuffer(_src.size(), _src.type());
|
||||
src.upload(_src, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
return src;
|
||||
}
|
||||
|
||||
GpuMat cv::cuda::getOutputMat(OutputArray _dst, int rows, int cols, int type, Stream& stream)
|
||||
{
|
||||
GpuMat dst;
|
||||
|
||||
#ifndef HAVE_CUDA
|
||||
(void) _dst;
|
||||
(void) rows;
|
||||
(void) cols;
|
||||
(void) type;
|
||||
(void) stream;
|
||||
throw_no_cuda();
|
||||
#else
|
||||
if (_dst.kind() == _InputArray::CUDA_GPU_MAT)
|
||||
{
|
||||
_dst.create(rows, cols, type);
|
||||
dst = _dst.getGpuMat();
|
||||
}
|
||||
else
|
||||
{
|
||||
BufferPool pool(stream);
|
||||
dst = pool.getBuffer(rows, cols, type);
|
||||
}
|
||||
#endif
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
void cv::cuda::syncOutput(const GpuMat& dst, OutputArray _dst, Stream& stream)
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
(void) dst;
|
||||
(void) _dst;
|
||||
(void) stream;
|
||||
throw_no_cuda();
|
||||
#else
|
||||
if (_dst.kind() != _InputArray::CUDA_GPU_MAT)
|
||||
{
|
||||
if (stream)
|
||||
dst.download(_dst, stream);
|
||||
else
|
||||
dst.download(_dst);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef HAVE_CUDA
|
||||
|
||||
GpuMat::Allocator* cv::cuda::GpuMat::defaultAllocator()
|
||||
|
@ -48,6 +48,13 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined ANDROID || defined __linux__
|
||||
# include <unistd.h>
|
||||
# include <fcntl.h>
|
||||
# include <elf.h>
|
||||
# include <linux/auxvec.h>
|
||||
#endif
|
||||
|
||||
#if defined WIN32 || defined _WIN32 || defined WINCE
|
||||
#ifndef _WIN32_WINNT // This is needed for the declaration of TryEnterCriticalSection in winbase.h with Visual Studio 2005 (and older?)
|
||||
#define _WIN32_WINNT 0x0400 // http://msdn.microsoft.com/en-us/library/ms686857(VS.85).aspx
|
||||
@ -251,6 +258,29 @@ struct HWFeatures
|
||||
f.have[CV_CPU_AVX] = (((cpuid_data[2] & (1<<28)) != 0)&&((cpuid_data[2] & (1<<27)) != 0));//OS uses XSAVE_XRSTORE and CPU support AVX
|
||||
}
|
||||
|
||||
#if defined ANDROID || defined __linux__
|
||||
int cpufile = open("/proc/self/auxv", O_RDONLY);
|
||||
|
||||
if (cpufile >= 0)
|
||||
{
|
||||
Elf32_auxv_t auxv;
|
||||
const size_t size_auxv_t = sizeof(Elf32_auxv_t);
|
||||
|
||||
while (read(cpufile, &auxv, sizeof(Elf32_auxv_t)) == size_auxv_t)
|
||||
{
|
||||
if (auxv.a_type == AT_HWCAP)
|
||||
{
|
||||
f.have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
close(cpufile);
|
||||
}
|
||||
#elif (defined __clang__ || defined __APPLE__) && defined __ARM_NEON__
|
||||
f.have[CV_CPU_NEON] = true;
|
||||
#endif
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
|
@ -6,4 +6,4 @@ set(the_description "CUDA-accelerated Computer Vision")
|
||||
|
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4100 /wd4324 /wd4512 /wd4515 -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)
|
||||
|
||||
ocv_define_module(cuda opencv_calib3d opencv_objdetect opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudalegacy)
|
||||
ocv_define_module(cuda opencv_calib3d opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudalegacy)
|
||||
|
@ -53,274 +53,11 @@
|
||||
@addtogroup cuda
|
||||
@{
|
||||
@defgroup cuda_calib3d Camera Calibration and 3D Reconstruction
|
||||
@defgroup cuda_objdetect Object Detection
|
||||
@}
|
||||
*/
|
||||
|
||||
namespace cv { namespace cuda {
|
||||
|
||||
//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
|
||||
|
||||
//! @addtogroup cuda_objdetect
|
||||
//! @{
|
||||
|
||||
struct CV_EXPORTS HOGConfidence
|
||||
{
|
||||
double scale;
|
||||
std::vector<Point> locations;
|
||||
std::vector<double> confidences;
|
||||
std::vector<double> part_scores[4];
|
||||
};
|
||||
|
||||
/** @brief The class implements Histogram of Oriented Gradients (@cite Dalal2005) object detector.
|
||||
|
||||
Interfaces of all methods are kept similar to the CPU HOG descriptor and detector analogues as much
|
||||
as possible.
|
||||
|
||||
@note
|
||||
- An example applying the HOG descriptor for people detection can be found at
|
||||
opencv_source_code/samples/cpp/peopledetect.cpp
|
||||
- A CUDA example applying the HOG descriptor for people detection can be found at
|
||||
opencv_source_code/samples/gpu/hog.cpp
|
||||
- (Python) An example applying the HOG descriptor for people detection can be found at
|
||||
opencv_source_code/samples/python2/peopledetect.py
|
||||
*/
|
||||
struct CV_EXPORTS HOGDescriptor
|
||||
{
|
||||
enum { DEFAULT_WIN_SIGMA = -1 };
|
||||
enum { DEFAULT_NLEVELS = 64 };
|
||||
enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
|
||||
|
||||
/** @brief Creates the HOG descriptor and detector.
|
||||
|
||||
@param win_size Detection window size. Align to block size and block stride.
|
||||
@param block_size Block size in pixels. Align to cell size. Only (16,16) is supported for now.
|
||||
@param block_stride Block stride. It must be a multiple of cell size.
|
||||
@param cell_size Cell size. Only (8, 8) is supported for now.
|
||||
@param nbins Number of bins. Only 9 bins per cell are supported for now.
|
||||
@param win_sigma Gaussian smoothing window parameter.
|
||||
@param threshold_L2hys L2-Hys normalization method shrinkage.
|
||||
@param gamma_correction Flag to specify whether the gamma correction preprocessing is required or
|
||||
not.
|
||||
@param nlevels Maximum number of detection window increases.
|
||||
*/
|
||||
HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
|
||||
Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
|
||||
int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,
|
||||
double threshold_L2hys=0.2, bool gamma_correction=true,
|
||||
int nlevels=DEFAULT_NLEVELS);
|
||||
|
||||
/** @brief Returns the number of coefficients required for the classification.
|
||||
*/
|
||||
size_t getDescriptorSize() const;
|
||||
/** @brief Returns the block histogram size.
|
||||
*/
|
||||
size_t getBlockHistogramSize() const;
|
||||
|
||||
/** @brief Sets coefficients for the linear SVM classifier.
|
||||
*/
|
||||
void setSVMDetector(const std::vector<float>& detector);
|
||||
|
||||
/** @brief Returns coefficients of the classifier trained for people detection (for default window size).
|
||||
*/
|
||||
static std::vector<float> getDefaultPeopleDetector();
|
||||
/** @brief Returns coefficients of the classifier trained for people detection (for 48x96 windows).
|
||||
*/
|
||||
static std::vector<float> getPeopleDetector48x96();
|
||||
/** @brief Returns coefficients of the classifier trained for people detection (for 64x128 windows).
|
||||
*/
|
||||
static std::vector<float> getPeopleDetector64x128();
|
||||
|
||||
/** @brief Performs object detection without a multi-scale window.
|
||||
|
||||
@param img Source image. CV_8UC1 and CV_8UC4 types are supported for now.
|
||||
@param found_locations Left-top corner points of detected objects boundaries.
|
||||
@param hit_threshold Threshold for the distance between features and SVM classifying plane.
|
||||
Usually it is 0 and should be specfied in the detector coefficients (as the last free
|
||||
coefficient). But if the free coefficient is omitted (which is allowed), you can specify it
|
||||
manually here.
|
||||
@param win_stride Window stride. It must be a multiple of block stride.
|
||||
@param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
|
||||
*/
|
||||
void detect(const GpuMat& img, std::vector<Point>& found_locations,
|
||||
double hit_threshold=0, Size win_stride=Size(),
|
||||
Size padding=Size());
|
||||
|
||||
/** @brief Performs object detection with a multi-scale window.
|
||||
|
||||
@param img Source image. See cuda::HOGDescriptor::detect for type limitations.
|
||||
@param found_locations Detected objects boundaries.
|
||||
@param hit_threshold Threshold for the distance between features and SVM classifying plane. See
|
||||
cuda::HOGDescriptor::detect for details.
|
||||
@param win_stride Window stride. It must be a multiple of block stride.
|
||||
@param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
|
||||
@param scale0 Coefficient of the detection window increase.
|
||||
@param group_threshold Coefficient to regulate the similarity threshold. When detected, some
|
||||
objects can be covered by many rectangles. 0 means not to perform grouping. See groupRectangles .
|
||||
*/
|
||||
void detectMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
|
||||
double hit_threshold=0, Size win_stride=Size(),
|
||||
Size padding=Size(), double scale0=1.05,
|
||||
int group_threshold=2);
|
||||
|
||||
void computeConfidence(const GpuMat& img, std::vector<Point>& hits, double hit_threshold,
|
||||
Size win_stride, Size padding, std::vector<Point>& locations, std::vector<double>& confidences);
|
||||
|
||||
void computeConfidenceMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
|
||||
double hit_threshold, Size win_stride, Size padding,
|
||||
std::vector<HOGConfidence> &conf_out, int group_threshold);
|
||||
|
||||
/** @brief Returns block descriptors computed for the whole image.
|
||||
|
||||
@param img Source image. See cuda::HOGDescriptor::detect for type limitations.
|
||||
@param win_stride Window stride. It must be a multiple of block stride.
|
||||
@param descriptors 2D array of descriptors.
|
||||
@param descr_format Descriptor storage format:
|
||||
- **DESCR_FORMAT_ROW_BY_ROW** - Row-major order.
|
||||
- **DESCR_FORMAT_COL_BY_COL** - Column-major order.
|
||||
|
||||
The function is mainly used to learn the classifier.
|
||||
*/
|
||||
void getDescriptors(const GpuMat& img, Size win_stride,
|
||||
GpuMat& descriptors,
|
||||
int descr_format=DESCR_FORMAT_COL_BY_COL);
|
||||
|
||||
Size win_size;
|
||||
Size block_size;
|
||||
Size block_stride;
|
||||
Size cell_size;
|
||||
int nbins;
|
||||
double win_sigma;
|
||||
double threshold_L2hys;
|
||||
bool gamma_correction;
|
||||
int nlevels;
|
||||
|
||||
protected:
|
||||
void computeBlockHistograms(const GpuMat& img);
|
||||
void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
|
||||
|
||||
double getWinSigma() const;
|
||||
bool checkDetectorSize() const;
|
||||
|
||||
static int numPartsWithin(int size, int part_size, int stride);
|
||||
static Size numPartsWithin(Size size, Size part_size, Size stride);
|
||||
|
||||
// Coefficients of the separating plane
|
||||
float free_coef;
|
||||
GpuMat detector;
|
||||
|
||||
// Results of the last classification step
|
||||
GpuMat labels, labels_buf;
|
||||
Mat labels_host;
|
||||
|
||||
// Results of the last histogram evaluation step
|
||||
GpuMat block_hists, block_hists_buf;
|
||||
|
||||
// Gradients conputation results
|
||||
GpuMat grad, qangle, grad_buf, qangle_buf;
|
||||
|
||||
// returns subbuffer with required size, reallocates buffer if nessesary.
|
||||
static GpuMat getBuffer(const Size& sz, int type, GpuMat& buf);
|
||||
static GpuMat getBuffer(int rows, int cols, int type, GpuMat& buf);
|
||||
|
||||
std::vector<GpuMat> image_scales;
|
||||
};
|
||||
|
||||
//////////////////////////// CascadeClassifier ////////////////////////////
|
||||
|
||||
/** @brief Cascade classifier class used for object detection. Supports HAAR and LBP cascades. :
|
||||
|
||||
@note
|
||||
- A cascade classifier example can be found at
|
||||
opencv_source_code/samples/gpu/cascadeclassifier.cpp
|
||||
- A Nvidea API specific cascade classifier example can be found at
|
||||
opencv_source_code/samples/gpu/cascadeclassifier_nvidia_api.cpp
|
||||
*/
|
||||
class CV_EXPORTS CascadeClassifier_CUDA
|
||||
{
|
||||
public:
|
||||
CascadeClassifier_CUDA();
|
||||
/** @brief Loads the classifier from a file. Cascade type is detected automatically by constructor parameter.
|
||||
|
||||
@param filename Name of the file from which the classifier is loaded. Only the old haar classifier
|
||||
(trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new
|
||||
type of OpenCV XML cascade supported for LBP.
|
||||
*/
|
||||
CascadeClassifier_CUDA(const String& filename);
|
||||
~CascadeClassifier_CUDA();
|
||||
|
||||
/** @brief Checks whether the classifier is loaded or not.
|
||||
*/
|
||||
bool empty() const;
|
||||
/** @brief Loads the classifier from a file. The previous content is destroyed.
|
||||
|
||||
@param filename Name of the file from which the classifier is loaded. Only the old haar classifier
|
||||
(trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new
|
||||
type of OpenCV XML cascade supported for LBP.
|
||||
*/
|
||||
bool load(const String& filename);
|
||||
/** @brief Destroys the loaded classifier.
|
||||
*/
|
||||
void release();
|
||||
|
||||
/** @overload */
|
||||
int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size());
|
||||
/** @brief Detects objects of different sizes in the input image.
|
||||
|
||||
@param image Matrix of type CV_8U containing an image where objects should be detected.
|
||||
@param objectsBuf Buffer to store detected objects (rectangles). If it is empty, it is allocated
|
||||
with the default size. If not empty, the function searches not more than N objects, where
|
||||
N = sizeof(objectsBufer's data)/sizeof(cv::Rect).
|
||||
@param maxObjectSize Maximum possible object size. Objects larger than that are ignored. Used for
|
||||
second signature and supported only for LBP cascades.
|
||||
@param scaleFactor Parameter specifying how much the image size is reduced at each image scale.
|
||||
@param minNeighbors Parameter specifying how many neighbors each candidate rectangle should have
|
||||
to retain it.
|
||||
@param minSize Minimum possible object size. Objects smaller than that are ignored.
|
||||
|
||||
The detected objects are returned as a list of rectangles.
|
||||
|
||||
The function returns the number of detected objects, so you can retrieve them as in the following
|
||||
example:
|
||||
@code
|
||||
cuda::CascadeClassifier_CUDA cascade_gpu(...);
|
||||
|
||||
Mat image_cpu = imread(...)
|
||||
GpuMat image_gpu(image_cpu);
|
||||
|
||||
GpuMat objbuf;
|
||||
int detections_number = cascade_gpu.detectMultiScale( image_gpu,
|
||||
objbuf, 1.2, minNeighbors);
|
||||
|
||||
Mat obj_host;
|
||||
// download only detected number of rectangles
|
||||
objbuf.colRange(0, detections_number).download(obj_host);
|
||||
|
||||
Rect* faces = obj_host.ptr<Rect>();
|
||||
for(int i = 0; i < detections_num; ++i)
|
||||
cv::rectangle(image_cpu, faces[i], Scalar(255));
|
||||
|
||||
imshow("Faces", image_cpu);
|
||||
@endcode
|
||||
@sa CascadeClassifier::detectMultiScale
|
||||
*/
|
||||
int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
|
||||
|
||||
bool findLargestObject;
|
||||
bool visualizeInPlace;
|
||||
|
||||
Size getClassifierSize() const;
|
||||
|
||||
private:
|
||||
struct CascadeClassifierImpl;
|
||||
CascadeClassifierImpl* impl;
|
||||
struct HaarCascade;
|
||||
struct LbpCascade;
|
||||
friend class CascadeClassifier_CUDA_LBP;
|
||||
};
|
||||
|
||||
//! @} cuda_objdetect
|
||||
|
||||
//////////////////////////// Labeling ////////////////////////////
|
||||
|
||||
//! @addtogroup cuda
|
||||
|
@ -56,7 +56,6 @@
|
||||
|
||||
#include "opencv2/cuda.hpp"
|
||||
#include "opencv2/calib3d.hpp"
|
||||
#include "opencv2/objdetect.hpp"
|
||||
|
||||
#ifdef GTEST_CREATE_SHARED_LIBRARY
|
||||
#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -47,7 +47,6 @@
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
#include "opencv2/cudawarping.hpp"
|
||||
#include "opencv2/calib3d.hpp"
|
||||
#include "opencv2/objdetect.hpp"
|
||||
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
#include "opencv2/core/utility.hpp"
|
||||
|
@ -60,7 +60,6 @@
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/core/opengl.hpp"
|
||||
#include "opencv2/calib3d.hpp"
|
||||
#include "opencv2/objdetect.hpp"
|
||||
|
||||
#include "cvconfig.h"
|
||||
|
||||
|
@ -130,12 +130,6 @@ This function, in contrast to divide, uses a round-down rounding mode.
|
||||
*/
|
||||
CV_EXPORTS void divide(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null());
|
||||
|
||||
//! computes element-wise weighted reciprocal of an array (dst = scale/src2)
|
||||
static inline void divide(double src1, InputArray src2, OutputArray dst, int dtype = -1, Stream& stream = Stream::Null())
|
||||
{
|
||||
divide(src1, src2, dst, 1.0, dtype, stream);
|
||||
}
|
||||
|
||||
/** @brief Computes per-element absolute difference of two matrices (or of a matrix and scalar).
|
||||
|
||||
@param src1 First source matrix or scalar.
|
||||
@ -530,116 +524,53 @@ CV_EXPORTS void copyMakeBorder(InputArray src, OutputArray dst, int top, int bot
|
||||
@param src1 Source matrix. Any matrices except 64F are supported.
|
||||
@param normType Norm type. NORM_L1 , NORM_L2 , and NORM_INF are supported for now.
|
||||
@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type.
|
||||
@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
|
||||
@sa norm
|
||||
*/
|
||||
CV_EXPORTS double norm(InputArray src1, int normType, InputArray mask, GpuMat& buf);
|
||||
/** @overload
|
||||
uses new buffer, no mask
|
||||
*/
|
||||
static inline double norm(InputArray src, int normType)
|
||||
{
|
||||
GpuMat buf;
|
||||
return norm(src, normType, GpuMat(), buf);
|
||||
}
|
||||
/** @overload
|
||||
no mask
|
||||
*/
|
||||
static inline double norm(InputArray src, int normType, GpuMat& buf)
|
||||
{
|
||||
return norm(src, normType, GpuMat(), buf);
|
||||
}
|
||||
CV_EXPORTS double norm(InputArray src1, int normType, InputArray mask = noArray());
|
||||
/** @overload */
|
||||
CV_EXPORTS void calcNorm(InputArray src, OutputArray dst, int normType, InputArray mask = noArray(), Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Returns the difference of two matrices.
|
||||
|
||||
@param src1 Source matrix. Any matrices except 64F are supported.
|
||||
@param src2 Second source matrix (if any) with the same size and type as src1.
|
||||
@param normType Norm type. NORM_L1 , NORM_L2 , and NORM_INF are supported for now.
|
||||
@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
|
||||
@sa norm
|
||||
*/
|
||||
CV_EXPORTS double norm(InputArray src1, InputArray src2, GpuMat& buf, int normType=NORM_L2);
|
||||
/** @overload
|
||||
uses new buffer
|
||||
*/
|
||||
static inline double norm(InputArray src1, InputArray src2, int normType=NORM_L2)
|
||||
{
|
||||
GpuMat buf;
|
||||
return norm(src1, src2, buf, normType);
|
||||
}
|
||||
CV_EXPORTS double norm(InputArray src1, InputArray src2, int normType=NORM_L2);
|
||||
/** @overload */
|
||||
CV_EXPORTS void calcNormDiff(InputArray src1, InputArray src2, OutputArray dst, int normType=NORM_L2, Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Returns the sum of matrix elements.
|
||||
|
||||
@param src Source image of any depth except for CV_64F .
|
||||
@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type.
|
||||
@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
|
||||
@sa sum
|
||||
*/
|
||||
CV_EXPORTS Scalar sum(InputArray src, InputArray mask, GpuMat& buf);
|
||||
/** @overload
|
||||
uses new buffer, no mask
|
||||
*/
|
||||
static inline Scalar sum(InputArray src)
|
||||
{
|
||||
GpuMat buf;
|
||||
return sum(src, GpuMat(), buf);
|
||||
}
|
||||
/** @overload
|
||||
no mask
|
||||
*/
|
||||
static inline Scalar sum(InputArray src, GpuMat& buf)
|
||||
{
|
||||
return sum(src, GpuMat(), buf);
|
||||
}
|
||||
CV_EXPORTS Scalar sum(InputArray src, InputArray mask = noArray());
|
||||
/** @overload */
|
||||
CV_EXPORTS void calcSum(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Returns the sum of absolute values for matrix elements.
|
||||
|
||||
@param src Source image of any depth except for CV_64F .
|
||||
@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type.
|
||||
@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
*/
|
||||
CV_EXPORTS Scalar absSum(InputArray src, InputArray mask, GpuMat& buf);
|
||||
/** @overload
|
||||
uses new buffer, no mask
|
||||
*/
|
||||
static inline Scalar absSum(InputArray src)
|
||||
{
|
||||
GpuMat buf;
|
||||
return absSum(src, GpuMat(), buf);
|
||||
}
|
||||
/** @overload
|
||||
no mask
|
||||
*/
|
||||
static inline Scalar absSum(InputArray src, GpuMat& buf)
|
||||
{
|
||||
return absSum(src, GpuMat(), buf);
|
||||
}
|
||||
CV_EXPORTS Scalar absSum(InputArray src, InputArray mask = noArray());
|
||||
/** @overload */
|
||||
CV_EXPORTS void calcAbsSum(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Returns the squared sum of matrix elements.
|
||||
|
||||
@param src Source image of any depth except for CV_64F .
|
||||
@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type.
|
||||
@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
*/
|
||||
CV_EXPORTS Scalar sqrSum(InputArray src, InputArray mask, GpuMat& buf);
|
||||
/** @overload
|
||||
uses new buffer, no mask
|
||||
*/
|
||||
static inline Scalar sqrSum(InputArray src)
|
||||
{
|
||||
GpuMat buf;
|
||||
return sqrSum(src, GpuMat(), buf);
|
||||
}
|
||||
/** @overload
|
||||
no mask
|
||||
*/
|
||||
static inline Scalar sqrSum(InputArray src, GpuMat& buf)
|
||||
{
|
||||
return sqrSum(src, GpuMat(), buf);
|
||||
}
|
||||
CV_EXPORTS Scalar sqrSum(InputArray src, InputArray mask = noArray());
|
||||
/** @overload */
|
||||
CV_EXPORTS void calcSqrSum(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Finds global minimum and maximum matrix elements and returns their values.
|
||||
|
||||
@ -647,21 +578,14 @@ static inline Scalar sqrSum(InputArray src, GpuMat& buf)
|
||||
@param minVal Pointer to the returned minimum value. Use NULL if not required.
|
||||
@param maxVal Pointer to the returned maximum value. Use NULL if not required.
|
||||
@param mask Optional mask to select a sub-matrix.
|
||||
@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
|
||||
The function does not work with CV_64F images on GPUs with the compute capability \< 1.3.
|
||||
|
||||
@sa minMaxLoc
|
||||
*/
|
||||
CV_EXPORTS void minMax(InputArray src, double* minVal, double* maxVal, InputArray mask, GpuMat& buf);
|
||||
/** @overload
|
||||
uses new buffer
|
||||
*/
|
||||
static inline void minMax(InputArray src, double* minVal, double* maxVal=0, InputArray mask=noArray())
|
||||
{
|
||||
GpuMat buf;
|
||||
minMax(src, minVal, maxVal, mask, buf);
|
||||
}
|
||||
CV_EXPORTS void minMax(InputArray src, double* minVal, double* maxVal, InputArray mask = noArray());
|
||||
/** @overload */
|
||||
CV_EXPORTS void findMinMax(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Finds global minimum and maximum matrix elements and returns their values with locations.
|
||||
|
||||
@ -671,44 +595,28 @@ static inline void minMax(InputArray src, double* minVal, double* maxVal=0, Inpu
|
||||
@param minLoc Pointer to the returned minimum location. Use NULL if not required.
|
||||
@param maxLoc Pointer to the returned maximum location. Use NULL if not required.
|
||||
@param mask Optional mask to select a sub-matrix.
|
||||
@param valbuf Optional values buffer to avoid extra memory allocations. It is resized
|
||||
automatically.
|
||||
@param locbuf Optional locations buffer to avoid extra memory allocations. It is resized
|
||||
automatically.
|
||||
|
||||
The function does not work with CV_64F images on GPU with the compute capability \< 1.3.
|
||||
|
||||
@sa minMaxLoc
|
||||
*/
|
||||
CV_EXPORTS void minMaxLoc(InputArray src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
|
||||
InputArray mask, GpuMat& valbuf, GpuMat& locbuf);
|
||||
/** @overload
|
||||
uses new buffer
|
||||
*/
|
||||
static inline void minMaxLoc(InputArray src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0,
|
||||
InputArray mask=noArray())
|
||||
{
|
||||
GpuMat valBuf, locBuf;
|
||||
minMaxLoc(src, minVal, maxVal, minLoc, maxLoc, mask, valBuf, locBuf);
|
||||
}
|
||||
InputArray mask = noArray());
|
||||
/** @overload */
|
||||
CV_EXPORTS void findMinMaxLoc(InputArray src, OutputArray minMaxVals, OutputArray loc,
|
||||
InputArray mask = noArray(), Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Counts non-zero matrix elements.
|
||||
|
||||
@param src Single-channel source image.
|
||||
@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
|
||||
The function does not work with CV_64F images on GPUs with the compute capability \< 1.3.
|
||||
|
||||
@sa countNonZero
|
||||
*/
|
||||
CV_EXPORTS int countNonZero(InputArray src, GpuMat& buf);
|
||||
/** @overload
|
||||
uses new buffer
|
||||
*/
|
||||
static inline int countNonZero(const GpuMat& src)
|
||||
{
|
||||
GpuMat buf;
|
||||
return countNonZero(src, buf);
|
||||
}
|
||||
CV_EXPORTS int countNonZero(InputArray src);
|
||||
/** @overload */
|
||||
CV_EXPORTS void countNonZero(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Reduces a matrix to a vector.
|
||||
|
||||
@ -743,19 +651,12 @@ CV_EXPORTS void reduce(InputArray mtx, OutputArray vec, int dim, int reduceOp, i
|
||||
@param mtx Source matrix. CV_8UC1 matrices are supported for now.
|
||||
@param mean Mean value.
|
||||
@param stddev Standard deviation value.
|
||||
@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
|
||||
@sa meanStdDev
|
||||
*/
|
||||
CV_EXPORTS void meanStdDev(InputArray mtx, Scalar& mean, Scalar& stddev, GpuMat& buf);
|
||||
/** @overload
|
||||
uses new buffer
|
||||
*/
|
||||
static inline void meanStdDev(InputArray src, Scalar& mean, Scalar& stddev)
|
||||
{
|
||||
GpuMat buf;
|
||||
meanStdDev(src, mean, stddev, buf);
|
||||
}
|
||||
CV_EXPORTS void meanStdDev(InputArray mtx, Scalar& mean, Scalar& stddev);
|
||||
/** @overload */
|
||||
CV_EXPORTS void meanStdDev(InputArray mtx, OutputArray dst, Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Computes a standard deviation of integral images.
|
||||
|
||||
@ -779,64 +680,32 @@ normalization.
|
||||
@param dtype When negative, the output array has the same type as src; otherwise, it has the same
|
||||
number of channels as src and the depth =CV_MAT_DEPTH(dtype).
|
||||
@param mask Optional operation mask.
|
||||
@param norm_buf Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
@param cvt_buf Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
@param stream Stream for the asynchronous version.
|
||||
|
||||
@sa normalize
|
||||
*/
|
||||
CV_EXPORTS void normalize(InputArray src, OutputArray dst, double alpha, double beta,
|
||||
int norm_type, int dtype, InputArray mask, GpuMat& norm_buf, GpuMat& cvt_buf);
|
||||
/** @overload
|
||||
uses new buffers
|
||||
*/
|
||||
static inline void normalize(InputArray src, OutputArray dst, double alpha = 1, double beta = 0,
|
||||
int norm_type = NORM_L2, int dtype = -1, InputArray mask = noArray())
|
||||
{
|
||||
GpuMat norm_buf;
|
||||
GpuMat cvt_buf;
|
||||
normalize(src, dst, alpha, beta, norm_type, dtype, mask, norm_buf, cvt_buf);
|
||||
}
|
||||
int norm_type, int dtype, InputArray mask = noArray(),
|
||||
Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Computes an integral image.
|
||||
|
||||
@param src Source image. Only CV_8UC1 images are supported for now.
|
||||
@param sum Integral image containing 32-bit unsigned integer values packed into CV_32SC1 .
|
||||
@param buffer Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
@param stream Stream for the asynchronous version.
|
||||
|
||||
@sa integral
|
||||
*/
|
||||
CV_EXPORTS void integral(InputArray src, OutputArray sum, GpuMat& buffer, Stream& stream = Stream::Null());
|
||||
static inline void integralBuffered(InputArray src, OutputArray sum, GpuMat& buffer, Stream& stream = Stream::Null())
|
||||
{
|
||||
integral(src, sum, buffer, stream);
|
||||
}
|
||||
/** @overload
|
||||
uses new buffer
|
||||
*/
|
||||
static inline void integral(InputArray src, OutputArray sum, Stream& stream = Stream::Null())
|
||||
{
|
||||
GpuMat buffer;
|
||||
integral(src, sum, buffer, stream);
|
||||
}
|
||||
CV_EXPORTS void integral(InputArray src, OutputArray sum, Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Computes a squared integral image.
|
||||
|
||||
@param src Source image. Only CV_8UC1 images are supported for now.
|
||||
@param sqsum Squared integral image containing 64-bit unsigned integer values packed into
|
||||
CV_64FC1 .
|
||||
@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
@param stream Stream for the asynchronous version.
|
||||
*/
|
||||
CV_EXPORTS void sqrIntegral(InputArray src, OutputArray sqsum, GpuMat& buf, Stream& stream = Stream::Null());
|
||||
/** @overload
|
||||
uses new buffer
|
||||
*/
|
||||
static inline void sqrIntegral(InputArray src, OutputArray sqsum, Stream& stream = Stream::Null())
|
||||
{
|
||||
GpuMat buffer;
|
||||
sqrIntegral(src, sqsum, buffer, stream);
|
||||
}
|
||||
CV_EXPORTS void sqrIntegral(InputArray src, OutputArray sqsum, Stream& stream = Stream::Null());
|
||||
|
||||
//! @} cudaarithm_reduce
|
||||
|
||||
|
@ -108,10 +108,9 @@ PERF_TEST_P(Sz_Norm, NormDiff,
|
||||
{
|
||||
const cv::cuda::GpuMat d_src1(src1);
|
||||
const cv::cuda::GpuMat d_src2(src2);
|
||||
cv::cuda::GpuMat d_buf;
|
||||
double gpu_dst;
|
||||
|
||||
TEST_CYCLE() gpu_dst = cv::cuda::norm(d_src1, d_src2, d_buf, normType);
|
||||
TEST_CYCLE() gpu_dst = cv::cuda::norm(d_src1, d_src2, normType);
|
||||
|
||||
SANITY_CHECK(gpu_dst);
|
||||
|
||||
@ -146,10 +145,9 @@ PERF_TEST_P(Sz_Depth_Cn, Sum,
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
const cv::cuda::GpuMat d_src(src);
|
||||
cv::cuda::GpuMat d_buf;
|
||||
cv::Scalar gpu_dst;
|
||||
|
||||
TEST_CYCLE() gpu_dst = cv::cuda::sum(d_src, d_buf);
|
||||
TEST_CYCLE() gpu_dst = cv::cuda::sum(d_src);
|
||||
|
||||
SANITY_CHECK(gpu_dst, 1e-5, ERROR_RELATIVE);
|
||||
}
|
||||
@ -183,10 +181,9 @@ PERF_TEST_P(Sz_Depth_Cn, SumAbs,
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
const cv::cuda::GpuMat d_src(src);
|
||||
cv::cuda::GpuMat d_buf;
|
||||
cv::Scalar gpu_dst;
|
||||
|
||||
TEST_CYCLE() gpu_dst = cv::cuda::absSum(d_src, d_buf);
|
||||
TEST_CYCLE() gpu_dst = cv::cuda::absSum(d_src);
|
||||
|
||||
SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE);
|
||||
}
|
||||
@ -216,10 +213,9 @@ PERF_TEST_P(Sz_Depth_Cn, SumSqr,
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
const cv::cuda::GpuMat d_src(src);
|
||||
cv::cuda::GpuMat d_buf;
|
||||
cv::Scalar gpu_dst;
|
||||
|
||||
TEST_CYCLE() gpu_dst = cv::cuda::sqrSum(d_src, d_buf);
|
||||
TEST_CYCLE() gpu_dst = cv::cuda::sqrSum(d_src);
|
||||
|
||||
SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE);
|
||||
}
|
||||
@ -248,10 +244,9 @@ PERF_TEST_P(Sz_Depth, MinMax,
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
const cv::cuda::GpuMat d_src(src);
|
||||
cv::cuda::GpuMat d_buf;
|
||||
double gpu_minVal, gpu_maxVal;
|
||||
|
||||
TEST_CYCLE() cv::cuda::minMax(d_src, &gpu_minVal, &gpu_maxVal, cv::cuda::GpuMat(), d_buf);
|
||||
TEST_CYCLE() cv::cuda::minMax(d_src, &gpu_minVal, &gpu_maxVal, cv::cuda::GpuMat());
|
||||
|
||||
SANITY_CHECK(gpu_minVal, 1e-10);
|
||||
SANITY_CHECK(gpu_maxVal, 1e-10);
|
||||
@ -286,11 +281,10 @@ PERF_TEST_P(Sz_Depth, MinMaxLoc,
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
const cv::cuda::GpuMat d_src(src);
|
||||
cv::cuda::GpuMat d_valbuf, d_locbuf;
|
||||
double gpu_minVal, gpu_maxVal;
|
||||
cv::Point gpu_minLoc, gpu_maxLoc;
|
||||
|
||||
TEST_CYCLE() cv::cuda::minMaxLoc(d_src, &gpu_minVal, &gpu_maxVal, &gpu_minLoc, &gpu_maxLoc, cv::cuda::GpuMat(), d_valbuf, d_locbuf);
|
||||
TEST_CYCLE() cv::cuda::minMaxLoc(d_src, &gpu_minVal, &gpu_maxVal, &gpu_minLoc, &gpu_maxLoc);
|
||||
|
||||
SANITY_CHECK(gpu_minVal, 1e-10);
|
||||
SANITY_CHECK(gpu_maxVal, 1e-10);
|
||||
@ -323,10 +317,9 @@ PERF_TEST_P(Sz_Depth, CountNonZero,
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
const cv::cuda::GpuMat d_src(src);
|
||||
cv::cuda::GpuMat d_buf;
|
||||
int gpu_dst = 0;
|
||||
|
||||
TEST_CYCLE() gpu_dst = cv::cuda::countNonZero(d_src, d_buf);
|
||||
TEST_CYCLE() gpu_dst = cv::cuda::countNonZero(d_src);
|
||||
|
||||
SANITY_CHECK(gpu_dst);
|
||||
}
|
||||
@ -414,9 +407,8 @@ PERF_TEST_P(Sz_Depth_NormType, Normalize,
|
||||
{
|
||||
const cv::cuda::GpuMat d_src(src);
|
||||
cv::cuda::GpuMat dst;
|
||||
cv::cuda::GpuMat d_norm_buf, d_cvt_buf;
|
||||
|
||||
TEST_CYCLE() cv::cuda::normalize(d_src, dst, alpha, beta, norm_type, type, cv::cuda::GpuMat(), d_norm_buf, d_cvt_buf);
|
||||
TEST_CYCLE() cv::cuda::normalize(d_src, dst, alpha, beta, norm_type, type, cv::cuda::GpuMat());
|
||||
|
||||
CUDA_SANITY_CHECK(dst, 1e-6);
|
||||
}
|
||||
@ -445,11 +437,10 @@ PERF_TEST_P(Sz, MeanStdDev,
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
const cv::cuda::GpuMat d_src(src);
|
||||
cv::cuda::GpuMat d_buf;
|
||||
cv::Scalar gpu_mean;
|
||||
cv::Scalar gpu_stddev;
|
||||
|
||||
TEST_CYCLE() cv::cuda::meanStdDev(d_src, gpu_mean, gpu_stddev, d_buf);
|
||||
TEST_CYCLE() cv::cuda::meanStdDev(d_src, gpu_mean, gpu_stddev);
|
||||
|
||||
SANITY_CHECK(gpu_mean);
|
||||
SANITY_CHECK(gpu_stddev);
|
||||
@ -481,9 +472,8 @@ PERF_TEST_P(Sz, Integral,
|
||||
{
|
||||
const cv::cuda::GpuMat d_src(src);
|
||||
cv::cuda::GpuMat dst;
|
||||
cv::cuda::GpuMat d_buf;
|
||||
|
||||
TEST_CYCLE() cv::cuda::integral(d_src, dst, d_buf);
|
||||
TEST_CYCLE() cv::cuda::integral(d_src, dst);
|
||||
|
||||
CUDA_SANITY_CHECK(dst);
|
||||
}
|
||||
@ -511,9 +501,9 @@ PERF_TEST_P(Sz, IntegralSqr,
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
const cv::cuda::GpuMat d_src(src);
|
||||
cv::cuda::GpuMat dst, buf;
|
||||
cv::cuda::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() cv::cuda::sqrIntegral(d_src, dst, buf);
|
||||
TEST_CYCLE() cv::cuda::sqrIntegral(d_src, dst);
|
||||
|
||||
CUDA_SANITY_CHECK(dst);
|
||||
}
|
||||
|
@ -169,9 +169,9 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
|
||||
#else
|
||||
// CUBLAS works with column-major matrices
|
||||
|
||||
GpuMat src1 = _src1.getGpuMat();
|
||||
GpuMat src2 = _src2.getGpuMat();
|
||||
GpuMat src3 = _src3.getGpuMat();
|
||||
GpuMat src1 = getInputMat(_src1, stream);
|
||||
GpuMat src2 = getInputMat(_src2, stream);
|
||||
GpuMat src3 = getInputMat(_src3, stream);
|
||||
|
||||
CV_Assert( src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2 );
|
||||
CV_Assert( src2.type() == src1.type() && (src3.empty() || src3.type() == src1.type()) );
|
||||
@ -200,8 +200,7 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
|
||||
CV_Assert( src1Size.width == src2Size.height );
|
||||
CV_Assert( src3.empty() || src3Size == dstSize );
|
||||
|
||||
_dst.create(dstSize, src1.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, dstSize, src1.type(), stream);
|
||||
|
||||
if (beta != 0)
|
||||
{
|
||||
@ -281,6 +280,8 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
|
||||
}
|
||||
|
||||
cublasSafeCall( cublasDestroy_v2(handle) );
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -297,7 +298,7 @@ void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags,
|
||||
(void) stream;
|
||||
throw_no_cuda();
|
||||
#else
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
CV_Assert( src.type() == CV_32FC1 || src.type() == CV_32FC2 );
|
||||
|
||||
@ -314,13 +315,20 @@ void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags,
|
||||
// We don't support real-to-real transform
|
||||
CV_Assert( is_complex_input || is_complex_output );
|
||||
|
||||
GpuMat src_cont = src;
|
||||
|
||||
// Make sure here we work with the continuous input,
|
||||
// as CUFFT can't handle gaps
|
||||
createContinuous(src.rows, src.cols, src.type(), src_cont);
|
||||
if (src_cont.data != src.data)
|
||||
GpuMat src_cont;
|
||||
if (src.isContinuous())
|
||||
{
|
||||
src_cont = src;
|
||||
}
|
||||
else
|
||||
{
|
||||
BufferPool pool(stream);
|
||||
src_cont.allocator = pool.getAllocator();
|
||||
createContinuous(src.rows, src.cols, src.type(), src_cont);
|
||||
src.copyTo(src_cont, stream);
|
||||
}
|
||||
|
||||
Size dft_size_opt = dft_size;
|
||||
if (is_1d_input && !is_row_dft)
|
||||
@ -462,16 +470,15 @@ namespace
|
||||
|
||||
void ConvolutionImpl::convolve(InputArray _image, InputArray _templ, OutputArray _result, bool ccorr, Stream& _stream)
|
||||
{
|
||||
GpuMat image = _image.getGpuMat();
|
||||
GpuMat templ = _templ.getGpuMat();
|
||||
GpuMat image = getInputMat(_image, _stream);
|
||||
GpuMat templ = getInputMat(_templ, _stream);
|
||||
|
||||
CV_Assert( image.type() == CV_32FC1 );
|
||||
CV_Assert( templ.type() == CV_32FC1 );
|
||||
|
||||
create(image.size(), templ.size());
|
||||
|
||||
_result.create(result_size, CV_32FC1);
|
||||
GpuMat result = _result.getGpuMat();
|
||||
GpuMat result = getOutputMat(_result, result_size, CV_32FC1, _stream);
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(_stream);
|
||||
|
||||
@ -520,6 +527,8 @@ namespace
|
||||
|
||||
cufftSafeCall( cufftDestroy(planR2C) );
|
||||
cufftSafeCall( cufftDestroy(planC2R) );
|
||||
|
||||
syncOutput(result, _result, _stream);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -119,15 +119,17 @@ void cv::cuda::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& str
|
||||
{NppMirror<CV_32F, nppiMirror_32f_C1R>::call, 0, NppMirror<CV_32F, nppiMirror_32f_C3R>::call, NppMirror<CV_32F, nppiMirror_32f_C4R>::call}
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F);
|
||||
CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4);
|
||||
|
||||
_dst.create(src.size(), src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
|
||||
|
||||
funcs[src.depth()][src.channels() - 1](src, dst, flipCode, StreamAccessor::getStream(stream));
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
|
@ -50,7 +50,10 @@
|
||||
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
#include "opencv2/cudev.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cudev;
|
||||
|
||||
namespace
|
||||
@ -63,7 +66,7 @@ namespace
|
||||
|
||||
__device__ __forceinline__ D operator ()(T1 a, T2 b) const
|
||||
{
|
||||
return saturate_cast<D>(a * alpha + b * beta + gamma);
|
||||
return cudev::saturate_cast<D>(a * alpha + b * beta + gamma);
|
||||
}
|
||||
};
|
||||
|
||||
@ -555,8 +558,8 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou
|
||||
}
|
||||
};
|
||||
|
||||
GpuMat src1 = _src1.getGpuMat();
|
||||
GpuMat src2 = _src2.getGpuMat();
|
||||
GpuMat src1 = getInputMat(_src1, stream);
|
||||
GpuMat src2 = getInputMat(_src2, stream);
|
||||
|
||||
int sdepth1 = src1.depth();
|
||||
int sdepth2 = src2.depth();
|
||||
@ -564,19 +567,18 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou
|
||||
ddepth = ddepth >= 0 ? CV_MAT_DEPTH(ddepth) : std::max(sdepth1, sdepth2);
|
||||
const int cn = src1.channels();
|
||||
|
||||
CV_DbgAssert( src2.size() == src1.size() && src2.channels() == cn );
|
||||
CV_DbgAssert( sdepth1 <= CV_64F && sdepth2 <= CV_64F && ddepth <= CV_64F );
|
||||
CV_Assert( src2.size() == src1.size() && src2.channels() == cn );
|
||||
CV_Assert( sdepth1 <= CV_64F && sdepth2 <= CV_64F && ddepth <= CV_64F );
|
||||
|
||||
_dst.create(src1.size(), CV_MAKE_TYPE(ddepth, cn));
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src1.size(), CV_MAKE_TYPE(ddepth, cn), stream);
|
||||
|
||||
GpuMat src1_ = src1.reshape(1);
|
||||
GpuMat src2_ = src2.reshape(1);
|
||||
GpuMat dst_ = dst.reshape(1);
|
||||
GpuMat src1_single = src1.reshape(1);
|
||||
GpuMat src2_single = src2.reshape(1);
|
||||
GpuMat dst_single = dst.reshape(1);
|
||||
|
||||
if (sdepth1 > sdepth2)
|
||||
{
|
||||
src1_.swap(src2_);
|
||||
src1_single.swap(src2_single);
|
||||
std::swap(alpha, beta);
|
||||
std::swap(sdepth1, sdepth2);
|
||||
}
|
||||
@ -586,7 +588,9 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou
|
||||
if (!func)
|
||||
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
|
||||
|
||||
func(src1_, alpha, src2_, beta, gamma, dst_, stream);
|
||||
func(src1_single, alpha, src2_single, beta, gamma, dst_single, stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -50,7 +50,10 @@
|
||||
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
#include "opencv2/cudev.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cudev;
|
||||
|
||||
void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op);
|
||||
@ -60,16 +63,15 @@ void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& m
|
||||
|
||||
void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat mask = _mask.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
GpuMat mask = getInputMat(_mask, stream);
|
||||
|
||||
const int depth = src.depth();
|
||||
|
||||
CV_DbgAssert( depth <= CV_32F );
|
||||
CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
|
||||
|
||||
_dst.create(src.size(), src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
|
||||
|
||||
if (mask.empty())
|
||||
{
|
||||
@ -125,6 +127,8 @@ void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask,
|
||||
gridTransformUnary(vsrc, vdst, bit_not<uchar>(), singleMaskChannels(globPtr<uchar>(mask), src.channels()), stream);
|
||||
}
|
||||
}
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -50,7 +50,10 @@
|
||||
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
#include "opencv2/cudev.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cudev;
|
||||
|
||||
namespace
|
||||
@ -133,7 +136,7 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo
|
||||
{ copyMakeBorderImpl<float , 1> , 0 /*copyMakeBorderImpl<float , 2>*/, copyMakeBorderImpl<float , 3> , copyMakeBorderImpl<float ,4> }
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
const int depth = src.depth();
|
||||
const int cn = src.channels();
|
||||
@ -141,8 +144,7 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo
|
||||
CV_Assert( depth <= CV_32F && cn <= 4 );
|
||||
CV_Assert( borderType == BORDER_REFLECT_101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP );
|
||||
|
||||
_dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src.rows + top + bottom, src.cols + left + right, src.type(), stream);
|
||||
|
||||
const func_t func = funcs[depth][cn - 1];
|
||||
|
||||
@ -150,6 +152,8 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo
|
||||
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
|
||||
|
||||
func(src, dst, top, left, borderType, value, stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -50,47 +50,64 @@
|
||||
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
#include "opencv2/cudev.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cudev;
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T>
|
||||
int countNonZeroImpl(const GpuMat& _src, GpuMat& _buf)
|
||||
template <typename T, typename D>
|
||||
void countNonZeroImpl(const GpuMat& _src, GpuMat& _dst, Stream& stream)
|
||||
{
|
||||
const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
|
||||
GpuMat_<int>& buf = (GpuMat_<int>&) _buf;
|
||||
GpuMat_<D>& dst = (GpuMat_<D>&) _dst;
|
||||
|
||||
gridCountNonZero(src, buf);
|
||||
|
||||
int data;
|
||||
buf.download(cv::Mat(1, 1, buf.type(), &data));
|
||||
|
||||
return data;
|
||||
gridCountNonZero(src, dst, stream);
|
||||
}
|
||||
}
|
||||
|
||||
int cv::cuda::countNonZero(InputArray _src, GpuMat& buf)
|
||||
void cv::cuda::countNonZero(InputArray _src, OutputArray _dst, Stream& stream)
|
||||
{
|
||||
typedef int (*func_t)(const GpuMat& _src, GpuMat& _buf);
|
||||
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, Stream& stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
countNonZeroImpl<uchar>,
|
||||
countNonZeroImpl<schar>,
|
||||
countNonZeroImpl<ushort>,
|
||||
countNonZeroImpl<short>,
|
||||
countNonZeroImpl<int>,
|
||||
countNonZeroImpl<float>,
|
||||
countNonZeroImpl<double>
|
||||
countNonZeroImpl<uchar, int>,
|
||||
countNonZeroImpl<schar, int>,
|
||||
countNonZeroImpl<ushort, int>,
|
||||
countNonZeroImpl<short, int>,
|
||||
countNonZeroImpl<int, int>,
|
||||
countNonZeroImpl<float, int>,
|
||||
countNonZeroImpl<double, int>,
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
CV_Assert( src.depth() <= CV_64F );
|
||||
CV_Assert( src.channels() == 1 );
|
||||
|
||||
const func_t func = funcs[src.depth()];
|
||||
GpuMat dst = getOutputMat(_dst, 1, 1, CV_32SC1, stream);
|
||||
|
||||
return func(src, buf);
|
||||
const func_t func = funcs[src.depth()];
|
||||
func(src, dst, stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
int cv::cuda::countNonZero(InputArray _src)
|
||||
{
|
||||
Stream& stream = Stream::Null();
|
||||
|
||||
BufferPool pool(stream);
|
||||
GpuMat buf = pool.getBuffer(1, 1, CV_32SC1);
|
||||
|
||||
countNonZero(_src, buf, stream);
|
||||
|
||||
int data;
|
||||
buf.download(Mat(1, 1, CV_32SC1, &data));
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -50,51 +50,58 @@
|
||||
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
#include "opencv2/cudev.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cudev;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// integral
|
||||
|
||||
void cv::cuda::integral(InputArray _src, OutputArray _dst, GpuMat& buffer, Stream& stream)
|
||||
void cv::cuda::integral(InputArray _src, OutputArray _dst, Stream& stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
CV_Assert( src.type() == CV_8UC1 );
|
||||
|
||||
GpuMat_<int>& res = (GpuMat_<int>&) buffer;
|
||||
BufferPool pool(stream);
|
||||
GpuMat_<int> res(src.size(), pool.getAllocator());
|
||||
|
||||
gridIntegral(globPtr<uchar>(src), res, stream);
|
||||
|
||||
_dst.create(src.rows + 1, src.cols + 1, CV_32SC1);
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src.rows + 1, src.cols + 1, CV_32SC1, stream);
|
||||
|
||||
dst.setTo(Scalar::all(0), stream);
|
||||
|
||||
GpuMat inner = dst(Rect(1, 1, src.cols, src.rows));
|
||||
res.copyTo(inner, stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// sqrIntegral
|
||||
|
||||
void cv::cuda::sqrIntegral(InputArray _src, OutputArray _dst, GpuMat& buf, Stream& stream)
|
||||
void cv::cuda::sqrIntegral(InputArray _src, OutputArray _dst, Stream& stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
CV_Assert( src.type() == CV_8UC1 );
|
||||
|
||||
GpuMat_<double>& res = (GpuMat_<double>&) buf;
|
||||
BufferPool pool(Stream::Null());
|
||||
GpuMat_<double> res(pool.getBuffer(src.size(), CV_64FC1));
|
||||
|
||||
gridIntegral(sqr_(cvt_<int>(globPtr<uchar>(src))), res, stream);
|
||||
|
||||
_dst.create(src.rows + 1, src.cols + 1, CV_64FC1);
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src.rows + 1, src.cols + 1, CV_64FC1, stream);
|
||||
|
||||
dst.setTo(Scalar::all(0), stream);
|
||||
|
||||
GpuMat inner = dst(Rect(1, 1, src.cols, src.rows));
|
||||
res.copyTo(inner, stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -50,8 +50,10 @@
|
||||
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
#include "opencv2/cudev.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cudev;
|
||||
|
||||
namespace
|
||||
@ -165,7 +167,7 @@ namespace
|
||||
|
||||
void LookUpTableImpl::transform(InputArray _src, OutputArray _dst, Stream& stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
const int cn = src.channels();
|
||||
const int lut_cn = d_lut.channels();
|
||||
@ -173,8 +175,7 @@ namespace
|
||||
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 );
|
||||
CV_Assert( lut_cn == 1 || lut_cn == cn );
|
||||
|
||||
_dst.create(src.size(), src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
|
||||
|
||||
if (lut_cn == 1)
|
||||
{
|
||||
@ -196,6 +197,8 @@ namespace
|
||||
|
||||
dst3.assign(lut_(src3, tbl), stream);
|
||||
}
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -50,7 +50,10 @@
|
||||
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
#include "opencv2/cudev.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cudev;
|
||||
|
||||
namespace
|
||||
@ -92,16 +95,15 @@ void cv::cuda::abs(InputArray _src, OutputArray _dst, Stream& stream)
|
||||
absMat<double>
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
const int depth = src.depth();
|
||||
CV_Assert( src.depth() <= CV_64F );
|
||||
|
||||
CV_DbgAssert( depth <= CV_64F );
|
||||
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
|
||||
|
||||
_dst.create(src.size(), src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
|
||||
|
||||
funcs[depth](src.reshape(1), dst.reshape(1), stream);
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
@ -113,7 +115,7 @@ namespace
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(T x) const
|
||||
{
|
||||
return saturate_cast<T>(x * x);
|
||||
return cudev::saturate_cast<T>(x * x);
|
||||
}
|
||||
};
|
||||
|
||||
@ -138,16 +140,15 @@ void cv::cuda::sqr(InputArray _src, OutputArray _dst, Stream& stream)
|
||||
sqrMat<double>
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
const int depth = src.depth();
|
||||
CV_Assert( src.depth() <= CV_64F );
|
||||
|
||||
CV_DbgAssert( depth <= CV_64F );
|
||||
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
|
||||
|
||||
_dst.create(src.size(), src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
|
||||
|
||||
funcs[depth](src.reshape(1), dst.reshape(1), stream);
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
@ -176,16 +177,15 @@ void cv::cuda::sqrt(InputArray _src, OutputArray _dst, Stream& stream)
|
||||
sqrtMat<double>
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
const int depth = src.depth();
|
||||
CV_Assert( src.depth() <= CV_64F );
|
||||
|
||||
CV_DbgAssert( depth <= CV_64F );
|
||||
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
|
||||
|
||||
_dst.create(src.size(), src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
|
||||
|
||||
funcs[depth](src.reshape(1), dst.reshape(1), stream);
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -198,7 +198,7 @@ namespace
|
||||
__device__ __forceinline__ T operator ()(T x) const
|
||||
{
|
||||
exp_func<T> f;
|
||||
return saturate_cast<T>(f(x));
|
||||
return cudev::saturate_cast<T>(f(x));
|
||||
}
|
||||
};
|
||||
|
||||
@ -223,16 +223,15 @@ void cv::cuda::exp(InputArray _src, OutputArray _dst, Stream& stream)
|
||||
expMat<double>
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
const int depth = src.depth();
|
||||
CV_Assert( src.depth() <= CV_64F );
|
||||
|
||||
CV_DbgAssert( depth <= CV_64F );
|
||||
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
|
||||
|
||||
_dst.create(src.size(), src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
|
||||
|
||||
funcs[depth](src.reshape(1), dst.reshape(1), stream);
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -261,16 +260,15 @@ void cv::cuda::log(InputArray _src, OutputArray _dst, Stream& stream)
|
||||
logMat<double>
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
const int depth = src.depth();
|
||||
CV_Assert( src.depth() <= CV_64F );
|
||||
|
||||
CV_DbgAssert( depth <= CV_64F );
|
||||
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
|
||||
|
||||
_dst.create(src.size(), src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
|
||||
|
||||
funcs[depth](src.reshape(1), dst.reshape(1), stream);
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -284,7 +282,7 @@ namespace
|
||||
|
||||
__device__ __forceinline__ T operator()(T e) const
|
||||
{
|
||||
return saturate_cast<T>(__powf((float)e, power));
|
||||
return cudev::saturate_cast<T>(__powf((float)e, power));
|
||||
}
|
||||
};
|
||||
template<typename T> struct PowOp<T, true> : unary_function<T, T>
|
||||
@ -293,7 +291,7 @@ namespace
|
||||
|
||||
__device__ __forceinline__ T operator()(T e) const
|
||||
{
|
||||
T res = saturate_cast<T>(__powf((float)e, power));
|
||||
T res = cudev::saturate_cast<T>(__powf((float)e, power));
|
||||
|
||||
if ((e < 0) && (1 & static_cast<int>(power)))
|
||||
res *= -1;
|
||||
@ -344,16 +342,15 @@ void cv::cuda::pow(InputArray _src, double power, OutputArray _dst, Stream& stre
|
||||
powMat<double>
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
const int depth = src.depth();
|
||||
CV_Assert( src.depth() <= CV_64F );
|
||||
|
||||
CV_DbgAssert(depth <= CV_64F);
|
||||
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
|
||||
|
||||
_dst.create(src.size(), src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
funcs[src.depth()](src.reshape(1), power, dst.reshape(1), stream);
|
||||
|
||||
funcs[depth](src.reshape(1), power, dst.reshape(1), stream);
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -50,62 +50,140 @@
|
||||
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
#include "opencv2/cudev.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cudev;
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T>
|
||||
void minMaxImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf, double* minVal, double* maxVal)
|
||||
template <typename T, typename R>
|
||||
void minMaxImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream)
|
||||
{
|
||||
typedef typename SelectIf<
|
||||
TypesEquals<T, double>::value,
|
||||
double,
|
||||
typename SelectIf<TypesEquals<T, float>::value, float, int>::type
|
||||
>::type work_type;
|
||||
|
||||
const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
|
||||
GpuMat_<work_type>& buf = (GpuMat_<work_type>&) _buf;
|
||||
GpuMat_<R>& dst = (GpuMat_<R>&) _dst;
|
||||
|
||||
if (mask.empty())
|
||||
gridFindMinMaxVal(src, buf);
|
||||
gridFindMinMaxVal(src, dst, stream);
|
||||
else
|
||||
gridFindMinMaxVal(src, buf, globPtr<uchar>(mask));
|
||||
gridFindMinMaxVal(src, dst, globPtr<uchar>(mask), stream);
|
||||
}
|
||||
|
||||
work_type data[2];
|
||||
buf.download(cv::Mat(1, 2, buf.type(), data));
|
||||
template <typename T, typename R>
|
||||
void minMaxImpl(const GpuMat& src, const GpuMat& mask, double* minVal, double* maxVal)
|
||||
{
|
||||
BufferPool pool(Stream::Null());
|
||||
GpuMat buf(pool.getBuffer(1, 2, DataType<R>::type));
|
||||
|
||||
if (minVal)
|
||||
*minVal = data[0];
|
||||
minMaxImpl<T, R>(src, mask, buf, Stream::Null());
|
||||
|
||||
R data[2];
|
||||
buf.download(Mat(1, 2, buf.type(), data));
|
||||
|
||||
if (maxVal)
|
||||
*maxVal = data[1];
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::minMax(InputArray _src, double* minVal, double* maxVal, InputArray _mask, GpuMat& buf)
|
||||
void cv::cuda::findMinMax(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf, double* minVal, double* maxVal);
|
||||
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
minMaxImpl<uchar>,
|
||||
minMaxImpl<schar>,
|
||||
minMaxImpl<ushort>,
|
||||
minMaxImpl<short>,
|
||||
minMaxImpl<int>,
|
||||
minMaxImpl<float>,
|
||||
minMaxImpl<double>
|
||||
minMaxImpl<uchar, int>,
|
||||
minMaxImpl<schar, int>,
|
||||
minMaxImpl<ushort, int>,
|
||||
minMaxImpl<short, int>,
|
||||
minMaxImpl<int, int>,
|
||||
minMaxImpl<float, float>,
|
||||
minMaxImpl<double, double>
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat mask = _mask.getGpuMat();
|
||||
const GpuMat src = getInputMat(_src, stream);
|
||||
const GpuMat mask = getInputMat(_mask, stream);
|
||||
|
||||
CV_Assert( src.channels() == 1 );
|
||||
CV_DbgAssert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
|
||||
CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
|
||||
|
||||
const int src_depth = src.depth();
|
||||
const int dst_depth = src_depth < CV_32F ? CV_32S : src_depth;
|
||||
|
||||
GpuMat dst = getOutputMat(_dst, 1, 2, dst_depth, stream);
|
||||
|
||||
const func_t func = funcs[src.depth()];
|
||||
func(src, mask, dst, stream);
|
||||
|
||||
func(src, mask, buf, minVal, maxVal);
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
void cv::cuda::minMax(InputArray _src, double* minVal, double* maxVal, InputArray _mask)
|
||||
{
|
||||
Stream& stream = Stream::Null();
|
||||
|
||||
HostMem dst;
|
||||
findMinMax(_src, dst, _mask, stream);
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
double vals[2];
|
||||
dst.createMatHeader().convertTo(Mat(1, 2, CV_64FC1, &vals[0]), CV_64F);
|
||||
|
||||
if (minVal)
|
||||
*minVal = vals[0];
|
||||
|
||||
if (maxVal)
|
||||
*maxVal = vals[1];
|
||||
}
|
||||
|
||||
namespace cv { namespace cuda { namespace internal {
|
||||
|
||||
void findMaxAbs(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream);
|
||||
|
||||
}}}
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T, typename R>
|
||||
void findMaxAbsImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream)
|
||||
{
|
||||
const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
|
||||
GpuMat_<R>& dst = (GpuMat_<R>&) _dst;
|
||||
|
||||
if (mask.empty())
|
||||
gridFindMaxVal(abs_(src), dst, stream);
|
||||
else
|
||||
gridFindMaxVal(abs_(src), dst, globPtr<uchar>(mask), stream);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::internal::findMaxAbs(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
findMaxAbsImpl<uchar, int>,
|
||||
findMaxAbsImpl<schar, int>,
|
||||
findMaxAbsImpl<ushort, int>,
|
||||
findMaxAbsImpl<short, int>,
|
||||
findMaxAbsImpl<int, int>,
|
||||
findMaxAbsImpl<float, float>,
|
||||
findMaxAbsImpl<double, double>
|
||||
};
|
||||
|
||||
const GpuMat src = getInputMat(_src, stream);
|
||||
const GpuMat mask = getInputMat(_mask, stream);
|
||||
|
||||
CV_Assert( src.channels() == 1 );
|
||||
CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
|
||||
|
||||
const int src_depth = src.depth();
|
||||
const int dst_depth = src_depth < CV_32F ? CV_32S : src_depth;
|
||||
|
||||
GpuMat dst = getOutputMat(_dst, 1, 1, dst_depth, stream);
|
||||
|
||||
const func_t func = funcs[src.depth()];
|
||||
func(src, mask, dst, stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -50,78 +50,110 @@
|
||||
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
#include "opencv2/cudev.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cudev;
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T>
|
||||
void minMaxLocImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _valBuf, GpuMat& _locBuf, double* minVal, double* maxVal, cv::Point* minLoc, cv::Point* maxLoc)
|
||||
template <typename T, typename R>
|
||||
void minMaxLocImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _valBuf, GpuMat& _locBuf, Stream& stream)
|
||||
{
|
||||
typedef typename SelectIf<
|
||||
TypesEquals<T, double>::value,
|
||||
double,
|
||||
typename SelectIf<TypesEquals<T, float>::value, float, int>::type
|
||||
>::type work_type;
|
||||
|
||||
const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
|
||||
GpuMat_<work_type>& valBuf = (GpuMat_<work_type>&) _valBuf;
|
||||
GpuMat_<R>& valBuf = (GpuMat_<R>&) _valBuf;
|
||||
GpuMat_<int>& locBuf = (GpuMat_<int>&) _locBuf;
|
||||
|
||||
if (mask.empty())
|
||||
gridMinMaxLoc(src, valBuf, locBuf);
|
||||
gridMinMaxLoc(src, valBuf, locBuf, stream);
|
||||
else
|
||||
gridMinMaxLoc(src, valBuf, locBuf, globPtr<uchar>(mask));
|
||||
|
||||
cv::Mat_<work_type> h_valBuf;
|
||||
cv::Mat_<int> h_locBuf;
|
||||
|
||||
valBuf.download(h_valBuf);
|
||||
locBuf.download(h_locBuf);
|
||||
|
||||
if (minVal)
|
||||
*minVal = h_valBuf(0, 0);
|
||||
|
||||
if (maxVal)
|
||||
*maxVal = h_valBuf(1, 0);
|
||||
|
||||
if (minLoc)
|
||||
{
|
||||
const int idx = h_locBuf(0, 0);
|
||||
*minLoc = cv::Point(idx % src.cols, idx / src.cols);
|
||||
}
|
||||
|
||||
if (maxLoc)
|
||||
{
|
||||
const int idx = h_locBuf(1, 0);
|
||||
*maxLoc = cv::Point(idx % src.cols, idx / src.cols);
|
||||
}
|
||||
gridMinMaxLoc(src, valBuf, locBuf, globPtr<uchar>(mask), stream);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::minMaxLoc(InputArray _src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, InputArray _mask, GpuMat& valBuf, GpuMat& locBuf)
|
||||
void cv::cuda::findMinMaxLoc(InputArray _src, OutputArray _minMaxVals, OutputArray _loc, InputArray _mask, Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _valBuf, GpuMat& _locBuf, double* minVal, double* maxVal, cv::Point* minLoc, cv::Point* maxLoc);
|
||||
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _valBuf, GpuMat& _locBuf, Stream& stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
minMaxLocImpl<uchar>,
|
||||
minMaxLocImpl<schar>,
|
||||
minMaxLocImpl<ushort>,
|
||||
minMaxLocImpl<short>,
|
||||
minMaxLocImpl<int>,
|
||||
minMaxLocImpl<float>,
|
||||
minMaxLocImpl<double>
|
||||
minMaxLocImpl<uchar, int>,
|
||||
minMaxLocImpl<schar, int>,
|
||||
minMaxLocImpl<ushort, int>,
|
||||
minMaxLocImpl<short, int>,
|
||||
minMaxLocImpl<int, int>,
|
||||
minMaxLocImpl<float, float>,
|
||||
minMaxLocImpl<double, double>
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat mask = _mask.getGpuMat();
|
||||
const GpuMat src = getInputMat(_src, stream);
|
||||
const GpuMat mask = getInputMat(_mask, stream);
|
||||
|
||||
CV_Assert( src.channels() == 1 );
|
||||
CV_DbgAssert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
|
||||
CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
|
||||
|
||||
const func_t func = funcs[src.depth()];
|
||||
const int src_depth = src.depth();
|
||||
|
||||
func(src, mask, valBuf, locBuf, minVal, maxVal, minLoc, maxLoc);
|
||||
BufferPool pool(stream);
|
||||
GpuMat valBuf(pool.getAllocator());
|
||||
GpuMat locBuf(pool.getAllocator());
|
||||
|
||||
const func_t func = funcs[src_depth];
|
||||
func(src, mask, valBuf, locBuf, stream);
|
||||
|
||||
GpuMat minMaxVals = valBuf.colRange(0, 1);
|
||||
GpuMat loc = locBuf.colRange(0, 1);
|
||||
|
||||
if (_minMaxVals.kind() == _InputArray::CUDA_GPU_MAT)
|
||||
{
|
||||
minMaxVals.copyTo(_minMaxVals, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
minMaxVals.download(_minMaxVals, stream);
|
||||
}
|
||||
|
||||
if (_loc.kind() == _InputArray::CUDA_GPU_MAT)
|
||||
{
|
||||
loc.copyTo(_loc, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
loc.download(_loc, stream);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::minMaxLoc(InputArray _src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, InputArray _mask)
|
||||
{
|
||||
Stream& stream = Stream::Null();
|
||||
|
||||
HostMem minMaxVals, locVals;
|
||||
findMinMaxLoc(_src, minMaxVals, locVals, _mask, stream);
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
double vals[2];
|
||||
minMaxVals.createMatHeader().convertTo(Mat(minMaxVals.size(), CV_64FC1, &vals[0]), CV_64F);
|
||||
|
||||
int locs[2];
|
||||
locVals.createMatHeader().copyTo(Mat(locVals.size(), CV_32SC1, &locs[0]));
|
||||
Size size = _src.size();
|
||||
cv::Point locs2D[] = {
|
||||
cv::Point(locs[0] % size.width, locs[0] / size.width),
|
||||
cv::Point(locs[1] % size.width, locs[1] / size.width),
|
||||
};
|
||||
|
||||
if (minVal)
|
||||
*minVal = vals[0];
|
||||
|
||||
if (maxVal)
|
||||
*maxVal = vals[1];
|
||||
|
||||
if (minLoc)
|
||||
*minLoc = locs2D[0];
|
||||
|
||||
if (maxLoc)
|
||||
*maxLoc = locs2D[1];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -50,7 +50,10 @@
|
||||
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
#include "opencv2/cudev.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cudev;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
@ -120,33 +123,33 @@ void cv::cuda::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst
|
||||
{
|
||||
(void) flags;
|
||||
|
||||
GpuMat src1 = _src1.getGpuMat();
|
||||
GpuMat src2 = _src2.getGpuMat();
|
||||
GpuMat src1 = getInputMat(_src1, stream);
|
||||
GpuMat src2 = getInputMat(_src2, stream);
|
||||
|
||||
CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2 );
|
||||
CV_Assert( src1.size() == src2.size() );
|
||||
|
||||
_dst.create(src1.size(), CV_32FC2);
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src1.size(), CV_32FC2, stream);
|
||||
|
||||
if (conjB)
|
||||
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul_conj(), stream);
|
||||
else
|
||||
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul(), stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, float scale, bool conjB, Stream& stream)
|
||||
{
|
||||
(void) flags;
|
||||
|
||||
GpuMat src1 = _src1.getGpuMat();
|
||||
GpuMat src2 = _src2.getGpuMat();
|
||||
GpuMat src1 = getInputMat(_src1, stream);
|
||||
GpuMat src2 = getInputMat(_src2, stream);
|
||||
|
||||
CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2);
|
||||
CV_Assert( src1.size() == src2.size() );
|
||||
|
||||
_dst.create(src1.size(), CV_32FC2);
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src1.size(), CV_32FC2, stream);
|
||||
|
||||
if (conjB)
|
||||
{
|
||||
@ -160,6 +163,8 @@ void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputAr
|
||||
op.scale = scale;
|
||||
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), op, stream);
|
||||
}
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -50,70 +50,140 @@
|
||||
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
#include "opencv2/cudev.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cudev;
|
||||
|
||||
namespace
|
||||
{
|
||||
double normDiffInf(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf)
|
||||
void normDiffInf(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _dst, Stream& stream)
|
||||
{
|
||||
const GpuMat_<uchar>& src1 = (const GpuMat_<uchar>&) _src1;
|
||||
const GpuMat_<uchar>& src2 = (const GpuMat_<uchar>&) _src2;
|
||||
GpuMat_<int>& buf = (GpuMat_<int>&) _buf;
|
||||
GpuMat_<int>& dst = (GpuMat_<int>&) _dst;
|
||||
|
||||
gridFindMinMaxVal(abs_(cvt_<int>(src1) - cvt_<int>(src2)), buf);
|
||||
|
||||
int data[2];
|
||||
buf.download(cv::Mat(1, 2, buf.type(), data));
|
||||
|
||||
return data[1];
|
||||
gridFindMaxVal(abs_(cvt_<int>(src1) - cvt_<int>(src2)), dst, stream);
|
||||
}
|
||||
|
||||
double normDiffL1(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf)
|
||||
void normDiffL1(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _dst, Stream& stream)
|
||||
{
|
||||
const GpuMat_<uchar>& src1 = (const GpuMat_<uchar>&) _src1;
|
||||
const GpuMat_<uchar>& src2 = (const GpuMat_<uchar>&) _src2;
|
||||
GpuMat_<int>& buf = (GpuMat_<int>&) _buf;
|
||||
GpuMat_<int>& dst = (GpuMat_<int>&) _dst;
|
||||
|
||||
gridCalcSum(abs_(cvt_<int>(src1) - cvt_<int>(src2)), buf);
|
||||
|
||||
int data;
|
||||
buf.download(cv::Mat(1, 1, buf.type(), &data));
|
||||
|
||||
return data;
|
||||
gridCalcSum(abs_(cvt_<int>(src1) - cvt_<int>(src2)), dst, stream);
|
||||
}
|
||||
|
||||
double normDiffL2(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf)
|
||||
void normDiffL2(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _dst, Stream& stream)
|
||||
{
|
||||
const GpuMat_<uchar>& src1 = (const GpuMat_<uchar>&) _src1;
|
||||
const GpuMat_<uchar>& src2 = (const GpuMat_<uchar>&) _src2;
|
||||
GpuMat_<double>& buf = (GpuMat_<double>&) _buf;
|
||||
GpuMat_<double>& dst = (GpuMat_<double>&) _dst;
|
||||
|
||||
gridCalcSum(sqr_(cvt_<double>(src1) - cvt_<double>(src2)), buf);
|
||||
BufferPool pool(stream);
|
||||
GpuMat_<double> buf(1, 1, pool.getAllocator());
|
||||
|
||||
double data;
|
||||
buf.download(cv::Mat(1, 1, buf.type(), &data));
|
||||
|
||||
return std::sqrt(data);
|
||||
gridCalcSum(sqr_(cvt_<double>(src1) - cvt_<double>(src2)), buf, stream);
|
||||
gridTransformUnary(buf, dst, sqrt_func<double>(), stream);
|
||||
}
|
||||
}
|
||||
|
||||
double cv::cuda::norm(InputArray _src1, InputArray _src2, GpuMat& buf, int normType)
|
||||
void cv::cuda::calcNormDiff(InputArray _src1, InputArray _src2, OutputArray _dst, int normType, Stream& stream)
|
||||
{
|
||||
typedef double (*func_t)(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf);
|
||||
typedef void (*func_t)(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _dst, Stream& stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
0, normDiffInf, normDiffL1, 0, normDiffL2
|
||||
};
|
||||
|
||||
GpuMat src1 = _src1.getGpuMat();
|
||||
GpuMat src2 = _src2.getGpuMat();
|
||||
GpuMat src1 = getInputMat(_src1, stream);
|
||||
GpuMat src2 = getInputMat(_src2, stream);
|
||||
|
||||
CV_Assert( src1.type() == CV_8UC1 );
|
||||
CV_Assert( src1.size() == src2.size() && src1.type() == src2.type() );
|
||||
CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
|
||||
|
||||
return funcs[normType](src1, src2, buf);
|
||||
GpuMat dst = getOutputMat(_dst, 1, 1, normType == NORM_L2 ? CV_64FC1 : CV_32SC1, stream);
|
||||
|
||||
const func_t func = funcs[normType];
|
||||
func(src1, src2, dst, stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
double cv::cuda::norm(InputArray _src1, InputArray _src2, int normType)
|
||||
{
|
||||
Stream& stream = Stream::Null();
|
||||
|
||||
HostMem dst;
|
||||
calcNormDiff(_src1, _src2, dst, normType, stream);
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
double val;
|
||||
dst.createMatHeader().convertTo(Mat(1, 1, CV_64FC1, &val), CV_64F);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
namespace cv { namespace cuda { namespace internal {
|
||||
|
||||
void normL2(cv::InputArray _src, cv::OutputArray _dst, cv::InputArray _mask, Stream& stream);
|
||||
|
||||
}}}
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T, typename R>
|
||||
void normL2Impl(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream)
|
||||
{
|
||||
const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
|
||||
GpuMat_<R>& dst = (GpuMat_<R>&) _dst;
|
||||
|
||||
BufferPool pool(stream);
|
||||
GpuMat_<double> buf(1, 1, pool.getAllocator());
|
||||
|
||||
if (mask.empty())
|
||||
{
|
||||
gridCalcSum(sqr_(cvt_<double>(src)), buf, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
gridCalcSum(sqr_(cvt_<double>(src)), buf, globPtr<uchar>(mask), stream);
|
||||
}
|
||||
|
||||
gridTransformUnary(buf, dst, sqrt_func<double>(), stream);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::internal::normL2(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
normL2Impl<uchar, double>,
|
||||
normL2Impl<schar, double>,
|
||||
normL2Impl<ushort, double>,
|
||||
normL2Impl<short, double>,
|
||||
normL2Impl<int, double>,
|
||||
normL2Impl<float, double>,
|
||||
normL2Impl<double, double>
|
||||
};
|
||||
|
||||
const GpuMat src = getInputMat(_src, stream);
|
||||
const GpuMat mask = getInputMat(_mask, stream);
|
||||
|
||||
CV_Assert( src.channels() == 1 );
|
||||
CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
|
||||
|
||||
GpuMat dst = getOutputMat(_dst, 1, 1, CV_64FC1, stream);
|
||||
|
||||
const func_t func = funcs[src.depth()];
|
||||
func(src, mask, dst, stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
290
modules/cudaarithm/src/cuda/normalize.cu
Normal file
290
modules/cudaarithm/src/cuda/normalize.cu
Normal file
@ -0,0 +1,290 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "opencv2/opencv_modules.hpp"
|
||||
|
||||
#ifndef HAVE_OPENCV_CUDEV
|
||||
|
||||
#error "opencv_cudev is required"
|
||||
|
||||
#else
|
||||
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
#include "opencv2/cudev.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cudev;
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename T, typename R, typename I>
|
||||
struct ConvertorMinMax : unary_function<T, R>
|
||||
{
|
||||
typedef typename LargerType<T, R>::type larger_type1;
|
||||
typedef typename LargerType<larger_type1, I>::type larger_type2;
|
||||
typedef typename LargerType<larger_type2, float>::type scalar_type;
|
||||
|
||||
scalar_type dmin, dmax;
|
||||
const I* minMaxVals;
|
||||
|
||||
__device__ R operator ()(typename TypeTraits<T>::parameter_type src) const
|
||||
{
|
||||
const scalar_type smin = minMaxVals[0];
|
||||
const scalar_type smax = minMaxVals[1];
|
||||
|
||||
const scalar_type scale = (dmax - dmin) * (smax - smin > numeric_limits<scalar_type>::epsilon() ? 1.0 / (smax - smin) : 0.0);
|
||||
const scalar_type shift = dmin - smin * scale;
|
||||
|
||||
return cudev::saturate_cast<R>(scale * src + shift);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename R, typename I>
|
||||
void normalizeMinMax(const GpuMat& _src, GpuMat& _dst, double a, double b, const GpuMat& mask, Stream& stream)
|
||||
{
|
||||
const GpuMat_<T>& src = (const GpuMat_<T>&)_src;
|
||||
GpuMat_<R>& dst = (GpuMat_<R>&)_dst;
|
||||
|
||||
BufferPool pool(stream);
|
||||
GpuMat_<I> minMaxVals(1, 2, pool.getAllocator());
|
||||
|
||||
if (mask.empty())
|
||||
{
|
||||
gridFindMinMaxVal(src, minMaxVals, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
gridFindMinMaxVal(src, minMaxVals, globPtr<uchar>(mask), stream);
|
||||
}
|
||||
|
||||
ConvertorMinMax<T, R, I> cvt;
|
||||
cvt.dmin = std::min(a, b);
|
||||
cvt.dmax = std::max(a, b);
|
||||
cvt.minMaxVals = minMaxVals[0];
|
||||
|
||||
if (mask.empty())
|
||||
{
|
||||
gridTransformUnary(src, dst, cvt, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
dst.setTo(Scalar::all(0), stream);
|
||||
gridTransformUnary(src, dst, cvt, globPtr<uchar>(mask), stream);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename R, typename I, bool normL2>
|
||||
struct ConvertorNorm : unary_function<T, R>
|
||||
{
|
||||
typedef typename LargerType<T, R>::type larger_type1;
|
||||
typedef typename LargerType<larger_type1, I>::type larger_type2;
|
||||
typedef typename LargerType<larger_type2, float>::type scalar_type;
|
||||
|
||||
scalar_type a;
|
||||
const I* normVal;
|
||||
|
||||
__device__ R operator ()(typename TypeTraits<T>::parameter_type src) const
|
||||
{
|
||||
sqrt_func<scalar_type> sqrt;
|
||||
|
||||
scalar_type scale = normL2 ? sqrt(*normVal) : *normVal;
|
||||
scale = scale > numeric_limits<scalar_type>::epsilon() ? a / scale : 0.0;
|
||||
|
||||
return cudev::saturate_cast<R>(scale * src);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename R, typename I>
|
||||
void normalizeNorm(const GpuMat& _src, GpuMat& _dst, double a, int normType, const GpuMat& mask, Stream& stream)
|
||||
{
|
||||
const GpuMat_<T>& src = (const GpuMat_<T>&)_src;
|
||||
GpuMat_<R>& dst = (GpuMat_<R>&)_dst;
|
||||
|
||||
BufferPool pool(stream);
|
||||
GpuMat_<I> normVal(1, 1, pool.getAllocator());
|
||||
|
||||
if (normType == NORM_L1)
|
||||
{
|
||||
if (mask.empty())
|
||||
{
|
||||
gridCalcSum(abs_(cvt_<I>(src)), normVal, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
gridCalcSum(abs_(cvt_<I>(src)), normVal, globPtr<uchar>(mask), stream);
|
||||
}
|
||||
}
|
||||
else if (normType == NORM_L2)
|
||||
{
|
||||
if (mask.empty())
|
||||
{
|
||||
gridCalcSum(sqr_(cvt_<I>(src)), normVal, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
gridCalcSum(sqr_(cvt_<I>(src)), normVal, globPtr<uchar>(mask), stream);
|
||||
}
|
||||
}
|
||||
else // NORM_INF
|
||||
{
|
||||
if (mask.empty())
|
||||
{
|
||||
gridFindMaxVal(abs_(cvt_<I>(src)), normVal, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
gridFindMaxVal(abs_(cvt_<I>(src)), normVal, globPtr<uchar>(mask), stream);
|
||||
}
|
||||
}
|
||||
|
||||
if (normType == NORM_L2)
|
||||
{
|
||||
ConvertorNorm<T, R, I, true> cvt;
|
||||
cvt.a = a;
|
||||
cvt.normVal = normVal[0];
|
||||
|
||||
if (mask.empty())
|
||||
{
|
||||
gridTransformUnary(src, dst, cvt, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
dst.setTo(Scalar::all(0), stream);
|
||||
gridTransformUnary(src, dst, cvt, globPtr<uchar>(mask), stream);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ConvertorNorm<T, R, I, false> cvt;
|
||||
cvt.a = a;
|
||||
cvt.normVal = normVal[0];
|
||||
|
||||
if (mask.empty())
|
||||
{
|
||||
gridTransformUnary(src, dst, cvt, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
dst.setTo(Scalar::all(0), stream);
|
||||
gridTransformUnary(src, dst, cvt, globPtr<uchar>(mask), stream);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void cv::cuda::normalize(InputArray _src, OutputArray _dst, double a, double b, int normType, int dtype, InputArray _mask, Stream& stream)
|
||||
{
|
||||
typedef void (*func_minmax_t)(const GpuMat& _src, GpuMat& _dst, double a, double b, const GpuMat& mask, Stream& stream);
|
||||
typedef void (*func_norm_t)(const GpuMat& _src, GpuMat& _dst, double a, int normType, const GpuMat& mask, Stream& stream);
|
||||
|
||||
static const func_minmax_t funcs_minmax[] =
|
||||
{
|
||||
normalizeMinMax<uchar, float, float>,
|
||||
normalizeMinMax<schar, float, float>,
|
||||
normalizeMinMax<ushort, float, float>,
|
||||
normalizeMinMax<short, float, float>,
|
||||
normalizeMinMax<int, float, float>,
|
||||
normalizeMinMax<float, float, float>,
|
||||
normalizeMinMax<double, double, double>
|
||||
};
|
||||
|
||||
static const func_norm_t funcs_norm[] =
|
||||
{
|
||||
normalizeNorm<uchar, float, float>,
|
||||
normalizeNorm<schar, float, float>,
|
||||
normalizeNorm<ushort, float, float>,
|
||||
normalizeNorm<short, float, float>,
|
||||
normalizeNorm<int, float, float>,
|
||||
normalizeNorm<float, float, float>,
|
||||
normalizeNorm<double, double, double>
|
||||
};
|
||||
|
||||
CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_MINMAX );
|
||||
|
||||
const GpuMat src = getInputMat(_src, stream);
|
||||
const GpuMat mask = getInputMat(_mask, stream);
|
||||
|
||||
CV_Assert( src.channels() == 1 );
|
||||
CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
|
||||
|
||||
dtype = CV_MAT_DEPTH(dtype);
|
||||
|
||||
const int src_depth = src.depth();
|
||||
const int tmp_depth = src_depth <= CV_32F ? CV_32F : src_depth;
|
||||
|
||||
GpuMat dst;
|
||||
if (dtype == tmp_depth)
|
||||
{
|
||||
_dst.create(src.size(), tmp_depth);
|
||||
dst = getOutputMat(_dst, src.size(), tmp_depth, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
BufferPool pool(stream);
|
||||
dst = pool.getBuffer(src.size(), tmp_depth);
|
||||
}
|
||||
|
||||
if (normType == NORM_MINMAX)
|
||||
{
|
||||
const func_minmax_t func = funcs_minmax[src_depth];
|
||||
func(src, dst, a, b, mask, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
const func_norm_t func = funcs_norm[src_depth];
|
||||
func(src, dst, a, normType, mask, stream);
|
||||
}
|
||||
|
||||
if (dtype == tmp_depth)
|
||||
{
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
dst.convertTo(_dst, dtype, stream);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@ -50,55 +50,59 @@
|
||||
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
#include "opencv2/cudev.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cudev;
|
||||
|
||||
void cv::cuda::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
|
||||
{
|
||||
GpuMat x = _x.getGpuMat();
|
||||
GpuMat y = _y.getGpuMat();
|
||||
GpuMat x = getInputMat(_x, stream);
|
||||
GpuMat y = getInputMat(_y, stream);
|
||||
|
||||
CV_DbgAssert( x.depth() == CV_32F );
|
||||
CV_DbgAssert( y.type() == x.type() && y.size() == x.size() );
|
||||
CV_Assert( x.depth() == CV_32F );
|
||||
CV_Assert( y.type() == x.type() && y.size() == x.size() );
|
||||
|
||||
_dst.create(x.size(), CV_32FC1);
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
|
||||
|
||||
GpuMat_<float> xc(x.reshape(1));
|
||||
GpuMat_<float> yc(y.reshape(1));
|
||||
GpuMat_<float> magc(dst.reshape(1));
|
||||
|
||||
gridTransformBinary(xc, yc, magc, magnitude_func<float>(), stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
void cv::cuda::magnitudeSqr(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
|
||||
{
|
||||
GpuMat x = _x.getGpuMat();
|
||||
GpuMat y = _y.getGpuMat();
|
||||
GpuMat x = getInputMat(_x, stream);
|
||||
GpuMat y = getInputMat(_y, stream);
|
||||
|
||||
CV_DbgAssert( x.depth() == CV_32F );
|
||||
CV_DbgAssert( y.type() == x.type() && y.size() == x.size() );
|
||||
CV_Assert( x.depth() == CV_32F );
|
||||
CV_Assert( y.type() == x.type() && y.size() == x.size() );
|
||||
|
||||
_dst.create(x.size(), CV_32FC1);
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
|
||||
|
||||
GpuMat_<float> xc(x.reshape(1));
|
||||
GpuMat_<float> yc(y.reshape(1));
|
||||
GpuMat_<float> magc(dst.reshape(1));
|
||||
|
||||
gridTransformBinary(xc, yc, magc, magnitude_sqr_func<float>(), stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleInDegrees, Stream& stream)
|
||||
{
|
||||
GpuMat x = _x.getGpuMat();
|
||||
GpuMat y = _y.getGpuMat();
|
||||
GpuMat x = getInputMat(_x, stream);
|
||||
GpuMat y = getInputMat(_y, stream);
|
||||
|
||||
CV_DbgAssert( x.depth() == CV_32F );
|
||||
CV_DbgAssert( y.type() == x.type() && y.size() == x.size() );
|
||||
CV_Assert( x.depth() == CV_32F );
|
||||
CV_Assert( y.type() == x.type() && y.size() == x.size() );
|
||||
|
||||
_dst.create(x.size(), CV_32FC1);
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
|
||||
|
||||
GpuMat_<float> xc(x.reshape(1));
|
||||
GpuMat_<float> yc(y.reshape(1));
|
||||
@ -108,21 +112,20 @@ void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleI
|
||||
gridTransformBinary(xc, yc, anglec, direction_func<float, true>(), stream);
|
||||
else
|
||||
gridTransformBinary(xc, yc, anglec, direction_func<float, false>(), stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream)
|
||||
{
|
||||
GpuMat x = _x.getGpuMat();
|
||||
GpuMat y = _y.getGpuMat();
|
||||
GpuMat x = getInputMat(_x, stream);
|
||||
GpuMat y = getInputMat(_y, stream);
|
||||
|
||||
CV_DbgAssert( x.depth() == CV_32F );
|
||||
CV_DbgAssert( y.type() == x.type() && y.size() == x.size() );
|
||||
CV_Assert( x.depth() == CV_32F );
|
||||
CV_Assert( y.type() == x.type() && y.size() == x.size() );
|
||||
|
||||
_mag.create(x.size(), CV_32FC1);
|
||||
GpuMat mag = _mag.getGpuMat();
|
||||
|
||||
_angle.create(x.size(), CV_32FC1);
|
||||
GpuMat angle = _angle.getGpuMat();
|
||||
GpuMat mag = getOutputMat(_mag, x.size(), CV_32FC1, stream);
|
||||
GpuMat angle = getOutputMat(_angle, x.size(), CV_32FC1, stream);
|
||||
|
||||
GpuMat_<float> xc(x.reshape(1));
|
||||
GpuMat_<float> yc(y.reshape(1));
|
||||
@ -147,6 +150,9 @@ void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, Outpu
|
||||
binaryTupleAdapter<0, 1>(direction_func<float, false>())),
|
||||
stream);
|
||||
}
|
||||
|
||||
syncOutput(mag, _mag, stream);
|
||||
syncOutput(angle, _angle, stream);
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -173,17 +179,14 @@ namespace
|
||||
|
||||
void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, OutputArray _y, bool angleInDegrees, Stream& _stream)
|
||||
{
|
||||
GpuMat mag = _mag.getGpuMat();
|
||||
GpuMat angle = _angle.getGpuMat();
|
||||
GpuMat mag = getInputMat(_mag, _stream);
|
||||
GpuMat angle = getInputMat(_angle, _stream);
|
||||
|
||||
CV_DbgAssert( angle.depth() == CV_32F );
|
||||
CV_DbgAssert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size()) );
|
||||
CV_Assert( angle.depth() == CV_32F );
|
||||
CV_Assert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size()) );
|
||||
|
||||
_x.create(angle.size(), CV_32FC1);
|
||||
GpuMat x = _x.getGpuMat();
|
||||
|
||||
_y.create(angle.size(), CV_32FC1);
|
||||
GpuMat y = _y.getGpuMat();
|
||||
GpuMat x = getOutputMat(_x, angle.size(), CV_32FC1, _stream);
|
||||
GpuMat y = getOutputMat(_y, angle.size(), CV_32FC1, _stream);
|
||||
|
||||
GpuMat_<float> xc(x.reshape(1));
|
||||
GpuMat_<float> yc(y.reshape(1));
|
||||
@ -204,6 +207,9 @@ void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, O
|
||||
|
||||
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
|
||||
|
||||
syncOutput(x, _x, _stream);
|
||||
syncOutput(y, _y, _stream);
|
||||
|
||||
if (stream == 0)
|
||||
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
@ -50,7 +50,10 @@
|
||||
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
#include "opencv2/cudev.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cudev;
|
||||
|
||||
namespace
|
||||
@ -125,7 +128,7 @@ namespace
|
||||
|
||||
void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, int dtype, Stream& stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
CV_Assert( src.channels() <= 4 );
|
||||
CV_Assert( dim == 0 || dim == 1 );
|
||||
@ -134,8 +137,7 @@ void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp,
|
||||
if (dtype < 0)
|
||||
dtype = src.depth();
|
||||
|
||||
_dst.create(1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()));
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, 1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()), stream);
|
||||
|
||||
if (dim == 0)
|
||||
{
|
||||
@ -292,6 +294,8 @@ void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp,
|
||||
|
||||
func(src, dst, reduceOp, stream);
|
||||
}
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -50,7 +50,10 @@
|
||||
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
#include "opencv2/cudev.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cudev;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -92,20 +95,18 @@ namespace
|
||||
|
||||
void mergeImpl(const GpuMat* src, size_t n, cv::OutputArray _dst, Stream& stream)
|
||||
{
|
||||
CV_DbgAssert( src != 0 );
|
||||
CV_DbgAssert( n > 0 && n <= 4 );
|
||||
CV_Assert( src != 0 );
|
||||
CV_Assert( n > 0 && n <= 4 );
|
||||
|
||||
const int depth = src[0].depth();
|
||||
const cv::Size size = src[0].size();
|
||||
|
||||
#ifdef _DEBUG
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
CV_Assert( src[i].size() == size );
|
||||
CV_Assert( src[i].depth() == depth );
|
||||
CV_Assert( src[i].channels() == 1 );
|
||||
}
|
||||
#endif
|
||||
|
||||
if (n == 1)
|
||||
{
|
||||
@ -123,8 +124,7 @@ namespace
|
||||
|
||||
const int channels = static_cast<int>(n);
|
||||
|
||||
_dst.create(size, CV_MAKE_TYPE(depth, channels));
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, size, CV_MAKE_TYPE(depth, channels), stream);
|
||||
|
||||
const func_t func = funcs[channels - 2][CV_ELEM_SIZE(depth) / 2];
|
||||
|
||||
@ -132,6 +132,8 @@ namespace
|
||||
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported channel count or data type");
|
||||
|
||||
func(src, dst, stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -203,12 +205,12 @@ namespace
|
||||
{SplitFunc<4, uchar>::call, SplitFunc<4, ushort>::call, SplitFunc<4, int>::call, 0, SplitFunc<4, double>::call}
|
||||
};
|
||||
|
||||
CV_DbgAssert( dst != 0 );
|
||||
CV_Assert( dst != 0 );
|
||||
|
||||
const int depth = src.depth();
|
||||
const int channels = src.channels();
|
||||
|
||||
CV_DbgAssert( channels <= 4 );
|
||||
CV_Assert( channels <= 4 );
|
||||
|
||||
if (channels == 0)
|
||||
return;
|
||||
@ -233,13 +235,13 @@ namespace
|
||||
|
||||
void cv::cuda::split(InputArray _src, GpuMat* dst, Stream& stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
splitImpl(src, dst, stream);
|
||||
}
|
||||
|
||||
void cv::cuda::split(InputArray _src, std::vector<GpuMat>& dst, Stream& stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
dst.resize(src.channels());
|
||||
if (src.channels() > 0)
|
||||
splitImpl(src, &dst[0], stream);
|
||||
|
@ -50,126 +50,153 @@
|
||||
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
#include "opencv2/cudev.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cudev;
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T, typename R, int cn>
|
||||
cv::Scalar sumImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf)
|
||||
void sumImpl(const GpuMat& _src, GpuMat& _dst, const GpuMat& mask, Stream& stream)
|
||||
{
|
||||
typedef typename MakeVec<T, cn>::type src_type;
|
||||
typedef typename MakeVec<R, cn>::type res_type;
|
||||
|
||||
const GpuMat_<src_type>& src = (const GpuMat_<src_type>&) _src;
|
||||
GpuMat_<res_type>& buf = (GpuMat_<res_type>&) _buf;
|
||||
GpuMat_<res_type>& dst = (GpuMat_<res_type>&) _dst;
|
||||
|
||||
if (mask.empty())
|
||||
gridCalcSum(src, buf);
|
||||
gridCalcSum(src, dst, stream);
|
||||
else
|
||||
gridCalcSum(src, buf, globPtr<uchar>(mask));
|
||||
|
||||
cv::Scalar_<R> res;
|
||||
cv::Mat res_mat(buf.size(), buf.type(), res.val);
|
||||
buf.download(res_mat);
|
||||
|
||||
return res;
|
||||
gridCalcSum(src, dst, globPtr<uchar>(mask), stream);
|
||||
}
|
||||
|
||||
template <typename T, typename R, int cn>
|
||||
cv::Scalar sumAbsImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf)
|
||||
void sumAbsImpl(const GpuMat& _src, GpuMat& _dst, const GpuMat& mask, Stream& stream)
|
||||
{
|
||||
typedef typename MakeVec<T, cn>::type src_type;
|
||||
typedef typename MakeVec<R, cn>::type res_type;
|
||||
|
||||
const GpuMat_<src_type>& src = (const GpuMat_<src_type>&) _src;
|
||||
GpuMat_<res_type>& buf = (GpuMat_<res_type>&) _buf;
|
||||
GpuMat_<res_type>& dst = (GpuMat_<res_type>&) _dst;
|
||||
|
||||
if (mask.empty())
|
||||
gridCalcSum(abs_(cvt_<res_type>(src)), buf);
|
||||
gridCalcSum(abs_(cvt_<res_type>(src)), dst, stream);
|
||||
else
|
||||
gridCalcSum(abs_(cvt_<res_type>(src)), buf, globPtr<uchar>(mask));
|
||||
|
||||
cv::Scalar_<R> res;
|
||||
cv::Mat res_mat(buf.size(), buf.type(), res.val);
|
||||
buf.download(res_mat);
|
||||
|
||||
return res;
|
||||
gridCalcSum(abs_(cvt_<res_type>(src)), dst, globPtr<uchar>(mask), stream);
|
||||
}
|
||||
|
||||
template <typename T, typename R, int cn>
|
||||
cv::Scalar sumSqrImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf)
|
||||
void sumSqrImpl(const GpuMat& _src, GpuMat& _dst, const GpuMat& mask, Stream& stream)
|
||||
{
|
||||
typedef typename MakeVec<T, cn>::type src_type;
|
||||
typedef typename MakeVec<R, cn>::type res_type;
|
||||
|
||||
const GpuMat_<src_type>& src = (const GpuMat_<src_type>&) _src;
|
||||
GpuMat_<res_type>& buf = (GpuMat_<res_type>&) _buf;
|
||||
GpuMat_<res_type>& dst = (GpuMat_<res_type>&) _dst;
|
||||
|
||||
if (mask.empty())
|
||||
gridCalcSum(sqr_(cvt_<res_type>(src)), buf);
|
||||
gridCalcSum(sqr_(cvt_<res_type>(src)), dst, stream);
|
||||
else
|
||||
gridCalcSum(sqr_(cvt_<res_type>(src)), buf, globPtr<uchar>(mask));
|
||||
|
||||
cv::Scalar_<R> res;
|
||||
cv::Mat res_mat(buf.size(), buf.type(), res.val);
|
||||
buf.download(res_mat);
|
||||
|
||||
return res;
|
||||
gridCalcSum(sqr_(cvt_<res_type>(src)), dst, globPtr<uchar>(mask), stream);
|
||||
}
|
||||
}
|
||||
|
||||
cv::Scalar cv::cuda::sum(InputArray _src, InputArray _mask, GpuMat& buf)
|
||||
void cv::cuda::calcSum(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
|
||||
{
|
||||
typedef cv::Scalar (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf);
|
||||
typedef void (*func_t)(const GpuMat& _src, GpuMat& _dst, const GpuMat& mask, Stream& stream);
|
||||
static const func_t funcs[7][4] =
|
||||
{
|
||||
{sumImpl<uchar , uint , 1>, sumImpl<uchar , uint , 2>, sumImpl<uchar , uint , 3>, sumImpl<uchar , uint , 4>},
|
||||
{sumImpl<schar , int , 1>, sumImpl<schar , int , 2>, sumImpl<schar , int , 3>, sumImpl<schar , int , 4>},
|
||||
{sumImpl<ushort, uint , 1>, sumImpl<ushort, uint , 2>, sumImpl<ushort, uint , 3>, sumImpl<ushort, uint , 4>},
|
||||
{sumImpl<short , int , 1>, sumImpl<short , int , 2>, sumImpl<short , int , 3>, sumImpl<short , int , 4>},
|
||||
{sumImpl<int , int , 1>, sumImpl<int , int , 2>, sumImpl<int , int , 3>, sumImpl<int , int , 4>},
|
||||
{sumImpl<float , float , 1>, sumImpl<float , float , 2>, sumImpl<float , float , 3>, sumImpl<float , float , 4>},
|
||||
{sumImpl<uchar , double, 1>, sumImpl<uchar , double, 2>, sumImpl<uchar , double, 3>, sumImpl<uchar , double, 4>},
|
||||
{sumImpl<schar , double, 1>, sumImpl<schar , double, 2>, sumImpl<schar , double, 3>, sumImpl<schar , double, 4>},
|
||||
{sumImpl<ushort, double, 1>, sumImpl<ushort, double, 2>, sumImpl<ushort, double, 3>, sumImpl<ushort, double, 4>},
|
||||
{sumImpl<short , double, 1>, sumImpl<short , double, 2>, sumImpl<short , double, 3>, sumImpl<short , double, 4>},
|
||||
{sumImpl<int , double, 1>, sumImpl<int , double, 2>, sumImpl<int , double, 3>, sumImpl<int , double, 4>},
|
||||
{sumImpl<float , double, 1>, sumImpl<float , double, 2>, sumImpl<float , double, 3>, sumImpl<float , double, 4>},
|
||||
{sumImpl<double, double, 1>, sumImpl<double, double, 2>, sumImpl<double, double, 3>, sumImpl<double, double, 4>}
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat mask = _mask.getGpuMat();
|
||||
const GpuMat src = getInputMat(_src, stream);
|
||||
const GpuMat mask = getInputMat(_mask, stream);
|
||||
|
||||
CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
|
||||
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
|
||||
|
||||
const func_t func = funcs[src.depth()][src.channels() - 1];
|
||||
const int src_depth = src.depth();
|
||||
const int channels = src.channels();
|
||||
|
||||
return func(src, mask, buf);
|
||||
GpuMat dst = getOutputMat(_dst, 1, 1, CV_64FC(channels), stream);
|
||||
|
||||
const func_t func = funcs[src_depth][channels - 1];
|
||||
func(src, dst, mask, stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
cv::Scalar cv::cuda::absSum(InputArray _src, InputArray _mask, GpuMat& buf)
|
||||
cv::Scalar cv::cuda::sum(InputArray _src, InputArray _mask)
|
||||
{
|
||||
typedef cv::Scalar (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf);
|
||||
Stream& stream = Stream::Null();
|
||||
|
||||
HostMem dst;
|
||||
calcSum(_src, dst, _mask, stream);
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
cv::Scalar val;
|
||||
dst.createMatHeader().convertTo(cv::Mat(dst.size(), CV_64FC(dst.channels()), val.val), CV_64F);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
void cv::cuda::calcAbsSum(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& _src, GpuMat& _dst, const GpuMat& mask, Stream& stream);
|
||||
static const func_t funcs[7][4] =
|
||||
{
|
||||
{sumAbsImpl<uchar , uint , 1>, sumAbsImpl<uchar , uint , 2>, sumAbsImpl<uchar , uint , 3>, sumAbsImpl<uchar , uint , 4>},
|
||||
{sumAbsImpl<schar , int , 1>, sumAbsImpl<schar , int , 2>, sumAbsImpl<schar , int , 3>, sumAbsImpl<schar , int , 4>},
|
||||
{sumAbsImpl<ushort, uint , 1>, sumAbsImpl<ushort, uint , 2>, sumAbsImpl<ushort, uint , 3>, sumAbsImpl<ushort, uint , 4>},
|
||||
{sumAbsImpl<short , int , 1>, sumAbsImpl<short , int , 2>, sumAbsImpl<short , int , 3>, sumAbsImpl<short , int , 4>},
|
||||
{sumAbsImpl<int , int , 1>, sumAbsImpl<int , int , 2>, sumAbsImpl<int , int , 3>, sumAbsImpl<int , int , 4>},
|
||||
{sumAbsImpl<float , float , 1>, sumAbsImpl<float , float , 2>, sumAbsImpl<float , float , 3>, sumAbsImpl<float , float , 4>},
|
||||
{sumAbsImpl<uchar , double, 1>, sumAbsImpl<uchar , double, 2>, sumAbsImpl<uchar , double, 3>, sumAbsImpl<uchar , double, 4>},
|
||||
{sumAbsImpl<schar , double, 1>, sumAbsImpl<schar , double, 2>, sumAbsImpl<schar , double, 3>, sumAbsImpl<schar , double, 4>},
|
||||
{sumAbsImpl<ushort, double, 1>, sumAbsImpl<ushort, double, 2>, sumAbsImpl<ushort, double, 3>, sumAbsImpl<ushort, double, 4>},
|
||||
{sumAbsImpl<short , double, 1>, sumAbsImpl<short , double, 2>, sumAbsImpl<short , double, 3>, sumAbsImpl<short , double, 4>},
|
||||
{sumAbsImpl<int , double, 1>, sumAbsImpl<int , double, 2>, sumAbsImpl<int , double, 3>, sumAbsImpl<int , double, 4>},
|
||||
{sumAbsImpl<float , double, 1>, sumAbsImpl<float , double, 2>, sumAbsImpl<float , double, 3>, sumAbsImpl<float , double, 4>},
|
||||
{sumAbsImpl<double, double, 1>, sumAbsImpl<double, double, 2>, sumAbsImpl<double, double, 3>, sumAbsImpl<double, double, 4>}
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat mask = _mask.getGpuMat();
|
||||
const GpuMat src = getInputMat(_src, stream);
|
||||
const GpuMat mask = getInputMat(_mask, stream);
|
||||
|
||||
CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
|
||||
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
|
||||
|
||||
const func_t func = funcs[src.depth()][src.channels() - 1];
|
||||
const int src_depth = src.depth();
|
||||
const int channels = src.channels();
|
||||
|
||||
return func(src, mask, buf);
|
||||
GpuMat dst = getOutputMat(_dst, 1, 1, CV_64FC(channels), stream);
|
||||
|
||||
const func_t func = funcs[src_depth][channels - 1];
|
||||
func(src, dst, mask, stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
cv::Scalar cv::cuda::sqrSum(InputArray _src, InputArray _mask, GpuMat& buf)
|
||||
cv::Scalar cv::cuda::absSum(InputArray _src, InputArray _mask)
|
||||
{
|
||||
typedef cv::Scalar (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf);
|
||||
Stream& stream = Stream::Null();
|
||||
|
||||
HostMem dst;
|
||||
calcAbsSum(_src, dst, _mask, stream);
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
cv::Scalar val;
|
||||
dst.createMatHeader().convertTo(cv::Mat(dst.size(), CV_64FC(dst.channels()), val.val), CV_64F);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
void cv::cuda::calcSqrSum(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& _src, GpuMat& _dst, const GpuMat& mask, Stream& stream);
|
||||
static const func_t funcs[7][4] =
|
||||
{
|
||||
{sumSqrImpl<uchar , double, 1>, sumSqrImpl<uchar , double, 2>, sumSqrImpl<uchar , double, 3>, sumSqrImpl<uchar , double, 4>},
|
||||
@ -181,14 +208,35 @@ cv::Scalar cv::cuda::sqrSum(InputArray _src, InputArray _mask, GpuMat& buf)
|
||||
{sumSqrImpl<double, double, 1>, sumSqrImpl<double, double, 2>, sumSqrImpl<double, double, 3>, sumSqrImpl<double, double, 4>}
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat mask = _mask.getGpuMat();
|
||||
const GpuMat src = getInputMat(_src, stream);
|
||||
const GpuMat mask = getInputMat(_mask, stream);
|
||||
|
||||
CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
|
||||
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
|
||||
|
||||
const func_t func = funcs[src.depth()][src.channels() - 1];
|
||||
const int src_depth = src.depth();
|
||||
const int channels = src.channels();
|
||||
|
||||
return func(src, mask, buf);
|
||||
GpuMat dst = getOutputMat(_dst, 1, 1, CV_64FC(channels), stream);
|
||||
|
||||
const func_t func = funcs[src_depth][channels - 1];
|
||||
func(src, dst, mask, stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
cv::Scalar cv::cuda::sqrSum(InputArray _src, InputArray _mask)
|
||||
{
|
||||
Stream& stream = Stream::Null();
|
||||
|
||||
HostMem dst;
|
||||
calcSqrSum(_src, dst, _mask, stream);
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
cv::Scalar val;
|
||||
dst.createMatHeader().convertTo(cv::Mat(dst.size(), CV_64FC(dst.channels()), val.val), CV_64F);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -52,6 +52,8 @@
|
||||
#include "opencv2/cudev.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cudev;
|
||||
|
||||
namespace
|
||||
@ -95,15 +97,14 @@ namespace
|
||||
|
||||
double cv::cuda::threshold(InputArray _src, OutputArray _dst, double thresh, double maxVal, int type, Stream& stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
const int depth = src.depth();
|
||||
|
||||
CV_DbgAssert( src.channels() == 1 && depth <= CV_64F );
|
||||
CV_DbgAssert( type <= 4 /*THRESH_TOZERO_INV*/ );
|
||||
CV_Assert( src.channels() == 1 && depth <= CV_64F );
|
||||
CV_Assert( type <= 4 /*THRESH_TOZERO_INV*/ );
|
||||
|
||||
_dst.create(src.size(), src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
|
||||
|
||||
if (depth == CV_32F && type == 2 /*THRESH_TRUNC*/)
|
||||
{
|
||||
@ -142,6 +143,8 @@ double cv::cuda::threshold(InputArray _src, OutputArray _dst, double thresh, dou
|
||||
funcs[depth](src, dst, thresh, maxVal, type, stream);
|
||||
}
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
|
||||
return thresh;
|
||||
}
|
||||
|
||||
|
@ -52,18 +52,19 @@
|
||||
#include "opencv2/cudev.hpp"
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cudev;
|
||||
|
||||
void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
const size_t elemSize = src.elemSize();
|
||||
|
||||
CV_Assert( elemSize == 1 || elemSize == 4 || elemSize == 8 );
|
||||
|
||||
_dst.create( src.cols, src.rows, src.type() );
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src.cols, src.rows, src.type(), stream);
|
||||
|
||||
if (elemSize == 1)
|
||||
{
|
||||
@ -87,6 +88,8 @@ void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream)
|
||||
{
|
||||
gridTranspose(globPtr<double>(src), globPtr<double>(dst), stream);
|
||||
}
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -107,11 +107,11 @@ namespace
|
||||
|
||||
GpuMat src1;
|
||||
if (!isScalar1)
|
||||
src1 = _src1.getGpuMat();
|
||||
src1 = getInputMat(_src1, stream);
|
||||
|
||||
GpuMat src2;
|
||||
if (!isScalar2)
|
||||
src2 = _src2.getGpuMat();
|
||||
src2 = getInputMat(_src2, stream);
|
||||
|
||||
Mat scalar;
|
||||
if (isScalar1)
|
||||
@ -126,7 +126,7 @@ namespace
|
||||
scalar.convertTo(Mat_<double>(scalar.rows, scalar.cols, &val[0]), CV_64F);
|
||||
}
|
||||
|
||||
GpuMat mask = _mask.getGpuMat();
|
||||
GpuMat mask = getInputMat(_mask, stream);
|
||||
|
||||
const int sdepth = src1.empty() ? src2.depth() : src1.depth();
|
||||
const int cn = src1.empty() ? src2.channels() : src1.channels();
|
||||
@ -147,8 +147,7 @@ namespace
|
||||
CV_Error(Error::StsUnsupportedFormat, "The device doesn't support double");
|
||||
}
|
||||
|
||||
_dst.create(size, CV_MAKE_TYPE(ddepth, cn));
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, size, CV_MAKE_TYPE(ddepth, cn), stream);
|
||||
|
||||
if (isScalar1)
|
||||
mat_scalar_func(src2, val, true, dst, mask, scale, stream, op);
|
||||
@ -156,6 +155,8 @@ namespace
|
||||
mat_scalar_func(src1, val, false, dst, mask, scale, stream, op);
|
||||
else
|
||||
mat_mat_func(src1, src2, dst, mask, scale, stream, op);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
}
|
||||
|
||||
@ -196,27 +197,29 @@ void cv::cuda::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, do
|
||||
{
|
||||
if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1)
|
||||
{
|
||||
GpuMat src1 = _src1.getGpuMat();
|
||||
GpuMat src2 = _src2.getGpuMat();
|
||||
GpuMat src1 = getInputMat(_src1, stream);
|
||||
GpuMat src2 = getInputMat(_src2, stream);
|
||||
|
||||
CV_Assert( src1.size() == src2.size() );
|
||||
|
||||
_dst.create(src1.size(), src1.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
|
||||
|
||||
mulMat_8uc4_32f(src1, src2, dst, stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1)
|
||||
{
|
||||
GpuMat src1 = _src1.getGpuMat();
|
||||
GpuMat src2 = _src2.getGpuMat();
|
||||
GpuMat src1 = getInputMat(_src1, stream);
|
||||
GpuMat src2 = getInputMat(_src2, stream);
|
||||
|
||||
CV_Assert( src1.size() == src2.size() );
|
||||
|
||||
_dst.create(src1.size(), src1.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
|
||||
|
||||
mulMat_16sc4_32f(src1, src2, dst, stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -237,27 +240,29 @@ void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, doub
|
||||
{
|
||||
if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1)
|
||||
{
|
||||
GpuMat src1 = _src1.getGpuMat();
|
||||
GpuMat src2 = _src2.getGpuMat();
|
||||
GpuMat src1 = getInputMat(_src1, stream);
|
||||
GpuMat src2 = getInputMat(_src2, stream);
|
||||
|
||||
CV_Assert( src1.size() == src2.size() );
|
||||
|
||||
_dst.create(src1.size(), src1.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
|
||||
|
||||
divMat_8uc4_32f(src1, src2, dst, stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1)
|
||||
{
|
||||
GpuMat src1 = _src1.getGpuMat();
|
||||
GpuMat src2 = _src2.getGpuMat();
|
||||
GpuMat src1 = getInputMat(_src1, stream);
|
||||
GpuMat src2 = getInputMat(_src2, stream);
|
||||
|
||||
CV_Assert( src1.size() == src2.size() );
|
||||
|
||||
_dst.create(src1.size(), src1.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
|
||||
|
||||
divMat_16sc4_32f(src1, src2, dst, stream);
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -389,15 +394,16 @@ void cv::cuda::rshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Strea
|
||||
{NppShift<CV_32S, 1, nppiRShiftC_32s_C1R>::call, 0, NppShift<CV_32S, 3, nppiRShiftC_32s_C3R>::call, NppShift<CV_32S, 4, nppiRShiftC_32s_C4R>::call},
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
CV_Assert( src.depth() < CV_32F );
|
||||
CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
|
||||
|
||||
_dst.create(src.size(), src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
|
||||
|
||||
funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream));
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
void cv::cuda::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream& stream)
|
||||
@ -412,15 +418,16 @@ void cv::cuda::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Strea
|
||||
{NppShift<CV_32S, 1, nppiLShiftC_32s_C1R>::call, 0, NppShift<CV_32S, 3, nppiLShiftC_32s_C3R>::call, NppShift<CV_32S, 4, nppiLShiftC_32s_C4R>::call},
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S );
|
||||
CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
|
||||
|
||||
_dst.create(src.size(), src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
|
||||
|
||||
funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream));
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
@ -475,22 +482,24 @@ namespace
|
||||
|
||||
void cv::cuda::magnitude(InputArray _src, OutputArray _dst, Stream& stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
_dst.create(src.size(), CV_32FC1);
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src.size(), CV_32FC1, stream);
|
||||
|
||||
npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R, StreamAccessor::getStream(stream));
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
void cv::cuda::magnitudeSqr(InputArray _src, OutputArray _dst, Stream& stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
_dst.create(src.size(), CV_32FC1);
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src.size(), CV_32FC1, stream);
|
||||
|
||||
npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream));
|
||||
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -47,110 +47,106 @@ using namespace cv::cuda;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
double cv::cuda::norm(InputArray, int, InputArray, GpuMat&) { throw_no_cuda(); return 0.0; }
|
||||
double cv::cuda::norm(InputArray, InputArray, GpuMat&, int) { throw_no_cuda(); return 0.0; }
|
||||
double cv::cuda::norm(InputArray, int, InputArray) { throw_no_cuda(); return 0.0; }
|
||||
void cv::cuda::calcNorm(InputArray, OutputArray, int, InputArray, Stream&) { throw_no_cuda(); }
|
||||
double cv::cuda::norm(InputArray, InputArray, int) { throw_no_cuda(); return 0.0; }
|
||||
void cv::cuda::calcNormDiff(InputArray, InputArray, OutputArray, int, Stream&) { throw_no_cuda(); }
|
||||
|
||||
Scalar cv::cuda::sum(InputArray, InputArray, GpuMat&) { throw_no_cuda(); return Scalar(); }
|
||||
Scalar cv::cuda::absSum(InputArray, InputArray, GpuMat&) { throw_no_cuda(); return Scalar(); }
|
||||
Scalar cv::cuda::sqrSum(InputArray, InputArray, GpuMat&) { throw_no_cuda(); return Scalar(); }
|
||||
Scalar cv::cuda::sum(InputArray, InputArray) { throw_no_cuda(); return Scalar(); }
|
||||
void cv::cuda::calcSum(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
|
||||
Scalar cv::cuda::absSum(InputArray, InputArray) { throw_no_cuda(); return Scalar(); }
|
||||
void cv::cuda::calcAbsSum(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
|
||||
Scalar cv::cuda::sqrSum(InputArray, InputArray) { throw_no_cuda(); return Scalar(); }
|
||||
void cv::cuda::calcSqrSum(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::cuda::minMax(InputArray, double*, double*, InputArray, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::cuda::minMaxLoc(InputArray, double*, double*, Point*, Point*, InputArray, GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::cuda::minMax(InputArray, double*, double*, InputArray) { throw_no_cuda(); }
|
||||
void cv::cuda::findMinMax(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::minMaxLoc(InputArray, double*, double*, Point*, Point*, InputArray) { throw_no_cuda(); }
|
||||
void cv::cuda::findMinMaxLoc(InputArray, OutputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
|
||||
|
||||
int cv::cuda::countNonZero(InputArray, GpuMat&) { throw_no_cuda(); return 0; }
|
||||
int cv::cuda::countNonZero(InputArray) { throw_no_cuda(); return 0; }
|
||||
void cv::cuda::countNonZero(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::cuda::reduce(InputArray, OutputArray, int, int, int, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::cuda::meanStdDev(InputArray, Scalar&, Scalar&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::cuda::meanStdDev(InputArray, Scalar&, Scalar&) { throw_no_cuda(); }
|
||||
void cv::cuda::meanStdDev(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::cuda::rectStdDev(InputArray, InputArray, OutputArray, Rect, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::cuda::normalize(InputArray, OutputArray, double, double, int, int, InputArray, GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::cuda::normalize(InputArray, OutputArray, double, double, int, int, InputArray, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::cuda::integral(InputArray, OutputArray, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::sqrIntegral(InputArray, OutputArray, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::integral(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::sqrIntegral(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
|
||||
#else
|
||||
|
||||
namespace
|
||||
{
|
||||
class DeviceBuffer
|
||||
{
|
||||
public:
|
||||
explicit DeviceBuffer(int count_ = 1) : count(count_)
|
||||
{
|
||||
cudaSafeCall( cudaMalloc(&pdev, count * sizeof(double)) );
|
||||
}
|
||||
~DeviceBuffer()
|
||||
{
|
||||
cudaSafeCall( cudaFree(pdev) );
|
||||
}
|
||||
|
||||
operator double*() {return pdev;}
|
||||
|
||||
void download(double* hptr)
|
||||
{
|
||||
double hbuf;
|
||||
cudaSafeCall( cudaMemcpy(&hbuf, pdev, sizeof(double), cudaMemcpyDeviceToHost) );
|
||||
*hptr = hbuf;
|
||||
}
|
||||
void download(double** hptrs)
|
||||
{
|
||||
AutoBuffer<double, 2 * sizeof(double)> hbuf(count);
|
||||
cudaSafeCall( cudaMemcpy((void*)hbuf, pdev, count * sizeof(double), cudaMemcpyDeviceToHost) );
|
||||
for (int i = 0; i < count; ++i)
|
||||
*hptrs[i] = hbuf[i];
|
||||
}
|
||||
|
||||
private:
|
||||
double* pdev;
|
||||
int count;
|
||||
};
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// norm
|
||||
|
||||
double cv::cuda::norm(InputArray _src, int normType, InputArray _mask, GpuMat& buf)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat mask = _mask.getGpuMat();
|
||||
namespace cv { namespace cuda { namespace internal {
|
||||
|
||||
void normL2(cv::InputArray _src, cv::OutputArray _dst, cv::InputArray _mask, Stream& stream);
|
||||
|
||||
void findMaxAbs(cv::InputArray _src, cv::OutputArray _dst, cv::InputArray _mask, Stream& stream);
|
||||
|
||||
}}}
|
||||
|
||||
void cv::cuda::calcNorm(InputArray _src, OutputArray dst, int normType, InputArray mask, Stream& stream)
|
||||
{
|
||||
CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
|
||||
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size() && src.channels() == 1) );
|
||||
|
||||
GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
GpuMat src_single_channel = src.reshape(1);
|
||||
|
||||
if (normType == NORM_L1)
|
||||
return cuda::absSum(src_single_channel, mask, buf)[0];
|
||||
{
|
||||
calcAbsSum(src_single_channel, dst, mask, stream);
|
||||
}
|
||||
else if (normType == NORM_L2)
|
||||
{
|
||||
internal::normL2(src_single_channel, dst, mask, stream);
|
||||
}
|
||||
else // NORM_INF
|
||||
{
|
||||
internal::findMaxAbs(src_single_channel, dst, mask, stream);
|
||||
}
|
||||
}
|
||||
|
||||
if (normType == NORM_L2)
|
||||
return std::sqrt(cuda::sqrSum(src_single_channel, mask, buf)[0]);
|
||||
double cv::cuda::norm(InputArray _src, int normType, InputArray _mask)
|
||||
{
|
||||
Stream& stream = Stream::Null();
|
||||
|
||||
// NORM_INF
|
||||
double min_val, max_val;
|
||||
cuda::minMax(src_single_channel, &min_val, &max_val, mask, buf);
|
||||
return std::max(std::abs(min_val), std::abs(max_val));
|
||||
HostMem dst;
|
||||
calcNorm(_src, dst, normType, _mask, stream);
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
double val;
|
||||
dst.createMatHeader().convertTo(Mat(1, 1, CV_64FC1, &val), CV_64F);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// meanStdDev
|
||||
|
||||
void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev, GpuMat& buf)
|
||||
void cv::cuda::meanStdDev(InputArray _src, OutputArray _dst, Stream& stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
if (!deviceSupports(FEATURE_SET_COMPUTE_13))
|
||||
CV_Error(cv::Error::StsNotImplemented, "Not sufficient compute capebility");
|
||||
|
||||
const GpuMat src = getInputMat(_src, stream);
|
||||
|
||||
CV_Assert( src.type() == CV_8UC1 );
|
||||
|
||||
if (!deviceSupports(FEATURE_SET_COMPUTE_13))
|
||||
CV_Error(cv::Error::StsNotImplemented, "Not sufficient compute capebility");
|
||||
GpuMat dst = getOutputMat(_dst, 1, 2, CV_64FC1, stream);
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
DeviceBuffer dbuf(2);
|
||||
|
||||
int bufSize;
|
||||
#if (CUDA_VERSION <= 4020)
|
||||
nppSafeCall( nppiMeanStdDev8uC1RGetBufferHostSize(sz, &bufSize) );
|
||||
@ -158,14 +154,30 @@ void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev, GpuMat&
|
||||
nppSafeCall( nppiMeanStdDevGetBufferHostSize_8u_C1R(sz, &bufSize) );
|
||||
#endif
|
||||
|
||||
ensureSizeIsEnough(1, bufSize, CV_8UC1, buf);
|
||||
BufferPool pool(stream);
|
||||
GpuMat buf = pool.getBuffer(1, bufSize, CV_8UC1);
|
||||
|
||||
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dbuf, (double*)dbuf + 1) );
|
||||
NppStreamHandler h(StreamAccessor::getStream(stream));
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dst.ptr<Npp64f>(), dst.ptr<Npp64f>() + 1) );
|
||||
|
||||
double* ptrs[2] = {mean.val, stddev.val};
|
||||
dbuf.download(ptrs);
|
||||
syncOutput(dst, _dst, stream);
|
||||
}
|
||||
|
||||
void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev)
|
||||
{
|
||||
Stream& stream = Stream::Null();
|
||||
|
||||
HostMem dst;
|
||||
meanStdDev(_src, dst, stream);
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
double vals[2];
|
||||
dst.createMatHeader().copyTo(Mat(1, 2, CV_64FC1, &vals[0]));
|
||||
|
||||
mean = Scalar(vals[0]);
|
||||
stddev = Scalar(vals[1]);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
@ -173,13 +185,12 @@ void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev, GpuMat&
|
||||
|
||||
void cv::cuda::rectStdDev(InputArray _src, InputArray _sqr, OutputArray _dst, Rect rect, Stream& _stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat sqr = _sqr.getGpuMat();
|
||||
GpuMat src = getInputMat(_src, _stream);
|
||||
GpuMat sqr = getInputMat(_sqr, _stream);
|
||||
|
||||
CV_Assert( src.type() == CV_32SC1 && sqr.type() == CV_64FC1 );
|
||||
|
||||
_dst.create(src.size(), CV_32FC1);
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
GpuMat dst = getOutputMat(_dst, src.size(), CV_32FC1, _stream);
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
@ -200,45 +211,8 @@ void cv::cuda::rectStdDev(InputArray _src, InputArray _sqr, OutputArray _dst, Re
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// normalize
|
||||
|
||||
void cv::cuda::normalize(InputArray _src, OutputArray dst, double a, double b, int norm_type, int dtype, InputArray mask, GpuMat& norm_buf, GpuMat& cvt_buf)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
|
||||
double scale = 1, shift = 0;
|
||||
|
||||
if (norm_type == NORM_MINMAX)
|
||||
{
|
||||
double smin = 0, smax = 0;
|
||||
double dmin = std::min(a, b), dmax = std::max(a, b);
|
||||
cuda::minMax(src, &smin, &smax, mask, norm_buf);
|
||||
scale = (dmax - dmin) * (smax - smin > std::numeric_limits<double>::epsilon() ? 1.0 / (smax - smin) : 0.0);
|
||||
shift = dmin - smin * scale;
|
||||
}
|
||||
else if (norm_type == NORM_L2 || norm_type == NORM_L1 || norm_type == NORM_INF)
|
||||
{
|
||||
scale = cuda::norm(src, norm_type, mask, norm_buf);
|
||||
scale = scale > std::numeric_limits<double>::epsilon() ? a / scale : 0.0;
|
||||
shift = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Error(cv::Error::StsBadArg, "Unknown/unsupported norm type");
|
||||
}
|
||||
|
||||
if (mask.empty())
|
||||
{
|
||||
src.convertTo(dst, dtype, scale, shift);
|
||||
}
|
||||
else
|
||||
{
|
||||
src.convertTo(cvt_buf, dtype, scale, shift);
|
||||
cvt_buf.copyTo(dst, mask);
|
||||
}
|
||||
syncOutput(dst, _dst, _stream);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1329,7 +1329,7 @@ CUDA_TEST_P(Divide_Scalar_First, Accuracy)
|
||||
try
|
||||
{
|
||||
cv::cuda::GpuMat dst;
|
||||
cv::cuda::divide(scale, loadMat(mat), dst, depth.second);
|
||||
cv::cuda::divide(scale, loadMat(mat), dst, 1.0, depth.second);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
@ -1339,7 +1339,7 @@ CUDA_TEST_P(Divide_Scalar_First, Accuracy)
|
||||
else
|
||||
{
|
||||
cv::cuda::GpuMat dst = createMat(size, depth.second, useRoi);
|
||||
cv::cuda::divide(scale, loadMat(mat, useRoi), dst, depth.second);
|
||||
cv::cuda::divide(scale, loadMat(mat, useRoi), dst, 1.0, depth.second);
|
||||
|
||||
cv::Mat dst_gold;
|
||||
cv::divide(scale, mat, dst_gold, depth.second);
|
||||
|
@ -74,8 +74,27 @@ CUDA_TEST_P(Norm, Accuracy)
|
||||
cv::Mat src = randomMat(size, depth);
|
||||
cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);
|
||||
|
||||
cv::cuda::GpuMat d_buf;
|
||||
double val = cv::cuda::norm(loadMat(src, useRoi), normCode, loadMat(mask, useRoi), d_buf);
|
||||
double val = cv::cuda::norm(loadMat(src, useRoi), normCode, loadMat(mask, useRoi));
|
||||
|
||||
double val_gold = cv::norm(src, normCode, mask);
|
||||
|
||||
EXPECT_NEAR(val_gold, val, depth < CV_32F ? 0.0 : 1.0);
|
||||
}
|
||||
|
||||
CUDA_TEST_P(Norm, Async)
|
||||
{
|
||||
cv::Mat src = randomMat(size, depth);
|
||||
cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);
|
||||
|
||||
cv::cuda::Stream stream;
|
||||
|
||||
cv::cuda::HostMem dst;
|
||||
cv::cuda::calcNorm(loadMat(src, useRoi), dst, normCode, loadMat(mask, useRoi), stream);
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
double val;
|
||||
dst.createMatHeader().convertTo(cv::Mat(1, 1, CV_64FC1, &val), CV_64F);
|
||||
|
||||
double val_gold = cv::norm(src, normCode, mask);
|
||||
|
||||
@ -127,6 +146,27 @@ CUDA_TEST_P(NormDiff, Accuracy)
|
||||
EXPECT_NEAR(val_gold, val, 0.0);
|
||||
}
|
||||
|
||||
CUDA_TEST_P(NormDiff, Async)
|
||||
{
|
||||
cv::Mat src1 = randomMat(size, CV_8UC1);
|
||||
cv::Mat src2 = randomMat(size, CV_8UC1);
|
||||
|
||||
cv::cuda::Stream stream;
|
||||
|
||||
cv::cuda::HostMem dst;
|
||||
cv::cuda::calcNormDiff(loadMat(src1, useRoi), loadMat(src2, useRoi), dst, normCode, stream);
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
double val;
|
||||
const cv::Mat val_mat(1, 1, CV_64FC1, &val);
|
||||
dst.createMatHeader().convertTo(val_mat, CV_64F);
|
||||
|
||||
double val_gold = cv::norm(src1, src2, normCode);
|
||||
|
||||
EXPECT_NEAR(val_gold, val, 0.0);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(CUDA_Arithm, NormDiff, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
DIFFERENT_SIZES,
|
||||
@ -247,6 +287,24 @@ CUDA_TEST_P(Sum, Simple)
|
||||
EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
|
||||
}
|
||||
|
||||
CUDA_TEST_P(Sum, Simple_Async)
|
||||
{
|
||||
cv::cuda::Stream stream;
|
||||
|
||||
cv::cuda::HostMem dst;
|
||||
cv::cuda::calcSum(loadMat(src, useRoi), dst, cv::noArray(), stream);
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
cv::Scalar val;
|
||||
cv::Mat val_mat(dst.size(), CV_64FC(dst.channels()), val.val);
|
||||
dst.createMatHeader().convertTo(val_mat, CV_64F);
|
||||
|
||||
cv::Scalar val_gold = cv::sum(src);
|
||||
|
||||
EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
|
||||
}
|
||||
|
||||
CUDA_TEST_P(Sum, Abs)
|
||||
{
|
||||
cv::Scalar val = cv::cuda::absSum(loadMat(src, useRoi));
|
||||
@ -256,6 +314,24 @@ CUDA_TEST_P(Sum, Abs)
|
||||
EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
|
||||
}
|
||||
|
||||
CUDA_TEST_P(Sum, Abs_Async)
|
||||
{
|
||||
cv::cuda::Stream stream;
|
||||
|
||||
cv::cuda::HostMem dst;
|
||||
cv::cuda::calcAbsSum(loadMat(src, useRoi), dst, cv::noArray(), stream);
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
cv::Scalar val;
|
||||
cv::Mat val_mat(dst.size(), CV_64FC(dst.channels()), val.val);
|
||||
dst.createMatHeader().convertTo(val_mat, CV_64F);
|
||||
|
||||
cv::Scalar val_gold = absSumGold(src);
|
||||
|
||||
EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
|
||||
}
|
||||
|
||||
CUDA_TEST_P(Sum, Sqr)
|
||||
{
|
||||
cv::Scalar val = cv::cuda::sqrSum(loadMat(src, useRoi));
|
||||
@ -265,6 +341,24 @@ CUDA_TEST_P(Sum, Sqr)
|
||||
EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
|
||||
}
|
||||
|
||||
CUDA_TEST_P(Sum, Sqr_Async)
|
||||
{
|
||||
cv::cuda::Stream stream;
|
||||
|
||||
cv::cuda::HostMem dst;
|
||||
cv::cuda::calcSqrSum(loadMat(src, useRoi), dst, cv::noArray(), stream);
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
cv::Scalar val;
|
||||
cv::Mat val_mat(dst.size(), CV_64FC(dst.channels()), val.val);
|
||||
dst.createMatHeader().convertTo(val_mat, CV_64F);
|
||||
|
||||
cv::Scalar val_gold = sqrSumGold(src);
|
||||
|
||||
EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Sum, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
DIFFERENT_SIZES,
|
||||
@ -321,6 +415,28 @@ CUDA_TEST_P(MinMax, WithoutMask)
|
||||
}
|
||||
}
|
||||
|
||||
CUDA_TEST_P(MinMax, Async)
|
||||
{
|
||||
cv::Mat src = randomMat(size, depth);
|
||||
|
||||
cv::cuda::Stream stream;
|
||||
|
||||
cv::cuda::HostMem dst;
|
||||
cv::cuda::findMinMax(loadMat(src, useRoi), dst, cv::noArray(), stream);
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
double vals[2];
|
||||
const cv::Mat vals_mat(1, 2, CV_64FC1, &vals[0]);
|
||||
dst.createMatHeader().convertTo(vals_mat, CV_64F);
|
||||
|
||||
double minVal_gold, maxVal_gold;
|
||||
minMaxLocGold(src, &minVal_gold, &maxVal_gold);
|
||||
|
||||
EXPECT_DOUBLE_EQ(minVal_gold, vals[0]);
|
||||
EXPECT_DOUBLE_EQ(maxVal_gold, vals[1]);
|
||||
}
|
||||
|
||||
CUDA_TEST_P(MinMax, WithMask)
|
||||
{
|
||||
cv::Mat src = randomMat(size, depth);
|
||||
@ -471,6 +587,41 @@ CUDA_TEST_P(MinMaxLoc, WithoutMask)
|
||||
}
|
||||
}
|
||||
|
||||
CUDA_TEST_P(MinMaxLoc, Async)
|
||||
{
|
||||
cv::Mat src = randomMat(size, depth);
|
||||
|
||||
cv::cuda::Stream stream;
|
||||
|
||||
cv::cuda::HostMem minMaxVals, locVals;
|
||||
cv::cuda::findMinMaxLoc(loadMat(src, useRoi), minMaxVals, locVals, cv::noArray(), stream);
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
double vals[2];
|
||||
const cv::Mat vals_mat(2, 1, CV_64FC1, &vals[0]);
|
||||
minMaxVals.createMatHeader().convertTo(vals_mat, CV_64F);
|
||||
|
||||
int locs[2];
|
||||
const cv::Mat locs_mat(2, 1, CV_32SC1, &locs[0]);
|
||||
locVals.createMatHeader().copyTo(locs_mat);
|
||||
|
||||
cv::Point locs2D[] = {
|
||||
cv::Point(locs[0] % src.cols, locs[0] / src.cols),
|
||||
cv::Point(locs[1] % src.cols, locs[1] / src.cols),
|
||||
};
|
||||
|
||||
double minVal_gold, maxVal_gold;
|
||||
cv::Point minLoc_gold, maxLoc_gold;
|
||||
minMaxLocGold(src, &minVal_gold, &maxVal_gold, &minLoc_gold, &maxLoc_gold);
|
||||
|
||||
EXPECT_DOUBLE_EQ(minVal_gold, vals[0]);
|
||||
EXPECT_DOUBLE_EQ(maxVal_gold, vals[1]);
|
||||
|
||||
expectEqual(src, minLoc_gold, locs2D[0]);
|
||||
expectEqual(src, maxLoc_gold, locs2D[1]);
|
||||
}
|
||||
|
||||
CUDA_TEST_P(MinMaxLoc, WithMask)
|
||||
{
|
||||
cv::Mat src = randomMat(size, depth);
|
||||
@ -564,6 +715,7 @@ PARAM_TEST_CASE(CountNonZero, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
|
||||
int depth;
|
||||
bool useRoi;
|
||||
|
||||
cv::Mat src;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
@ -573,15 +725,14 @@ PARAM_TEST_CASE(CountNonZero, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
|
||||
useRoi = GET_PARAM(3);
|
||||
|
||||
cv::cuda::setDevice(devInfo.deviceID());
|
||||
|
||||
cv::Mat srcBase = randomMat(size, CV_8U, 0.0, 1.5);
|
||||
srcBase.convertTo(src, depth);
|
||||
}
|
||||
};
|
||||
|
||||
CUDA_TEST_P(CountNonZero, Accuracy)
|
||||
{
|
||||
cv::Mat srcBase = randomMat(size, CV_8U, 0.0, 1.5);
|
||||
cv::Mat src;
|
||||
srcBase.convertTo(src, depth);
|
||||
|
||||
if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
|
||||
{
|
||||
try
|
||||
@ -603,6 +754,24 @@ CUDA_TEST_P(CountNonZero, Accuracy)
|
||||
}
|
||||
}
|
||||
|
||||
CUDA_TEST_P(CountNonZero, Async)
|
||||
{
|
||||
cv::cuda::Stream stream;
|
||||
|
||||
cv::cuda::HostMem dst;
|
||||
cv::cuda::countNonZero(loadMat(src, useRoi), dst, stream);
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
int val;
|
||||
const cv::Mat val_mat(1, 1, CV_32SC1, &val);
|
||||
dst.createMatHeader().copyTo(val_mat);
|
||||
|
||||
int val_gold = cv::countNonZero(src);
|
||||
|
||||
ASSERT_EQ(val_gold, val);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(CUDA_Arithm, CountNonZero, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
DIFFERENT_SIZES,
|
||||
@ -750,7 +919,7 @@ CUDA_TEST_P(Normalize, WithMask)
|
||||
dst_gold.setTo(cv::Scalar::all(0));
|
||||
cv::normalize(src, dst_gold, alpha, beta, norm_type, type, mask);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 1e-6);
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, type < CV_32F ? 1.0 : 1e-4);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Normalize, testing::Combine(
|
||||
@ -811,6 +980,28 @@ CUDA_TEST_P(MeanStdDev, Accuracy)
|
||||
}
|
||||
}
|
||||
|
||||
CUDA_TEST_P(MeanStdDev, Async)
|
||||
{
|
||||
cv::Mat src = randomMat(size, CV_8UC1);
|
||||
|
||||
cv::cuda::Stream stream;
|
||||
|
||||
cv::cuda::HostMem dst;
|
||||
cv::cuda::meanStdDev(loadMat(src, useRoi), dst, stream);
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
double vals[2];
|
||||
dst.createMatHeader().copyTo(cv::Mat(1, 2, CV_64FC1, &vals[0]));
|
||||
|
||||
cv::Scalar mean_gold;
|
||||
cv::Scalar stddev_gold;
|
||||
cv::meanStdDev(src, mean_gold, stddev_gold);
|
||||
|
||||
EXPECT_SCALAR_NEAR(mean_gold, cv::Scalar(vals[0]), 1e-5);
|
||||
EXPECT_SCALAR_NEAR(stddev_gold, cv::Scalar(vals[1]), 1e-5);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(CUDA_Arithm, MeanStdDev, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
DIFFERENT_SIZES,
|
||||
|
@ -266,7 +266,7 @@ namespace
|
||||
{
|
||||
int bgfgClassification(const GpuMat& prevFrame, const GpuMat& curFrame,
|
||||
const GpuMat& Ftd, const GpuMat& Fbd,
|
||||
GpuMat& foreground, GpuMat& countBuf,
|
||||
GpuMat& foreground,
|
||||
const FGDParams& params, int out_cn)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground,
|
||||
@ -298,7 +298,7 @@ namespace
|
||||
deltaC, deltaCC, params.alpha2,
|
||||
params.N1c, params.N1cc, 0);
|
||||
|
||||
int count = cuda::countNonZero(foreground, countBuf);
|
||||
int count = cuda::countNonZero(foreground);
|
||||
|
||||
cuda::multiply(foreground, Scalar::all(255), foreground);
|
||||
|
||||
@ -605,8 +605,6 @@ namespace
|
||||
GpuMat hist_;
|
||||
GpuMat histBuf_;
|
||||
|
||||
GpuMat countBuf_;
|
||||
|
||||
GpuMat buf_;
|
||||
GpuMat filterBrd_;
|
||||
|
||||
@ -649,7 +647,7 @@ namespace
|
||||
changeDetection(prevFrame_, curFrame, Ftd_, hist_, histBuf_);
|
||||
changeDetection(background_, curFrame, Fbd_, hist_, histBuf_);
|
||||
|
||||
int FG_pixels_count = bgfgClassification(prevFrame_, curFrame, Ftd_, Fbd_, foreground_, countBuf_, params_, 4);
|
||||
int FG_pixels_count = bgfgClassification(prevFrame_, curFrame, Ftd_, Fbd_, foreground_, params_, 4);
|
||||
|
||||
#ifdef HAVE_OPENCV_CUDAFILTERS
|
||||
if (params_.perform_morphing > 0)
|
||||
|
@ -48,6 +48,7 @@
|
||||
#endif
|
||||
|
||||
#include "opencv2/core/cuda.hpp"
|
||||
#include "opencv2/features2d.hpp"
|
||||
#include "opencv2/cudafilters.hpp"
|
||||
|
||||
/**
|
||||
@ -62,262 +63,396 @@ namespace cv { namespace cuda {
|
||||
//! @addtogroup cudafeatures2d
|
||||
//! @{
|
||||
|
||||
/** @brief Brute-force descriptor matcher.
|
||||
//
|
||||
// DescriptorMatcher
|
||||
//
|
||||
|
||||
For each descriptor in the first set, this matcher finds the closest descriptor in the second set
|
||||
by trying each one. This descriptor matcher supports masking permissible matches between descriptor
|
||||
sets.
|
||||
/** @brief Abstract base class for matching keypoint descriptors.
|
||||
|
||||
The class BFMatcher_CUDA has an interface similar to the class DescriptorMatcher. It has two groups
|
||||
of match methods: for matching descriptors of one image with another image or with an image set.
|
||||
Also, all functions have an alternative to save results either to the GPU memory or to the CPU
|
||||
memory.
|
||||
|
||||
@sa DescriptorMatcher, BFMatcher
|
||||
It has two groups of match methods: for matching descriptors of an image with another image or with
|
||||
an image set.
|
||||
*/
|
||||
class CV_EXPORTS BFMatcher_CUDA
|
||||
class CV_EXPORTS DescriptorMatcher : public cv::Algorithm
|
||||
{
|
||||
public:
|
||||
explicit BFMatcher_CUDA(int norm = cv::NORM_L2);
|
||||
//
|
||||
// Factories
|
||||
//
|
||||
|
||||
//! Add descriptors to train descriptor collection
|
||||
void add(const std::vector<GpuMat>& descCollection);
|
||||
/** @brief Brute-force descriptor matcher.
|
||||
|
||||
//! Get train descriptors collection
|
||||
const std::vector<GpuMat>& getTrainDescriptors() const;
|
||||
For each descriptor in the first set, this matcher finds the closest descriptor in the second set
|
||||
by trying each one. This descriptor matcher supports masking permissible matches of descriptor
|
||||
sets.
|
||||
|
||||
//! Clear train descriptors collection
|
||||
void clear();
|
||||
@param normType One of NORM_L1, NORM_L2, NORM_HAMMING. L1 and L2 norms are
|
||||
preferable choices for SIFT and SURF descriptors, NORM_HAMMING should be used with ORB, BRISK and
|
||||
BRIEF).
|
||||
*/
|
||||
static Ptr<DescriptorMatcher> createBFMatcher(int normType = cv::NORM_L2);
|
||||
|
||||
//! Return true if there are not train descriptors in collection
|
||||
bool empty() const;
|
||||
//
|
||||
// Utility
|
||||
//
|
||||
|
||||
//! Return true if the matcher supports mask in match methods
|
||||
bool isMaskSupported() const;
|
||||
/** @brief Returns true if the descriptor matcher supports masking permissible matches.
|
||||
*/
|
||||
virtual bool isMaskSupported() const = 0;
|
||||
|
||||
//! Find one best match for each query descriptor
|
||||
void matchSingle(const GpuMat& query, const GpuMat& train,
|
||||
GpuMat& trainIdx, GpuMat& distance,
|
||||
const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
|
||||
//
|
||||
// Descriptor collection
|
||||
//
|
||||
|
||||
//! Download trainIdx and distance and convert it to CPU vector with DMatch
|
||||
static void matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector<DMatch>& matches);
|
||||
//! Convert trainIdx and distance to vector with DMatch
|
||||
static void matchConvert(const Mat& trainIdx, const Mat& distance, std::vector<DMatch>& matches);
|
||||
/** @brief Adds descriptors to train a descriptor collection.
|
||||
|
||||
//! Find one best match for each query descriptor
|
||||
void match(const GpuMat& query, const GpuMat& train, std::vector<DMatch>& matches, const GpuMat& mask = GpuMat());
|
||||
If the collection is not empty, the new descriptors are added to existing train descriptors.
|
||||
|
||||
//! Make gpu collection of trains and masks in suitable format for matchCollection function
|
||||
void makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection, const std::vector<GpuMat>& masks = std::vector<GpuMat>());
|
||||
@param descriptors Descriptors to add. Each descriptors[i] is a set of descriptors from the same
|
||||
train image.
|
||||
*/
|
||||
virtual void add(const std::vector<GpuMat>& descriptors) = 0;
|
||||
|
||||
//! Find one best match from train collection for each query descriptor
|
||||
void matchCollection(const GpuMat& query, const GpuMat& trainCollection,
|
||||
GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
|
||||
const GpuMat& masks = GpuMat(), Stream& stream = Stream::Null());
|
||||
/** @brief Returns a constant link to the train descriptor collection.
|
||||
*/
|
||||
virtual const std::vector<GpuMat>& getTrainDescriptors() const = 0;
|
||||
|
||||
//! Download trainIdx, imgIdx and distance and convert it to vector with DMatch
|
||||
static void matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector<DMatch>& matches);
|
||||
//! Convert trainIdx, imgIdx and distance to vector with DMatch
|
||||
static void matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches);
|
||||
/** @brief Clears the train descriptor collection.
|
||||
*/
|
||||
virtual void clear() = 0;
|
||||
|
||||
//! Find one best match from train collection for each query descriptor.
|
||||
void match(const GpuMat& query, std::vector<DMatch>& matches, const std::vector<GpuMat>& masks = std::vector<GpuMat>());
|
||||
/** @brief Returns true if there are no train descriptors in the collection.
|
||||
*/
|
||||
virtual bool empty() const = 0;
|
||||
|
||||
//! Find k best matches for each query descriptor (in increasing order of distances)
|
||||
void knnMatchSingle(const GpuMat& query, const GpuMat& train,
|
||||
GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k,
|
||||
const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
|
||||
/** @brief Trains a descriptor matcher.
|
||||
|
||||
//! Download trainIdx and distance and convert it to vector with DMatch
|
||||
//! compactResult is used when mask is not empty. If compactResult is false matches
|
||||
//! vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
//! matches vector will not contain matches for fully masked out query descriptors.
|
||||
static void knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
//! Convert trainIdx and distance to vector with DMatch
|
||||
static void knnMatchConvert(const Mat& trainIdx, const Mat& distance,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
Trains a descriptor matcher (for example, the flann index). In all methods to match, the method
|
||||
train() is run every time before matching.
|
||||
*/
|
||||
virtual void train() = 0;
|
||||
|
||||
//! Find k best matches for each query descriptor (in increasing order of distances).
|
||||
//! compactResult is used when mask is not empty. If compactResult is false matches
|
||||
//! vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
//! matches vector will not contain matches for fully masked out query descriptors.
|
||||
void knnMatch(const GpuMat& query, const GpuMat& train,
|
||||
std::vector< std::vector<DMatch> >& matches, int k, const GpuMat& mask = GpuMat(),
|
||||
bool compactResult = false);
|
||||
//
|
||||
// 1 to 1 match
|
||||
//
|
||||
|
||||
//! Find k best matches from train collection for each query descriptor (in increasing order of distances)
|
||||
void knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection,
|
||||
GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
|
||||
const GpuMat& maskCollection = GpuMat(), Stream& stream = Stream::Null());
|
||||
/** @brief Finds the best match for each descriptor from a query set (blocking version).
|
||||
|
||||
//! Download trainIdx and distance and convert it to vector with DMatch
|
||||
//! compactResult is used when mask is not empty. If compactResult is false matches
|
||||
//! vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
//! matches vector will not contain matches for fully masked out query descriptors.
|
||||
//! @see BFMatcher_CUDA::knnMatchDownload
|
||||
static void knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
//! Convert trainIdx and distance to vector with DMatch
|
||||
//! @see BFMatcher_CUDA::knnMatchConvert
|
||||
static void knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
@param queryDescriptors Query set of descriptors.
|
||||
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
|
||||
collection stored in the class object.
|
||||
@param matches Matches. If a query descriptor is masked out in mask , no match is added for this
|
||||
descriptor. So, matches size may be smaller than the query descriptors count.
|
||||
@param mask Mask specifying permissible matches between an input query and train matrices of
|
||||
descriptors.
|
||||
|
||||
//! Find k best matches for each query descriptor (in increasing order of distances).
|
||||
//! compactResult is used when mask is not empty. If compactResult is false matches
|
||||
//! vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
//! matches vector will not contain matches for fully masked out query descriptors.
|
||||
void knnMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, int k,
|
||||
const std::vector<GpuMat>& masks = std::vector<GpuMat>(), bool compactResult = false);
|
||||
In the first variant of this method, the train descriptors are passed as an input argument. In the
|
||||
second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is
|
||||
used. Optional mask (or masks) can be passed to specify which query and training descriptors can be
|
||||
matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if
|
||||
mask.at\<uchar\>(i,j) is non-zero.
|
||||
*/
|
||||
virtual void match(InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
std::vector<DMatch>& matches,
|
||||
InputArray mask = noArray()) = 0;
|
||||
|
||||
//! Find best matches for each query descriptor which have distance less than maxDistance.
|
||||
//! nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
|
||||
//! carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
|
||||
//! because it didn't have enough memory.
|
||||
//! If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
|
||||
//! otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
|
||||
//! Matches doesn't sorted.
|
||||
void radiusMatchSingle(const GpuMat& query, const GpuMat& train,
|
||||
GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
|
||||
const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
|
||||
/** @overload
|
||||
*/
|
||||
virtual void match(InputArray queryDescriptors,
|
||||
std::vector<DMatch>& matches,
|
||||
const std::vector<GpuMat>& masks = std::vector<GpuMat>()) = 0;
|
||||
|
||||
//! Download trainIdx, nMatches and distance and convert it to vector with DMatch.
|
||||
//! matches will be sorted in increasing order of distances.
|
||||
//! compactResult is used when mask is not empty. If compactResult is false matches
|
||||
//! vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
//! matches vector will not contain matches for fully masked out query descriptors.
|
||||
static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
//! Convert trainIdx, nMatches and distance to vector with DMatch.
|
||||
static void radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
/** @brief Finds the best match for each descriptor from a query set (asynchronous version).
|
||||
|
||||
//! Find best matches for each query descriptor which have distance less than maxDistance
|
||||
//! in increasing order of distances).
|
||||
void radiusMatch(const GpuMat& query, const GpuMat& train,
|
||||
std::vector< std::vector<DMatch> >& matches, float maxDistance,
|
||||
const GpuMat& mask = GpuMat(), bool compactResult = false);
|
||||
@param queryDescriptors Query set of descriptors.
|
||||
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
|
||||
collection stored in the class object.
|
||||
@param matches Matches array stored in GPU memory. Internal representation is not defined.
|
||||
Use DescriptorMatcher::matchConvert method to retrieve results in standard representation.
|
||||
@param mask Mask specifying permissible matches between an input query and train matrices of
|
||||
descriptors.
|
||||
@param stream CUDA stream.
|
||||
|
||||
//! Find best matches for each query descriptor which have distance less than maxDistance.
|
||||
//! If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
|
||||
//! otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
|
||||
//! Matches doesn't sorted.
|
||||
void radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
|
||||
const std::vector<GpuMat>& masks = std::vector<GpuMat>(), Stream& stream = Stream::Null());
|
||||
In the first variant of this method, the train descriptors are passed as an input argument. In the
|
||||
second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is
|
||||
used. Optional mask (or masks) can be passed to specify which query and training descriptors can be
|
||||
matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if
|
||||
mask.at\<uchar\>(i,j) is non-zero.
|
||||
*/
|
||||
virtual void matchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
OutputArray matches,
|
||||
InputArray mask = noArray(),
|
||||
Stream& stream = Stream::Null()) = 0;
|
||||
|
||||
//! Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
|
||||
//! matches will be sorted in increasing order of distances.
|
||||
//! compactResult is used when mask is not empty. If compactResult is false matches
|
||||
//! vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
//! matches vector will not contain matches for fully masked out query descriptors.
|
||||
static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
//! Convert trainIdx, nMatches and distance to vector with DMatch.
|
||||
static void radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
/** @overload
|
||||
*/
|
||||
virtual void matchAsync(InputArray queryDescriptors,
|
||||
OutputArray matches,
|
||||
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
|
||||
Stream& stream = Stream::Null()) = 0;
|
||||
|
||||
//! Find best matches from train collection for each query descriptor which have distance less than
|
||||
//! maxDistance (in increasing order of distances).
|
||||
void radiusMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, float maxDistance,
|
||||
const std::vector<GpuMat>& masks = std::vector<GpuMat>(), bool compactResult = false);
|
||||
/** @brief Converts matches array from internal representation to standard matches vector.
|
||||
|
||||
int norm;
|
||||
The method is supposed to be used with DescriptorMatcher::matchAsync to get final result.
|
||||
Call this method only after DescriptorMatcher::matchAsync is completed (ie. after synchronization).
|
||||
|
||||
private:
|
||||
std::vector<GpuMat> trainDescCollection;
|
||||
@param gpu_matches Matches, returned from DescriptorMatcher::matchAsync.
|
||||
@param matches Vector of DMatch objects.
|
||||
*/
|
||||
virtual void matchConvert(InputArray gpu_matches,
|
||||
std::vector<DMatch>& matches) = 0;
|
||||
|
||||
//
|
||||
// knn match
|
||||
//
|
||||
|
||||
/** @brief Finds the k best matches for each descriptor from a query set (blocking version).
|
||||
|
||||
@param queryDescriptors Query set of descriptors.
|
||||
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
|
||||
collection stored in the class object.
|
||||
@param matches Matches. Each matches[i] is k or less matches for the same query descriptor.
|
||||
@param k Count of best matches found per each query descriptor or less if a query descriptor has
|
||||
less than k possible matches in total.
|
||||
@param mask Mask specifying permissible matches between an input query and train matrices of
|
||||
descriptors.
|
||||
@param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
|
||||
false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
|
||||
the matches vector does not contain matches for fully masked-out query descriptors.
|
||||
|
||||
These extended variants of DescriptorMatcher::match methods find several best matches for each query
|
||||
descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::match
|
||||
for the details about query and train descriptors.
|
||||
*/
|
||||
virtual void knnMatch(InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
std::vector<std::vector<DMatch> >& matches,
|
||||
int k,
|
||||
InputArray mask = noArray(),
|
||||
bool compactResult = false) = 0;
|
||||
|
||||
/** @overload
|
||||
*/
|
||||
virtual void knnMatch(InputArray queryDescriptors,
|
||||
std::vector<std::vector<DMatch> >& matches,
|
||||
int k,
|
||||
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
|
||||
bool compactResult = false) = 0;
|
||||
|
||||
/** @brief Finds the k best matches for each descriptor from a query set (asynchronous version).
|
||||
|
||||
@param queryDescriptors Query set of descriptors.
|
||||
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
|
||||
collection stored in the class object.
|
||||
@param matches Matches array stored in GPU memory. Internal representation is not defined.
|
||||
Use DescriptorMatcher::knnMatchConvert method to retrieve results in standard representation.
|
||||
@param k Count of best matches found per each query descriptor or less if a query descriptor has
|
||||
less than k possible matches in total.
|
||||
@param mask Mask specifying permissible matches between an input query and train matrices of
|
||||
descriptors.
|
||||
@param stream CUDA stream.
|
||||
|
||||
These extended variants of DescriptorMatcher::matchAsync methods find several best matches for each query
|
||||
descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::matchAsync
|
||||
for the details about query and train descriptors.
|
||||
*/
|
||||
virtual void knnMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
OutputArray matches,
|
||||
int k,
|
||||
InputArray mask = noArray(),
|
||||
Stream& stream = Stream::Null()) = 0;
|
||||
|
||||
/** @overload
|
||||
*/
|
||||
virtual void knnMatchAsync(InputArray queryDescriptors,
|
||||
OutputArray matches,
|
||||
int k,
|
||||
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
|
||||
Stream& stream = Stream::Null()) = 0;
|
||||
|
||||
/** @brief Converts matches array from internal representation to standard matches vector.
|
||||
|
||||
The method is supposed to be used with DescriptorMatcher::knnMatchAsync to get final result.
|
||||
Call this method only after DescriptorMatcher::knnMatchAsync is completed (ie. after synchronization).
|
||||
|
||||
@param gpu_matches Matches, returned from DescriptorMatcher::knnMatchAsync.
|
||||
@param matches Vector of DMatch objects.
|
||||
@param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
|
||||
false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
|
||||
the matches vector does not contain matches for fully masked-out query descriptors.
|
||||
*/
|
||||
virtual void knnMatchConvert(InputArray gpu_matches,
|
||||
std::vector< std::vector<DMatch> >& matches,
|
||||
bool compactResult = false) = 0;
|
||||
|
||||
//
|
||||
// radius match
|
||||
//
|
||||
|
||||
/** @brief For each query descriptor, finds the training descriptors not farther than the specified distance (blocking version).
|
||||
|
||||
@param queryDescriptors Query set of descriptors.
|
||||
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
|
||||
collection stored in the class object.
|
||||
@param matches Found matches.
|
||||
@param maxDistance Threshold for the distance between matched descriptors. Distance means here
|
||||
metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured
|
||||
in Pixels)!
|
||||
@param mask Mask specifying permissible matches between an input query and train matrices of
|
||||
descriptors.
|
||||
@param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
|
||||
false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
|
||||
the matches vector does not contain matches for fully masked-out query descriptors.
|
||||
|
||||
For each query descriptor, the methods find such training descriptors that the distance between the
|
||||
query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are
|
||||
returned in the distance increasing order.
|
||||
*/
|
||||
virtual void radiusMatch(InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
std::vector<std::vector<DMatch> >& matches,
|
||||
float maxDistance,
|
||||
InputArray mask = noArray(),
|
||||
bool compactResult = false) = 0;
|
||||
|
||||
/** @overload
|
||||
*/
|
||||
virtual void radiusMatch(InputArray queryDescriptors,
|
||||
std::vector<std::vector<DMatch> >& matches,
|
||||
float maxDistance,
|
||||
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
|
||||
bool compactResult = false) = 0;
|
||||
|
||||
/** @brief For each query descriptor, finds the training descriptors not farther than the specified distance (asynchronous version).
|
||||
|
||||
@param queryDescriptors Query set of descriptors.
|
||||
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
|
||||
collection stored in the class object.
|
||||
@param matches Matches array stored in GPU memory. Internal representation is not defined.
|
||||
Use DescriptorMatcher::radiusMatchConvert method to retrieve results in standard representation.
|
||||
@param maxDistance Threshold for the distance between matched descriptors. Distance means here
|
||||
metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured
|
||||
in Pixels)!
|
||||
@param mask Mask specifying permissible matches between an input query and train matrices of
|
||||
descriptors.
|
||||
@param stream CUDA stream.
|
||||
|
||||
For each query descriptor, the methods find such training descriptors that the distance between the
|
||||
query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are
|
||||
returned in the distance increasing order.
|
||||
*/
|
||||
virtual void radiusMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
OutputArray matches,
|
||||
float maxDistance,
|
||||
InputArray mask = noArray(),
|
||||
Stream& stream = Stream::Null()) = 0;
|
||||
|
||||
/** @overload
|
||||
*/
|
||||
virtual void radiusMatchAsync(InputArray queryDescriptors,
|
||||
OutputArray matches,
|
||||
float maxDistance,
|
||||
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
|
||||
Stream& stream = Stream::Null()) = 0;
|
||||
|
||||
/** @brief Converts matches array from internal representation to standard matches vector.
|
||||
|
||||
The method is supposed to be used with DescriptorMatcher::radiusMatchAsync to get final result.
|
||||
Call this method only after DescriptorMatcher::radiusMatchAsync is completed (ie. after synchronization).
|
||||
|
||||
@param gpu_matches Matches, returned from DescriptorMatcher::radiusMatchAsync.
|
||||
@param matches Vector of DMatch objects.
|
||||
@param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
|
||||
false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
|
||||
the matches vector does not contain matches for fully masked-out query descriptors.
|
||||
*/
|
||||
virtual void radiusMatchConvert(InputArray gpu_matches,
|
||||
std::vector< std::vector<DMatch> >& matches,
|
||||
bool compactResult = false) = 0;
|
||||
};
|
||||
|
||||
/** @brief Class used for corner detection using the FAST algorithm. :
|
||||
//
|
||||
// Feature2DAsync
|
||||
//
|
||||
|
||||
/** @brief Abstract base class for CUDA asynchronous 2D image feature detectors and descriptor extractors.
|
||||
*/
|
||||
class CV_EXPORTS FAST_CUDA
|
||||
class CV_EXPORTS Feature2DAsync
|
||||
{
|
||||
public:
|
||||
virtual ~Feature2DAsync();
|
||||
|
||||
/** @brief Detects keypoints in an image.
|
||||
|
||||
@param image Image.
|
||||
@param keypoints The detected keypoints.
|
||||
@param mask Mask specifying where to look for keypoints (optional). It must be a 8-bit integer
|
||||
matrix with non-zero values in the region of interest.
|
||||
@param stream CUDA stream.
|
||||
*/
|
||||
virtual void detectAsync(InputArray image,
|
||||
OutputArray keypoints,
|
||||
InputArray mask = noArray(),
|
||||
Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Computes the descriptors for a set of keypoints detected in an image.
|
||||
|
||||
@param image Image.
|
||||
@param keypoints Input collection of keypoints.
|
||||
@param descriptors Computed descriptors. Row j is the descriptor for j-th keypoint.
|
||||
@param stream CUDA stream.
|
||||
*/
|
||||
virtual void computeAsync(InputArray image,
|
||||
OutputArray keypoints,
|
||||
OutputArray descriptors,
|
||||
Stream& stream = Stream::Null());
|
||||
|
||||
/** Detects keypoints and computes the descriptors. */
|
||||
virtual void detectAndComputeAsync(InputArray image,
|
||||
InputArray mask,
|
||||
OutputArray keypoints,
|
||||
OutputArray descriptors,
|
||||
bool useProvidedKeypoints = false,
|
||||
Stream& stream = Stream::Null());
|
||||
|
||||
/** Converts keypoints array from internal representation to standard vector. */
|
||||
virtual void convert(InputArray gpu_keypoints,
|
||||
std::vector<KeyPoint>& keypoints) = 0;
|
||||
};
|
||||
|
||||
//
|
||||
// FastFeatureDetector
|
||||
//
|
||||
|
||||
/** @brief Wrapping class for feature detection using the FAST method.
|
||||
*/
|
||||
class CV_EXPORTS FastFeatureDetector : public cv::FastFeatureDetector, public Feature2DAsync
|
||||
{
|
||||
public:
|
||||
enum
|
||||
{
|
||||
LOCATION_ROW = 0,
|
||||
RESPONSE_ROW,
|
||||
ROWS_COUNT
|
||||
ROWS_COUNT,
|
||||
|
||||
FEATURE_SIZE = 7
|
||||
};
|
||||
|
||||
//! all features have same size
|
||||
static const int FEATURE_SIZE = 7;
|
||||
static Ptr<FastFeatureDetector> create(int threshold=10,
|
||||
bool nonmaxSuppression=true,
|
||||
int type=FastFeatureDetector::TYPE_9_16,
|
||||
int max_npoints = 5000);
|
||||
|
||||
/** @brief Constructor.
|
||||
|
||||
@param threshold Threshold on difference between intensity of the central pixel and pixels on a
|
||||
circle around this pixel.
|
||||
@param nonmaxSuppression If it is true, non-maximum suppression is applied to detected corners
|
||||
(keypoints).
|
||||
@param keypointsRatio Inner buffer size for keypoints store is determined as (keypointsRatio \*
|
||||
image_width \* image_height).
|
||||
*/
|
||||
explicit FAST_CUDA(int threshold, bool nonmaxSuppression = true, double keypointsRatio = 0.05);
|
||||
|
||||
/** @brief Finds the keypoints using FAST detector.
|
||||
|
||||
@param image Image where keypoints (corners) are detected. Only 8-bit grayscale images are
|
||||
supported.
|
||||
@param mask Optional input mask that marks the regions where we should detect features.
|
||||
@param keypoints The output vector of keypoints. Can be stored both in CPU and GPU memory. For GPU
|
||||
memory:
|
||||
- keypoints.ptr\<Vec2s\>(LOCATION_ROW)[i] will contain location of i'th point
|
||||
- keypoints.ptr\<float\>(RESPONSE_ROW)[i] will contain response of i'th point (if non-maximum
|
||||
suppression is applied)
|
||||
*/
|
||||
void operator ()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints);
|
||||
/** @overload */
|
||||
void operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
|
||||
|
||||
/** @brief Download keypoints from GPU to CPU memory.
|
||||
*/
|
||||
static void downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints);
|
||||
|
||||
/** @brief Converts keypoints from CUDA representation to vector of KeyPoint.
|
||||
*/
|
||||
static void convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints);
|
||||
|
||||
/** @brief Releases inner buffer memory.
|
||||
*/
|
||||
void release();
|
||||
|
||||
bool nonmaxSuppression;
|
||||
|
||||
int threshold;
|
||||
|
||||
//! max keypoints = keypointsRatio * img.size().area()
|
||||
double keypointsRatio;
|
||||
|
||||
/** @brief Find keypoints and compute it's response if nonmaxSuppression is true.
|
||||
|
||||
@param image Image where keypoints (corners) are detected. Only 8-bit grayscale images are
|
||||
supported.
|
||||
@param mask Optional input mask that marks the regions where we should detect features.
|
||||
|
||||
The function returns count of detected keypoints.
|
||||
*/
|
||||
int calcKeyPointsLocation(const GpuMat& image, const GpuMat& mask);
|
||||
|
||||
/** @brief Gets final array of keypoints.
|
||||
|
||||
@param keypoints The output vector of keypoints.
|
||||
|
||||
The function performs non-max suppression if needed and returns final count of keypoints.
|
||||
*/
|
||||
int getKeyPoints(GpuMat& keypoints);
|
||||
|
||||
private:
|
||||
GpuMat kpLoc_;
|
||||
int count_;
|
||||
|
||||
GpuMat score_;
|
||||
|
||||
GpuMat d_keypoints_;
|
||||
virtual void setMaxNumPoints(int max_npoints) = 0;
|
||||
virtual int getMaxNumPoints() const = 0;
|
||||
};
|
||||
|
||||
/** @brief Class for extracting ORB features and descriptors from an image. :
|
||||
//
|
||||
// ORB
|
||||
//
|
||||
|
||||
/** @brief Class implementing the ORB (*oriented BRIEF*) keypoint detector and descriptor extractor
|
||||
*
|
||||
* @sa cv::ORB
|
||||
*/
|
||||
class CV_EXPORTS ORB_CUDA
|
||||
class CV_EXPORTS ORB : public cv::ORB, public Feature2DAsync
|
||||
{
|
||||
public:
|
||||
enum
|
||||
@ -331,113 +466,20 @@ public:
|
||||
ROWS_COUNT
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
DEFAULT_FAST_THRESHOLD = 20
|
||||
};
|
||||
|
||||
/** @brief Constructor.
|
||||
|
||||
@param nFeatures The number of desired features.
|
||||
@param scaleFactor Coefficient by which we divide the dimensions from one scale pyramid level to
|
||||
the next.
|
||||
@param nLevels The number of levels in the scale pyramid.
|
||||
@param edgeThreshold How far from the boundary the points should be.
|
||||
@param firstLevel The level at which the image is given. If 1, that means we will also look at the
|
||||
image scaleFactor times bigger.
|
||||
@param WTA_K
|
||||
@param scoreType
|
||||
@param patchSize
|
||||
*/
|
||||
explicit ORB_CUDA(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31,
|
||||
int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31);
|
||||
|
||||
/** @overload */
|
||||
void operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
|
||||
/** @overload */
|
||||
void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints);
|
||||
|
||||
/** @brief Detects keypoints and computes descriptors for them.
|
||||
|
||||
@param image Input 8-bit grayscale image.
|
||||
@param mask Optional input mask that marks the regions where we should detect features.
|
||||
@param keypoints The input/output vector of keypoints. Can be stored both in CPU and GPU memory.
|
||||
For GPU memory:
|
||||
- keypoints.ptr\<float\>(X_ROW)[i] contains x coordinate of the i'th feature.
|
||||
- keypoints.ptr\<float\>(Y_ROW)[i] contains y coordinate of the i'th feature.
|
||||
- keypoints.ptr\<float\>(RESPONSE_ROW)[i] contains the response of the i'th feature.
|
||||
- keypoints.ptr\<float\>(ANGLE_ROW)[i] contains orientation of the i'th feature.
|
||||
- keypoints.ptr\<float\>(OCTAVE_ROW)[i] contains the octave of the i'th feature.
|
||||
- keypoints.ptr\<float\>(SIZE_ROW)[i] contains the size of the i'th feature.
|
||||
@param descriptors Computed descriptors. if blurForDescriptor is true, image will be blurred
|
||||
before descriptors calculation.
|
||||
*/
|
||||
void operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors);
|
||||
/** @overload */
|
||||
void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors);
|
||||
|
||||
/** @brief Download keypoints from GPU to CPU memory.
|
||||
*/
|
||||
static void downloadKeyPoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints);
|
||||
/** @brief Converts keypoints from CUDA representation to vector of KeyPoint.
|
||||
*/
|
||||
static void convertKeyPoints(const Mat& d_keypoints, std::vector<KeyPoint>& keypoints);
|
||||
|
||||
//! returns the descriptor size in bytes
|
||||
inline int descriptorSize() const { return kBytes; }
|
||||
|
||||
inline void setFastParams(int threshold, bool nonmaxSuppression = true)
|
||||
{
|
||||
fastDetector_.threshold = threshold;
|
||||
fastDetector_.nonmaxSuppression = nonmaxSuppression;
|
||||
}
|
||||
|
||||
/** @brief Releases inner buffer memory.
|
||||
*/
|
||||
void release();
|
||||
static Ptr<ORB> create(int nfeatures=500,
|
||||
float scaleFactor=1.2f,
|
||||
int nlevels=8,
|
||||
int edgeThreshold=31,
|
||||
int firstLevel=0,
|
||||
int WTA_K=2,
|
||||
int scoreType=ORB::HARRIS_SCORE,
|
||||
int patchSize=31,
|
||||
int fastThreshold=20,
|
||||
bool blurForDescriptor=false);
|
||||
|
||||
//! if true, image will be blurred before descriptors calculation
|
||||
bool blurForDescriptor;
|
||||
|
||||
private:
|
||||
enum { kBytes = 32 };
|
||||
|
||||
void buildScalePyramids(const GpuMat& image, const GpuMat& mask);
|
||||
|
||||
void computeKeyPointsPyramid();
|
||||
|
||||
void computeDescriptors(GpuMat& descriptors);
|
||||
|
||||
void mergeKeyPoints(GpuMat& keypoints);
|
||||
|
||||
int nFeatures_;
|
||||
float scaleFactor_;
|
||||
int nLevels_;
|
||||
int edgeThreshold_;
|
||||
int firstLevel_;
|
||||
int WTA_K_;
|
||||
int scoreType_;
|
||||
int patchSize_;
|
||||
|
||||
//! The number of desired features per scale
|
||||
std::vector<size_t> n_features_per_level_;
|
||||
|
||||
//! Points to compute BRIEF descriptors from
|
||||
GpuMat pattern_;
|
||||
|
||||
std::vector<GpuMat> imagePyr_;
|
||||
std::vector<GpuMat> maskPyr_;
|
||||
|
||||
GpuMat buf_;
|
||||
|
||||
std::vector<GpuMat> keyPointsPyr_;
|
||||
std::vector<int> keyPointsCount_;
|
||||
|
||||
FAST_CUDA fastDetector_;
|
||||
|
||||
Ptr<cuda::Filter> blurFilter;
|
||||
|
||||
GpuMat d_keypoints_;
|
||||
virtual void setBlurForDescriptor(bool blurForDescriptor) = 0;
|
||||
virtual bool getBlurForDescriptor() const = 0;
|
||||
};
|
||||
|
||||
//! @}
|
||||
|
@ -64,15 +64,18 @@ PERF_TEST_P(Image_Threshold_NonMaxSuppression, FAST,
|
||||
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
cv::cuda::FAST_CUDA d_fast(threshold, nonMaxSuppersion, 0.5);
|
||||
cv::Ptr<cv::cuda::FastFeatureDetector> d_fast =
|
||||
cv::cuda::FastFeatureDetector::create(threshold, nonMaxSuppersion,
|
||||
cv::FastFeatureDetector::TYPE_9_16,
|
||||
0.5 * img.size().area());
|
||||
|
||||
const cv::cuda::GpuMat d_img(img);
|
||||
cv::cuda::GpuMat d_keypoints;
|
||||
|
||||
TEST_CYCLE() d_fast(d_img, cv::cuda::GpuMat(), d_keypoints);
|
||||
TEST_CYCLE() d_fast->detectAsync(d_img, d_keypoints);
|
||||
|
||||
std::vector<cv::KeyPoint> gpu_keypoints;
|
||||
d_fast.downloadKeypoints(d_keypoints, gpu_keypoints);
|
||||
d_fast->convert(d_keypoints, gpu_keypoints);
|
||||
|
||||
sortKeyPoints(gpu_keypoints);
|
||||
|
||||
@ -106,15 +109,15 @@ PERF_TEST_P(Image_NFeatures, ORB,
|
||||
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
cv::cuda::ORB_CUDA d_orb(nFeatures);
|
||||
cv::Ptr<cv::cuda::ORB> d_orb = cv::cuda::ORB::create(nFeatures);
|
||||
|
||||
const cv::cuda::GpuMat d_img(img);
|
||||
cv::cuda::GpuMat d_keypoints, d_descriptors;
|
||||
|
||||
TEST_CYCLE() d_orb(d_img, cv::cuda::GpuMat(), d_keypoints, d_descriptors);
|
||||
TEST_CYCLE() d_orb->detectAndComputeAsync(d_img, cv::noArray(), d_keypoints, d_descriptors);
|
||||
|
||||
std::vector<cv::KeyPoint> gpu_keypoints;
|
||||
d_orb.downloadKeyPoints(d_keypoints, gpu_keypoints);
|
||||
d_orb->convert(d_keypoints, gpu_keypoints);
|
||||
|
||||
cv::Mat gpu_descriptors(d_descriptors);
|
||||
|
||||
@ -164,16 +167,16 @@ PERF_TEST_P(DescSize_Norm, BFMatch,
|
||||
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA d_matcher(normType);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType);
|
||||
|
||||
const cv::cuda::GpuMat d_query(query);
|
||||
const cv::cuda::GpuMat d_train(train);
|
||||
cv::cuda::GpuMat d_trainIdx, d_distance;
|
||||
cv::cuda::GpuMat d_matches;
|
||||
|
||||
TEST_CYCLE() d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
|
||||
TEST_CYCLE() d_matcher->matchAsync(d_query, d_train, d_matches);
|
||||
|
||||
std::vector<cv::DMatch> gpu_matches;
|
||||
d_matcher.matchDownload(d_trainIdx, d_distance, gpu_matches);
|
||||
d_matcher->matchConvert(d_matches, gpu_matches);
|
||||
|
||||
SANITY_CHECK_MATCHES(gpu_matches);
|
||||
}
|
||||
@ -223,16 +226,16 @@ PERF_TEST_P(DescSize_K_Norm, BFKnnMatch,
|
||||
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA d_matcher(normType);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType);
|
||||
|
||||
const cv::cuda::GpuMat d_query(query);
|
||||
const cv::cuda::GpuMat d_train(train);
|
||||
cv::cuda::GpuMat d_trainIdx, d_distance, d_allDist;
|
||||
cv::cuda::GpuMat d_matches;
|
||||
|
||||
TEST_CYCLE() d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
|
||||
TEST_CYCLE() d_matcher->knnMatchAsync(d_query, d_train, d_matches, k);
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matchesTbl;
|
||||
d_matcher.knnMatchDownload(d_trainIdx, d_distance, matchesTbl);
|
||||
d_matcher->knnMatchConvert(d_matches, matchesTbl);
|
||||
|
||||
std::vector<cv::DMatch> gpu_matches;
|
||||
toOneRowMatches(matchesTbl, gpu_matches);
|
||||
@ -277,16 +280,16 @@ PERF_TEST_P(DescSize_Norm, BFRadiusMatch,
|
||||
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA d_matcher(normType);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType);
|
||||
|
||||
const cv::cuda::GpuMat d_query(query);
|
||||
const cv::cuda::GpuMat d_train(train);
|
||||
cv::cuda::GpuMat d_trainIdx, d_nMatches, d_distance;
|
||||
cv::cuda::GpuMat d_matches;
|
||||
|
||||
TEST_CYCLE() d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, maxDistance);
|
||||
TEST_CYCLE() d_matcher->radiusMatchAsync(d_query, d_train, d_matches, maxDistance);
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matchesTbl;
|
||||
d_matcher.radiusMatchDownload(d_trainIdx, d_distance, d_nMatches, matchesTbl);
|
||||
d_matcher->radiusMatchConvert(d_matches, matchesTbl);
|
||||
|
||||
std::vector<cv::DMatch> gpu_matches;
|
||||
toOneRowMatches(matchesTbl, gpu_matches);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -279,7 +279,7 @@ namespace cv { namespace cuda { namespace device
|
||||
#endif
|
||||
}
|
||||
|
||||
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold)
|
||||
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold, cudaStream_t stream)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
||||
@ -290,29 +290,29 @@ namespace cv { namespace cuda { namespace device
|
||||
grid.x = divUp(img.cols - 6, block.x);
|
||||
grid.y = divUp(img.rows - 6, block.y);
|
||||
|
||||
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) );
|
||||
cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(unsigned int), stream) );
|
||||
|
||||
if (score.data)
|
||||
{
|
||||
if (mask.data)
|
||||
calcKeypoints<true><<<grid, block>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
|
||||
calcKeypoints<true><<<grid, block, 0, stream>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
|
||||
else
|
||||
calcKeypoints<true><<<grid, block>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
|
||||
calcKeypoints<true><<<grid, block, 0, stream>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mask.data)
|
||||
calcKeypoints<false><<<grid, block>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
|
||||
calcKeypoints<false><<<grid, block, 0, stream>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
|
||||
else
|
||||
calcKeypoints<false><<<grid, block>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
|
||||
calcKeypoints<false><<<grid, block, 0, stream>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
|
||||
}
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
unsigned int count;
|
||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream) );
|
||||
|
||||
cudaSafeCall( cudaStreamSynchronize(stream) );
|
||||
|
||||
return count;
|
||||
}
|
||||
@ -356,7 +356,7 @@ namespace cv { namespace cuda { namespace device
|
||||
#endif
|
||||
}
|
||||
|
||||
int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response)
|
||||
int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response, cudaStream_t stream)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
||||
@ -366,15 +366,15 @@ namespace cv { namespace cuda { namespace device
|
||||
dim3 grid;
|
||||
grid.x = divUp(count, block.x);
|
||||
|
||||
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) );
|
||||
cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(unsigned int), stream) );
|
||||
|
||||
nonmaxSuppression<<<grid, block>>>(kpLoc, count, score, loc, response);
|
||||
nonmaxSuppression<<<grid, block, 0, stream>>>(kpLoc, count, score, loc, response);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
unsigned int new_count;
|
||||
cudaSafeCall( cudaMemcpy(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpyAsync(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream) );
|
||||
|
||||
cudaSafeCall( cudaStreamSynchronize(stream) );
|
||||
|
||||
return new_count;
|
||||
}
|
||||
|
@ -47,124 +47,162 @@ using namespace cv::cuda;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
cv::cuda::FAST_CUDA::FAST_CUDA(int, bool, double) { throw_no_cuda(); }
|
||||
void cv::cuda::FAST_CUDA::operator ()(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::cuda::FAST_CUDA::operator ()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
|
||||
void cv::cuda::FAST_CUDA::downloadKeypoints(const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
|
||||
void cv::cuda::FAST_CUDA::convertKeypoints(const Mat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
|
||||
void cv::cuda::FAST_CUDA::release() { throw_no_cuda(); }
|
||||
int cv::cuda::FAST_CUDA::calcKeyPointsLocation(const GpuMat&, const GpuMat&) { throw_no_cuda(); return 0; }
|
||||
int cv::cuda::FAST_CUDA::getKeyPoints(GpuMat&) { throw_no_cuda(); return 0; }
|
||||
Ptr<cv::cuda::FastFeatureDetector> cv::cuda::FastFeatureDetector::create(int, bool, int, int) { throw_no_cuda(); return Ptr<cv::cuda::FastFeatureDetector>(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
cv::cuda::FAST_CUDA::FAST_CUDA(int _threshold, bool _nonmaxSuppression, double _keypointsRatio) :
|
||||
nonmaxSuppression(_nonmaxSuppression), threshold(_threshold), keypointsRatio(_keypointsRatio), count_(0)
|
||||
{
|
||||
}
|
||||
|
||||
void cv::cuda::FAST_CUDA::operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
if (image.empty())
|
||||
return;
|
||||
|
||||
(*this)(image, mask, d_keypoints_);
|
||||
downloadKeypoints(d_keypoints_, keypoints);
|
||||
}
|
||||
|
||||
void cv::cuda::FAST_CUDA::downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
if (d_keypoints.empty())
|
||||
return;
|
||||
|
||||
Mat h_keypoints(d_keypoints);
|
||||
convertKeypoints(h_keypoints, keypoints);
|
||||
}
|
||||
|
||||
void cv::cuda::FAST_CUDA::convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
if (h_keypoints.empty())
|
||||
return;
|
||||
|
||||
CV_Assert(h_keypoints.rows == ROWS_COUNT && h_keypoints.elemSize() == 4);
|
||||
|
||||
int npoints = h_keypoints.cols;
|
||||
|
||||
keypoints.resize(npoints);
|
||||
|
||||
const short2* loc_row = h_keypoints.ptr<short2>(LOCATION_ROW);
|
||||
const float* response_row = h_keypoints.ptr<float>(RESPONSE_ROW);
|
||||
|
||||
for (int i = 0; i < npoints; ++i)
|
||||
{
|
||||
KeyPoint kp(loc_row[i].x, loc_row[i].y, static_cast<float>(FEATURE_SIZE), -1, response_row[i]);
|
||||
keypoints[i] = kp;
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::FAST_CUDA::operator ()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
|
||||
{
|
||||
calcKeyPointsLocation(img, mask);
|
||||
keypoints.cols = getKeyPoints(keypoints);
|
||||
}
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace fast
|
||||
{
|
||||
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold);
|
||||
int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response);
|
||||
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold, cudaStream_t stream);
|
||||
int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
int cv::cuda::FAST_CUDA::calcKeyPointsLocation(const GpuMat& img, const GpuMat& mask)
|
||||
namespace
|
||||
{
|
||||
using namespace cv::cuda::device::fast;
|
||||
|
||||
CV_Assert(img.type() == CV_8UC1);
|
||||
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()));
|
||||
|
||||
int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area());
|
||||
|
||||
ensureSizeIsEnough(1, maxKeypoints, CV_16SC2, kpLoc_);
|
||||
|
||||
if (nonmaxSuppression)
|
||||
class FAST_Impl : public cv::cuda::FastFeatureDetector
|
||||
{
|
||||
public:
|
||||
FAST_Impl(int threshold, bool nonmaxSuppression, int max_npoints);
|
||||
|
||||
virtual void detect(InputArray _image, std::vector<KeyPoint>& keypoints, InputArray _mask);
|
||||
virtual void detectAsync(InputArray _image, OutputArray _keypoints, InputArray _mask, Stream& stream);
|
||||
|
||||
virtual void convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints);
|
||||
|
||||
virtual void setThreshold(int threshold) { threshold_ = threshold; }
|
||||
virtual int getThreshold() const { return threshold_; }
|
||||
|
||||
virtual void setNonmaxSuppression(bool f) { nonmaxSuppression_ = f; }
|
||||
virtual bool getNonmaxSuppression() const { return nonmaxSuppression_; }
|
||||
|
||||
virtual void setMaxNumPoints(int max_npoints) { max_npoints_ = max_npoints; }
|
||||
virtual int getMaxNumPoints() const { return max_npoints_; }
|
||||
|
||||
virtual void setType(int type) { CV_Assert( type == TYPE_9_16 ); }
|
||||
virtual int getType() const { return TYPE_9_16; }
|
||||
|
||||
private:
|
||||
int threshold_;
|
||||
bool nonmaxSuppression_;
|
||||
int max_npoints_;
|
||||
};
|
||||
|
||||
FAST_Impl::FAST_Impl(int threshold, bool nonmaxSuppression, int max_npoints) :
|
||||
threshold_(threshold), nonmaxSuppression_(nonmaxSuppression), max_npoints_(max_npoints)
|
||||
{
|
||||
ensureSizeIsEnough(img.size(), CV_32SC1, score_);
|
||||
score_.setTo(Scalar::all(0));
|
||||
}
|
||||
|
||||
count_ = calcKeypoints_gpu(img, mask, kpLoc_.ptr<short2>(), maxKeypoints, nonmaxSuppression ? score_ : PtrStepSzi(), threshold);
|
||||
count_ = std::min(count_, maxKeypoints);
|
||||
void FAST_Impl::detect(InputArray _image, std::vector<KeyPoint>& keypoints, InputArray _mask)
|
||||
{
|
||||
if (_image.empty())
|
||||
{
|
||||
keypoints.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
return count_;
|
||||
BufferPool pool(Stream::Null());
|
||||
GpuMat d_keypoints = pool.getBuffer(ROWS_COUNT, max_npoints_, CV_16SC2);
|
||||
|
||||
detectAsync(_image, d_keypoints, _mask, Stream::Null());
|
||||
convert(d_keypoints, keypoints);
|
||||
}
|
||||
|
||||
void FAST_Impl::detectAsync(InputArray _image, OutputArray _keypoints, InputArray _mask, Stream& stream)
|
||||
{
|
||||
using namespace cv::cuda::device::fast;
|
||||
|
||||
const GpuMat img = _image.getGpuMat();
|
||||
const GpuMat mask = _mask.getGpuMat();
|
||||
|
||||
CV_Assert( img.type() == CV_8UC1 );
|
||||
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()) );
|
||||
|
||||
BufferPool pool(stream);
|
||||
|
||||
GpuMat kpLoc = pool.getBuffer(1, max_npoints_, CV_16SC2);
|
||||
|
||||
GpuMat score;
|
||||
if (nonmaxSuppression_)
|
||||
{
|
||||
score = pool.getBuffer(img.size(), CV_32SC1);
|
||||
score.setTo(Scalar::all(0), stream);
|
||||
}
|
||||
|
||||
int count = calcKeypoints_gpu(img, mask, kpLoc.ptr<short2>(), max_npoints_, score, threshold_, StreamAccessor::getStream(stream));
|
||||
count = std::min(count, max_npoints_);
|
||||
|
||||
if (count == 0)
|
||||
{
|
||||
_keypoints.release();
|
||||
return;
|
||||
}
|
||||
|
||||
ensureSizeIsEnough(ROWS_COUNT, count, CV_32FC1, _keypoints);
|
||||
GpuMat& keypoints = _keypoints.getGpuMatRef();
|
||||
|
||||
if (nonmaxSuppression_)
|
||||
{
|
||||
count = nonmaxSuppression_gpu(kpLoc.ptr<short2>(), count, score, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW), StreamAccessor::getStream(stream));
|
||||
if (count == 0)
|
||||
{
|
||||
keypoints.release();
|
||||
}
|
||||
else
|
||||
{
|
||||
keypoints.cols = count;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
GpuMat locRow(1, count, kpLoc.type(), keypoints.ptr(0));
|
||||
kpLoc.colRange(0, count).copyTo(locRow, stream);
|
||||
keypoints.row(1).setTo(Scalar::all(0), stream);
|
||||
}
|
||||
}
|
||||
|
||||
void FAST_Impl::convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
if (_gpu_keypoints.empty())
|
||||
{
|
||||
keypoints.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
Mat h_keypoints;
|
||||
if (_gpu_keypoints.kind() == _InputArray::CUDA_GPU_MAT)
|
||||
{
|
||||
_gpu_keypoints.getGpuMat().download(h_keypoints);
|
||||
}
|
||||
else
|
||||
{
|
||||
h_keypoints = _gpu_keypoints.getMat();
|
||||
}
|
||||
|
||||
CV_Assert( h_keypoints.rows == ROWS_COUNT );
|
||||
CV_Assert( h_keypoints.elemSize() == 4 );
|
||||
|
||||
const int npoints = h_keypoints.cols;
|
||||
|
||||
keypoints.resize(npoints);
|
||||
|
||||
const short2* loc_row = h_keypoints.ptr<short2>(LOCATION_ROW);
|
||||
const float* response_row = h_keypoints.ptr<float>(RESPONSE_ROW);
|
||||
|
||||
for (int i = 0; i < npoints; ++i)
|
||||
{
|
||||
KeyPoint kp(loc_row[i].x, loc_row[i].y, static_cast<float>(FEATURE_SIZE), -1, response_row[i]);
|
||||
keypoints[i] = kp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int cv::cuda::FAST_CUDA::getKeyPoints(GpuMat& keypoints)
|
||||
Ptr<cv::cuda::FastFeatureDetector> cv::cuda::FastFeatureDetector::create(int threshold, bool nonmaxSuppression, int type, int max_npoints)
|
||||
{
|
||||
using namespace cv::cuda::device::fast;
|
||||
|
||||
if (count_ == 0)
|
||||
return 0;
|
||||
|
||||
ensureSizeIsEnough(ROWS_COUNT, count_, CV_32FC1, keypoints);
|
||||
|
||||
if (nonmaxSuppression)
|
||||
return nonmaxSuppression_gpu(kpLoc_.ptr<short2>(), count_, score_, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW));
|
||||
|
||||
GpuMat locRow(1, count_, kpLoc_.type(), keypoints.ptr(0));
|
||||
kpLoc_.colRange(0, count_).copyTo(locRow);
|
||||
keypoints.row(1).setTo(Scalar::all(0));
|
||||
|
||||
return count_;
|
||||
}
|
||||
|
||||
void cv::cuda::FAST_CUDA::release()
|
||||
{
|
||||
kpLoc_.release();
|
||||
score_.release();
|
||||
|
||||
d_keypoints_.release();
|
||||
CV_Assert( type == TYPE_9_16 );
|
||||
return makePtr<FAST_Impl>(threshold, nonmaxSuppression, max_npoints);
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
|
85
modules/cudafeatures2d/src/feature2d_async.cpp
Normal file
85
modules/cudafeatures2d/src/feature2d_async.cpp
Normal file
@ -0,0 +1,85 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
cv::cuda::Feature2DAsync::~Feature2DAsync()
|
||||
{
|
||||
}
|
||||
|
||||
void cv::cuda::Feature2DAsync::detectAsync(InputArray image,
|
||||
OutputArray keypoints,
|
||||
InputArray mask,
|
||||
Stream& stream)
|
||||
{
|
||||
if (image.empty())
|
||||
{
|
||||
keypoints.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
detectAndComputeAsync(image, mask, keypoints, noArray(), false, stream);
|
||||
}
|
||||
|
||||
void cv::cuda::Feature2DAsync::computeAsync(InputArray image,
|
||||
OutputArray keypoints,
|
||||
OutputArray descriptors,
|
||||
Stream& stream)
|
||||
{
|
||||
if (image.empty())
|
||||
{
|
||||
descriptors.release();
|
||||
return;
|
||||
}
|
||||
|
||||
detectAndComputeAsync(image, noArray(), keypoints, descriptors, true, stream);
|
||||
}
|
||||
|
||||
void cv::cuda::Feature2DAsync::detectAndComputeAsync(InputArray /*image*/,
|
||||
InputArray /*mask*/,
|
||||
OutputArray /*keypoints*/,
|
||||
OutputArray /*descriptors*/,
|
||||
bool /*useProvidedKeypoints*/,
|
||||
Stream& /*stream*/)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
}
|
@ -47,18 +47,7 @@ using namespace cv::cuda;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
cv::cuda::ORB_CUDA::ORB_CUDA(int, float, int, int, int, int, int, int) : fastDetector_(20) { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::downloadKeyPoints(const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::convertKeyPoints(const Mat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::release() { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::buildScalePyramids(const GpuMat&, const GpuMat&) { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::computeKeyPointsPyramid() { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::computeDescriptors(GpuMat&) { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::mergeKeyPoints(GpuMat&) { throw_no_cuda(); }
|
||||
Ptr<cv::cuda::ORB> cv::cuda::ORB::create(int, float, int, int, int, int, int, int, int, bool) { throw_no_cuda(); return Ptr<cv::cuda::ORB>(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
@ -346,7 +335,100 @@ namespace
|
||||
-1,-6, 0,-11/*mean (0.127148), correlation (0.547401)*/
|
||||
};
|
||||
|
||||
void initializeOrbPattern(const Point* pattern0, Mat& pattern, int ntuples, int tupleSize, int poolSize)
|
||||
class ORB_Impl : public cv::cuda::ORB
|
||||
{
|
||||
public:
|
||||
ORB_Impl(int nfeatures,
|
||||
float scaleFactor,
|
||||
int nlevels,
|
||||
int edgeThreshold,
|
||||
int firstLevel,
|
||||
int WTA_K,
|
||||
int scoreType,
|
||||
int patchSize,
|
||||
int fastThreshold,
|
||||
bool blurForDescriptor);
|
||||
|
||||
virtual void detectAndCompute(InputArray _image, InputArray _mask, std::vector<KeyPoint>& keypoints, OutputArray _descriptors, bool useProvidedKeypoints);
|
||||
virtual void detectAndComputeAsync(InputArray _image, InputArray _mask, OutputArray _keypoints, OutputArray _descriptors, bool useProvidedKeypoints, Stream& stream);
|
||||
|
||||
virtual void convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints);
|
||||
|
||||
virtual int descriptorSize() const { return kBytes; }
|
||||
virtual int descriptorType() const { return CV_8U; }
|
||||
virtual int defaultNorm() const { return NORM_HAMMING; }
|
||||
|
||||
virtual void setMaxFeatures(int maxFeatures) { nFeatures_ = maxFeatures; }
|
||||
virtual int getMaxFeatures() const { return nFeatures_; }
|
||||
|
||||
virtual void setScaleFactor(double scaleFactor) { scaleFactor_ = scaleFactor; }
|
||||
virtual double getScaleFactor() const { return scaleFactor_; }
|
||||
|
||||
virtual void setNLevels(int nlevels) { nLevels_ = nlevels; }
|
||||
virtual int getNLevels() const { return nLevels_; }
|
||||
|
||||
virtual void setEdgeThreshold(int edgeThreshold) { edgeThreshold_ = edgeThreshold; }
|
||||
virtual int getEdgeThreshold() const { return edgeThreshold_; }
|
||||
|
||||
virtual void setFirstLevel(int firstLevel) { firstLevel_ = firstLevel; }
|
||||
virtual int getFirstLevel() const { return firstLevel_; }
|
||||
|
||||
virtual void setWTA_K(int wta_k) { WTA_K_ = wta_k; }
|
||||
virtual int getWTA_K() const { return WTA_K_; }
|
||||
|
||||
virtual void setScoreType(int scoreType) { scoreType_ = scoreType; }
|
||||
virtual int getScoreType() const { return scoreType_; }
|
||||
|
||||
virtual void setPatchSize(int patchSize) { patchSize_ = patchSize; }
|
||||
virtual int getPatchSize() const { return patchSize_; }
|
||||
|
||||
virtual void setFastThreshold(int fastThreshold) { fastThreshold_ = fastThreshold; }
|
||||
virtual int getFastThreshold() const { return fastThreshold_; }
|
||||
|
||||
virtual void setBlurForDescriptor(bool blurForDescriptor) { blurForDescriptor_ = blurForDescriptor; }
|
||||
virtual bool getBlurForDescriptor() const { return blurForDescriptor_; }
|
||||
|
||||
private:
|
||||
int nFeatures_;
|
||||
float scaleFactor_;
|
||||
int nLevels_;
|
||||
int edgeThreshold_;
|
||||
int firstLevel_;
|
||||
int WTA_K_;
|
||||
int scoreType_;
|
||||
int patchSize_;
|
||||
int fastThreshold_;
|
||||
bool blurForDescriptor_;
|
||||
|
||||
private:
|
||||
void buildScalePyramids(InputArray _image, InputArray _mask);
|
||||
void computeKeyPointsPyramid();
|
||||
void computeDescriptors(OutputArray _descriptors);
|
||||
void mergeKeyPoints(OutputArray _keypoints);
|
||||
|
||||
private:
|
||||
Ptr<cv::cuda::FastFeatureDetector> fastDetector_;
|
||||
|
||||
//! The number of desired features per scale
|
||||
std::vector<size_t> n_features_per_level_;
|
||||
|
||||
//! Points to compute BRIEF descriptors from
|
||||
GpuMat pattern_;
|
||||
|
||||
std::vector<GpuMat> imagePyr_;
|
||||
std::vector<GpuMat> maskPyr_;
|
||||
|
||||
GpuMat buf_;
|
||||
|
||||
std::vector<GpuMat> keyPointsPyr_;
|
||||
std::vector<int> keyPointsCount_;
|
||||
|
||||
Ptr<cuda::Filter> blurFilter_;
|
||||
|
||||
GpuMat d_keypoints_;
|
||||
};
|
||||
|
||||
static void initializeOrbPattern(const Point* pattern0, Mat& pattern, int ntuples, int tupleSize, int poolSize)
|
||||
{
|
||||
RNG rng(0x12345678);
|
||||
|
||||
@ -381,7 +463,7 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
void makeRandomPattern(int patchSize, Point* pattern, int npoints)
|
||||
static void makeRandomPattern(int patchSize, Point* pattern, int npoints)
|
||||
{
|
||||
// we always start with a fixed seed,
|
||||
// to make patterns the same on each run
|
||||
@ -393,155 +475,189 @@ namespace
|
||||
pattern[i].y = rng.uniform(-patchSize / 2, patchSize / 2 + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cv::cuda::ORB_CUDA::ORB_CUDA(int nFeatures, float scaleFactor, int nLevels, int edgeThreshold, int firstLevel, int WTA_K, int scoreType, int patchSize) :
|
||||
nFeatures_(nFeatures), scaleFactor_(scaleFactor), nLevels_(nLevels), edgeThreshold_(edgeThreshold), firstLevel_(firstLevel), WTA_K_(WTA_K),
|
||||
scoreType_(scoreType), patchSize_(patchSize),
|
||||
fastDetector_(DEFAULT_FAST_THRESHOLD)
|
||||
{
|
||||
CV_Assert(patchSize_ >= 2);
|
||||
|
||||
// fill the extractors and descriptors for the corresponding scales
|
||||
float factor = 1.0f / scaleFactor_;
|
||||
float n_desired_features_per_scale = nFeatures_ * (1.0f - factor) / (1.0f - std::pow(factor, nLevels_));
|
||||
|
||||
n_features_per_level_.resize(nLevels_);
|
||||
size_t sum_n_features = 0;
|
||||
for (int level = 0; level < nLevels_ - 1; ++level)
|
||||
ORB_Impl::ORB_Impl(int nFeatures,
|
||||
float scaleFactor,
|
||||
int nLevels,
|
||||
int edgeThreshold,
|
||||
int firstLevel,
|
||||
int WTA_K,
|
||||
int scoreType,
|
||||
int patchSize,
|
||||
int fastThreshold,
|
||||
bool blurForDescriptor) :
|
||||
nFeatures_(nFeatures),
|
||||
scaleFactor_(scaleFactor),
|
||||
nLevels_(nLevels),
|
||||
edgeThreshold_(edgeThreshold),
|
||||
firstLevel_(firstLevel),
|
||||
WTA_K_(WTA_K),
|
||||
scoreType_(scoreType),
|
||||
patchSize_(patchSize),
|
||||
fastThreshold_(fastThreshold),
|
||||
blurForDescriptor_(blurForDescriptor)
|
||||
{
|
||||
n_features_per_level_[level] = cvRound(n_desired_features_per_scale);
|
||||
sum_n_features += n_features_per_level_[level];
|
||||
n_desired_features_per_scale *= factor;
|
||||
}
|
||||
n_features_per_level_[nLevels_ - 1] = nFeatures - sum_n_features;
|
||||
CV_Assert( patchSize_ >= 2 );
|
||||
CV_Assert( WTA_K_ == 2 || WTA_K_ == 3 || WTA_K_ == 4 );
|
||||
|
||||
// pre-compute the end of a row in a circular patch
|
||||
int half_patch_size = patchSize_ / 2;
|
||||
std::vector<int> u_max(half_patch_size + 2);
|
||||
for (int v = 0; v <= half_patch_size * std::sqrt(2.f) / 2 + 1; ++v)
|
||||
u_max[v] = cvRound(std::sqrt(static_cast<float>(half_patch_size * half_patch_size - v * v)));
|
||||
fastDetector_ = cuda::FastFeatureDetector::create(fastThreshold_);
|
||||
|
||||
// Make sure we are symmetric
|
||||
for (int v = half_patch_size, v_0 = 0; v >= half_patch_size * std::sqrt(2.f) / 2; --v)
|
||||
{
|
||||
while (u_max[v_0] == u_max[v_0 + 1])
|
||||
++v_0;
|
||||
u_max[v] = v_0;
|
||||
++v_0;
|
||||
}
|
||||
CV_Assert(u_max.size() < 32);
|
||||
cv::cuda::device::orb::loadUMax(&u_max[0], static_cast<int>(u_max.size()));
|
||||
// fill the extractors and descriptors for the corresponding scales
|
||||
float factor = 1.0f / scaleFactor_;
|
||||
float n_desired_features_per_scale = nFeatures_ * (1.0f - factor) / (1.0f - std::pow(factor, nLevels_));
|
||||
|
||||
// Calc pattern
|
||||
const int npoints = 512;
|
||||
Point pattern_buf[npoints];
|
||||
const Point* pattern0 = (const Point*)bit_pattern_31_;
|
||||
if (patchSize_ != 31)
|
||||
{
|
||||
pattern0 = pattern_buf;
|
||||
makeRandomPattern(patchSize_, pattern_buf, npoints);
|
||||
}
|
||||
|
||||
CV_Assert(WTA_K_ == 2 || WTA_K_ == 3 || WTA_K_ == 4);
|
||||
|
||||
Mat h_pattern;
|
||||
|
||||
if (WTA_K_ == 2)
|
||||
{
|
||||
h_pattern.create(2, npoints, CV_32SC1);
|
||||
|
||||
int* pattern_x_ptr = h_pattern.ptr<int>(0);
|
||||
int* pattern_y_ptr = h_pattern.ptr<int>(1);
|
||||
|
||||
for (int i = 0; i < npoints; ++i)
|
||||
n_features_per_level_.resize(nLevels_);
|
||||
size_t sum_n_features = 0;
|
||||
for (int level = 0; level < nLevels_ - 1; ++level)
|
||||
{
|
||||
pattern_x_ptr[i] = pattern0[i].x;
|
||||
pattern_y_ptr[i] = pattern0[i].y;
|
||||
n_features_per_level_[level] = cvRound(n_desired_features_per_scale);
|
||||
sum_n_features += n_features_per_level_[level];
|
||||
n_desired_features_per_scale *= factor;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int ntuples = descriptorSize() * 4;
|
||||
initializeOrbPattern(pattern0, h_pattern, ntuples, WTA_K_, npoints);
|
||||
}
|
||||
n_features_per_level_[nLevels_ - 1] = nFeatures - sum_n_features;
|
||||
|
||||
pattern_.upload(h_pattern);
|
||||
|
||||
blurFilter = cuda::createGaussianFilter(CV_8UC1, -1, Size(7, 7), 2, 2, BORDER_REFLECT_101);
|
||||
|
||||
blurForDescriptor = false;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
inline float getScale(float scaleFactor, int firstLevel, int level)
|
||||
{
|
||||
return pow(scaleFactor, level - firstLevel);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::buildScalePyramids(const GpuMat& image, const GpuMat& mask)
|
||||
{
|
||||
CV_Assert(image.type() == CV_8UC1);
|
||||
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
|
||||
|
||||
imagePyr_.resize(nLevels_);
|
||||
maskPyr_.resize(nLevels_);
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
{
|
||||
float scale = 1.0f / getScale(scaleFactor_, firstLevel_, level);
|
||||
|
||||
Size sz(cvRound(image.cols * scale), cvRound(image.rows * scale));
|
||||
|
||||
ensureSizeIsEnough(sz, image.type(), imagePyr_[level]);
|
||||
ensureSizeIsEnough(sz, CV_8UC1, maskPyr_[level]);
|
||||
maskPyr_[level].setTo(Scalar::all(255));
|
||||
|
||||
// Compute the resized image
|
||||
if (level != firstLevel_)
|
||||
// pre-compute the end of a row in a circular patch
|
||||
int half_patch_size = patchSize_ / 2;
|
||||
std::vector<int> u_max(half_patch_size + 2);
|
||||
for (int v = 0; v <= half_patch_size * std::sqrt(2.f) / 2 + 1; ++v)
|
||||
{
|
||||
if (level < firstLevel_)
|
||||
{
|
||||
cuda::resize(image, imagePyr_[level], sz, 0, 0, INTER_LINEAR);
|
||||
u_max[v] = cvRound(std::sqrt(static_cast<float>(half_patch_size * half_patch_size - v * v)));
|
||||
}
|
||||
|
||||
if (!mask.empty())
|
||||
cuda::resize(mask, maskPyr_[level], sz, 0, 0, INTER_LINEAR);
|
||||
}
|
||||
else
|
||||
{
|
||||
cuda::resize(imagePyr_[level - 1], imagePyr_[level], sz, 0, 0, INTER_LINEAR);
|
||||
// Make sure we are symmetric
|
||||
for (int v = half_patch_size, v_0 = 0; v >= half_patch_size * std::sqrt(2.f) / 2; --v)
|
||||
{
|
||||
while (u_max[v_0] == u_max[v_0 + 1])
|
||||
++v_0;
|
||||
u_max[v] = v_0;
|
||||
++v_0;
|
||||
}
|
||||
CV_Assert( u_max.size() < 32 );
|
||||
cv::cuda::device::orb::loadUMax(&u_max[0], static_cast<int>(u_max.size()));
|
||||
|
||||
if (!mask.empty())
|
||||
{
|
||||
cuda::resize(maskPyr_[level - 1], maskPyr_[level], sz, 0, 0, INTER_LINEAR);
|
||||
cuda::threshold(maskPyr_[level], maskPyr_[level], 254, 0, THRESH_TOZERO);
|
||||
}
|
||||
// Calc pattern
|
||||
const int npoints = 512;
|
||||
Point pattern_buf[npoints];
|
||||
const Point* pattern0 = (const Point*)bit_pattern_31_;
|
||||
if (patchSize_ != 31)
|
||||
{
|
||||
pattern0 = pattern_buf;
|
||||
makeRandomPattern(patchSize_, pattern_buf, npoints);
|
||||
}
|
||||
|
||||
Mat h_pattern;
|
||||
if (WTA_K_ == 2)
|
||||
{
|
||||
h_pattern.create(2, npoints, CV_32SC1);
|
||||
|
||||
int* pattern_x_ptr = h_pattern.ptr<int>(0);
|
||||
int* pattern_y_ptr = h_pattern.ptr<int>(1);
|
||||
|
||||
for (int i = 0; i < npoints; ++i)
|
||||
{
|
||||
pattern_x_ptr[i] = pattern0[i].x;
|
||||
pattern_y_ptr[i] = pattern0[i].y;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
image.copyTo(imagePyr_[level]);
|
||||
|
||||
if (!mask.empty())
|
||||
mask.copyTo(maskPyr_[level]);
|
||||
int ntuples = descriptorSize() * 4;
|
||||
initializeOrbPattern(pattern0, h_pattern, ntuples, WTA_K_, npoints);
|
||||
}
|
||||
|
||||
// Filter keypoints by image border
|
||||
ensureSizeIsEnough(sz, CV_8UC1, buf_);
|
||||
buf_.setTo(Scalar::all(0));
|
||||
Rect inner(edgeThreshold_, edgeThreshold_, sz.width - 2 * edgeThreshold_, sz.height - 2 * edgeThreshold_);
|
||||
buf_(inner).setTo(Scalar::all(255));
|
||||
pattern_.upload(h_pattern);
|
||||
|
||||
cuda::bitwise_and(maskPyr_[level], buf_, maskPyr_[level]);
|
||||
blurFilter_ = cuda::createGaussianFilter(CV_8UC1, -1, Size(7, 7), 2, 2, BORDER_REFLECT_101);
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
//takes keypoints and culls them by the response
|
||||
void cull(GpuMat& keypoints, int& count, int n_points)
|
||||
void ORB_Impl::detectAndCompute(InputArray _image, InputArray _mask, std::vector<KeyPoint>& keypoints, OutputArray _descriptors, bool useProvidedKeypoints)
|
||||
{
|
||||
CV_Assert( useProvidedKeypoints == false );
|
||||
|
||||
detectAndComputeAsync(_image, _mask, d_keypoints_, _descriptors, false, Stream::Null());
|
||||
convert(d_keypoints_, keypoints);
|
||||
}
|
||||
|
||||
void ORB_Impl::detectAndComputeAsync(InputArray _image, InputArray _mask, OutputArray _keypoints, OutputArray _descriptors, bool useProvidedKeypoints, Stream& stream)
|
||||
{
|
||||
CV_Assert( useProvidedKeypoints == false );
|
||||
|
||||
buildScalePyramids(_image, _mask);
|
||||
computeKeyPointsPyramid();
|
||||
if (_descriptors.needed())
|
||||
{
|
||||
computeDescriptors(_descriptors);
|
||||
}
|
||||
mergeKeyPoints(_keypoints);
|
||||
}
|
||||
|
||||
static float getScale(float scaleFactor, int firstLevel, int level)
|
||||
{
|
||||
return pow(scaleFactor, level - firstLevel);
|
||||
}
|
||||
|
||||
void ORB_Impl::buildScalePyramids(InputArray _image, InputArray _mask)
|
||||
{
|
||||
const GpuMat image = _image.getGpuMat();
|
||||
const GpuMat mask = _mask.getGpuMat();
|
||||
|
||||
CV_Assert( image.type() == CV_8UC1 );
|
||||
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()) );
|
||||
|
||||
imagePyr_.resize(nLevels_);
|
||||
maskPyr_.resize(nLevels_);
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
{
|
||||
float scale = 1.0f / getScale(scaleFactor_, firstLevel_, level);
|
||||
|
||||
Size sz(cvRound(image.cols * scale), cvRound(image.rows * scale));
|
||||
|
||||
ensureSizeIsEnough(sz, image.type(), imagePyr_[level]);
|
||||
ensureSizeIsEnough(sz, CV_8UC1, maskPyr_[level]);
|
||||
maskPyr_[level].setTo(Scalar::all(255));
|
||||
|
||||
// Compute the resized image
|
||||
if (level != firstLevel_)
|
||||
{
|
||||
if (level < firstLevel_)
|
||||
{
|
||||
cuda::resize(image, imagePyr_[level], sz, 0, 0, INTER_LINEAR);
|
||||
|
||||
if (!mask.empty())
|
||||
cuda::resize(mask, maskPyr_[level], sz, 0, 0, INTER_LINEAR);
|
||||
}
|
||||
else
|
||||
{
|
||||
cuda::resize(imagePyr_[level - 1], imagePyr_[level], sz, 0, 0, INTER_LINEAR);
|
||||
|
||||
if (!mask.empty())
|
||||
{
|
||||
cuda::resize(maskPyr_[level - 1], maskPyr_[level], sz, 0, 0, INTER_LINEAR);
|
||||
cuda::threshold(maskPyr_[level], maskPyr_[level], 254, 0, THRESH_TOZERO);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
image.copyTo(imagePyr_[level]);
|
||||
|
||||
if (!mask.empty())
|
||||
mask.copyTo(maskPyr_[level]);
|
||||
}
|
||||
|
||||
// Filter keypoints by image border
|
||||
ensureSizeIsEnough(sz, CV_8UC1, buf_);
|
||||
buf_.setTo(Scalar::all(0));
|
||||
Rect inner(edgeThreshold_, edgeThreshold_, sz.width - 2 * edgeThreshold_, sz.height - 2 * edgeThreshold_);
|
||||
buf_(inner).setTo(Scalar::all(255));
|
||||
|
||||
cuda::bitwise_and(maskPyr_[level], buf_, maskPyr_[level]);
|
||||
}
|
||||
}
|
||||
|
||||
// takes keypoints and culls them by the response
|
||||
static void cull(GpuMat& keypoints, int& count, int n_points)
|
||||
{
|
||||
using namespace cv::cuda::device::orb;
|
||||
|
||||
@ -554,222 +670,199 @@ namespace
|
||||
return;
|
||||
}
|
||||
|
||||
count = cull_gpu(keypoints.ptr<int>(FAST_CUDA::LOCATION_ROW), keypoints.ptr<float>(FAST_CUDA::RESPONSE_ROW), count, n_points);
|
||||
count = cull_gpu(keypoints.ptr<int>(cuda::FastFeatureDetector::LOCATION_ROW), keypoints.ptr<float>(cuda::FastFeatureDetector::RESPONSE_ROW), count, n_points);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::computeKeyPointsPyramid()
|
||||
{
|
||||
using namespace cv::cuda::device::orb;
|
||||
|
||||
int half_patch_size = patchSize_ / 2;
|
||||
|
||||
keyPointsPyr_.resize(nLevels_);
|
||||
keyPointsCount_.resize(nLevels_);
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
void ORB_Impl::computeKeyPointsPyramid()
|
||||
{
|
||||
keyPointsCount_[level] = fastDetector_.calcKeyPointsLocation(imagePyr_[level], maskPyr_[level]);
|
||||
using namespace cv::cuda::device::orb;
|
||||
|
||||
if (keyPointsCount_[level] == 0)
|
||||
continue;
|
||||
int half_patch_size = patchSize_ / 2;
|
||||
|
||||
ensureSizeIsEnough(3, keyPointsCount_[level], CV_32FC1, keyPointsPyr_[level]);
|
||||
keyPointsPyr_.resize(nLevels_);
|
||||
keyPointsCount_.resize(nLevels_);
|
||||
|
||||
GpuMat fastKpRange = keyPointsPyr_[level].rowRange(0, 2);
|
||||
keyPointsCount_[level] = fastDetector_.getKeyPoints(fastKpRange);
|
||||
fastDetector_->setThreshold(fastThreshold_);
|
||||
|
||||
if (keyPointsCount_[level] == 0)
|
||||
continue;
|
||||
|
||||
int n_features = static_cast<int>(n_features_per_level_[level]);
|
||||
|
||||
if (scoreType_ == ORB::HARRIS_SCORE)
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
{
|
||||
// Keep more points than necessary as FAST does not give amazing corners
|
||||
cull(keyPointsPyr_[level], keyPointsCount_[level], 2 * n_features);
|
||||
fastDetector_->setMaxNumPoints(0.05 * imagePyr_[level].size().area());
|
||||
|
||||
// Compute the Harris cornerness (better scoring than FAST)
|
||||
HarrisResponses_gpu(imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(1), keyPointsCount_[level], 7, HARRIS_K, 0);
|
||||
GpuMat fastKpRange;
|
||||
fastDetector_->detectAsync(imagePyr_[level], fastKpRange, maskPyr_[level], Stream::Null());
|
||||
|
||||
keyPointsCount_[level] = fastKpRange.cols;
|
||||
|
||||
if (keyPointsCount_[level] == 0)
|
||||
continue;
|
||||
|
||||
ensureSizeIsEnough(3, keyPointsCount_[level], fastKpRange.type(), keyPointsPyr_[level]);
|
||||
fastKpRange.copyTo(keyPointsPyr_[level].rowRange(0, 2));
|
||||
|
||||
const int n_features = static_cast<int>(n_features_per_level_[level]);
|
||||
|
||||
if (scoreType_ == ORB::HARRIS_SCORE)
|
||||
{
|
||||
// Keep more points than necessary as FAST does not give amazing corners
|
||||
cull(keyPointsPyr_[level], keyPointsCount_[level], 2 * n_features);
|
||||
|
||||
// Compute the Harris cornerness (better scoring than FAST)
|
||||
HarrisResponses_gpu(imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(1), keyPointsCount_[level], 7, HARRIS_K, 0);
|
||||
}
|
||||
|
||||
//cull to the final desired level, using the new Harris scores or the original FAST scores.
|
||||
cull(keyPointsPyr_[level], keyPointsCount_[level], n_features);
|
||||
|
||||
// Compute orientation
|
||||
IC_Angle_gpu(imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(2), keyPointsCount_[level], half_patch_size, 0);
|
||||
}
|
||||
|
||||
//cull to the final desired level, using the new Harris scores or the original FAST scores.
|
||||
cull(keyPointsPyr_[level], keyPointsCount_[level], n_features);
|
||||
|
||||
// Compute orientation
|
||||
IC_Angle_gpu(imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(2), keyPointsCount_[level], half_patch_size, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::computeDescriptors(GpuMat& descriptors)
|
||||
{
|
||||
using namespace cv::cuda::device::orb;
|
||||
|
||||
int nAllkeypoints = 0;
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
nAllkeypoints += keyPointsCount_[level];
|
||||
|
||||
if (nAllkeypoints == 0)
|
||||
{
|
||||
descriptors.release();
|
||||
return;
|
||||
}
|
||||
|
||||
ensureSizeIsEnough(nAllkeypoints, descriptorSize(), CV_8UC1, descriptors);
|
||||
|
||||
int offset = 0;
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
void ORB_Impl::computeDescriptors(OutputArray _descriptors)
|
||||
{
|
||||
if (keyPointsCount_[level] == 0)
|
||||
continue;
|
||||
using namespace cv::cuda::device::orb;
|
||||
|
||||
GpuMat descRange = descriptors.rowRange(offset, offset + keyPointsCount_[level]);
|
||||
int nAllkeypoints = 0;
|
||||
|
||||
if (blurForDescriptor)
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
nAllkeypoints += keyPointsCount_[level];
|
||||
|
||||
if (nAllkeypoints == 0)
|
||||
{
|
||||
// preprocess the resized image
|
||||
ensureSizeIsEnough(imagePyr_[level].size(), imagePyr_[level].type(), buf_);
|
||||
blurFilter->apply(imagePyr_[level], buf_);
|
||||
_descriptors.release();
|
||||
return;
|
||||
}
|
||||
|
||||
computeOrbDescriptor_gpu(blurForDescriptor ? buf_ : imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(2),
|
||||
keyPointsCount_[level], pattern_.ptr<int>(0), pattern_.ptr<int>(1), descRange, descriptorSize(), WTA_K_, 0);
|
||||
ensureSizeIsEnough(nAllkeypoints, descriptorSize(), CV_8UC1, _descriptors);
|
||||
GpuMat descriptors = _descriptors.getGpuMat();
|
||||
|
||||
offset += keyPointsCount_[level];
|
||||
int offset = 0;
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
{
|
||||
if (keyPointsCount_[level] == 0)
|
||||
continue;
|
||||
|
||||
GpuMat descRange = descriptors.rowRange(offset, offset + keyPointsCount_[level]);
|
||||
|
||||
if (blurForDescriptor_)
|
||||
{
|
||||
// preprocess the resized image
|
||||
ensureSizeIsEnough(imagePyr_[level].size(), imagePyr_[level].type(), buf_);
|
||||
blurFilter_->apply(imagePyr_[level], buf_);
|
||||
}
|
||||
|
||||
computeOrbDescriptor_gpu(blurForDescriptor_ ? buf_ : imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(2),
|
||||
keyPointsCount_[level], pattern_.ptr<int>(0), pattern_.ptr<int>(1), descRange, descriptorSize(), WTA_K_, 0);
|
||||
|
||||
offset += keyPointsCount_[level];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::mergeKeyPoints(GpuMat& keypoints)
|
||||
{
|
||||
using namespace cv::cuda::device::orb;
|
||||
|
||||
int nAllkeypoints = 0;
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
nAllkeypoints += keyPointsCount_[level];
|
||||
|
||||
if (nAllkeypoints == 0)
|
||||
void ORB_Impl::mergeKeyPoints(OutputArray _keypoints)
|
||||
{
|
||||
keypoints.release();
|
||||
return;
|
||||
using namespace cv::cuda::device::orb;
|
||||
|
||||
int nAllkeypoints = 0;
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
nAllkeypoints += keyPointsCount_[level];
|
||||
|
||||
if (nAllkeypoints == 0)
|
||||
{
|
||||
_keypoints.release();
|
||||
return;
|
||||
}
|
||||
|
||||
ensureSizeIsEnough(ROWS_COUNT, nAllkeypoints, CV_32FC1, _keypoints);
|
||||
GpuMat& keypoints = _keypoints.getGpuMatRef();
|
||||
|
||||
int offset = 0;
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
{
|
||||
if (keyPointsCount_[level] == 0)
|
||||
continue;
|
||||
|
||||
float sf = getScale(scaleFactor_, firstLevel_, level);
|
||||
|
||||
GpuMat keyPointsRange = keypoints.colRange(offset, offset + keyPointsCount_[level]);
|
||||
|
||||
float locScale = level != firstLevel_ ? sf : 1.0f;
|
||||
|
||||
mergeLocation_gpu(keyPointsPyr_[level].ptr<short2>(0), keyPointsRange.ptr<float>(0), keyPointsRange.ptr<float>(1), keyPointsCount_[level], locScale, 0);
|
||||
|
||||
GpuMat range = keyPointsRange.rowRange(2, 4);
|
||||
keyPointsPyr_[level](Range(1, 3), Range(0, keyPointsCount_[level])).copyTo(range);
|
||||
|
||||
keyPointsRange.row(4).setTo(Scalar::all(level));
|
||||
keyPointsRange.row(5).setTo(Scalar::all(patchSize_ * sf));
|
||||
|
||||
offset += keyPointsCount_[level];
|
||||
}
|
||||
}
|
||||
|
||||
ensureSizeIsEnough(ROWS_COUNT, nAllkeypoints, CV_32FC1, keypoints);
|
||||
|
||||
int offset = 0;
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
void ORB_Impl::convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
if (keyPointsCount_[level] == 0)
|
||||
continue;
|
||||
if (_gpu_keypoints.empty())
|
||||
{
|
||||
keypoints.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
float sf = getScale(scaleFactor_, firstLevel_, level);
|
||||
Mat h_keypoints;
|
||||
if (_gpu_keypoints.kind() == _InputArray::CUDA_GPU_MAT)
|
||||
{
|
||||
_gpu_keypoints.getGpuMat().download(h_keypoints);
|
||||
}
|
||||
else
|
||||
{
|
||||
h_keypoints = _gpu_keypoints.getMat();
|
||||
}
|
||||
|
||||
GpuMat keyPointsRange = keypoints.colRange(offset, offset + keyPointsCount_[level]);
|
||||
CV_Assert( h_keypoints.rows == ROWS_COUNT );
|
||||
CV_Assert( h_keypoints.type() == CV_32FC1 );
|
||||
|
||||
float locScale = level != firstLevel_ ? sf : 1.0f;
|
||||
const int npoints = h_keypoints.cols;
|
||||
|
||||
mergeLocation_gpu(keyPointsPyr_[level].ptr<short2>(0), keyPointsRange.ptr<float>(0), keyPointsRange.ptr<float>(1), keyPointsCount_[level], locScale, 0);
|
||||
keypoints.resize(npoints);
|
||||
|
||||
GpuMat range = keyPointsRange.rowRange(2, 4);
|
||||
keyPointsPyr_[level](Range(1, 3), Range(0, keyPointsCount_[level])).copyTo(range);
|
||||
const float* x_ptr = h_keypoints.ptr<float>(X_ROW);
|
||||
const float* y_ptr = h_keypoints.ptr<float>(Y_ROW);
|
||||
const float* response_ptr = h_keypoints.ptr<float>(RESPONSE_ROW);
|
||||
const float* angle_ptr = h_keypoints.ptr<float>(ANGLE_ROW);
|
||||
const float* octave_ptr = h_keypoints.ptr<float>(OCTAVE_ROW);
|
||||
const float* size_ptr = h_keypoints.ptr<float>(SIZE_ROW);
|
||||
|
||||
keyPointsRange.row(4).setTo(Scalar::all(level));
|
||||
keyPointsRange.row(5).setTo(Scalar::all(patchSize_ * sf));
|
||||
for (int i = 0; i < npoints; ++i)
|
||||
{
|
||||
KeyPoint kp;
|
||||
|
||||
offset += keyPointsCount_[level];
|
||||
kp.pt.x = x_ptr[i];
|
||||
kp.pt.y = y_ptr[i];
|
||||
kp.response = response_ptr[i];
|
||||
kp.angle = angle_ptr[i];
|
||||
kp.octave = static_cast<int>(octave_ptr[i]);
|
||||
kp.size = size_ptr[i];
|
||||
|
||||
keypoints[i] = kp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::downloadKeyPoints(const GpuMat &d_keypoints, std::vector<KeyPoint>& keypoints)
|
||||
Ptr<cv::cuda::ORB> cv::cuda::ORB::create(int nfeatures,
|
||||
float scaleFactor,
|
||||
int nlevels,
|
||||
int edgeThreshold,
|
||||
int firstLevel,
|
||||
int WTA_K,
|
||||
int scoreType,
|
||||
int patchSize,
|
||||
int fastThreshold,
|
||||
bool blurForDescriptor)
|
||||
{
|
||||
if (d_keypoints.empty())
|
||||
{
|
||||
keypoints.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
Mat h_keypoints(d_keypoints);
|
||||
|
||||
convertKeyPoints(h_keypoints, keypoints);
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::convertKeyPoints(const Mat &d_keypoints, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
if (d_keypoints.empty())
|
||||
{
|
||||
keypoints.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
CV_Assert(d_keypoints.type() == CV_32FC1 && d_keypoints.rows == ROWS_COUNT);
|
||||
|
||||
const float* x_ptr = d_keypoints.ptr<float>(X_ROW);
|
||||
const float* y_ptr = d_keypoints.ptr<float>(Y_ROW);
|
||||
const float* response_ptr = d_keypoints.ptr<float>(RESPONSE_ROW);
|
||||
const float* angle_ptr = d_keypoints.ptr<float>(ANGLE_ROW);
|
||||
const float* octave_ptr = d_keypoints.ptr<float>(OCTAVE_ROW);
|
||||
const float* size_ptr = d_keypoints.ptr<float>(SIZE_ROW);
|
||||
|
||||
keypoints.resize(d_keypoints.cols);
|
||||
|
||||
for (int i = 0; i < d_keypoints.cols; ++i)
|
||||
{
|
||||
KeyPoint kp;
|
||||
|
||||
kp.pt.x = x_ptr[i];
|
||||
kp.pt.y = y_ptr[i];
|
||||
kp.response = response_ptr[i];
|
||||
kp.angle = angle_ptr[i];
|
||||
kp.octave = static_cast<int>(octave_ptr[i]);
|
||||
kp.size = size_ptr[i];
|
||||
|
||||
keypoints[i] = kp;
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints)
|
||||
{
|
||||
buildScalePyramids(image, mask);
|
||||
computeKeyPointsPyramid();
|
||||
mergeKeyPoints(keypoints);
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors)
|
||||
{
|
||||
buildScalePyramids(image, mask);
|
||||
computeKeyPointsPyramid();
|
||||
computeDescriptors(descriptors);
|
||||
mergeKeyPoints(keypoints);
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
(*this)(image, mask, d_keypoints_);
|
||||
downloadKeyPoints(d_keypoints_, keypoints);
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors)
|
||||
{
|
||||
(*this)(image, mask, d_keypoints_, descriptors);
|
||||
downloadKeyPoints(d_keypoints_, keypoints);
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::release()
|
||||
{
|
||||
imagePyr_.clear();
|
||||
maskPyr_.clear();
|
||||
|
||||
buf_.release();
|
||||
|
||||
keyPointsPyr_.clear();
|
||||
|
||||
fastDetector_.release();
|
||||
|
||||
d_keypoints_.release();
|
||||
return makePtr<ORB_Impl>(nfeatures, scaleFactor, nlevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize, fastThreshold, blurForDescriptor);
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
|
@ -76,15 +76,14 @@ CUDA_TEST_P(FAST, Accuracy)
|
||||
cv::Mat image = readImage("features2d/aloe.png", cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(image.empty());
|
||||
|
||||
cv::cuda::FAST_CUDA fast(threshold);
|
||||
fast.nonmaxSuppression = nonmaxSuppression;
|
||||
cv::Ptr<cv::cuda::FastFeatureDetector> fast = cv::cuda::FastFeatureDetector::create(threshold, nonmaxSuppression);
|
||||
|
||||
if (!supportFeature(devInfo, cv::cuda::GLOBAL_ATOMICS))
|
||||
{
|
||||
try
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
fast(loadMat(image), cv::cuda::GpuMat(), keypoints);
|
||||
fast->detect(loadMat(image), keypoints);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
@ -94,7 +93,7 @@ CUDA_TEST_P(FAST, Accuracy)
|
||||
else
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
fast(loadMat(image), cv::cuda::GpuMat(), keypoints);
|
||||
fast->detect(loadMat(image), keypoints);
|
||||
|
||||
std::vector<cv::KeyPoint> keypoints_gold;
|
||||
cv::FAST(image, keypoints_gold, threshold, nonmaxSuppression);
|
||||
@ -123,7 +122,7 @@ namespace
|
||||
IMPLEMENT_PARAM_CLASS(ORB_BlurForDescriptor, bool)
|
||||
}
|
||||
|
||||
CV_ENUM(ORB_ScoreType, ORB::HARRIS_SCORE, ORB::FAST_SCORE)
|
||||
CV_ENUM(ORB_ScoreType, cv::ORB::HARRIS_SCORE, cv::ORB::FAST_SCORE)
|
||||
|
||||
PARAM_TEST_CASE(ORB, cv::cuda::DeviceInfo, ORB_FeaturesCount, ORB_ScaleFactor, ORB_LevelsCount, ORB_EdgeThreshold, ORB_firstLevel, ORB_WTA_K, ORB_ScoreType, ORB_PatchSize, ORB_BlurForDescriptor)
|
||||
{
|
||||
@ -163,8 +162,9 @@ CUDA_TEST_P(ORB, Accuracy)
|
||||
cv::Mat mask(image.size(), CV_8UC1, cv::Scalar::all(1));
|
||||
mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0));
|
||||
|
||||
cv::cuda::ORB_CUDA orb(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
|
||||
orb.blurForDescriptor = blurForDescriptor;
|
||||
cv::Ptr<cv::cuda::ORB> orb =
|
||||
cv::cuda::ORB::create(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel,
|
||||
WTA_K, scoreType, patchSize, 20, blurForDescriptor);
|
||||
|
||||
if (!supportFeature(devInfo, cv::cuda::GLOBAL_ATOMICS))
|
||||
{
|
||||
@ -172,7 +172,7 @@ CUDA_TEST_P(ORB, Accuracy)
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
cv::cuda::GpuMat descriptors;
|
||||
orb(loadMat(image), loadMat(mask), keypoints, descriptors);
|
||||
orb->detectAndComputeAsync(loadMat(image), loadMat(mask), keypoints, descriptors);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
@ -183,7 +183,7 @@ CUDA_TEST_P(ORB, Accuracy)
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
cv::cuda::GpuMat descriptors;
|
||||
orb(loadMat(image), loadMat(mask), keypoints, descriptors);
|
||||
orb->detectAndCompute(loadMat(image), loadMat(mask), keypoints, descriptors);
|
||||
|
||||
cv::Ptr<cv::ORB> orb_gold = cv::ORB::create(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
|
||||
|
||||
@ -208,7 +208,7 @@ INSTANTIATE_TEST_CASE_P(CUDA_Features2D, ORB, testing::Combine(
|
||||
testing::Values(ORB_ScaleFactor(1.2f)),
|
||||
testing::Values(ORB_LevelsCount(4), ORB_LevelsCount(8)),
|
||||
testing::Values(ORB_EdgeThreshold(31)),
|
||||
testing::Values(ORB_firstLevel(0), ORB_firstLevel(2)),
|
||||
testing::Values(ORB_firstLevel(0)),
|
||||
testing::Values(ORB_WTA_K(2), ORB_WTA_K(3), ORB_WTA_K(4)),
|
||||
testing::Values(ORB_ScoreType(cv::ORB::HARRIS_SCORE)),
|
||||
testing::Values(ORB_PatchSize(31), ORB_PatchSize(29)),
|
||||
@ -285,7 +285,8 @@ PARAM_TEST_CASE(BruteForceMatcher, cv::cuda::DeviceInfo, NormCode, DescriptorSiz
|
||||
|
||||
CUDA_TEST_P(BruteForceMatcher, Match_Single)
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA matcher(normCode);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
|
||||
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
|
||||
|
||||
cv::cuda::GpuMat mask;
|
||||
if (useMask)
|
||||
@ -295,7 +296,7 @@ CUDA_TEST_P(BruteForceMatcher, Match_Single)
|
||||
}
|
||||
|
||||
std::vector<cv::DMatch> matches;
|
||||
matcher.match(loadMat(query), loadMat(train), matches, mask);
|
||||
matcher->match(loadMat(query), loadMat(train), matches, mask);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
@ -312,13 +313,14 @@ CUDA_TEST_P(BruteForceMatcher, Match_Single)
|
||||
|
||||
CUDA_TEST_P(BruteForceMatcher, Match_Collection)
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA matcher(normCode);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
|
||||
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
|
||||
|
||||
cv::cuda::GpuMat d_train(train);
|
||||
|
||||
// make add() twice to test such case
|
||||
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
|
||||
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
|
||||
matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
|
||||
matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
|
||||
|
||||
// prepare masks (make first nearest match illegal)
|
||||
std::vector<cv::cuda::GpuMat> masks(2);
|
||||
@ -331,9 +333,9 @@ CUDA_TEST_P(BruteForceMatcher, Match_Collection)
|
||||
|
||||
std::vector<cv::DMatch> matches;
|
||||
if (useMask)
|
||||
matcher.match(cv::cuda::GpuMat(query), matches, masks);
|
||||
matcher->match(cv::cuda::GpuMat(query), matches, masks);
|
||||
else
|
||||
matcher.match(cv::cuda::GpuMat(query), matches);
|
||||
matcher->match(cv::cuda::GpuMat(query), matches);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
@ -366,7 +368,8 @@ CUDA_TEST_P(BruteForceMatcher, Match_Collection)
|
||||
|
||||
CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA matcher(normCode);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
|
||||
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
|
||||
|
||||
const int knn = 2;
|
||||
|
||||
@ -378,7 +381,7 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
|
||||
}
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.knnMatch(loadMat(query), loadMat(train), matches, knn, mask);
|
||||
matcher->knnMatch(loadMat(query), loadMat(train), matches, knn, mask);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
@ -405,7 +408,8 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
|
||||
|
||||
CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single)
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA matcher(normCode);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
|
||||
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
|
||||
|
||||
const int knn = 3;
|
||||
|
||||
@ -417,7 +421,7 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single)
|
||||
}
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.knnMatch(loadMat(query), loadMat(train), matches, knn, mask);
|
||||
matcher->knnMatch(loadMat(query), loadMat(train), matches, knn, mask);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
@ -444,15 +448,16 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single)
|
||||
|
||||
CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection)
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA matcher(normCode);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
|
||||
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
|
||||
|
||||
const int knn = 2;
|
||||
|
||||
cv::cuda::GpuMat d_train(train);
|
||||
|
||||
// make add() twice to test such case
|
||||
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
|
||||
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
|
||||
matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
|
||||
matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
|
||||
|
||||
// prepare masks (make first nearest match illegal)
|
||||
std::vector<cv::cuda::GpuMat> masks(2);
|
||||
@ -466,9 +471,9 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection)
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
|
||||
if (useMask)
|
||||
matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn, masks);
|
||||
matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn, masks);
|
||||
else
|
||||
matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn);
|
||||
matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
@ -506,15 +511,16 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection)
|
||||
|
||||
CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection)
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA matcher(normCode);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
|
||||
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
|
||||
|
||||
const int knn = 3;
|
||||
|
||||
cv::cuda::GpuMat d_train(train);
|
||||
|
||||
// make add() twice to test such case
|
||||
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
|
||||
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
|
||||
matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
|
||||
matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
|
||||
|
||||
// prepare masks (make first nearest match illegal)
|
||||
std::vector<cv::cuda::GpuMat> masks(2);
|
||||
@ -528,9 +534,9 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection)
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
|
||||
if (useMask)
|
||||
matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn, masks);
|
||||
matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn, masks);
|
||||
else
|
||||
matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn);
|
||||
matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
@ -568,7 +574,8 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection)
|
||||
|
||||
CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA matcher(normCode);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
|
||||
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
|
||||
|
||||
const float radius = 1.f / countFactor;
|
||||
|
||||
@ -577,7 +584,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
|
||||
try
|
||||
{
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
|
||||
matcher->radiusMatch(loadMat(query), loadMat(train), matches, radius);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
@ -594,7 +601,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
|
||||
}
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius, mask);
|
||||
matcher->radiusMatch(loadMat(query), loadMat(train), matches, radius, mask);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
@ -617,7 +624,8 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
|
||||
|
||||
CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA matcher(normCode);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
|
||||
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
|
||||
|
||||
const int n = 3;
|
||||
const float radius = 1.f / countFactor * n;
|
||||
@ -625,8 +633,8 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
|
||||
cv::cuda::GpuMat d_train(train);
|
||||
|
||||
// make add() twice to test such case
|
||||
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
|
||||
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
|
||||
matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
|
||||
matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
|
||||
|
||||
// prepare masks (make first nearest match illegal)
|
||||
std::vector<cv::cuda::GpuMat> masks(2);
|
||||
@ -642,7 +650,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
|
||||
try
|
||||
{
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks);
|
||||
matcher->radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
@ -654,9 +662,9 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
|
||||
if (useMask)
|
||||
matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks);
|
||||
matcher->radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks);
|
||||
else
|
||||
matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius);
|
||||
matcher->radiusMatch(cv::cuda::GpuMat(query), matches, radius);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
|
@ -542,7 +542,7 @@ namespace
|
||||
anchor_ = Point(iters_, iters_);
|
||||
iters_ = 1;
|
||||
}
|
||||
else if (iters_ > 1 && countNonZero(kernel) == (int) kernel.total())
|
||||
else if (iters_ > 1 && cv::countNonZero(kernel) == (int) kernel.total())
|
||||
{
|
||||
anchor_ = Point(anchor_.x * iters_, anchor_.y * iters_);
|
||||
kernel = getStructuringElement(MORPH_RECT,
|
||||
|
@ -81,7 +81,6 @@ namespace
|
||||
GpuMat Dy_;
|
||||
GpuMat buf_;
|
||||
GpuMat eig_;
|
||||
GpuMat minMaxbuf_;
|
||||
GpuMat tmpCorners_;
|
||||
};
|
||||
|
||||
@ -112,7 +111,7 @@ namespace
|
||||
cornerCriteria_->compute(image, eig_);
|
||||
|
||||
double maxVal = 0;
|
||||
cuda::minMax(eig_, 0, &maxVal, noArray(), minMaxbuf_);
|
||||
cuda::minMax(eig_, 0, &maxVal);
|
||||
|
||||
ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
|
||||
|
||||
|
@ -271,7 +271,6 @@ namespace
|
||||
private:
|
||||
Match_CCORR_8U match_CCORR_;
|
||||
GpuMat image_sqsums_;
|
||||
GpuMat intBuffer_;
|
||||
};
|
||||
|
||||
void Match_CCORR_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
|
||||
@ -288,7 +287,7 @@ namespace
|
||||
match_CCORR_.match(image, templ, _result, stream);
|
||||
GpuMat result = _result.getGpuMat();
|
||||
|
||||
cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
|
||||
cuda::sqrIntegral(image.reshape(1), image_sqsums_, stream);
|
||||
|
||||
double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0];
|
||||
|
||||
@ -335,7 +334,6 @@ namespace
|
||||
|
||||
private:
|
||||
GpuMat image_sqsums_;
|
||||
GpuMat intBuffer_;
|
||||
Match_CCORR_8U match_CCORR_;
|
||||
};
|
||||
|
||||
@ -359,7 +357,7 @@ namespace
|
||||
return;
|
||||
}
|
||||
|
||||
cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
|
||||
cuda::sqrIntegral(image.reshape(1), image_sqsums_, stream);
|
||||
|
||||
double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0];
|
||||
|
||||
@ -383,7 +381,6 @@ namespace
|
||||
|
||||
private:
|
||||
GpuMat image_sqsums_;
|
||||
GpuMat intBuffer_;
|
||||
Match_CCORR_8U match_CCORR_;
|
||||
};
|
||||
|
||||
@ -398,7 +395,7 @@ namespace
|
||||
CV_Assert( image.type() == templ.type() );
|
||||
CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
|
||||
|
||||
cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
|
||||
cuda::sqrIntegral(image.reshape(1), image_sqsums_, stream);
|
||||
|
||||
double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0];
|
||||
|
||||
@ -421,7 +418,6 @@ namespace
|
||||
void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
|
||||
|
||||
private:
|
||||
GpuMat intBuffer_;
|
||||
std::vector<GpuMat> images_;
|
||||
std::vector<GpuMat> image_sums_;
|
||||
Match_CCORR_8U match_CCORR_;
|
||||
@ -444,7 +440,7 @@ namespace
|
||||
if (image.channels() == 1)
|
||||
{
|
||||
image_sums_.resize(1);
|
||||
cuda::integral(image, image_sums_[0], intBuffer_, stream);
|
||||
cuda::integral(image, image_sums_[0], stream);
|
||||
|
||||
int templ_sum = (int) cuda::sum(templ)[0];
|
||||
|
||||
@ -456,7 +452,7 @@ namespace
|
||||
|
||||
image_sums_.resize(images_.size());
|
||||
for (int i = 0; i < image.channels(); ++i)
|
||||
cuda::integral(images_[i], image_sums_[i], intBuffer_, stream);
|
||||
cuda::integral(images_[i], image_sums_[i], stream);
|
||||
|
||||
Scalar templ_sum = cuda::sum(templ);
|
||||
|
||||
@ -501,7 +497,6 @@ namespace
|
||||
private:
|
||||
GpuMat imagef_, templf_;
|
||||
Match_CCORR_32F match_CCORR_32F_;
|
||||
GpuMat intBuffer_;
|
||||
std::vector<GpuMat> images_;
|
||||
std::vector<GpuMat> image_sums_;
|
||||
std::vector<GpuMat> image_sqsums_;
|
||||
@ -527,10 +522,10 @@ namespace
|
||||
if (image.channels() == 1)
|
||||
{
|
||||
image_sums_.resize(1);
|
||||
cuda::integral(image, image_sums_[0], intBuffer_, stream);
|
||||
cuda::integral(image, image_sums_[0], stream);
|
||||
|
||||
image_sqsums_.resize(1);
|
||||
cuda::sqrIntegral(image, image_sqsums_[0], intBuffer_, stream);
|
||||
cuda::sqrIntegral(image, image_sqsums_[0], stream);
|
||||
|
||||
int templ_sum = (int) cuda::sum(templ)[0];
|
||||
double templ_sqsum = cuda::sqrSum(templ)[0];
|
||||
@ -547,8 +542,8 @@ namespace
|
||||
image_sqsums_.resize(images_.size());
|
||||
for (int i = 0; i < image.channels(); ++i)
|
||||
{
|
||||
cuda::integral(images_[i], image_sums_[i], intBuffer_, stream);
|
||||
cuda::sqrIntegral(images_[i], image_sqsums_[i], intBuffer_, stream);
|
||||
cuda::integral(images_[i], image_sums_[i], stream);
|
||||
cuda::sqrIntegral(images_[i], image_sqsums_[i], stream);
|
||||
}
|
||||
|
||||
Scalar templ_sum = cuda::sum(templ);
|
||||
|
@ -43,6 +43,7 @@
|
||||
#ifndef __OPENCV_CUDALEGACY_HPP__
|
||||
#define __OPENCV_CUDALEGACY_HPP__
|
||||
|
||||
#include "opencv2/core/cuda.hpp"
|
||||
#include "opencv2/cudalegacy/NCV.hpp"
|
||||
#include "opencv2/cudalegacy/NPP_staging.hpp"
|
||||
#include "opencv2/cudalegacy/NCVPyramid.hpp"
|
||||
@ -56,4 +57,16 @@
|
||||
@}
|
||||
*/
|
||||
|
||||
namespace cv { namespace cuda {
|
||||
|
||||
class CV_EXPORTS ImagePyramid : public Algorithm
|
||||
{
|
||||
public:
|
||||
virtual void getLayer(OutputArray outImg, Size outRoi, Stream& stream = Stream::Null()) const = 0;
|
||||
};
|
||||
|
||||
CV_EXPORTS Ptr<ImagePyramid> createImagePyramid(InputArray img, int nLayers = -1, Stream& stream = Stream::Null());
|
||||
|
||||
}}
|
||||
|
||||
#endif /* __OPENCV_CUDALEGACY_HPP__ */
|
||||
|
147
modules/cudalegacy/src/image_pyramid.cpp
Normal file
147
modules/cudalegacy/src/image_pyramid.cpp
Normal file
@ -0,0 +1,147 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
Ptr<ImagePyramid> cv::cuda::createImagePyramid(InputArray, int, Stream&) { throw_no_cuda(); return Ptr<ImagePyramid>(); }
|
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
namespace
|
||||
{
|
||||
class ImagePyramidImpl : public ImagePyramid
|
||||
{
|
||||
public:
|
||||
ImagePyramidImpl(InputArray img, int nLayers, Stream& stream);
|
||||
|
||||
void getLayer(OutputArray outImg, Size outRoi, Stream& stream = Stream::Null()) const;
|
||||
|
||||
private:
|
||||
GpuMat layer0_;
|
||||
std::vector<GpuMat> pyramid_;
|
||||
int nLayers_;
|
||||
};
|
||||
|
||||
ImagePyramidImpl::ImagePyramidImpl(InputArray _img, int numLayers, Stream& stream)
|
||||
{
|
||||
GpuMat img = _img.getGpuMat();
|
||||
|
||||
CV_Assert( img.depth() <= CV_32F && img.channels() <= 4 );
|
||||
|
||||
img.copyTo(layer0_, stream);
|
||||
|
||||
Size szLastLayer = img.size();
|
||||
nLayers_ = 1;
|
||||
|
||||
if (numLayers <= 0)
|
||||
numLayers = 255; // it will cut-off when any of the dimensions goes 1
|
||||
|
||||
pyramid_.resize(numLayers);
|
||||
|
||||
for (int i = 0; i < numLayers - 1; ++i)
|
||||
{
|
||||
Size szCurLayer(szLastLayer.width / 2, szLastLayer.height / 2);
|
||||
|
||||
if (szCurLayer.width == 0 || szCurLayer.height == 0)
|
||||
break;
|
||||
|
||||
ensureSizeIsEnough(szCurLayer, img.type(), pyramid_[i]);
|
||||
nLayers_++;
|
||||
|
||||
const GpuMat& prevLayer = i == 0 ? layer0_ : pyramid_[i - 1];
|
||||
|
||||
cv::cuda::device::pyramid::downsampleX2(prevLayer, pyramid_[i], img.depth(), img.channels(), StreamAccessor::getStream(stream));
|
||||
|
||||
szLastLayer = szCurLayer;
|
||||
}
|
||||
}
|
||||
|
||||
void ImagePyramidImpl::getLayer(OutputArray _outImg, Size outRoi, Stream& stream) const
|
||||
{
|
||||
CV_Assert( outRoi.width <= layer0_.cols && outRoi.height <= layer0_.rows && outRoi.width > 0 && outRoi.height > 0 );
|
||||
|
||||
ensureSizeIsEnough(outRoi, layer0_.type(), _outImg);
|
||||
GpuMat outImg = _outImg.getGpuMat();
|
||||
|
||||
if (outRoi.width == layer0_.cols && outRoi.height == layer0_.rows)
|
||||
{
|
||||
layer0_.copyTo(outImg, stream);
|
||||
return;
|
||||
}
|
||||
|
||||
float lastScale = 1.0f;
|
||||
float curScale;
|
||||
GpuMat lastLayer = layer0_;
|
||||
GpuMat curLayer;
|
||||
|
||||
for (int i = 0; i < nLayers_ - 1; ++i)
|
||||
{
|
||||
curScale = lastScale * 0.5f;
|
||||
curLayer = pyramid_[i];
|
||||
|
||||
if (outRoi.width == curLayer.cols && outRoi.height == curLayer.rows)
|
||||
{
|
||||
curLayer.copyTo(outImg, stream);
|
||||
}
|
||||
|
||||
if (outRoi.width >= curLayer.cols && outRoi.height >= curLayer.rows)
|
||||
break;
|
||||
|
||||
lastScale = curScale;
|
||||
lastLayer = curLayer;
|
||||
}
|
||||
|
||||
cv::cuda::device::pyramid::interpolateFrom1(lastLayer, outImg, outImg.depth(), outImg.channels(), StreamAccessor::getStream(stream));
|
||||
}
|
||||
}
|
||||
|
||||
Ptr<ImagePyramid> cv::cuda::createImagePyramid(InputArray img, int nLayers, Stream& stream)
|
||||
{
|
||||
return Ptr<ImagePyramid>(new ImagePyramidImpl(img, nLayers, stream));
|
||||
}
|
||||
|
||||
#endif
|
9
modules/cudaobjdetect/CMakeLists.txt
Normal file
9
modules/cudaobjdetect/CMakeLists.txt
Normal file
@ -0,0 +1,9 @@
|
||||
if(IOS OR (NOT HAVE_CUDA AND NOT BUILD_CUDA_STUBS))
|
||||
ocv_module_disable(cudaobjdetect)
|
||||
endif()
|
||||
|
||||
set(the_description "CUDA-accelerated Object Detection")
|
||||
|
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow)
|
||||
|
||||
ocv_define_module(cudaobjdetect opencv_objdetect opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudalegacy)
|
288
modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
Normal file
288
modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
Normal file
@ -0,0 +1,288 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDAOBJDETECT_HPP__
|
||||
#define __OPENCV_CUDAOBJDETECT_HPP__
|
||||
|
||||
#ifndef __cplusplus
|
||||
# error cudaobjdetect.hpp header must be compiled as C++
|
||||
#endif
|
||||
|
||||
#include "opencv2/core/cuda.hpp"
|
||||
|
||||
/**
|
||||
@addtogroup cuda
|
||||
@{
|
||||
@defgroup cudaobjdetect Object Detection
|
||||
@}
|
||||
*/
|
||||
|
||||
namespace cv { namespace cuda {
|
||||
|
||||
//! @addtogroup cudaobjdetect
|
||||
//! @{
|
||||
|
||||
//
|
||||
// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector
|
||||
//
|
||||
|
||||
/** @brief The class implements Histogram of Oriented Gradients (@cite Dalal2005) object detector.
|
||||
|
||||
@note
|
||||
- An example applying the HOG descriptor for people detection can be found at
|
||||
opencv_source_code/samples/cpp/peopledetect.cpp
|
||||
- A CUDA example applying the HOG descriptor for people detection can be found at
|
||||
opencv_source_code/samples/gpu/hog.cpp
|
||||
- (Python) An example applying the HOG descriptor for people detection can be found at
|
||||
opencv_source_code/samples/python2/peopledetect.py
|
||||
*/
|
||||
class CV_EXPORTS HOG : public Algorithm
|
||||
{
|
||||
public:
|
||||
enum
|
||||
{
|
||||
DESCR_FORMAT_ROW_BY_ROW,
|
||||
DESCR_FORMAT_COL_BY_COL
|
||||
};
|
||||
|
||||
/** @brief Creates the HOG descriptor and detector.
|
||||
|
||||
@param win_size Detection window size. Align to block size and block stride.
|
||||
@param block_size Block size in pixels. Align to cell size. Only (16,16) is supported for now.
|
||||
@param block_stride Block stride. It must be a multiple of cell size.
|
||||
@param cell_size Cell size. Only (8, 8) is supported for now.
|
||||
@param nbins Number of bins. Only 9 bins per cell are supported for now.
|
||||
*/
|
||||
static Ptr<HOG> create(Size win_size = Size(64, 128),
|
||||
Size block_size = Size(16, 16),
|
||||
Size block_stride = Size(8, 8),
|
||||
Size cell_size = Size(8, 8),
|
||||
int nbins = 9);
|
||||
|
||||
//! Gaussian smoothing window parameter.
|
||||
virtual void setWinSigma(double win_sigma) = 0;
|
||||
virtual double getWinSigma() const = 0;
|
||||
|
||||
//! L2-Hys normalization method shrinkage.
|
||||
virtual void setL2HysThreshold(double threshold_L2hys) = 0;
|
||||
virtual double getL2HysThreshold() const = 0;
|
||||
|
||||
//! Flag to specify whether the gamma correction preprocessing is required or not.
|
||||
virtual void setGammaCorrection(bool gamma_correction) = 0;
|
||||
virtual bool getGammaCorrection() const = 0;
|
||||
|
||||
//! Maximum number of detection window increases.
|
||||
virtual void setNumLevels(int nlevels) = 0;
|
||||
virtual int getNumLevels() const = 0;
|
||||
|
||||
//! Threshold for the distance between features and SVM classifying plane.
|
||||
//! Usually it is 0 and should be specfied in the detector coefficients (as the last free
|
||||
//! coefficient). But if the free coefficient is omitted (which is allowed), you can specify it
|
||||
//! manually here.
|
||||
virtual void setHitThreshold(double hit_threshold) = 0;
|
||||
virtual double getHitThreshold() const = 0;
|
||||
|
||||
//! Window stride. It must be a multiple of block stride.
|
||||
virtual void setWinStride(Size win_stride) = 0;
|
||||
virtual Size getWinStride() const = 0;
|
||||
|
||||
//! Coefficient of the detection window increase.
|
||||
virtual void setScaleFactor(double scale0) = 0;
|
||||
virtual double getScaleFactor() const = 0;
|
||||
|
||||
//! Coefficient to regulate the similarity threshold. When detected, some
|
||||
//! objects can be covered by many rectangles. 0 means not to perform grouping.
|
||||
//! See groupRectangles.
|
||||
virtual void setGroupThreshold(int group_threshold) = 0;
|
||||
virtual int getGroupThreshold() const = 0;
|
||||
|
||||
//! Descriptor storage format:
|
||||
//! - **DESCR_FORMAT_ROW_BY_ROW** - Row-major order.
|
||||
//! - **DESCR_FORMAT_COL_BY_COL** - Column-major order.
|
||||
virtual void setDescriptorFormat(int descr_format) = 0;
|
||||
virtual int getDescriptorFormat() const = 0;
|
||||
|
||||
/** @brief Returns the number of coefficients required for the classification.
|
||||
*/
|
||||
virtual size_t getDescriptorSize() const = 0;
|
||||
|
||||
/** @brief Returns the block histogram size.
|
||||
*/
|
||||
virtual size_t getBlockHistogramSize() const = 0;
|
||||
|
||||
/** @brief Sets coefficients for the linear SVM classifier.
|
||||
*/
|
||||
virtual void setSVMDetector(InputArray detector) = 0;
|
||||
|
||||
/** @brief Returns coefficients of the classifier trained for people detection.
|
||||
*/
|
||||
virtual Mat getDefaultPeopleDetector() const = 0;
|
||||
|
||||
/** @brief Performs object detection without a multi-scale window.
|
||||
|
||||
@param img Source image. CV_8UC1 and CV_8UC4 types are supported for now.
|
||||
@param found_locations Left-top corner points of detected objects boundaries.
|
||||
@param confidences Optional output array for confidences.
|
||||
*/
|
||||
virtual void detect(InputArray img,
|
||||
std::vector<Point>& found_locations,
|
||||
std::vector<double>* confidences = NULL) = 0;
|
||||
|
||||
/** @brief Performs object detection with a multi-scale window.
|
||||
|
||||
@param img Source image. See cuda::HOGDescriptor::detect for type limitations.
|
||||
@param found_locations Detected objects boundaries.
|
||||
@param confidences Optional output array for confidences.
|
||||
*/
|
||||
virtual void detectMultiScale(InputArray img,
|
||||
std::vector<Rect>& found_locations,
|
||||
std::vector<double>* confidences = NULL) = 0;
|
||||
|
||||
/** @brief Returns block descriptors computed for the whole image.
|
||||
|
||||
@param img Source image. See cuda::HOGDescriptor::detect for type limitations.
|
||||
@param descriptors 2D array of descriptors.
|
||||
@param stream CUDA stream.
|
||||
*/
|
||||
virtual void compute(InputArray img,
|
||||
OutputArray descriptors,
|
||||
Stream& stream = Stream::Null()) = 0;
|
||||
};
|
||||
|
||||
//
|
||||
// CascadeClassifier
|
||||
//
|
||||
|
||||
/** @brief Cascade classifier class used for object detection. Supports HAAR and LBP cascades. :
|
||||
|
||||
@note
|
||||
- A cascade classifier example can be found at
|
||||
opencv_source_code/samples/gpu/cascadeclassifier.cpp
|
||||
- A Nvidea API specific cascade classifier example can be found at
|
||||
opencv_source_code/samples/gpu/cascadeclassifier_nvidia_api.cpp
|
||||
*/
|
||||
class CV_EXPORTS CascadeClassifier : public Algorithm
|
||||
{
|
||||
public:
|
||||
/** @brief Loads the classifier from a file. Cascade type is detected automatically by constructor parameter.
|
||||
|
||||
@param filename Name of the file from which the classifier is loaded. Only the old haar classifier
|
||||
(trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new
|
||||
type of OpenCV XML cascade supported for LBP.
|
||||
*/
|
||||
static Ptr<CascadeClassifier> create(const String& filename);
|
||||
/** @overload
|
||||
*/
|
||||
static Ptr<CascadeClassifier> create(const FileStorage& file);
|
||||
|
||||
//! Maximum possible object size. Objects larger than that are ignored. Used for
|
||||
//! second signature and supported only for LBP cascades.
|
||||
virtual void setMaxObjectSize(Size maxObjectSize) = 0;
|
||||
virtual Size getMaxObjectSize() const = 0;
|
||||
|
||||
//! Minimum possible object size. Objects smaller than that are ignored.
|
||||
virtual void setMinObjectSize(Size minSize) = 0;
|
||||
virtual Size getMinObjectSize() const = 0;
|
||||
|
||||
//! Parameter specifying how much the image size is reduced at each image scale.
|
||||
virtual void setScaleFactor(double scaleFactor) = 0;
|
||||
virtual double getScaleFactor() const = 0;
|
||||
|
||||
//! Parameter specifying how many neighbors each candidate rectangle should have
|
||||
//! to retain it.
|
||||
virtual void setMinNeighbors(int minNeighbors) = 0;
|
||||
virtual int getMinNeighbors() const = 0;
|
||||
|
||||
virtual void setFindLargestObject(bool findLargestObject) = 0;
|
||||
virtual bool getFindLargestObject() = 0;
|
||||
|
||||
virtual void setMaxNumObjects(int maxNumObjects) = 0;
|
||||
virtual int getMaxNumObjects() const = 0;
|
||||
|
||||
virtual Size getClassifierSize() const = 0;
|
||||
|
||||
/** @brief Detects objects of different sizes in the input image.
|
||||
|
||||
@param image Matrix of type CV_8U containing an image where objects should be detected.
|
||||
@param objects Buffer to store detected objects (rectangles).
|
||||
@param stream CUDA stream.
|
||||
|
||||
To get final array of detected objects use CascadeClassifier::convert method.
|
||||
|
||||
@code
|
||||
Ptr<cuda::CascadeClassifier> cascade_gpu = cuda::CascadeClassifier::create(...);
|
||||
|
||||
Mat image_cpu = imread(...)
|
||||
GpuMat image_gpu(image_cpu);
|
||||
|
||||
GpuMat objbuf;
|
||||
cascade_gpu->detectMultiScale(image_gpu, objbuf);
|
||||
|
||||
std::vector<Rect> faces;
|
||||
cascade_gpu->convert(objbuf, faces);
|
||||
|
||||
for(int i = 0; i < detections_num; ++i)
|
||||
cv::rectangle(image_cpu, faces[i], Scalar(255));
|
||||
|
||||
imshow("Faces", image_cpu);
|
||||
@endcode
|
||||
|
||||
@sa CascadeClassifier::detectMultiScale
|
||||
*/
|
||||
virtual void detectMultiScale(InputArray image,
|
||||
OutputArray objects,
|
||||
Stream& stream = Stream::Null()) = 0;
|
||||
|
||||
/** @brief Converts objects array from internal representation to standard vector.
|
||||
|
||||
@param gpu_objects Objects array in internal representation.
|
||||
@param objects Resulting array.
|
||||
*/
|
||||
virtual void convert(OutputArray gpu_objects,
|
||||
std::vector<Rect>& objects) = 0;
|
||||
};
|
||||
|
||||
//! @}
|
||||
|
||||
}} // namespace cv { namespace cuda {
|
||||
|
||||
#endif /* __OPENCV_CUDAOBJDETECT_HPP__ */
|
47
modules/cudaobjdetect/perf/perf_main.cpp
Normal file
47
modules/cudaobjdetect/perf/perf_main.cpp
Normal file
@ -0,0 +1,47 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
using namespace perf;
|
||||
|
||||
CV_PERF_TEST_CUDA_MAIN(cudaobjdetect)
|
@ -71,10 +71,10 @@ PERF_TEST_P(Image, ObjDetect_HOG,
|
||||
const cv::cuda::GpuMat d_img(img);
|
||||
std::vector<cv::Rect> gpu_found_locations;
|
||||
|
||||
cv::cuda::HOGDescriptor d_hog;
|
||||
d_hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
|
||||
cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
|
||||
d_hog->setSVMDetector(d_hog->getDefaultPeopleDetector());
|
||||
|
||||
TEST_CYCLE() d_hog.detectMultiScale(d_img, gpu_found_locations);
|
||||
TEST_CYCLE() d_hog->detectMultiScale(d_img, gpu_found_locations);
|
||||
|
||||
SANITY_CHECK(gpu_found_locations);
|
||||
}
|
||||
@ -82,8 +82,10 @@ PERF_TEST_P(Image, ObjDetect_HOG,
|
||||
{
|
||||
std::vector<cv::Rect> cpu_found_locations;
|
||||
|
||||
cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
|
||||
|
||||
cv::HOGDescriptor hog;
|
||||
hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
|
||||
hog.setSVMDetector(d_hog->getDefaultPeopleDetector());
|
||||
|
||||
TEST_CYCLE() hog.detectMultiScale(img, cpu_found_locations);
|
||||
|
||||
@ -105,18 +107,17 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
|
||||
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
cv::cuda::CascadeClassifier_CUDA d_cascade;
|
||||
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
|
||||
cv::Ptr<cv::cuda::CascadeClassifier> d_cascade =
|
||||
cv::cuda::CascadeClassifier::create(perf::TestBase::getDataPath(GetParam().second));
|
||||
|
||||
const cv::cuda::GpuMat d_img(img);
|
||||
cv::cuda::GpuMat objects_buffer;
|
||||
int detections_num = 0;
|
||||
|
||||
TEST_CYCLE() detections_num = d_cascade.detectMultiScale(d_img, objects_buffer);
|
||||
TEST_CYCLE() d_cascade->detectMultiScale(d_img, objects_buffer);
|
||||
|
||||
std::vector<cv::Rect> gpu_rects;
|
||||
d_cascade->convert(objects_buffer, gpu_rects);
|
||||
|
||||
std::vector<cv::Rect> gpu_rects(detections_num);
|
||||
cv::Mat gpu_rects_mat(1, detections_num, cv::DataType<cv::Rect>::type, &gpu_rects[0]);
|
||||
objects_buffer.colRange(0, detections_num).download(gpu_rects_mat);
|
||||
cv::groupRectangles(gpu_rects, 3, 0.2);
|
||||
SANITY_CHECK(gpu_rects);
|
||||
}
|
||||
@ -144,18 +145,17 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
|
||||
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
cv::cuda::CascadeClassifier_CUDA d_cascade;
|
||||
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
|
||||
cv::Ptr<cv::cuda::CascadeClassifier> d_cascade =
|
||||
cv::cuda::CascadeClassifier::create(perf::TestBase::getDataPath(GetParam().second));
|
||||
|
||||
const cv::cuda::GpuMat d_img(img);
|
||||
cv::cuda::GpuMat objects_buffer;
|
||||
int detections_num = 0;
|
||||
|
||||
TEST_CYCLE() detections_num = d_cascade.detectMultiScale(d_img, objects_buffer);
|
||||
TEST_CYCLE() d_cascade->detectMultiScale(d_img, objects_buffer);
|
||||
|
||||
std::vector<cv::Rect> gpu_rects;
|
||||
d_cascade->convert(objects_buffer, gpu_rects);
|
||||
|
||||
std::vector<cv::Rect> gpu_rects(detections_num);
|
||||
cv::Mat gpu_rects_mat(1, detections_num, cv::DataType<cv::Rect>::type, &gpu_rects[0]);
|
||||
objects_buffer.colRange(0, detections_num).download(gpu_rects_mat);
|
||||
cv::groupRectangles(gpu_rects, 3, 0.2);
|
||||
SANITY_CHECK(gpu_rects);
|
||||
}
|
64
modules/cudaobjdetect/perf/perf_precomp.hpp
Normal file
64
modules/cudaobjdetect/perf/perf_precomp.hpp
Normal file
@ -0,0 +1,64 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifdef __GNUC__
|
||||
# pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
# if defined __clang__ || defined __APPLE__
|
||||
# pragma GCC diagnostic ignored "-Wmissing-prototypes"
|
||||
# pragma GCC diagnostic ignored "-Wextra"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef __OPENCV_PERF_PRECOMP_HPP__
|
||||
#define __OPENCV_PERF_PRECOMP_HPP__
|
||||
|
||||
#include "opencv2/ts.hpp"
|
||||
#include "opencv2/ts/cuda_perf.hpp"
|
||||
|
||||
#include "opencv2/cudaobjdetect.hpp"
|
||||
#include "opencv2/objdetect.hpp"
|
||||
|
||||
#ifdef GTEST_CREATE_SHARED_LIBRARY
|
||||
#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
|
||||
#endif
|
||||
|
||||
#endif
|
@ -48,160 +48,185 @@ using namespace cv::cuda;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
cv::cuda::CascadeClassifier_CUDA::CascadeClassifier_CUDA() { throw_no_cuda(); }
|
||||
cv::cuda::CascadeClassifier_CUDA::CascadeClassifier_CUDA(const String&) { throw_no_cuda(); }
|
||||
cv::cuda::CascadeClassifier_CUDA::~CascadeClassifier_CUDA() { throw_no_cuda(); }
|
||||
bool cv::cuda::CascadeClassifier_CUDA::empty() const { throw_no_cuda(); return true; }
|
||||
bool cv::cuda::CascadeClassifier_CUDA::load(const String&) { throw_no_cuda(); return true; }
|
||||
Size cv::cuda::CascadeClassifier_CUDA::getClassifierSize() const { throw_no_cuda(); return Size();}
|
||||
void cv::cuda::CascadeClassifier_CUDA::release() { throw_no_cuda(); }
|
||||
int cv::cuda::CascadeClassifier_CUDA::detectMultiScale( const GpuMat&, GpuMat&, double, int, Size) {throw_no_cuda(); return -1;}
|
||||
int cv::cuda::CascadeClassifier_CUDA::detectMultiScale( const GpuMat&, GpuMat&, Size, Size, double, int) {throw_no_cuda(); return -1;}
|
||||
Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); }
|
||||
Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); }
|
||||
|
||||
#else
|
||||
|
||||
struct cv::cuda::CascadeClassifier_CUDA::CascadeClassifierImpl
|
||||
//
|
||||
// CascadeClassifierBase
|
||||
//
|
||||
|
||||
namespace
|
||||
{
|
||||
public:
|
||||
CascadeClassifierImpl(){}
|
||||
virtual ~CascadeClassifierImpl(){}
|
||||
class CascadeClassifierBase : public cuda::CascadeClassifier
|
||||
{
|
||||
public:
|
||||
CascadeClassifierBase();
|
||||
|
||||
virtual unsigned int process(const GpuMat& src, GpuMat& objects, float scaleStep, int minNeighbors,
|
||||
bool findLargestObject, bool visualizeInPlace, cv::Size ncvMinSize, cv::Size maxObjectSize) = 0;
|
||||
virtual void setMaxObjectSize(Size maxObjectSize) { maxObjectSize_ = maxObjectSize; }
|
||||
virtual Size getMaxObjectSize() const { return maxObjectSize_; }
|
||||
|
||||
virtual cv::Size getClassifierCvSize() const = 0;
|
||||
virtual bool read(const String& classifierAsXml) = 0;
|
||||
};
|
||||
virtual void setMinObjectSize(Size minSize) { minObjectSize_ = minSize; }
|
||||
virtual Size getMinObjectSize() const { return minObjectSize_; }
|
||||
|
||||
#ifndef HAVE_OPENCV_CUDALEGACY
|
||||
virtual void setScaleFactor(double scaleFactor) { scaleFactor_ = scaleFactor; }
|
||||
virtual double getScaleFactor() const { return scaleFactor_; }
|
||||
|
||||
struct cv::cuda::CascadeClassifier_CUDA::HaarCascade : cv::cuda::CascadeClassifier_CUDA::CascadeClassifierImpl
|
||||
virtual void setMinNeighbors(int minNeighbors) { minNeighbors_ = minNeighbors; }
|
||||
virtual int getMinNeighbors() const { return minNeighbors_; }
|
||||
|
||||
virtual void setFindLargestObject(bool findLargestObject) { findLargestObject_ = findLargestObject; }
|
||||
virtual bool getFindLargestObject() { return findLargestObject_; }
|
||||
|
||||
virtual void setMaxNumObjects(int maxNumObjects) { maxNumObjects_ = maxNumObjects; }
|
||||
virtual int getMaxNumObjects() const { return maxNumObjects_; }
|
||||
|
||||
protected:
|
||||
Size maxObjectSize_;
|
||||
Size minObjectSize_;
|
||||
double scaleFactor_;
|
||||
int minNeighbors_;
|
||||
bool findLargestObject_;
|
||||
int maxNumObjects_;
|
||||
};
|
||||
|
||||
CascadeClassifierBase::CascadeClassifierBase() :
|
||||
maxObjectSize_(),
|
||||
minObjectSize_(),
|
||||
scaleFactor_(1.2),
|
||||
minNeighbors_(4),
|
||||
findLargestObject_(false),
|
||||
maxNumObjects_(100)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// HaarCascade
|
||||
//
|
||||
|
||||
#ifdef HAVE_OPENCV_CUDALEGACY
|
||||
|
||||
namespace
|
||||
{
|
||||
public:
|
||||
HaarCascade()
|
||||
class HaarCascade_Impl : public CascadeClassifierBase
|
||||
{
|
||||
throw_no_cuda();
|
||||
public:
|
||||
explicit HaarCascade_Impl(const String& filename);
|
||||
|
||||
virtual Size getClassifierSize() const;
|
||||
|
||||
virtual void detectMultiScale(InputArray image,
|
||||
OutputArray objects,
|
||||
Stream& stream);
|
||||
|
||||
virtual void convert(OutputArray gpu_objects,
|
||||
std::vector<Rect>& objects);
|
||||
|
||||
private:
|
||||
NCVStatus load(const String& classifierFile);
|
||||
NCVStatus calculateMemReqsAndAllocate(const Size& frameSize);
|
||||
NCVStatus process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections);
|
||||
|
||||
Size lastAllocatedFrameSize;
|
||||
|
||||
Ptr<NCVMemStackAllocator> gpuAllocator;
|
||||
Ptr<NCVMemStackAllocator> cpuAllocator;
|
||||
|
||||
cudaDeviceProp devProp;
|
||||
NCVStatus ncvStat;
|
||||
|
||||
Ptr<NCVMemNativeAllocator> gpuCascadeAllocator;
|
||||
Ptr<NCVMemNativeAllocator> cpuCascadeAllocator;
|
||||
|
||||
Ptr<NCVVectorAlloc<HaarStage64> > h_haarStages;
|
||||
Ptr<NCVVectorAlloc<HaarClassifierNode128> > h_haarNodes;
|
||||
Ptr<NCVVectorAlloc<HaarFeature64> > h_haarFeatures;
|
||||
|
||||
HaarClassifierCascadeDescriptor haar;
|
||||
|
||||
Ptr<NCVVectorAlloc<HaarStage64> > d_haarStages;
|
||||
Ptr<NCVVectorAlloc<HaarClassifierNode128> > d_haarNodes;
|
||||
Ptr<NCVVectorAlloc<HaarFeature64> > d_haarFeatures;
|
||||
};
|
||||
|
||||
static void NCVDebugOutputHandler(const String &msg)
|
||||
{
|
||||
CV_Error(Error::GpuApiCallError, msg.c_str());
|
||||
}
|
||||
|
||||
unsigned int process(const GpuMat&, GpuMat&, float, int, bool, bool, cv::Size, cv::Size)
|
||||
{
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
}
|
||||
|
||||
cv::Size getClassifierCvSize() const
|
||||
{
|
||||
throw_no_cuda();
|
||||
return cv::Size();
|
||||
}
|
||||
|
||||
bool read(const String&)
|
||||
{
|
||||
throw_no_cuda();
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
struct cv::cuda::CascadeClassifier_CUDA::HaarCascade : cv::cuda::CascadeClassifier_CUDA::CascadeClassifierImpl
|
||||
{
|
||||
public:
|
||||
HaarCascade() : lastAllocatedFrameSize(-1, -1)
|
||||
HaarCascade_Impl::HaarCascade_Impl(const String& filename) :
|
||||
lastAllocatedFrameSize(-1, -1)
|
||||
{
|
||||
ncvSetDebugOutputHandler(NCVDebugOutputHandler);
|
||||
}
|
||||
|
||||
bool read(const String& filename)
|
||||
{
|
||||
ncvSafeCall( load(filename) );
|
||||
return true;
|
||||
}
|
||||
|
||||
NCVStatus process(const GpuMat& src, GpuMat& objects, float scaleStep, int minNeighbors,
|
||||
bool findLargestObject, bool visualizeInPlace, cv::Size ncvMinSize,
|
||||
/*out*/unsigned int& numDetections)
|
||||
Size HaarCascade_Impl::getClassifierSize() const
|
||||
{
|
||||
calculateMemReqsAndAllocate(src.size());
|
||||
|
||||
NCVMemPtr src_beg;
|
||||
src_beg.ptr = (void*)src.ptr<Ncv8u>();
|
||||
src_beg.memtype = NCVMemoryTypeDevice;
|
||||
|
||||
NCVMemSegment src_seg;
|
||||
src_seg.begin = src_beg;
|
||||
src_seg.size = src.step * src.rows;
|
||||
|
||||
NCVMatrixReuse<Ncv8u> d_src(src_seg, static_cast<int>(devProp.textureAlignment), src.cols, src.rows, static_cast<int>(src.step), true);
|
||||
ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
|
||||
|
||||
CV_Assert(objects.rows == 1);
|
||||
|
||||
NCVMemPtr objects_beg;
|
||||
objects_beg.ptr = (void*)objects.ptr<NcvRect32u>();
|
||||
objects_beg.memtype = NCVMemoryTypeDevice;
|
||||
|
||||
NCVMemSegment objects_seg;
|
||||
objects_seg.begin = objects_beg;
|
||||
objects_seg.size = objects.step * objects.rows;
|
||||
NCVVectorReuse<NcvRect32u> d_rects(objects_seg, objects.cols);
|
||||
ncvAssertReturn(d_rects.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
|
||||
|
||||
NcvSize32u roi;
|
||||
roi.width = d_src.width();
|
||||
roi.height = d_src.height();
|
||||
|
||||
NcvSize32u winMinSize(ncvMinSize.width, ncvMinSize.height);
|
||||
|
||||
Ncv32u flags = 0;
|
||||
flags |= findLargestObject? NCVPipeObjDet_FindLargestObject : 0;
|
||||
flags |= visualizeInPlace ? NCVPipeObjDet_VisualizeInPlace : 0;
|
||||
|
||||
ncvStat = ncvDetectObjectsMultiScale_device(
|
||||
d_src, roi, d_rects, numDetections, haar, *h_haarStages,
|
||||
*d_haarStages, *d_haarNodes, *d_haarFeatures,
|
||||
winMinSize,
|
||||
minNeighbors,
|
||||
scaleStep, 1,
|
||||
flags,
|
||||
*gpuAllocator, *cpuAllocator, devProp, 0);
|
||||
ncvAssertReturnNcvStat(ncvStat);
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
|
||||
|
||||
return NCV_SUCCESS;
|
||||
return Size(haar.ClassifierSize.width, haar.ClassifierSize.height);
|
||||
}
|
||||
|
||||
unsigned int process(const GpuMat& image, GpuMat& objectsBuf, float scaleFactor, int minNeighbors,
|
||||
bool findLargestObject, bool visualizeInPlace, cv::Size minSize, cv::Size /*maxObjectSize*/)
|
||||
void HaarCascade_Impl::detectMultiScale(InputArray _image,
|
||||
OutputArray _objects,
|
||||
Stream& stream)
|
||||
{
|
||||
CV_Assert( scaleFactor > 1 && image.depth() == CV_8U);
|
||||
const GpuMat image = _image.getGpuMat();
|
||||
|
||||
const int defaultObjSearchNum = 100;
|
||||
if (objectsBuf.empty())
|
||||
CV_Assert( image.depth() == CV_8U);
|
||||
CV_Assert( scaleFactor_ > 1 );
|
||||
CV_Assert( !stream );
|
||||
|
||||
Size ncvMinSize = getClassifierSize();
|
||||
if (ncvMinSize.width < minObjectSize_.width && ncvMinSize.height < minObjectSize_.height)
|
||||
{
|
||||
objectsBuf.create(1, defaultObjSearchNum, DataType<Rect>::type);
|
||||
ncvMinSize.width = minObjectSize_.width;
|
||||
ncvMinSize.height = minObjectSize_.height;
|
||||
}
|
||||
|
||||
cv::Size ncvMinSize = this->getClassifierCvSize();
|
||||
|
||||
if (ncvMinSize.width < minSize.width && ncvMinSize.height < minSize.height)
|
||||
{
|
||||
ncvMinSize.width = minSize.width;
|
||||
ncvMinSize.height = minSize.height;
|
||||
}
|
||||
BufferPool pool(stream);
|
||||
GpuMat objectsBuf = pool.getBuffer(1, maxNumObjects_, DataType<Rect>::type);
|
||||
|
||||
unsigned int numDetections;
|
||||
ncvSafeCall(this->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, ncvMinSize, numDetections));
|
||||
ncvSafeCall( process(image, objectsBuf, ncvMinSize, numDetections) );
|
||||
|
||||
return numDetections;
|
||||
if (numDetections > 0)
|
||||
{
|
||||
objectsBuf.colRange(0, numDetections).copyTo(_objects);
|
||||
}
|
||||
else
|
||||
{
|
||||
_objects.release();
|
||||
}
|
||||
}
|
||||
|
||||
cv::Size getClassifierCvSize() const { return cv::Size(haar.ClassifierSize.width, haar.ClassifierSize.height); }
|
||||
void HaarCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects)
|
||||
{
|
||||
if (_gpu_objects.empty())
|
||||
{
|
||||
objects.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
private:
|
||||
static void NCVDebugOutputHandler(const String &msg) { CV_Error(cv::Error::GpuApiCallError, msg.c_str()); }
|
||||
Mat gpu_objects;
|
||||
if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT)
|
||||
{
|
||||
_gpu_objects.getGpuMat().download(gpu_objects);
|
||||
}
|
||||
else
|
||||
{
|
||||
gpu_objects = _gpu_objects.getMat();
|
||||
}
|
||||
|
||||
NCVStatus load(const String& classifierFile)
|
||||
CV_Assert( gpu_objects.rows == 1 );
|
||||
CV_Assert( gpu_objects.type() == DataType<Rect>::type );
|
||||
|
||||
Rect* ptr = gpu_objects.ptr<Rect>();
|
||||
objects.assign(ptr, ptr + gpu_objects.cols);
|
||||
}
|
||||
|
||||
NCVStatus HaarCascade_Impl::load(const String& classifierFile)
|
||||
{
|
||||
int devId = cv::cuda::getDevice();
|
||||
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR);
|
||||
@ -246,7 +271,7 @@ private:
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
NCVStatus calculateMemReqsAndAllocate(const Size& frameSize)
|
||||
NCVStatus HaarCascade_Impl::calculateMemReqsAndAllocate(const Size& frameSize)
|
||||
{
|
||||
if (lastAllocatedFrameSize == frameSize)
|
||||
{
|
||||
@ -289,88 +314,62 @@ private:
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
cudaDeviceProp devProp;
|
||||
NCVStatus ncvStat;
|
||||
NCVStatus HaarCascade_Impl::process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections)
|
||||
{
|
||||
calculateMemReqsAndAllocate(src.size());
|
||||
|
||||
Ptr<NCVMemNativeAllocator> gpuCascadeAllocator;
|
||||
Ptr<NCVMemNativeAllocator> cpuCascadeAllocator;
|
||||
NCVMemPtr src_beg;
|
||||
src_beg.ptr = (void*)src.ptr<Ncv8u>();
|
||||
src_beg.memtype = NCVMemoryTypeDevice;
|
||||
|
||||
Ptr<NCVVectorAlloc<HaarStage64> > h_haarStages;
|
||||
Ptr<NCVVectorAlloc<HaarClassifierNode128> > h_haarNodes;
|
||||
Ptr<NCVVectorAlloc<HaarFeature64> > h_haarFeatures;
|
||||
NCVMemSegment src_seg;
|
||||
src_seg.begin = src_beg;
|
||||
src_seg.size = src.step * src.rows;
|
||||
|
||||
HaarClassifierCascadeDescriptor haar;
|
||||
NCVMatrixReuse<Ncv8u> d_src(src_seg, static_cast<int>(devProp.textureAlignment), src.cols, src.rows, static_cast<int>(src.step), true);
|
||||
ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
|
||||
|
||||
Ptr<NCVVectorAlloc<HaarStage64> > d_haarStages;
|
||||
Ptr<NCVVectorAlloc<HaarClassifierNode128> > d_haarNodes;
|
||||
Ptr<NCVVectorAlloc<HaarFeature64> > d_haarFeatures;
|
||||
CV_Assert(objects.rows == 1);
|
||||
|
||||
Size lastAllocatedFrameSize;
|
||||
NCVMemPtr objects_beg;
|
||||
objects_beg.ptr = (void*)objects.ptr<NcvRect32u>();
|
||||
objects_beg.memtype = NCVMemoryTypeDevice;
|
||||
|
||||
Ptr<NCVMemStackAllocator> gpuAllocator;
|
||||
Ptr<NCVMemStackAllocator> cpuAllocator;
|
||||
NCVMemSegment objects_seg;
|
||||
objects_seg.begin = objects_beg;
|
||||
objects_seg.size = objects.step * objects.rows;
|
||||
NCVVectorReuse<NcvRect32u> d_rects(objects_seg, objects.cols);
|
||||
ncvAssertReturn(d_rects.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
|
||||
|
||||
virtual ~HaarCascade(){}
|
||||
};
|
||||
NcvSize32u roi;
|
||||
roi.width = d_src.width();
|
||||
roi.height = d_src.height();
|
||||
|
||||
NcvSize32u winMinSize(ncvMinSize.width, ncvMinSize.height);
|
||||
|
||||
Ncv32u flags = 0;
|
||||
flags |= findLargestObject_ ? NCVPipeObjDet_FindLargestObject : 0;
|
||||
|
||||
ncvStat = ncvDetectObjectsMultiScale_device(
|
||||
d_src, roi, d_rects, numDetections, haar, *h_haarStages,
|
||||
*d_haarStages, *d_haarNodes, *d_haarFeatures,
|
||||
winMinSize,
|
||||
minNeighbors_,
|
||||
scaleFactor_, 1,
|
||||
flags,
|
||||
*gpuAllocator, *cpuAllocator, devProp, 0);
|
||||
ncvAssertReturnNcvStat(ncvStat);
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
|
||||
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
cv::Size operator -(const cv::Size& a, const cv::Size& b)
|
||||
{
|
||||
return cv::Size(a.width - b.width, a.height - b.height);
|
||||
}
|
||||
|
||||
cv::Size operator +(const cv::Size& a, const int& i)
|
||||
{
|
||||
return cv::Size(a.width + i, a.height + i);
|
||||
}
|
||||
|
||||
cv::Size operator *(const cv::Size& a, const float& f)
|
||||
{
|
||||
return cv::Size(cvRound(a.width * f), cvRound(a.height * f));
|
||||
}
|
||||
|
||||
cv::Size operator /(const cv::Size& a, const float& f)
|
||||
{
|
||||
return cv::Size(cvRound(a.width / f), cvRound(a.height / f));
|
||||
}
|
||||
|
||||
bool operator <=(const cv::Size& a, const cv::Size& b)
|
||||
{
|
||||
return a.width <= b.width && a.height <= b.width;
|
||||
}
|
||||
|
||||
struct PyrLavel
|
||||
{
|
||||
PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window, cv::Size minObjectSize)
|
||||
{
|
||||
do
|
||||
{
|
||||
order = _order;
|
||||
scale = pow(_scale, order);
|
||||
sFrame = frame / scale;
|
||||
workArea = sFrame - window + 1;
|
||||
sWindow = window * scale;
|
||||
_order++;
|
||||
} while (sWindow <= minObjectSize);
|
||||
}
|
||||
|
||||
bool isFeasible(cv::Size maxObj)
|
||||
{
|
||||
return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj;
|
||||
}
|
||||
|
||||
PyrLavel next(float factor, cv::Size frame, cv::Size window, cv::Size minObjectSize)
|
||||
{
|
||||
return PyrLavel(order + 1, factor, frame, window, minObjectSize);
|
||||
}
|
||||
|
||||
int order;
|
||||
float scale;
|
||||
cv::Size sFrame;
|
||||
cv::Size workArea;
|
||||
cv::Size sWindow;
|
||||
};
|
||||
//
|
||||
// LbpCascade
|
||||
//
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
@ -394,42 +393,154 @@ namespace cv { namespace cuda { namespace device
|
||||
unsigned int* classified,
|
||||
PtrStepSzi integral);
|
||||
|
||||
void connectedConmonents(PtrStepSz<int4> candidates, int ncandidates, PtrStepSz<int4> objects,int groupThreshold, float grouping_eps, unsigned int* nclasses);
|
||||
void connectedConmonents(PtrStepSz<int4> candidates,
|
||||
int ncandidates,
|
||||
PtrStepSz<int4> objects,
|
||||
int groupThreshold,
|
||||
float grouping_eps,
|
||||
unsigned int* nclasses);
|
||||
}
|
||||
}}}
|
||||
|
||||
struct cv::cuda::CascadeClassifier_CUDA::LbpCascade : cv::cuda::CascadeClassifier_CUDA::CascadeClassifierImpl
|
||||
namespace
|
||||
{
|
||||
public:
|
||||
struct Stage
|
||||
cv::Size operator -(const cv::Size& a, const cv::Size& b)
|
||||
{
|
||||
int first;
|
||||
int ntrees;
|
||||
float threshold;
|
||||
return cv::Size(a.width - b.width, a.height - b.height);
|
||||
}
|
||||
|
||||
cv::Size operator +(const cv::Size& a, const int& i)
|
||||
{
|
||||
return cv::Size(a.width + i, a.height + i);
|
||||
}
|
||||
|
||||
cv::Size operator *(const cv::Size& a, const float& f)
|
||||
{
|
||||
return cv::Size(cvRound(a.width * f), cvRound(a.height * f));
|
||||
}
|
||||
|
||||
cv::Size operator /(const cv::Size& a, const float& f)
|
||||
{
|
||||
return cv::Size(cvRound(a.width / f), cvRound(a.height / f));
|
||||
}
|
||||
|
||||
bool operator <=(const cv::Size& a, const cv::Size& b)
|
||||
{
|
||||
return a.width <= b.width && a.height <= b.width;
|
||||
}
|
||||
|
||||
struct PyrLavel
|
||||
{
|
||||
PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window, cv::Size minObjectSize)
|
||||
{
|
||||
do
|
||||
{
|
||||
order = _order;
|
||||
scale = pow(_scale, order);
|
||||
sFrame = frame / scale;
|
||||
workArea = sFrame - window + 1;
|
||||
sWindow = window * scale;
|
||||
_order++;
|
||||
} while (sWindow <= minObjectSize);
|
||||
}
|
||||
|
||||
bool isFeasible(cv::Size maxObj)
|
||||
{
|
||||
return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj;
|
||||
}
|
||||
|
||||
PyrLavel next(float factor, cv::Size frame, cv::Size window, cv::Size minObjectSize)
|
||||
{
|
||||
return PyrLavel(order + 1, factor, frame, window, minObjectSize);
|
||||
}
|
||||
|
||||
int order;
|
||||
float scale;
|
||||
cv::Size sFrame;
|
||||
cv::Size workArea;
|
||||
cv::Size sWindow;
|
||||
};
|
||||
|
||||
LbpCascade(){}
|
||||
virtual ~LbpCascade(){}
|
||||
|
||||
virtual unsigned int process(const GpuMat& image, GpuMat& objects, float scaleFactor, int groupThreshold, bool /*findLargestObject*/,
|
||||
bool /*visualizeInPlace*/, cv::Size minObjectSize, cv::Size maxObjectSize)
|
||||
class LbpCascade_Impl : public CascadeClassifierBase
|
||||
{
|
||||
CV_Assert(scaleFactor > 1 && image.depth() == CV_8U);
|
||||
public:
|
||||
explicit LbpCascade_Impl(const FileStorage& file);
|
||||
|
||||
virtual Size getClassifierSize() const { return NxM; }
|
||||
|
||||
virtual void detectMultiScale(InputArray image,
|
||||
OutputArray objects,
|
||||
Stream& stream);
|
||||
|
||||
virtual void convert(OutputArray gpu_objects,
|
||||
std::vector<Rect>& objects);
|
||||
|
||||
private:
|
||||
bool load(const FileNode &root);
|
||||
void allocateBuffers(cv::Size frame);
|
||||
|
||||
private:
|
||||
struct Stage
|
||||
{
|
||||
int first;
|
||||
int ntrees;
|
||||
float threshold;
|
||||
};
|
||||
|
||||
enum stage { BOOST = 0 };
|
||||
enum feature { LBP = 1, HAAR = 2 };
|
||||
|
||||
static const stage stageType = BOOST;
|
||||
static const feature featureType = LBP;
|
||||
|
||||
cv::Size NxM;
|
||||
bool isStumps;
|
||||
int ncategories;
|
||||
int subsetSize;
|
||||
int nodeStep;
|
||||
|
||||
// gpu representation of classifier
|
||||
GpuMat stage_mat;
|
||||
GpuMat trees_mat;
|
||||
GpuMat nodes_mat;
|
||||
GpuMat leaves_mat;
|
||||
GpuMat subsets_mat;
|
||||
GpuMat features_mat;
|
||||
|
||||
GpuMat integral;
|
||||
GpuMat integralBuffer;
|
||||
GpuMat resuzeBuffer;
|
||||
|
||||
GpuMat candidates;
|
||||
static const int integralFactor = 4;
|
||||
};
|
||||
|
||||
LbpCascade_Impl::LbpCascade_Impl(const FileStorage& file)
|
||||
{
|
||||
load(file.getFirstTopLevelNode());
|
||||
}
|
||||
|
||||
void LbpCascade_Impl::detectMultiScale(InputArray _image,
|
||||
OutputArray _objects,
|
||||
Stream& stream)
|
||||
{
|
||||
const GpuMat image = _image.getGpuMat();
|
||||
|
||||
CV_Assert( image.depth() == CV_8U);
|
||||
CV_Assert( scaleFactor_ > 1 );
|
||||
CV_Assert( !stream );
|
||||
|
||||
// const int defaultObjSearchNum = 100;
|
||||
const float grouping_eps = 0.2f;
|
||||
|
||||
if( !objects.empty() && objects.depth() == CV_32S)
|
||||
objects.reshape(4, 1);
|
||||
else
|
||||
objects.create(1 , image.cols >> 4, CV_32SC4);
|
||||
BufferPool pool(stream);
|
||||
GpuMat objects = pool.getBuffer(1, maxNumObjects_, DataType<Rect>::type);
|
||||
|
||||
// used for debug
|
||||
// candidates.setTo(cv::Scalar::all(0));
|
||||
// objects.setTo(cv::Scalar::all(0));
|
||||
|
||||
if (maxObjectSize == cv::Size())
|
||||
maxObjectSize = image.size();
|
||||
if (maxObjectSize_ == cv::Size())
|
||||
maxObjectSize_ = image.size();
|
||||
|
||||
allocateBuffers(image.size());
|
||||
|
||||
@ -437,9 +548,9 @@ public:
|
||||
GpuMat dclassified(1, 1, CV_32S);
|
||||
cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) );
|
||||
|
||||
PyrLavel level(0, scaleFactor, image.size(), NxM, minObjectSize);
|
||||
PyrLavel level(0, scaleFactor_, image.size(), NxM, minObjectSize_);
|
||||
|
||||
while (level.isFeasible(maxObjectSize))
|
||||
while (level.isFeasible(maxObjectSize_))
|
||||
{
|
||||
int acc = level.sFrame.width + 1;
|
||||
float iniScale = level.scale;
|
||||
@ -449,23 +560,22 @@ public:
|
||||
|
||||
int total = 0, prev = 0;
|
||||
|
||||
while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize))
|
||||
while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize_))
|
||||
{
|
||||
// create sutable matrix headers
|
||||
GpuMat src = resuzeBuffer(cv::Rect(0, 0, level.sFrame.width, level.sFrame.height));
|
||||
GpuMat sint = integral(cv::Rect(prev, 0, level.sFrame.width + 1, level.sFrame.height + 1));
|
||||
GpuMat buff = integralBuffer;
|
||||
|
||||
// generate integral for scale
|
||||
cuda::resize(image, src, level.sFrame, 0, 0, cv::INTER_LINEAR);
|
||||
cuda::integral(src, sint, buff);
|
||||
cuda::integral(src, sint);
|
||||
|
||||
// calculate job
|
||||
int totalWidth = level.workArea.width / step;
|
||||
total += totalWidth * (level.workArea.height / step);
|
||||
|
||||
// go to next pyramide level
|
||||
level = level.next(scaleFactor, image.size(), NxM, minObjectSize);
|
||||
level = level.next(scaleFactor_, image.size(), NxM, minObjectSize_);
|
||||
area = level.workArea;
|
||||
|
||||
step = (1 + (level.scale <= 2.f));
|
||||
@ -473,60 +583,55 @@ public:
|
||||
acc += level.sFrame.width + 1;
|
||||
}
|
||||
|
||||
device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat,
|
||||
device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor_, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat,
|
||||
leaves_mat, subsets_mat, features_mat, subsetSize, candidates, dclassified.ptr<unsigned int>(), integral);
|
||||
}
|
||||
|
||||
if (groupThreshold <= 0 || objects.empty())
|
||||
return 0;
|
||||
if (minNeighbors_ <= 0 || objects.empty())
|
||||
return;
|
||||
|
||||
cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
device::lbp::connectedConmonents(candidates, classified, objects, groupThreshold, grouping_eps, dclassified.ptr<unsigned int>());
|
||||
device::lbp::connectedConmonents(candidates, classified, objects, minNeighbors_, grouping_eps, dclassified.ptr<unsigned int>());
|
||||
|
||||
cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
return classified;
|
||||
}
|
||||
|
||||
virtual cv::Size getClassifierCvSize() const { return NxM; }
|
||||
|
||||
bool read(const String& classifierAsXml)
|
||||
{
|
||||
FileStorage fs(classifierAsXml, FileStorage::READ);
|
||||
return fs.isOpened() ? read(fs.getFirstTopLevelNode()) : false;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void allocateBuffers(cv::Size frame)
|
||||
{
|
||||
if (frame == cv::Size())
|
||||
return;
|
||||
|
||||
if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows)
|
||||
if (classified > 0)
|
||||
{
|
||||
resuzeBuffer.create(frame, CV_8UC1);
|
||||
|
||||
integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1);
|
||||
|
||||
#ifdef HAVE_OPENCV_CUDALEGACY
|
||||
NcvSize32u roiSize;
|
||||
roiSize.width = frame.width;
|
||||
roiSize.height = frame.height;
|
||||
|
||||
cudaDeviceProp prop;
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
|
||||
|
||||
Ncv32u bufSize;
|
||||
ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
|
||||
integralBuffer.create(1, bufSize, CV_8UC1);
|
||||
#endif
|
||||
|
||||
candidates.create(1 , frame.width >> 1, CV_32SC4);
|
||||
objects.colRange(0, classified).copyTo(_objects);
|
||||
}
|
||||
else
|
||||
{
|
||||
_objects.release();
|
||||
}
|
||||
}
|
||||
|
||||
bool read(const FileNode &root)
|
||||
void LbpCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects)
|
||||
{
|
||||
if (_gpu_objects.empty())
|
||||
{
|
||||
objects.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
Mat gpu_objects;
|
||||
if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT)
|
||||
{
|
||||
_gpu_objects.getGpuMat().download(gpu_objects);
|
||||
}
|
||||
else
|
||||
{
|
||||
gpu_objects = _gpu_objects.getMat();
|
||||
}
|
||||
|
||||
CV_Assert( gpu_objects.rows == 1 );
|
||||
CV_Assert( gpu_objects.type() == DataType<Rect>::type );
|
||||
|
||||
Rect* ptr = gpu_objects.ptr<Rect>();
|
||||
objects.assign(ptr, ptr + gpu_objects.cols);
|
||||
}
|
||||
|
||||
bool LbpCascade_Impl::load(const FileNode &root)
|
||||
{
|
||||
const char *CUDA_CC_STAGE_TYPE = "stageType";
|
||||
const char *CUDA_CC_FEATURE_TYPE = "featureType";
|
||||
@ -667,92 +772,90 @@ private:
|
||||
return true;
|
||||
}
|
||||
|
||||
enum stage { BOOST = 0 };
|
||||
enum feature { LBP = 1, HAAR = 2 };
|
||||
static const stage stageType = BOOST;
|
||||
static const feature featureType = LBP;
|
||||
void LbpCascade_Impl::allocateBuffers(cv::Size frame)
|
||||
{
|
||||
if (frame == cv::Size())
|
||||
return;
|
||||
|
||||
cv::Size NxM;
|
||||
bool isStumps;
|
||||
int ncategories;
|
||||
int subsetSize;
|
||||
int nodeStep;
|
||||
if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows)
|
||||
{
|
||||
resuzeBuffer.create(frame, CV_8UC1);
|
||||
|
||||
// gpu representation of classifier
|
||||
GpuMat stage_mat;
|
||||
GpuMat trees_mat;
|
||||
GpuMat nodes_mat;
|
||||
GpuMat leaves_mat;
|
||||
GpuMat subsets_mat;
|
||||
GpuMat features_mat;
|
||||
integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1);
|
||||
|
||||
GpuMat integral;
|
||||
GpuMat integralBuffer;
|
||||
GpuMat resuzeBuffer;
|
||||
#ifdef HAVE_OPENCV_CUDALEGACY
|
||||
NcvSize32u roiSize;
|
||||
roiSize.width = frame.width;
|
||||
roiSize.height = frame.height;
|
||||
|
||||
GpuMat candidates;
|
||||
static const int integralFactor = 4;
|
||||
};
|
||||
cudaDeviceProp prop;
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
|
||||
|
||||
cv::cuda::CascadeClassifier_CUDA::CascadeClassifier_CUDA()
|
||||
: findLargestObject(false), visualizeInPlace(false), impl(0) {}
|
||||
Ncv32u bufSize;
|
||||
ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
|
||||
integralBuffer.create(1, bufSize, CV_8UC1);
|
||||
#endif
|
||||
|
||||
cv::cuda::CascadeClassifier_CUDA::CascadeClassifier_CUDA(const String& filename)
|
||||
: findLargestObject(false), visualizeInPlace(false), impl(0) { load(filename); }
|
||||
candidates.create(1 , frame.width >> 1, CV_32SC4);
|
||||
}
|
||||
}
|
||||
|
||||
cv::cuda::CascadeClassifier_CUDA::~CascadeClassifier_CUDA() { release(); }
|
||||
|
||||
void cv::cuda::CascadeClassifier_CUDA::release() { if (impl) { delete impl; impl = 0; } }
|
||||
|
||||
bool cv::cuda::CascadeClassifier_CUDA::empty() const { return impl == 0; }
|
||||
|
||||
Size cv::cuda::CascadeClassifier_CUDA::getClassifierSize() const
|
||||
{
|
||||
return this->empty() ? Size() : impl->getClassifierCvSize();
|
||||
}
|
||||
|
||||
int cv::cuda::CascadeClassifier_CUDA::detectMultiScale( const GpuMat& image, GpuMat& objectsBuf, double scaleFactor, int minNeighbors, Size minSize)
|
||||
{
|
||||
CV_Assert( !this->empty());
|
||||
return impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, minSize, cv::Size());
|
||||
}
|
||||
//
|
||||
// create
|
||||
//
|
||||
|
||||
int cv::cuda::CascadeClassifier_CUDA::detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize, double scaleFactor, int minNeighbors)
|
||||
Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String& filename)
|
||||
{
|
||||
CV_Assert( !this->empty());
|
||||
return impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, minSize, maxObjectSize);
|
||||
}
|
||||
|
||||
bool cv::cuda::CascadeClassifier_CUDA::load(const String& filename)
|
||||
{
|
||||
release();
|
||||
|
||||
String fext = filename.substr(filename.find_last_of(".") + 1);
|
||||
fext = fext.toLowerCase();
|
||||
|
||||
if (fext == "nvbin")
|
||||
{
|
||||
impl = new HaarCascade();
|
||||
return impl->read(filename);
|
||||
#ifndef HAVE_OPENCV_CUDALEGACY
|
||||
CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
|
||||
return Ptr<cuda::CascadeClassifier>();
|
||||
#else
|
||||
return makePtr<HaarCascade_Impl>(filename);
|
||||
#endif
|
||||
}
|
||||
|
||||
FileStorage fs(filename, FileStorage::READ);
|
||||
|
||||
if (!fs.isOpened())
|
||||
{
|
||||
impl = new HaarCascade();
|
||||
return impl->read(filename);
|
||||
#ifndef HAVE_OPENCV_CUDALEGACY
|
||||
CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
|
||||
return Ptr<cuda::CascadeClassifier>();
|
||||
#else
|
||||
return makePtr<HaarCascade_Impl>(filename);
|
||||
#endif
|
||||
}
|
||||
|
||||
const char *CUDA_CC_LBP = "LBP";
|
||||
String featureTypeStr = (String)fs.getFirstTopLevelNode()["featureType"];
|
||||
if (featureTypeStr == CUDA_CC_LBP)
|
||||
impl = new LbpCascade();
|
||||
{
|
||||
return makePtr<LbpCascade_Impl>(fs);
|
||||
}
|
||||
else
|
||||
impl = new HaarCascade();
|
||||
{
|
||||
#ifndef HAVE_OPENCV_CUDALEGACY
|
||||
CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
|
||||
return Ptr<cuda::CascadeClassifier>();
|
||||
#else
|
||||
return makePtr<HaarCascade_Impl>(filename);
|
||||
#endif
|
||||
}
|
||||
|
||||
impl->read(filename);
|
||||
return !this->empty();
|
||||
CV_Error(Error::StsUnsupportedFormat, "Unsupported format for CUDA CascadeClassifier");
|
||||
return Ptr<cuda::CascadeClassifier>();
|
||||
}
|
||||
|
||||
Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage& file)
|
||||
{
|
||||
return makePtr<LbpCascade_Impl>(file);
|
||||
}
|
||||
|
||||
#endif
|
1697
modules/cudaobjdetect/src/hog.cpp
Normal file
1697
modules/cudaobjdetect/src/hog.cpp
Normal file
File diff suppressed because it is too large
Load Diff
62
modules/cudaobjdetect/src/precomp.hpp
Normal file
62
modules/cudaobjdetect/src/precomp.hpp
Normal file
@ -0,0 +1,62 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_PRECOMP_H__
|
||||
#define __OPENCV_PRECOMP_H__
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "opencv2/cudaobjdetect.hpp"
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
#include "opencv2/cudawarping.hpp"
|
||||
#include "opencv2/objdetect.hpp"
|
||||
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
#include "opencv2/core/utility.hpp"
|
||||
|
||||
#include "opencv2/opencv_modules.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCV_CUDALEGACY
|
||||
# include "opencv2/cudalegacy/private.hpp"
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCV_PRECOMP_H__ */
|
45
modules/cudaobjdetect/test/test_main.cpp
Normal file
45
modules/cudaobjdetect/test/test_main.cpp
Normal file
@ -0,0 +1,45 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
|
||||
CV_CUDA_TEST_MAIN("gpu")
|
@ -48,9 +48,10 @@ using namespace cvtest;
|
||||
|
||||
//#define DUMP
|
||||
|
||||
struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescriptor
|
||||
struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>
|
||||
{
|
||||
cv::cuda::DeviceInfo devInfo;
|
||||
cv::Ptr<cv::cuda::HOG> hog;
|
||||
|
||||
#ifdef DUMP
|
||||
std::ofstream f;
|
||||
@ -69,23 +70,13 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
|
||||
devInfo = GetParam();
|
||||
|
||||
cv::cuda::setDevice(devInfo.deviceID());
|
||||
|
||||
hog = cv::cuda::HOG::create();
|
||||
}
|
||||
|
||||
#ifdef DUMP
|
||||
void dump(const cv::Mat& blockHists, const std::vector<cv::Point>& locations)
|
||||
void dump(const std::vector<cv::Point>& locations)
|
||||
{
|
||||
f.write((char*)&blockHists.rows, sizeof(blockHists.rows));
|
||||
f.write((char*)&blockHists.cols, sizeof(blockHists.cols));
|
||||
|
||||
for (int i = 0; i < blockHists.rows; ++i)
|
||||
{
|
||||
for (int j = 0; j < blockHists.cols; ++j)
|
||||
{
|
||||
float val = blockHists.at<float>(i, j);
|
||||
f.write((char*)&val, sizeof(val));
|
||||
}
|
||||
}
|
||||
|
||||
int nlocations = locations.size();
|
||||
f.write((char*)&nlocations, sizeof(nlocations));
|
||||
|
||||
@ -93,21 +84,18 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
|
||||
f.write((char*)&locations[i], sizeof(locations[i]));
|
||||
}
|
||||
#else
|
||||
void compare(const cv::Mat& blockHists, const std::vector<cv::Point>& locations)
|
||||
void compare(const std::vector<cv::Point>& locations)
|
||||
{
|
||||
// skip block_hists check
|
||||
int rows, cols;
|
||||
f.read((char*)&rows, sizeof(rows));
|
||||
f.read((char*)&cols, sizeof(cols));
|
||||
ASSERT_EQ(rows, blockHists.rows);
|
||||
ASSERT_EQ(cols, blockHists.cols);
|
||||
|
||||
for (int i = 0; i < blockHists.rows; ++i)
|
||||
for (int i = 0; i < rows; ++i)
|
||||
{
|
||||
for (int j = 0; j < blockHists.cols; ++j)
|
||||
for (int j = 0; j < cols; ++j)
|
||||
{
|
||||
float val;
|
||||
f.read((char*)&val, sizeof(val));
|
||||
ASSERT_NEAR(val, blockHists.at<float>(i, j), 1e-3);
|
||||
}
|
||||
}
|
||||
|
||||
@ -126,54 +114,41 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
|
||||
|
||||
void testDetect(const cv::Mat& img)
|
||||
{
|
||||
gamma_correction = false;
|
||||
setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
|
||||
hog->setGammaCorrection(false);
|
||||
hog->setSVMDetector(hog->getDefaultPeopleDetector());
|
||||
|
||||
std::vector<cv::Point> locations;
|
||||
|
||||
// Test detect
|
||||
detect(loadMat(img), locations, 0);
|
||||
hog->detect(loadMat(img), locations);
|
||||
|
||||
#ifdef DUMP
|
||||
dump(cv::Mat(block_hists), locations);
|
||||
dump(locations);
|
||||
#else
|
||||
compare(cv::Mat(block_hists), locations);
|
||||
compare(locations);
|
||||
#endif
|
||||
|
||||
// Test detect on smaller image
|
||||
cv::Mat img2;
|
||||
cv::resize(img, img2, cv::Size(img.cols / 2, img.rows / 2));
|
||||
detect(loadMat(img2), locations, 0);
|
||||
hog->detect(loadMat(img2), locations);
|
||||
|
||||
#ifdef DUMP
|
||||
dump(cv::Mat(block_hists), locations);
|
||||
dump(locations);
|
||||
#else
|
||||
compare(cv::Mat(block_hists), locations);
|
||||
compare(locations);
|
||||
#endif
|
||||
|
||||
// Test detect on greater image
|
||||
cv::resize(img, img2, cv::Size(img.cols * 2, img.rows * 2));
|
||||
detect(loadMat(img2), locations, 0);
|
||||
hog->detect(loadMat(img2), locations);
|
||||
|
||||
#ifdef DUMP
|
||||
dump(cv::Mat(block_hists), locations);
|
||||
dump(locations);
|
||||
#else
|
||||
compare(cv::Mat(block_hists), locations);
|
||||
compare(locations);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Does not compare border value, as interpolation leads to delta
|
||||
void compare_inner_parts(cv::Mat d1, cv::Mat d2)
|
||||
{
|
||||
for (int i = 1; i < blocks_per_win_y - 1; ++i)
|
||||
for (int j = 1; j < blocks_per_win_x - 1; ++j)
|
||||
for (int k = 0; k < block_hist_size; ++k)
|
||||
{
|
||||
float a = d1.at<float>(0, (i * blocks_per_win_x + j) * block_hist_size);
|
||||
float b = d2.at<float>(0, (i * blocks_per_win_x + j) * block_hist_size);
|
||||
ASSERT_FLOAT_EQ(a, b);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// desabled while resize does not fixed
|
||||
@ -182,13 +157,8 @@ CUDA_TEST_P(HOG, DISABLED_Detect)
|
||||
cv::Mat img_rgb = readImage("hog/road.png");
|
||||
ASSERT_FALSE(img_rgb.empty());
|
||||
|
||||
#ifdef DUMP
|
||||
f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
|
||||
ASSERT_TRUE(f.is_open());
|
||||
#else
|
||||
f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
|
||||
ASSERT_TRUE(f.is_open());
|
||||
#endif
|
||||
|
||||
// Test on color image
|
||||
cv::Mat img;
|
||||
@ -198,8 +168,6 @@ CUDA_TEST_P(HOG, DISABLED_Detect)
|
||||
// Test on gray image
|
||||
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2GRAY);
|
||||
testDetect(img);
|
||||
|
||||
f.close();
|
||||
}
|
||||
|
||||
CUDA_TEST_P(HOG, GetDescriptors)
|
||||
@ -216,8 +184,14 @@ CUDA_TEST_P(HOG, GetDescriptors)
|
||||
|
||||
// Convert train images into feature vectors (train table)
|
||||
cv::cuda::GpuMat descriptors, descriptors_by_cols;
|
||||
getDescriptors(d_img, win_size, descriptors, DESCR_FORMAT_ROW_BY_ROW);
|
||||
getDescriptors(d_img, win_size, descriptors_by_cols, DESCR_FORMAT_COL_BY_COL);
|
||||
|
||||
hog->setWinStride(Size(64, 128));
|
||||
|
||||
hog->setDescriptorFormat(cv::cuda::HOG::DESCR_FORMAT_ROW_BY_ROW);
|
||||
hog->compute(d_img, descriptors);
|
||||
|
||||
hog->setDescriptorFormat(cv::cuda::HOG::DESCR_FORMAT_COL_BY_COL);
|
||||
hog->compute(d_img, descriptors_by_cols);
|
||||
|
||||
// Check size of the result train table
|
||||
wins_per_img_x = 3;
|
||||
@ -242,48 +216,6 @@ CUDA_TEST_P(HOG, GetDescriptors)
|
||||
ASSERT_EQ(l[(y * blocks_per_win_x + x) * block_hist_size + k],
|
||||
r[(x * blocks_per_win_y + y) * block_hist_size + k]);
|
||||
}
|
||||
|
||||
/* Now we want to extract the same feature vectors, but from single images. NOTE: results will
|
||||
be defferent, due to border values interpolation. Using of many small images is slower, however we
|
||||
wont't call getDescriptors and will use computeBlockHistograms instead of. computeBlockHistograms
|
||||
works good, it can be checked in the gpu_hog sample */
|
||||
|
||||
img_rgb = readImage("hog/positive1.png");
|
||||
ASSERT_TRUE(!img_rgb.empty());
|
||||
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
|
||||
computeBlockHistograms(cv::cuda::GpuMat(img));
|
||||
// Everything is fine with interpolation for left top subimage
|
||||
ASSERT_EQ(0.0, cv::norm((cv::Mat)block_hists, (cv::Mat)descriptors.rowRange(0, 1)));
|
||||
|
||||
img_rgb = readImage("hog/positive2.png");
|
||||
ASSERT_TRUE(!img_rgb.empty());
|
||||
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
|
||||
computeBlockHistograms(cv::cuda::GpuMat(img));
|
||||
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(1, 2)));
|
||||
|
||||
img_rgb = readImage("hog/negative1.png");
|
||||
ASSERT_TRUE(!img_rgb.empty());
|
||||
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
|
||||
computeBlockHistograms(cv::cuda::GpuMat(img));
|
||||
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(2, 3)));
|
||||
|
||||
img_rgb = readImage("hog/negative2.png");
|
||||
ASSERT_TRUE(!img_rgb.empty());
|
||||
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
|
||||
computeBlockHistograms(cv::cuda::GpuMat(img));
|
||||
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(3, 4)));
|
||||
|
||||
img_rgb = readImage("hog/positive3.png");
|
||||
ASSERT_TRUE(!img_rgb.empty());
|
||||
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
|
||||
computeBlockHistograms(cv::cuda::GpuMat(img));
|
||||
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(4, 5)));
|
||||
|
||||
img_rgb = readImage("hog/negative3.png");
|
||||
ASSERT_TRUE(!img_rgb.empty());
|
||||
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
|
||||
computeBlockHistograms(cv::cuda::GpuMat(img));
|
||||
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(5, 6)));
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, HOG, ALL_DEVICES);
|
||||
@ -310,12 +242,12 @@ CUDA_TEST_P(CalTech, HOG)
|
||||
cv::cuda::GpuMat d_img(img);
|
||||
cv::Mat markedImage(img.clone());
|
||||
|
||||
cv::cuda::HOGDescriptor d_hog;
|
||||
d_hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
|
||||
d_hog.nlevels = d_hog.nlevels + 32;
|
||||
cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
|
||||
d_hog->setSVMDetector(d_hog->getDefaultPeopleDetector());
|
||||
d_hog->setNumLevels(d_hog->getNumLevels() + 32);
|
||||
|
||||
std::vector<cv::Rect> found_locations;
|
||||
d_hog.detectMultiScale(d_img, found_locations);
|
||||
d_hog->detectMultiScale(d_img, found_locations);
|
||||
|
||||
#if defined (LOG_CASCADE_STATISTIC)
|
||||
for (int i = 0; i < (int)found_locations.size(); i++)
|
||||
@ -326,7 +258,8 @@ CUDA_TEST_P(CalTech, HOG)
|
||||
cv::rectangle(markedImage, r , CV_RGB(255, 0, 0));
|
||||
}
|
||||
|
||||
cv::imshow("Res", markedImage); cv::waitKey();
|
||||
cv::imshow("Res", markedImage);
|
||||
cv::waitKey();
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -354,9 +287,15 @@ PARAM_TEST_CASE(LBP_Read_classifier, cv::cuda::DeviceInfo, int)
|
||||
|
||||
CUDA_TEST_P(LBP_Read_classifier, Accuracy)
|
||||
{
|
||||
cv::cuda::CascadeClassifier_CUDA classifier;
|
||||
std::string classifierXmlPath = std::string(cvtest::TS::ptr()->get_data_path()) + "lbpcascade/lbpcascade_frontalface.xml";
|
||||
ASSERT_TRUE(classifier.load(classifierXmlPath));
|
||||
|
||||
cv::Ptr<cv::cuda::CascadeClassifier> d_cascade;
|
||||
|
||||
ASSERT_NO_THROW(
|
||||
d_cascade = cv::cuda::CascadeClassifier::create(classifierXmlPath);
|
||||
);
|
||||
|
||||
ASSERT_FALSE(d_cascade.empty());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, LBP_Read_classifier,
|
||||
@ -396,29 +335,28 @@ CUDA_TEST_P(LBP_classify, Accuracy)
|
||||
for (; it != rects.end(); ++it)
|
||||
cv::rectangle(markedImage, *it, cv::Scalar(255, 0, 0));
|
||||
|
||||
cv::cuda::CascadeClassifier_CUDA gpuClassifier;
|
||||
ASSERT_TRUE(gpuClassifier.load(classifierXmlPath));
|
||||
cv::Ptr<cv::cuda::CascadeClassifier> gpuClassifier =
|
||||
cv::cuda::CascadeClassifier::create(classifierXmlPath);
|
||||
|
||||
cv::cuda::GpuMat gpu_rects;
|
||||
cv::cuda::GpuMat tested(grey);
|
||||
int count = gpuClassifier.detectMultiScale(tested, gpu_rects);
|
||||
cv::cuda::GpuMat gpu_rects_buf;
|
||||
gpuClassifier->detectMultiScale(tested, gpu_rects_buf);
|
||||
|
||||
std::vector<cv::Rect> gpu_rects;
|
||||
gpuClassifier->convert(gpu_rects_buf, gpu_rects);
|
||||
|
||||
#if defined (LOG_CASCADE_STATISTIC)
|
||||
cv::Mat downloaded(gpu_rects);
|
||||
const cv::Rect* faces = downloaded.ptr<cv::Rect>();
|
||||
for (int i = 0; i < count; i++)
|
||||
for (size_t i = 0; i < gpu_rects.size(); i++)
|
||||
{
|
||||
cv::Rect r = faces[i];
|
||||
cv::Rect r = gpu_rects[i];
|
||||
|
||||
std::cout << r.x << " " << r.y << " " << r.width << " " << r.height << std::endl;
|
||||
cv::rectangle(markedImage, r , CV_RGB(255, 0, 0));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined (LOG_CASCADE_STATISTIC)
|
||||
cv::imshow("Res", markedImage); cv::waitKey();
|
||||
cv::imshow("Res", markedImage);
|
||||
cv::waitKey();
|
||||
#endif
|
||||
(void)count;
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, LBP_classify,
|
64
modules/cudaobjdetect/test/test_precomp.hpp
Normal file
64
modules/cudaobjdetect/test/test_precomp.hpp
Normal file
@ -0,0 +1,64 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifdef __GNUC__
|
||||
# pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
# if defined __clang__ || defined __APPLE__
|
||||
# pragma GCC diagnostic ignored "-Wmissing-prototypes"
|
||||
# pragma GCC diagnostic ignored "-Wextra"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef __OPENCV_TEST_PRECOMP_HPP__
|
||||
#define __OPENCV_TEST_PRECOMP_HPP__
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "opencv2/ts.hpp"
|
||||
#include "opencv2/ts/cuda_test.hpp"
|
||||
|
||||
#include "opencv2/cudaobjdetect.hpp"
|
||||
#include "opencv2/objdetect.hpp"
|
||||
|
||||
#include "cvconfig.h"
|
||||
|
||||
#endif
|
@ -6,4 +6,4 @@ set(the_description "CUDA-accelerated Image Warping")
|
||||
|
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow)
|
||||
|
||||
ocv_define_module(cudawarping opencv_imgproc OPTIONAL opencv_cudalegacy)
|
||||
ocv_define_module(cudawarping opencv_core opencv_imgproc OPTIONAL opencv_cudev)
|
||||
|
@ -171,21 +171,6 @@ CV_EXPORTS void warpPerspective(InputArray src, OutputArray dst, InputArray M, S
|
||||
*/
|
||||
CV_EXPORTS void buildWarpPerspectiveMaps(InputArray M, bool inverse, Size dsize, OutputArray xmap, OutputArray ymap, Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Builds plane warping maps.
|
||||
*/
|
||||
CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, InputArray K, InputArray R, InputArray T, float scale,
|
||||
OutputArray map_x, OutputArray map_y, Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Builds cylindrical warping maps.
|
||||
*/
|
||||
CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, InputArray K, InputArray R, float scale,
|
||||
OutputArray map_x, OutputArray map_y, Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Builds spherical warping maps.
|
||||
*/
|
||||
CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, InputArray K, InputArray R, float scale,
|
||||
OutputArray map_x, OutputArray map_y, Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Rotates an image around the origin (0,0) and then shifts it.
|
||||
|
||||
@param src Source image. Supports 1, 3 or 4 channels images with CV_8U , CV_16U or CV_32F
|
||||
@ -224,14 +209,6 @@ src .
|
||||
*/
|
||||
CV_EXPORTS void pyrUp(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
|
||||
|
||||
class CV_EXPORTS ImagePyramid : public Algorithm
|
||||
{
|
||||
public:
|
||||
virtual void getLayer(OutputArray outImg, Size outRoi, Stream& stream = Stream::Null()) const = 0;
|
||||
};
|
||||
|
||||
CV_EXPORTS Ptr<ImagePyramid> createImagePyramid(InputArray img, int nLayers = -1, Stream& stream = Stream::Null());
|
||||
|
||||
//! @}
|
||||
|
||||
}} // namespace cv { namespace cuda {
|
||||
|
@ -325,88 +325,6 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Border, WarpPerspective,
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// BuildWarpPlaneMaps
|
||||
|
||||
PERF_TEST_P(Sz, BuildWarpPlaneMaps,
|
||||
CUDA_TYPICAL_MAT_SIZES)
|
||||
{
|
||||
const cv::Size size = GetParam();
|
||||
|
||||
const cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
|
||||
const cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
|
||||
const cv::Mat T = cv::Mat::zeros(1, 3, CV_32F);
|
||||
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
cv::cuda::GpuMat map_x;
|
||||
cv::cuda::GpuMat map_y;
|
||||
|
||||
TEST_CYCLE() cv::cuda::buildWarpPlaneMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, T, 1.0, map_x, map_y);
|
||||
|
||||
CUDA_SANITY_CHECK(map_x);
|
||||
CUDA_SANITY_CHECK(map_y);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// BuildWarpCylindricalMaps
|
||||
|
||||
PERF_TEST_P(Sz, BuildWarpCylindricalMaps,
|
||||
CUDA_TYPICAL_MAT_SIZES)
|
||||
{
|
||||
const cv::Size size = GetParam();
|
||||
|
||||
const cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
|
||||
const cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
|
||||
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
cv::cuda::GpuMat map_x;
|
||||
cv::cuda::GpuMat map_y;
|
||||
|
||||
TEST_CYCLE() cv::cuda::buildWarpCylindricalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
|
||||
|
||||
CUDA_SANITY_CHECK(map_x);
|
||||
CUDA_SANITY_CHECK(map_y);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// BuildWarpSphericalMaps
|
||||
|
||||
PERF_TEST_P(Sz, BuildWarpSphericalMaps,
|
||||
CUDA_TYPICAL_MAT_SIZES)
|
||||
{
|
||||
const cv::Size size = GetParam();
|
||||
|
||||
const cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
|
||||
const cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
|
||||
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
cv::cuda::GpuMat map_x;
|
||||
cv::cuda::GpuMat map_y;
|
||||
|
||||
TEST_CYCLE() cv::cuda::buildWarpSphericalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
|
||||
|
||||
CUDA_SANITY_CHECK(map_x);
|
||||
CUDA_SANITY_CHECK(map_y);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Rotate
|
||||
|
||||
@ -514,40 +432,3 @@ PERF_TEST_P(Sz_Depth_Cn, PyrUp,
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// ImagePyramidGetLayer
|
||||
|
||||
PERF_TEST_P(Sz_Depth_Cn, ImagePyramidGetLayer,
|
||||
Combine(CUDA_TYPICAL_MAT_SIZES,
|
||||
Values(CV_8U, CV_16U, CV_32F),
|
||||
CUDA_CHANNELS_1_3_4))
|
||||
{
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int depth = GET_PARAM(1);
|
||||
const int channels = GET_PARAM(2);
|
||||
|
||||
const int type = CV_MAKE_TYPE(depth, channels);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
const int nLayers = 3;
|
||||
const cv::Size dstSize(size.width / 2 + 10, size.height / 2 + 10);
|
||||
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
const cv::cuda::GpuMat d_src(src);
|
||||
cv::cuda::GpuMat dst;
|
||||
|
||||
cv::Ptr<cv::cuda::ImagePyramid> d_pyr = cv::cuda::createImagePyramid(d_src, nLayers);
|
||||
|
||||
TEST_CYCLE() d_pyr->getLayer(dst, dstSize);
|
||||
|
||||
CUDA_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
@ -47,11 +47,4 @@
|
||||
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
#include "opencv2/opencv_modules.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCV_CUDALEGACY
|
||||
# include "opencv2/cudalegacy.hpp"
|
||||
# include "opencv2/cudalegacy/private.hpp"
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCV_PRECOMP_H__ */
|
||||
|
@ -50,8 +50,6 @@ using namespace cv::cuda;
|
||||
void cv::cuda::pyrDown(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::pyrUp(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
|
||||
Ptr<ImagePyramid> cv::cuda::createImagePyramid(InputArray, int, Stream&) { throw_no_cuda(); return Ptr<ImagePyramid>(); }
|
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
@ -133,112 +131,4 @@ void cv::cuda::pyrUp(InputArray _src, OutputArray _dst, Stream& stream)
|
||||
func(src, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// ImagePyramid
|
||||
|
||||
#ifdef HAVE_OPENCV_CUDALEGACY
|
||||
|
||||
namespace
|
||||
{
|
||||
class ImagePyramidImpl : public ImagePyramid
|
||||
{
|
||||
public:
|
||||
ImagePyramidImpl(InputArray img, int nLayers, Stream& stream);
|
||||
|
||||
void getLayer(OutputArray outImg, Size outRoi, Stream& stream = Stream::Null()) const;
|
||||
|
||||
private:
|
||||
GpuMat layer0_;
|
||||
std::vector<GpuMat> pyramid_;
|
||||
int nLayers_;
|
||||
};
|
||||
|
||||
ImagePyramidImpl::ImagePyramidImpl(InputArray _img, int numLayers, Stream& stream)
|
||||
{
|
||||
GpuMat img = _img.getGpuMat();
|
||||
|
||||
CV_Assert( img.depth() <= CV_32F && img.channels() <= 4 );
|
||||
|
||||
img.copyTo(layer0_, stream);
|
||||
|
||||
Size szLastLayer = img.size();
|
||||
nLayers_ = 1;
|
||||
|
||||
if (numLayers <= 0)
|
||||
numLayers = 255; // it will cut-off when any of the dimensions goes 1
|
||||
|
||||
pyramid_.resize(numLayers);
|
||||
|
||||
for (int i = 0; i < numLayers - 1; ++i)
|
||||
{
|
||||
Size szCurLayer(szLastLayer.width / 2, szLastLayer.height / 2);
|
||||
|
||||
if (szCurLayer.width == 0 || szCurLayer.height == 0)
|
||||
break;
|
||||
|
||||
ensureSizeIsEnough(szCurLayer, img.type(), pyramid_[i]);
|
||||
nLayers_++;
|
||||
|
||||
const GpuMat& prevLayer = i == 0 ? layer0_ : pyramid_[i - 1];
|
||||
|
||||
cv::cuda::device::pyramid::downsampleX2(prevLayer, pyramid_[i], img.depth(), img.channels(), StreamAccessor::getStream(stream));
|
||||
|
||||
szLastLayer = szCurLayer;
|
||||
}
|
||||
}
|
||||
|
||||
void ImagePyramidImpl::getLayer(OutputArray _outImg, Size outRoi, Stream& stream) const
|
||||
{
|
||||
CV_Assert( outRoi.width <= layer0_.cols && outRoi.height <= layer0_.rows && outRoi.width > 0 && outRoi.height > 0 );
|
||||
|
||||
ensureSizeIsEnough(outRoi, layer0_.type(), _outImg);
|
||||
GpuMat outImg = _outImg.getGpuMat();
|
||||
|
||||
if (outRoi.width == layer0_.cols && outRoi.height == layer0_.rows)
|
||||
{
|
||||
layer0_.copyTo(outImg, stream);
|
||||
return;
|
||||
}
|
||||
|
||||
float lastScale = 1.0f;
|
||||
float curScale;
|
||||
GpuMat lastLayer = layer0_;
|
||||
GpuMat curLayer;
|
||||
|
||||
for (int i = 0; i < nLayers_ - 1; ++i)
|
||||
{
|
||||
curScale = lastScale * 0.5f;
|
||||
curLayer = pyramid_[i];
|
||||
|
||||
if (outRoi.width == curLayer.cols && outRoi.height == curLayer.rows)
|
||||
{
|
||||
curLayer.copyTo(outImg, stream);
|
||||
}
|
||||
|
||||
if (outRoi.width >= curLayer.cols && outRoi.height >= curLayer.rows)
|
||||
break;
|
||||
|
||||
lastScale = curScale;
|
||||
lastLayer = curLayer;
|
||||
}
|
||||
|
||||
cv::cuda::device::pyramid::interpolateFrom1(lastLayer, outImg, outImg.depth(), outImg.channels(), StreamAccessor::getStream(stream));
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
Ptr<ImagePyramid> cv::cuda::createImagePyramid(InputArray img, int nLayers, Stream& stream)
|
||||
{
|
||||
#ifndef HAVE_OPENCV_CUDALEGACY
|
||||
(void) img;
|
||||
(void) nLayers;
|
||||
(void) stream;
|
||||
throw_no_cuda();
|
||||
return Ptr<ImagePyramid>();
|
||||
#else
|
||||
return Ptr<ImagePyramid>(new ImagePyramidImpl(img, nLayers, stream));
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // HAVE_CUDA
|
||||
|
@ -53,10 +53,6 @@ void cv::cuda::buildWarpAffineMaps(InputArray, bool, Size, OutputArray, OutputAr
|
||||
void cv::cuda::warpPerspective(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::buildWarpPerspectiveMaps(InputArray, bool, Size, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::cuda::buildWarpPlaneMaps(Size, Rect, InputArray, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::buildWarpCylindricalMaps(Size, Rect, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::buildWarpSphericalMaps(Size, Rect, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::cuda::rotate(InputArray, OutputArray, Size, double, double, double, int, Stream&) { throw_no_cuda(); }
|
||||
|
||||
#else // HAVE_CUDA
|
||||
@ -462,124 +458,6 @@ void cv::cuda::warpPerspective(InputArray _src, OutputArray _dst, InputArray _M,
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// buildWarpPlaneMaps
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
void buildWarpPlaneMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||
const float k_rinv[9], const float r_kinv[9], const float t[3], float scale,
|
||||
cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::cuda::buildWarpPlaneMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, InputArray _T,
|
||||
float scale, OutputArray _map_x, OutputArray _map_y, Stream& stream)
|
||||
{
|
||||
(void) src_size;
|
||||
|
||||
Mat K = _K.getMat();
|
||||
Mat R = _R.getMat();
|
||||
Mat T = _T.getMat();
|
||||
|
||||
CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
|
||||
CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
|
||||
CV_Assert( (T.size() == Size(3,1) || T.size() == Size(1,3)) && T.type() == CV_32FC1 && T.isContinuous() );
|
||||
|
||||
Mat K_Rinv = K * R.t();
|
||||
Mat R_Kinv = R * K.inv();
|
||||
CV_Assert( K_Rinv.isContinuous() );
|
||||
CV_Assert( R_Kinv.isContinuous() );
|
||||
|
||||
_map_x.create(dst_roi.size(), CV_32FC1);
|
||||
_map_y.create(dst_roi.size(), CV_32FC1);
|
||||
|
||||
GpuMat map_x = _map_x.getGpuMat();
|
||||
GpuMat map_y = _map_y.getGpuMat();
|
||||
|
||||
device::imgproc::buildWarpPlaneMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(),
|
||||
T.ptr<float>(), scale, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// buildWarpCylyndricalMaps
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
void buildWarpCylindricalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||
cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::cuda::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
|
||||
OutputArray _map_x, OutputArray _map_y, Stream& stream)
|
||||
{
|
||||
(void) src_size;
|
||||
|
||||
Mat K = _K.getMat();
|
||||
Mat R = _R.getMat();
|
||||
|
||||
CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
|
||||
CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
|
||||
|
||||
Mat K_Rinv = K * R.t();
|
||||
Mat R_Kinv = R * K.inv();
|
||||
CV_Assert( K_Rinv.isContinuous() );
|
||||
CV_Assert( R_Kinv.isContinuous() );
|
||||
|
||||
_map_x.create(dst_roi.size(), CV_32FC1);
|
||||
_map_y.create(dst_roi.size(), CV_32FC1);
|
||||
|
||||
GpuMat map_x = _map_x.getGpuMat();
|
||||
GpuMat map_y = _map_y.getGpuMat();
|
||||
|
||||
device::imgproc::buildWarpCylindricalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// buildWarpSphericalMaps
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
void buildWarpSphericalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||
cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::cuda::buildWarpSphericalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
|
||||
OutputArray _map_x, OutputArray _map_y, Stream& stream)
|
||||
{
|
||||
(void) src_size;
|
||||
|
||||
Mat K = _K.getMat();
|
||||
Mat R = _R.getMat();
|
||||
|
||||
CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
|
||||
CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
|
||||
|
||||
Mat K_Rinv = K * R.t();
|
||||
Mat R_Kinv = R * K.inv();
|
||||
CV_Assert( K_Rinv.isContinuous() );
|
||||
CV_Assert( R_Kinv.isContinuous() );
|
||||
|
||||
_map_x.create(dst_roi.size(), CV_32FC1);
|
||||
_map_y.create(dst_roi.size(), CV_32FC1);
|
||||
|
||||
GpuMat map_x = _map_x.getGpuMat();
|
||||
GpuMat map_y = _map_y.getGpuMat();
|
||||
|
||||
device::imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// rotate
|
||||
|
||||
|
@ -337,7 +337,7 @@ public:
|
||||
double _min_margin=0.003, int _edge_blur_size=5 );
|
||||
|
||||
CV_WRAP virtual void detectRegions( InputArray image,
|
||||
std::vector<std::vector<Point> >& msers,
|
||||
CV_OUT std::vector<std::vector<Point> >& msers,
|
||||
std::vector<Rect>& bboxes ) = 0;
|
||||
|
||||
CV_WRAP virtual void setDelta(int delta) = 0;
|
||||
|
@ -818,7 +818,7 @@ void AKAZEFeatures::Compute_Main_Orientation(KeyPoint& kpt, const std::vector<TE
|
||||
ang2 = (ang1 + (float)(CV_PI / 3.0) >(float)(2.0*CV_PI) ? ang1 - (float)(5.0*CV_PI / 3.0) : ang1 + (float)(CV_PI / 3.0));
|
||||
sumX = sumY = 0.f;
|
||||
|
||||
for (size_t k = 0; k < ang_size; ++k) {
|
||||
for (int k = 0; k < ang_size; ++k) {
|
||||
// Get angle from the x-axis of the sample point
|
||||
const float & ang = Ang[k];
|
||||
|
||||
|
@ -48,6 +48,11 @@
|
||||
# pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
#endif
|
||||
|
||||
#if (_WIN32_IE < 0x0500)
|
||||
#pragma message("WARNING: Win32 UI needs to be compiled with _WIN32_IE >= 0x0500 (_WIN32_IE_IE50)")
|
||||
#define _WIN32_IE 0x0500
|
||||
#endif
|
||||
|
||||
#include <commctrl.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
@ -90,6 +90,8 @@ enum { IMWRITE_PNG_STRATEGY_DEFAULT = 0,
|
||||
|
||||
/** @brief Loads an image from a file.
|
||||
|
||||
@anchor imread
|
||||
|
||||
@param filename Name of file to be loaded.
|
||||
@param flags Flags specifying the color type of a loaded image:
|
||||
- CV_LOAD_IMAGE_ANYDEPTH - If set, return 16-bit/32-bit image when the input has the
|
||||
|
@ -38,10 +38,17 @@
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#include "grfmt_gdal.hpp"
|
||||
#include "precomp.hpp"
|
||||
|
||||
// GDAL Macros
|
||||
#include "cvconfig.h"
|
||||
|
||||
#ifdef HAVE_GDAL
|
||||
|
||||
// Our Header
|
||||
#include "grfmt_gdal.hpp"
|
||||
|
||||
|
||||
/// C++ Standard Libraries
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
@ -195,7 +202,10 @@ GdalDecoder::~GdalDecoder(){
|
||||
/**
|
||||
* Convert data range
|
||||
*/
|
||||
double range_cast( const GDALDataType& gdalType, const int& cvDepth, const double& value ){
|
||||
double range_cast( const GDALDataType& gdalType,
|
||||
const int& cvDepth,
|
||||
const double& value )
|
||||
{
|
||||
|
||||
// uint8 -> uint8
|
||||
if( gdalType == GDT_Byte && cvDepth == CV_8U ){
|
||||
|
@ -42,16 +42,15 @@
|
||||
#ifndef __GRFMT_GDAL_HPP__
|
||||
#define __GRFMT_GDAL_HPP__
|
||||
|
||||
/// OpenCV FMT Base Type
|
||||
#include "grfmt_base.hpp"
|
||||
|
||||
/// Macro to make sure we specified GDAL in CMake
|
||||
#ifdef HAVE_GDAL
|
||||
|
||||
/// C++ Libraries
|
||||
#include <iostream>
|
||||
|
||||
/// OpenCV Libraries
|
||||
#include "grfmt_base.hpp"
|
||||
#include "precomp.hpp"
|
||||
|
||||
/// Geospatial Data Abstraction Library
|
||||
#include <gdal/cpl_conv.h>
|
||||
#include <gdal/gdal_priv.h>
|
||||
@ -61,6 +60,13 @@
|
||||
/// Start of CV Namespace
|
||||
namespace cv {
|
||||
|
||||
/**
|
||||
* Convert GDAL Pixel Range to OpenCV Pixel Range
|
||||
*/
|
||||
double range_cast( const GDALDataType& gdalType,
|
||||
const int& cvDepth,
|
||||
const double& value );
|
||||
|
||||
/**
|
||||
* Convert GDAL Palette Interpretation to OpenCV Pixel Type
|
||||
*/
|
||||
|
@ -664,7 +664,7 @@ private:
|
||||
vector<Mat> pages;
|
||||
bool res = imreadmulti(folder + "multipage.tif", pages, flags);
|
||||
ASSERT_TRUE(res == true);
|
||||
ASSERT_TRUE(pages.size() == page_count);
|
||||
ASSERT_EQ(static_cast<size_t>(page_count), pages.size());
|
||||
|
||||
for (int i = 0; i < page_count; i++)
|
||||
{
|
||||
|
@ -3332,9 +3332,11 @@ data type.
|
||||
@param result Map of comparison results. It must be single-channel 32-bit floating-point. If image
|
||||
is \f$W \times H\f$ and templ is \f$w \times h\f$ , then result is \f$(W-w+1) \times (H-h+1)\f$ .
|
||||
@param method Parameter specifying the comparison method, see cv::TemplateMatchModes
|
||||
@param mask Mask of searched template. It must have the same datatype and size with templ. It is
|
||||
not set by default.
|
||||
*/
|
||||
CV_EXPORTS_W void matchTemplate( InputArray image, InputArray templ,
|
||||
OutputArray result, int method );
|
||||
OutputArray result, int method, InputArray mask = noArray() );
|
||||
|
||||
//! @}
|
||||
|
||||
|
@ -193,7 +193,9 @@ cvStartFindContours( void* _img, CvMemStorage* storage,
|
||||
|
||||
if( !((CV_IS_MASK_ARR( mat ) && mode < CV_RETR_FLOODFILL) ||
|
||||
(CV_MAT_TYPE(mat->type) == CV_32SC1 && mode == CV_RETR_FLOODFILL)) )
|
||||
CV_Error( CV_StsUnsupportedFormat, "[Start]FindContours support only 8uC1 and 32sC1 images" );
|
||||
CV_Error( CV_StsUnsupportedFormat,
|
||||
"[Start]FindContours supports only CV_8UC1 images when mode != CV_RETR_FLOODFILL "
|
||||
"otherwise supports CV_32SC1 images only" );
|
||||
|
||||
CvSize size = cvSize( mat->width, mat->height );
|
||||
int step = mat->step;
|
||||
|
@ -2231,9 +2231,8 @@ struct SymmRowSmallVec_8u32s
|
||||
|
||||
int operator()(const uchar* src, uchar* _dst, int width, int cn) const
|
||||
{
|
||||
//Uncomment the two following lines when runtime support for neon is implemented.
|
||||
// if( !checkHardwareSupport(CV_CPU_NEON) )
|
||||
// return 0;
|
||||
if( !checkHardwareSupport(CV_CPU_NEON) )
|
||||
return 0;
|
||||
|
||||
int i = 0, _ksize = kernel.rows + kernel.cols - 1;
|
||||
int* dst = (int*)_dst;
|
||||
@ -2459,9 +2458,8 @@ struct SymmColumnVec_32s8u
|
||||
|
||||
int operator()(const uchar** _src, uchar* dst, int width) const
|
||||
{
|
||||
//Uncomment the two following lines when runtime support for neon is implemented.
|
||||
// if( !checkHardwareSupport(CV_CPU_NEON) )
|
||||
// return 0;
|
||||
if( !checkHardwareSupport(CV_CPU_NEON) )
|
||||
return 0;
|
||||
|
||||
int _ksize = kernel.rows + kernel.cols - 1;
|
||||
int ksize2 = _ksize / 2;
|
||||
@ -2612,9 +2610,8 @@ struct SymmColumnSmallVec_32s16s
|
||||
|
||||
int operator()(const uchar** _src, uchar* _dst, int width) const
|
||||
{
|
||||
//Uncomment the two following lines when runtime support for neon is implemented.
|
||||
// if( !checkHardwareSupport(CV_CPU_NEON) )
|
||||
// return 0;
|
||||
if( !checkHardwareSupport(CV_CPU_NEON) )
|
||||
return 0;
|
||||
|
||||
int ksize2 = (kernel.rows + kernel.cols - 1)/2;
|
||||
const float* ky = kernel.ptr<float>() + ksize2;
|
||||
@ -2788,15 +2785,13 @@ struct SymmColumnVec_32f16s
|
||||
kernel = _kernel;
|
||||
delta = (float)_delta;
|
||||
CV_Assert( (symmetryType & (KERNEL_SYMMETRICAL | KERNEL_ASYMMETRICAL)) != 0 );
|
||||
//Uncomment the following line when runtime support for neon is implemented.
|
||||
// neon_supported = checkHardwareSupport(CV_CPU_NEON);
|
||||
neon_supported = checkHardwareSupport(CV_CPU_NEON);
|
||||
}
|
||||
|
||||
int operator()(const uchar** _src, uchar* _dst, int width) const
|
||||
{
|
||||
//Uncomment the two following lines when runtime support for neon is implemented.
|
||||
// if( !neon_supported )
|
||||
// return 0;
|
||||
if( !neon_supported )
|
||||
return 0;
|
||||
|
||||
int _ksize = kernel.rows + kernel.cols - 1;
|
||||
int ksize2 = _ksize / 2;
|
||||
@ -2943,9 +2938,8 @@ struct SymmRowSmallVec_32f
|
||||
|
||||
int operator()(const uchar* _src, uchar* _dst, int width, int cn) const
|
||||
{
|
||||
//Uncomment the two following lines when runtime support for neon is implemented.
|
||||
// if( !checkHardwareSupport(CV_CPU_NEON) )
|
||||
// return 0;
|
||||
if( !checkHardwareSupport(CV_CPU_NEON) )
|
||||
return 0;
|
||||
|
||||
int i = 0, _ksize = kernel.rows + kernel.cols - 1;
|
||||
float* dst = (float*)_dst;
|
||||
|
@ -1497,7 +1497,9 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
|
||||
}
|
||||
|
||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||
if(sigma1 == 0 && sigma2 == 0 && tegra::gaussian(_src.getMat(), _dst.getMat(), ksize, borderType))
|
||||
Mat src = _src.getMat();
|
||||
Mat dst = _dst.getMat();
|
||||
if(sigma1 == 0 && sigma2 == 0 && tegra::gaussian(src, dst, ksize, borderType))
|
||||
return;
|
||||
#endif
|
||||
|
||||
|
@ -814,12 +814,97 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void matchTemplateMask( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )
|
||||
{
|
||||
int type = _img.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
CV_Assert( CV_TM_SQDIFF <= method && method <= CV_TM_CCOEFF_NORMED );
|
||||
CV_Assert( (depth == CV_8U || depth == CV_32F) && type == _templ.type() && _img.dims() <= 2 );
|
||||
|
||||
Mat img = _img.getMat(), templ = _templ.getMat(), mask = _mask.getMat();
|
||||
int ttype = templ.type(), tdepth = CV_MAT_DEPTH(ttype), tcn = CV_MAT_CN(ttype);
|
||||
int mtype = img.type(), mdepth = CV_MAT_DEPTH(type), mcn = CV_MAT_CN(mtype);
|
||||
|
||||
if (depth == CV_8U)
|
||||
{
|
||||
depth = CV_32F;
|
||||
type = CV_MAKETYPE(CV_32F, cn);
|
||||
img.convertTo(img, type, 1.0 / 255);
|
||||
}
|
||||
|
||||
if (tdepth == CV_8U)
|
||||
{
|
||||
tdepth = CV_32F;
|
||||
ttype = CV_MAKETYPE(CV_32F, tcn);
|
||||
templ.convertTo(templ, ttype, 1.0 / 255);
|
||||
}
|
||||
|
||||
if (mdepth == CV_8U)
|
||||
{
|
||||
mdepth = CV_32F;
|
||||
mtype = CV_MAKETYPE(CV_32F, mcn);
|
||||
compare(mask, Scalar::all(0), mask, CMP_NE);
|
||||
mask.convertTo(mask, mtype, 1.0 / 255);
|
||||
}
|
||||
|
||||
Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1);
|
||||
_result.create(corrSize, CV_32F);
|
||||
Mat result = _result.getMat();
|
||||
|
||||
Mat img2 = img.mul(img);
|
||||
Mat mask2 = mask.mul(mask);
|
||||
Mat mask_templ = templ.mul(mask);
|
||||
Scalar templMean, templSdv;
|
||||
|
||||
double templSum2 = 0;
|
||||
meanStdDev( mask_templ, templMean, templSdv );
|
||||
|
||||
templSum2 = templSdv[0]*templSdv[0] + templSdv[1]*templSdv[1] + templSdv[2]*templSdv[2] + templSdv[3]*templSdv[3];
|
||||
templSum2 += templMean[0]*templMean[0] + templMean[1]*templMean[1] + templMean[2]*templMean[2] + templMean[3]*templMean[3];
|
||||
templSum2 *= ((double)templ.rows * templ.cols);
|
||||
|
||||
if (method == CV_TM_SQDIFF)
|
||||
{
|
||||
Mat mask2_templ = templ.mul(mask2);
|
||||
|
||||
Mat corr(corrSize, CV_32F);
|
||||
crossCorr( img, mask2_templ, corr, corr.size(), corr.type(), Point(0,0), 0, 0 );
|
||||
crossCorr( img2, mask, result, result.size(), result.type(), Point(0,0), 0, 0 );
|
||||
|
||||
result -= corr * 2;
|
||||
result += templSum2;
|
||||
}
|
||||
else if (method == CV_TM_CCORR_NORMED)
|
||||
{
|
||||
if (templSum2 < DBL_EPSILON)
|
||||
{
|
||||
result = Scalar::all(1);
|
||||
return;
|
||||
}
|
||||
|
||||
Mat corr(corrSize, CV_32F);
|
||||
crossCorr( img2, mask2, corr, corr.size(), corr.type(), Point(0,0), 0, 0 );
|
||||
crossCorr( img, mask_templ, result, result.size(), result.type(), Point(0,0), 0, 0 );
|
||||
|
||||
sqrt(corr, corr);
|
||||
result = result.mul(1/corr);
|
||||
result /= std::sqrt(templSum2);
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void cv::matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method )
|
||||
void cv::matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )
|
||||
{
|
||||
if (!_mask.empty())
|
||||
{
|
||||
cv::matchTemplateMask(_img, _templ, _result, method, _mask);
|
||||
return;
|
||||
}
|
||||
|
||||
int type = _img.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
CV_Assert( CV_TM_SQDIFF <= method && method <= CV_TM_CCOEFF_NORMED );
|
||||
CV_Assert( (depth == CV_8U || depth == CV_32F) && type == _templ.type() && _img.dims() <= 2 );
|
||||
|
@ -931,7 +931,7 @@ Ptr<CascadeClassifierImpl::MaskGenerator> CascadeClassifierImpl::getMaskGenerato
|
||||
Ptr<BaseCascadeClassifier::MaskGenerator> createFaceDetectionMaskGenerator()
|
||||
{
|
||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||
return tegra::getCascadeClassifierMaskGenerator(*this);
|
||||
return tegra::getCascadeClassifierMaskGenerator();
|
||||
#else
|
||||
return Ptr<BaseCascadeClassifier::MaskGenerator>();
|
||||
#endif
|
||||
@ -1072,10 +1072,10 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
|
||||
{
|
||||
String opts;
|
||||
if (lbufSize.area())
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D NODE_COUNT=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d",
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D NODE_COUNT=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d -D HAAR",
|
||||
localsz.width, localsz.height, lbufSize.area(), lbufSize.width, data.maxNodesPerTree, splitstage_ocl, nstages, MAX_FACES);
|
||||
else
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D NODE_COUNT=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d",
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D NODE_COUNT=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d -D HAAR",
|
||||
localsz.width, localsz.height, data.maxNodesPerTree, splitstage_ocl, nstages, MAX_FACES);
|
||||
haarKernel.create("runHaarClassifier", ocl::objdetect::cascadedetect_oclsrc, opts);
|
||||
if( haarKernel.empty() )
|
||||
@ -1112,10 +1112,10 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
|
||||
{
|
||||
String opts;
|
||||
if (lbufSize.area())
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d",
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d -D LBP",
|
||||
localsz.width, localsz.height, lbufSize.area(), lbufSize.width, splitstage_ocl, nstages, MAX_FACES);
|
||||
else
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d",
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d -D LBP",
|
||||
localsz.width, localsz.height, splitstage_ocl, nstages, MAX_FACES);
|
||||
lbpKernel.create("runLBPClassifierStumpSimple", ocl::objdetect::cascadedetect_oclsrc, opts);
|
||||
if( lbpKernel.empty() )
|
||||
|
@ -1,5 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "opencv2/core/ocl.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
|
@ -12,19 +12,22 @@
|
||||
// Erping Pang, erping@multicorewareinc.com
|
||||
//
|
||||
|
||||
|
||||
#ifdef HAAR
|
||||
typedef struct __attribute__((aligned(4))) OptHaarFeature
|
||||
{
|
||||
int4 ofs[3] __attribute__((aligned (4)));
|
||||
float4 weight __attribute__((aligned (4)));
|
||||
}
|
||||
OptHaarFeature;
|
||||
#endif
|
||||
|
||||
#ifdef LBP
|
||||
typedef struct __attribute__((aligned(4))) OptLBPFeature
|
||||
{
|
||||
int16 ofs __attribute__((aligned (4)));
|
||||
}
|
||||
OptLBPFeature;
|
||||
#endif
|
||||
|
||||
typedef struct __attribute__((aligned(4))) Stump
|
||||
{
|
||||
@ -64,6 +67,7 @@ ScaleData;
|
||||
#define NODE_COUNT 1
|
||||
#endif
|
||||
|
||||
#ifdef HAAR
|
||||
__kernel __attribute__((reqd_work_group_size(LOCAL_SIZE_X,LOCAL_SIZE_Y,1)))
|
||||
void runHaarClassifier(
|
||||
int nscales, __global const ScaleData* scaleData,
|
||||
@ -352,7 +356,9 @@ void runHaarClassifier(
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef LBP
|
||||
#undef CALC_SUM_OFS_
|
||||
#define CALC_SUM_OFS_(p0, p1, p2, p3, ptr) \
|
||||
((ptr)[p0] - (ptr)[p1] - (ptr)[p2] + (ptr)[p3])
|
||||
@ -651,3 +657,4 @@ void runLBPClassifierStump(
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -59,69 +59,71 @@ namespace cv { namespace cuda {
|
||||
@param block_size Size of block used for computing weights.
|
||||
@param borderMode Border type. See borderInterpolate for details. BORDER_REFLECT101 ,
|
||||
BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supported for now.
|
||||
@param s Stream for the asynchronous version.
|
||||
@param stream Stream for the asynchronous version.
|
||||
|
||||
@sa
|
||||
fastNlMeansDenoising
|
||||
*/
|
||||
CV_EXPORTS void nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, int borderMode = BORDER_DEFAULT, Stream& s = Stream::Null());
|
||||
CV_EXPORTS void nonLocalMeans(InputArray src, OutputArray dst,
|
||||
float h,
|
||||
int search_window = 21,
|
||||
int block_size = 7,
|
||||
int borderMode = BORDER_DEFAULT,
|
||||
Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief The class implements fast approximate Non Local Means Denoising algorithm.
|
||||
/** @brief Perform image denoising using Non-local Means Denoising algorithm
|
||||
<http://www.ipol.im/pub/algo/bcm_non_local_means_denoising> with several computational
|
||||
optimizations. Noise expected to be a gaussian white noise
|
||||
|
||||
@param src Input 8-bit 1-channel, 2-channel or 3-channel image.
|
||||
@param dst Output image with the same size and type as src .
|
||||
@param h Parameter regulating filter strength. Big h value perfectly removes noise but also
|
||||
removes image details, smaller h value preserves details but also preserves some noise
|
||||
@param search_window Size in pixels of the window that is used to compute weighted average for
|
||||
given pixel. Should be odd. Affect performance linearly: greater search_window - greater
|
||||
denoising time. Recommended value 21 pixels
|
||||
@param block_size Size in pixels of the template patch that is used to compute weights. Should be
|
||||
odd. Recommended value 7 pixels
|
||||
@param stream Stream for the asynchronous invocations.
|
||||
|
||||
This function expected to be applied to grayscale images. For colored images look at
|
||||
FastNonLocalMeansDenoising::labMethod.
|
||||
|
||||
@sa
|
||||
fastNlMeansDenoising
|
||||
*/
|
||||
class CV_EXPORTS FastNonLocalMeansDenoising
|
||||
{
|
||||
public:
|
||||
/** @brief Perform image denoising using Non-local Means Denoising algorithm
|
||||
<http://www.ipol.im/pub/algo/bcm_non_local_means_denoising> with several computational
|
||||
optimizations. Noise expected to be a gaussian white noise
|
||||
CV_EXPORTS void fastNlMeansDenoising(InputArray src, OutputArray dst,
|
||||
float h,
|
||||
int search_window = 21,
|
||||
int block_size = 7,
|
||||
Stream& stream = Stream::Null());
|
||||
|
||||
@param src Input 8-bit 1-channel, 2-channel or 3-channel image.
|
||||
@param dst Output image with the same size and type as src .
|
||||
@param h Parameter regulating filter strength. Big h value perfectly removes noise but also
|
||||
removes image details, smaller h value preserves details but also preserves some noise
|
||||
@param search_window Size in pixels of the window that is used to compute weighted average for
|
||||
given pixel. Should be odd. Affect performance linearly: greater search_window - greater
|
||||
denoising time. Recommended value 21 pixels
|
||||
@param block_size Size in pixels of the template patch that is used to compute weights. Should be
|
||||
odd. Recommended value 7 pixels
|
||||
@param s Stream for the asynchronous invocations.
|
||||
/** @brief Modification of fastNlMeansDenoising function for colored images
|
||||
|
||||
This function expected to be applied to grayscale images. For colored images look at
|
||||
FastNonLocalMeansDenoising::labMethod.
|
||||
@param src Input 8-bit 3-channel image.
|
||||
@param dst Output image with the same size and type as src .
|
||||
@param h_luminance Parameter regulating filter strength. Big h value perfectly removes noise but
|
||||
also removes image details, smaller h value preserves details but also preserves some noise
|
||||
@param photo_render float The same as h but for color components. For most images value equals 10 will be
|
||||
enought to remove colored noise and do not distort colors
|
||||
@param search_window Size in pixels of the window that is used to compute weighted average for
|
||||
given pixel. Should be odd. Affect performance linearly: greater search_window - greater
|
||||
denoising time. Recommended value 21 pixels
|
||||
@param block_size Size in pixels of the template patch that is used to compute weights. Should be
|
||||
odd. Recommended value 7 pixels
|
||||
@param stream Stream for the asynchronous invocations.
|
||||
|
||||
@sa
|
||||
fastNlMeansDenoising
|
||||
*/
|
||||
void simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, Stream& s = Stream::Null());
|
||||
The function converts image to CIELAB colorspace and then separately denoise L and AB components
|
||||
with given h parameters using FastNonLocalMeansDenoising::simpleMethod function.
|
||||
|
||||
/** @brief Modification of FastNonLocalMeansDenoising::simpleMethod for color images
|
||||
|
||||
@param src Input 8-bit 3-channel image.
|
||||
@param dst Output image with the same size and type as src .
|
||||
@param h_luminance Parameter regulating filter strength. Big h value perfectly removes noise but
|
||||
also removes image details, smaller h value preserves details but also preserves some noise
|
||||
@param photo_render float The same as h but for color components. For most images value equals 10 will be
|
||||
enought to remove colored noise and do not distort colors
|
||||
@param search_window Size in pixels of the window that is used to compute weighted average for
|
||||
given pixel. Should be odd. Affect performance linearly: greater search_window - greater
|
||||
denoising time. Recommended value 21 pixels
|
||||
@param block_size Size in pixels of the template patch that is used to compute weights. Should be
|
||||
odd. Recommended value 7 pixels
|
||||
@param s Stream for the asynchronous invocations.
|
||||
|
||||
The function converts image to CIELAB colorspace and then separately denoise L and AB components
|
||||
with given h parameters using FastNonLocalMeansDenoising::simpleMethod function.
|
||||
|
||||
@sa
|
||||
fastNlMeansDenoisingColored
|
||||
*/
|
||||
void labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float photo_render, int search_window = 21, int block_size = 7, Stream& s = Stream::Null());
|
||||
|
||||
private:
|
||||
|
||||
GpuMat buffer, extended_src_buffer;
|
||||
GpuMat lab, l, ab;
|
||||
};
|
||||
@sa
|
||||
fastNlMeansDenoisingColored
|
||||
*/
|
||||
CV_EXPORTS void fastNlMeansDenoisingColored(InputArray src, OutputArray dst,
|
||||
float h_luminance, float photo_render,
|
||||
int search_window = 21,
|
||||
int block_size = 7,
|
||||
Stream& stream = Stream::Null());
|
||||
|
||||
//! @} photo
|
||||
|
||||
|
@ -126,12 +126,10 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, CUDA_FastNonLocalMeans,
|
||||
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
cv::cuda::FastNonLocalMeansDenoising fnlmd;
|
||||
|
||||
const cv::cuda::GpuMat d_src(src);
|
||||
cv::cuda::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() fnlmd.simpleMethod(d_src, dst, h, search_widow_size, block_size);
|
||||
TEST_CYCLE() cv::cuda::fastNlMeansDenoising(d_src, dst, h, search_widow_size, block_size);
|
||||
|
||||
CUDA_SANITY_CHECK(dst);
|
||||
}
|
||||
@ -171,12 +169,10 @@ PERF_TEST_P(Sz_Depth_WinSz_BlockSz, CUDA_FastNonLocalMeansColored,
|
||||
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
cv::cuda::FastNonLocalMeansDenoising fnlmd;
|
||||
|
||||
const cv::cuda::GpuMat d_src(src);
|
||||
cv::cuda::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() fnlmd.labMethod(d_src, dst, h, h, search_widow_size, block_size);
|
||||
TEST_CYCLE() cv::cuda::fastNlMeansDenoisingColored(d_src, dst, h, h, search_widow_size, block_size);
|
||||
|
||||
CUDA_SANITY_CHECK(dst);
|
||||
}
|
||||
|
@ -60,9 +60,9 @@ using namespace cv::cuda;
|
||||
|
||||
#if !defined (HAVE_CUDA) || !defined(HAVE_OPENCV_CUDAARITHM) || !defined(HAVE_OPENCV_CUDAIMGPROC)
|
||||
|
||||
void cv::cuda::nonLocalMeans(const GpuMat&, GpuMat&, float, int, int, int, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::FastNonLocalMeansDenoising::simpleMethod(const GpuMat&, GpuMat&, float, int, int, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::FastNonLocalMeansDenoising::labMethod( const GpuMat&, GpuMat&, float, float, int, int, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::nonLocalMeans(InputArray, OutputArray, float, int, int, int, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::fastNlMeansDenoising(InputArray, OutputArray, float, int, int, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::fastNlMeansDenoisingColored(InputArray, OutputArray, float, float, int, int, Stream&) { throw_no_cuda(); }
|
||||
|
||||
#else
|
||||
|
||||
@ -78,13 +78,15 @@ namespace cv { namespace cuda { namespace device
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::cuda::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, int borderMode, Stream& s)
|
||||
void cv::cuda::nonLocalMeans(InputArray _src, OutputArray _dst, float h, int search_window, int block_window, int borderMode, Stream& stream)
|
||||
{
|
||||
using cv::cuda::device::imgproc::nlm_bruteforce_gpu;
|
||||
typedef void (*func_t)(const PtrStepSzb& src, PtrStepSzb dst, int search_radius, int block_radius, float h, int borderMode, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[4] = { nlm_bruteforce_gpu<uchar>, nlm_bruteforce_gpu<uchar2>, nlm_bruteforce_gpu<uchar3>, 0/*nlm_bruteforce_gpu<uchar4>,*/ };
|
||||
|
||||
const GpuMat src = _src.getGpuMat();
|
||||
|
||||
CV_Assert(src.type() == CV_8U || src.type() == CV_8UC2 || src.type() == CV_8UC3);
|
||||
|
||||
const func_t func = funcs[src.channels() - 1];
|
||||
@ -93,8 +95,10 @@ void cv::cuda::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search
|
||||
int b = borderMode;
|
||||
CV_Assert(b == BORDER_REFLECT101 || b == BORDER_REPLICATE || b == BORDER_CONSTANT || b == BORDER_REFLECT || b == BORDER_WRAP);
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
func(src, dst, search_window/2, block_window/2, h, borderMode, StreamAccessor::getStream(s));
|
||||
_dst.create(src.size(), src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
|
||||
func(src, dst, search_window/2, block_window/2, h, borderMode, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
@ -112,47 +116,55 @@ namespace cv { namespace cuda { namespace device
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::cuda::FastNonLocalMeansDenoising::simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, Stream& s)
|
||||
void cv::cuda::fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, int search_window, int block_window, Stream& stream)
|
||||
{
|
||||
const GpuMat src = _src.getGpuMat();
|
||||
|
||||
CV_Assert(src.depth() == CV_8U && src.channels() < 4);
|
||||
|
||||
int border_size = search_window/2 + block_window/2;
|
||||
Size esize = src.size() + Size(border_size, border_size) * 2;
|
||||
|
||||
cv::cuda::ensureSizeIsEnough(esize, CV_8UC3, extended_src_buffer);
|
||||
GpuMat extended_src(esize, src.type(), extended_src_buffer.ptr(), extended_src_buffer.step);
|
||||
BufferPool pool(stream);
|
||||
|
||||
cv::cuda::copyMakeBorder(src, extended_src, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), s);
|
||||
GpuMat extended_src = pool.getBuffer(esize, src.type());
|
||||
cv::cuda::copyMakeBorder(src, extended_src, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream);
|
||||
GpuMat src_hdr = extended_src(Rect(Point2i(border_size, border_size), src.size()));
|
||||
|
||||
int bcols, brows;
|
||||
device::imgproc::nln_fast_get_buffer_size(src_hdr, search_window, block_window, bcols, brows);
|
||||
buffer.create(brows, bcols, CV_32S);
|
||||
GpuMat buffer = pool.getBuffer(brows, bcols, CV_32S);
|
||||
|
||||
using namespace cv::cuda::device::imgproc;
|
||||
typedef void (*nlm_fast_t)(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
|
||||
static const nlm_fast_t funcs[] = { nlm_fast_gpu<uchar>, nlm_fast_gpu<uchar2>, nlm_fast_gpu<uchar3>, 0};
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
funcs[src.channels()-1](src_hdr, dst, buffer, search_window, block_window, h, StreamAccessor::getStream(s));
|
||||
_dst.create(src.size(), src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
|
||||
funcs[src.channels()-1](src_hdr, dst, buffer, search_window, block_window, h, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
void cv::cuda::FastNonLocalMeansDenoising::labMethod( const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window, int block_window, Stream& s)
|
||||
void cv::cuda::fastNlMeansDenoisingColored(InputArray _src, OutputArray _dst, float h_luminance, float h_color, int search_window, int block_window, Stream& stream)
|
||||
{
|
||||
const GpuMat src = _src.getGpuMat();
|
||||
|
||||
CV_Assert(src.type() == CV_8UC3);
|
||||
|
||||
lab.create(src.size(), src.type());
|
||||
cv::cuda::cvtColor(src, lab, cv::COLOR_BGR2Lab, 0, s);
|
||||
BufferPool pool(stream);
|
||||
|
||||
l.create(src.size(), CV_8U);
|
||||
ab.create(src.size(), CV_8UC2);
|
||||
device::imgproc::fnlm_split_channels(lab, l, ab, StreamAccessor::getStream(s));
|
||||
GpuMat lab = pool.getBuffer(src.size(), src.type());
|
||||
cv::cuda::cvtColor(src, lab, cv::COLOR_BGR2Lab, 0, stream);
|
||||
|
||||
simpleMethod(l, l, h_luminance, search_window, block_window, s);
|
||||
simpleMethod(ab, ab, h_color, search_window, block_window, s);
|
||||
GpuMat l = pool.getBuffer(src.size(), CV_8U);
|
||||
GpuMat ab = pool.getBuffer(src.size(), CV_8UC2);
|
||||
device::imgproc::fnlm_split_channels(lab, l, ab, StreamAccessor::getStream(stream));
|
||||
|
||||
device::imgproc::fnlm_merge_channels(l, ab, lab, StreamAccessor::getStream(s));
|
||||
cv::cuda::cvtColor(lab, dst, cv::COLOR_Lab2BGR, 0, s);
|
||||
fastNlMeansDenoising(l, l, h_luminance, search_window, block_window, stream);
|
||||
fastNlMeansDenoising(ab, ab, h_color, search_window, block_window, stream);
|
||||
|
||||
device::imgproc::fnlm_merge_channels(l, ab, lab, StreamAccessor::getStream(stream));
|
||||
cv::cuda::cvtColor(lab, _dst, cv::COLOR_Lab2BGR, 0, stream);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -99,10 +99,9 @@ TEST(CUDA_FastNonLocalMeans, Regression)
|
||||
cv::cvtColor(bgr, gray, cv::COLOR_BGR2GRAY);
|
||||
|
||||
GpuMat dbgr, dgray;
|
||||
cv::cuda::FastNonLocalMeansDenoising fnlmd;
|
||||
|
||||
fnlmd.simpleMethod(GpuMat(gray), dgray, 20);
|
||||
fnlmd.labMethod(GpuMat(bgr), dbgr, 20, 10);
|
||||
cv::cuda::fastNlMeansDenoising(GpuMat(gray), dgray, 20);
|
||||
cv::cuda::fastNlMeansDenoisingColored(GpuMat(bgr), dbgr, 20, 10);
|
||||
|
||||
#if 0
|
||||
dumpImage("../gpu/denoising/fnlm_denoised_lena_bgr.png", cv::Mat(dbgr));
|
||||
|
@ -861,7 +861,7 @@ class PythonWrapperGenerator(object):
|
||||
decls = self.parser.parse(hdr)
|
||||
if len(decls) == 0:
|
||||
continue
|
||||
self.code_include.write( '#include "{}"\n'.format(hdr[hdr.rindex('opencv2/'):]) )
|
||||
self.code_include.write( '#include "{0}"\n'.format(hdr[hdr.rindex('opencv2/'):]) )
|
||||
for decl in decls:
|
||||
name = decl[0]
|
||||
if name.startswith("struct") or name.startswith("class"):
|
||||
|
@ -1,3 +1,8 @@
|
||||
set(the_description "Images stitching")
|
||||
|
||||
if(HAVE_CUDA)
|
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wshadow)
|
||||
endif()
|
||||
|
||||
ocv_define_module(stitching opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect
|
||||
OPTIONAL opencv_cuda opencv_cudaarithm opencv_cudafilters opencv_cudafeatures2d opencv_xfeatures2d)
|
||||
|
@ -398,7 +398,6 @@ public:
|
||||
};
|
||||
|
||||
|
||||
#ifdef HAVE_OPENCV_CUDAWARPING
|
||||
class CV_EXPORTS PlaneWarperGpu : public PlaneWarper
|
||||
{
|
||||
public:
|
||||
@ -515,7 +514,6 @@ public:
|
||||
private:
|
||||
cuda::GpuMat d_xmap_, d_ymap_, d_src_, d_dst_;
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
struct SphericalPortraitProjector : ProjectorBase
|
||||
|
@ -476,7 +476,11 @@ static bool ocl_normalizeUsingWeightMap(InputArray _weight, InputOutputArray _ma
|
||||
|
||||
void normalizeUsingWeightMap(InputArray _weight, InputOutputArray _src)
|
||||
{
|
||||
Mat src;
|
||||
Mat weight;
|
||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||
src = _src.getMat();
|
||||
weight = _weight.getMat();
|
||||
if(tegra::normalizeUsingWeightMap(weight, src))
|
||||
return;
|
||||
#endif
|
||||
@ -486,12 +490,12 @@ void normalizeUsingWeightMap(InputArray _weight, InputOutputArray _src)
|
||||
!ocl_normalizeUsingWeightMap(_weight, _src) )
|
||||
#endif
|
||||
{
|
||||
Mat weight = _weight.getMat();
|
||||
Mat src = _src.getMat();
|
||||
src = _src.getMat();
|
||||
weight = _weight.getMat();
|
||||
|
||||
CV_Assert(src.type() == CV_16SC3);
|
||||
|
||||
if(weight.type() == CV_32FC1)
|
||||
if (weight.type() == CV_32FC1)
|
||||
{
|
||||
for (int y = 0; y < src.rows; ++y)
|
||||
{
|
||||
@ -547,7 +551,8 @@ void createWeightMap(InputArray mask, float sharpness, InputOutputArray weight)
|
||||
void createLaplacePyr(InputArray img, int num_levels, std::vector<UMat> &pyr)
|
||||
{
|
||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||
if(tegra::createLaplacePyr(img, num_levels, pyr))
|
||||
cv::Mat imgMat = img.getMat();
|
||||
if(tegra::createLaplacePyr(imgMat, num_levels, pyr))
|
||||
return;
|
||||
#endif
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user