Merge remote-tracking branch 'origin/2.4' into merge-2.4
Conflicts: modules/ocl/include/opencv2/ocl/ocl.hpp modules/ocl/src/arithm.cpp modules/ocl/src/build_warps.cpp modules/ocl/src/color.cpp modules/ocl/src/haar.cpp modules/ocl/src/imgproc.cpp modules/ocl/src/split_merge.cpp modules/ocl/test/test_color.cpp samples/cpp/3calibration.cpp samples/cpp/OpenEXRimages_HDR_Retina_toneMapping.cpp samples/cpp/OpenEXRimages_HDR_Retina_toneMapping_video.cpp samples/cpp/Qt_sample/main.cpp samples/cpp/camshiftdemo.cpp samples/cpp/descriptor_extractor_matcher.cpp samples/cpp/distrans.cpp samples/cpp/generic_descriptor_match.cpp samples/cpp/grabcut.cpp samples/cpp/morphology2.cpp samples/cpp/segment_objects.cpp samples/cpp/stereo_calib.cpp samples/cpp/tutorial_code/Histograms_Matching/compareHist_Demo.cpp samples/cpp/tutorial_code/core/mat_mask_operations/mat_mask_operations.cpp samples/cpp/tutorial_code/introduction/display_image/display_image.cpp samples/cpp/tutorial_code/introduction/windows_visual_studio_Opencv/Test.cpp samples/cpp/tutorial_code/objectDetection/objectDetection.cpp samples/cpp/tutorial_code/objectDetection/objectDetection2.cpp samples/cpp/video_dmtx.cpp
This commit is contained in:
commit
aacf188e83
@ -1,5 +1,5 @@
|
||||
# platform-specific config file
|
||||
configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.cmake" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h")
|
||||
configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.in" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h")
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# opencv_modules.hpp based on actual modules list
|
||||
|
@ -76,7 +76,9 @@ if(INSTALL_TO_MANGLED_PATHS)
|
||||
else()
|
||||
set(OPENCV_PC_FILE_NAME opencv.pc)
|
||||
endif()
|
||||
configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/opencv-XXX.pc.cmake.in" "${CMAKE_BINARY_DIR}/unix-install/${OPENCV_PC_FILE_NAME}" @ONLY IMMEDIATE)
|
||||
configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/opencv-XXX.pc.in"
|
||||
"${CMAKE_BINARY_DIR}/unix-install/${OPENCV_PC_FILE_NAME}"
|
||||
@ONLY IMMEDIATE)
|
||||
|
||||
if(UNIX AND NOT ANDROID)
|
||||
install(FILES ${CMAKE_BINARY_DIR}/unix-install/${OPENCV_PC_FILE_NAME} DESTINATION ${OPENCV_LIB_INSTALL_PATH}/pkgconfig)
|
||||
|
@ -42,7 +42,9 @@ if(ANDROID)
|
||||
string(REPLACE - _ modules_file_suffix "_${ANDROID_NDK_ABI_NAME}")
|
||||
endif()
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/OpenCVModules${modules_file_suffix}.cmake)
|
||||
if(NOT TARGET opencv_core)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/OpenCVModules${modules_file_suffix}.cmake)
|
||||
endif()
|
||||
|
||||
# TODO All things below should be reviewed. What is about of moving this code into related modules (special vars/hooks/files)
|
||||
|
||||
|
@ -554,7 +554,7 @@ bool CvCapture_FFMPEG::open( const char* _filename )
|
||||
goto exit_func;
|
||||
}
|
||||
err =
|
||||
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 3, 0)
|
||||
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 6, 0)
|
||||
avformat_find_stream_info(ic, NULL);
|
||||
#else
|
||||
av_find_stream_info(ic);
|
||||
@ -2144,7 +2144,7 @@ bool InputMediaStream_FFMPEG::open(const char* fileName, int* codec, int* chroma
|
||||
if (err < 0)
|
||||
return false;
|
||||
|
||||
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 3, 0)
|
||||
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 6, 0)
|
||||
err = avformat_find_stream_info(ctx_, 0);
|
||||
#else
|
||||
err = av_find_stream_info(ctx_);
|
||||
|
@ -200,10 +200,20 @@ static char* icvExtractPattern(const char *filename, unsigned *offset)
|
||||
}
|
||||
else // no pattern filename was given - extract the pattern
|
||||
{
|
||||
for(at = name; *at && !isdigit(*at); at++)
|
||||
;
|
||||
at = name;
|
||||
|
||||
if(!at)
|
||||
// ignore directory names
|
||||
char *slash = strrchr(at, '/');
|
||||
if (slash) at = slash + 1;
|
||||
|
||||
#ifdef _WIN32
|
||||
slash = strrchr(at, '\\');
|
||||
if (slash) at = slash + 1;
|
||||
#endif
|
||||
|
||||
while (*at && !isdigit(*at)) at++;
|
||||
|
||||
if(!*at)
|
||||
return 0;
|
||||
|
||||
sscanf(at, "%u", offset);
|
||||
|
@ -154,7 +154,7 @@ public:
|
||||
{
|
||||
try
|
||||
{
|
||||
string filename = ts->get_data_path() + "../cv/features2d/tsukuba.png";
|
||||
string filename = ts->get_data_path() + "readwrite/ordinary.bmp";
|
||||
VideoCapture cap(filename);
|
||||
Mat img0 = imread(filename, 1);
|
||||
Mat img, img_next;
|
||||
|
@ -65,15 +65,15 @@ ocl::integral
|
||||
-----------------
|
||||
Computes an integral image.
|
||||
|
||||
.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum, oclMat &sqsum)
|
||||
.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum, oclMat &sqsum, int sdepth=-1)
|
||||
|
||||
.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum)
|
||||
.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum, int sdepth=-1)
|
||||
|
||||
:param src: Source image. Only ``CV_8UC1`` images are supported for now.
|
||||
|
||||
:param sum: Integral image containing 32-bit unsigned integer values packed into ``CV_32SC1`` .
|
||||
:param sum: Integral image containing 32-bit unsigned integer or 32-bit floating-point .
|
||||
|
||||
:param sqsum: Sqsum values is ``CV_32FC1`` type.
|
||||
:param sqsum: Sqsum values is ``CV_32FC1`` or ``CV_64FC1`` type.
|
||||
|
||||
.. seealso:: :ocv:func:`integral`
|
||||
|
||||
|
@ -880,10 +880,10 @@ namespace cv
|
||||
CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
|
||||
|
||||
//! computes the integral image and integral for the squared image
|
||||
// sum will have CV_32S type, sqsum - CV32F type
|
||||
// sum will support CV_32S, CV_32F, sqsum - support CV32F, CV_64F
|
||||
// supports only CV_8UC1 source type
|
||||
CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
|
||||
CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
|
||||
CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum, int sdepth=-1 );
|
||||
CV_EXPORTS void integral(const oclMat &src, oclMat &sum, int sdepth=-1 );
|
||||
CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
|
||||
CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
|
||||
int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
|
||||
|
@ -1066,6 +1066,9 @@ PERF_TEST_P(RepeatFixture, Repeat,
|
||||
const int nx = 3, ny = 2;
|
||||
const Size dstSize(srcSize.width * nx, srcSize.height * ny);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
checkDeviceMaxMemoryAllocSize(dstSize, type);
|
||||
|
||||
Mat src(srcSize, type), dst(dstSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
|
@ -61,6 +61,8 @@ PERF_TEST_P(BlurFixture, Blur,
|
||||
const Size srcSize = get<0>(params), ksize(3, 3);
|
||||
const int type = get<1>(params), bordertype = BORDER_CONSTANT;
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
Mat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
@ -99,6 +101,8 @@ PERF_TEST_P(LaplacianFixture, Laplacian,
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params), ksize = 3;
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
Mat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
@ -138,6 +142,8 @@ PERF_TEST_P(ErodeFixture, Erode,
|
||||
const int type = get<1>(params), ksize = 3;
|
||||
const Mat ker = getStructuringElement(MORPH_RECT, Size(ksize, ksize));
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
Mat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst).in(ker);
|
||||
|
||||
@ -176,6 +182,8 @@ PERF_TEST_P(SobelFixture, Sobel,
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params), dx = 1, dy = 1;
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type, sizeof(float) * 2);
|
||||
|
||||
Mat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
@ -217,6 +225,8 @@ PERF_TEST_P(ScharrFixture, Scharr,
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params), dx = 1, dy = 0;
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type, sizeof(float) * 2);
|
||||
|
||||
Mat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
@ -258,6 +268,8 @@ PERF_TEST_P(GaussianBlurFixture, GaussianBlur,
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params), ksize = 7;
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
Mat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
@ -295,6 +307,8 @@ PERF_TEST_P(filter2DFixture, filter2D,
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params), ksize = 3;
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
Mat src(srcSize, type), dst(srcSize, type), kernel(ksize, ksize, CV_32SC1);
|
||||
declare.in(src, WARMUP_RNG).in(kernel).out(dst);
|
||||
randu(kernel, -3.0, 3.0);
|
||||
@ -335,6 +349,8 @@ PERF_TEST_P(BilateralFixture, Bilateral,
|
||||
const int type = get<1>(params), d = 7;
|
||||
const double sigmacolor = 50.0, sigmaspace = 50.0;
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
Mat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
@ -366,8 +382,7 @@ PERF_TEST_P(BilateralFixture, Bilateral,
|
||||
typedef Size_MatType adaptiveBilateralFixture;
|
||||
|
||||
PERF_TEST_P(adaptiveBilateralFixture, adaptiveBilateral,
|
||||
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
|
||||
OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
|
||||
::testing::Combine(::testing::Values(OCL_SIZE_1000), OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
@ -375,14 +390,11 @@ PERF_TEST_P(adaptiveBilateralFixture, adaptiveBilateral,
|
||||
const double sigmaspace = 10.0;
|
||||
Size ksize(9, 9);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
Mat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
if (srcSize == OCL_SIZE_4000)
|
||||
declare.time(type == CV_8UC3 ? 46 : 28);
|
||||
else if (srcSize == OCL_SIZE_2000)
|
||||
declare.time(type == CV_8UC3 ? 11 : 7);
|
||||
|
||||
if (RUN_OCL_IMPL)
|
||||
{
|
||||
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
|
||||
|
@ -231,139 +231,6 @@ PERF_TEST_P(integralFixture, integral, OCL_TYPICAL_MAT_SIZES)
|
||||
OCL_PERF_ELSE
|
||||
}
|
||||
|
||||
///////////// WarpAffine ////////////////////////
|
||||
|
||||
typedef Size_MatType WarpAffineFixture;
|
||||
|
||||
PERF_TEST_P(WarpAffineFixture, WarpAffine,
|
||||
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
|
||||
OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
|
||||
{
|
||||
static const double coeffs[2][3] =
|
||||
{
|
||||
{ cos(CV_PI / 6), -sin(CV_PI / 6), 100.0 },
|
||||
{ sin(CV_PI / 6), cos(CV_PI / 6), -100.0 }
|
||||
};
|
||||
Mat M(2, 3, CV_64F, (void *)coeffs);
|
||||
const int interpolation = INTER_NEAREST;
|
||||
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
Mat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
if (RUN_OCL_IMPL)
|
||||
{
|
||||
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
|
||||
|
||||
OCL_TEST_CYCLE() cv::ocl::warpAffine(oclSrc, oclDst, M, srcSize, interpolation);
|
||||
|
||||
oclDst.download(dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
else if (RUN_PLAIN_IMPL)
|
||||
{
|
||||
TEST_CYCLE() cv::warpAffine(src, dst, M, srcSize, interpolation);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
OCL_PERF_ELSE
|
||||
}
|
||||
|
||||
///////////// WarpPerspective ////////////////////////
|
||||
|
||||
typedef Size_MatType WarpPerspectiveFixture;
|
||||
|
||||
PERF_TEST_P(WarpPerspectiveFixture, WarpPerspective,
|
||||
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
|
||||
OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
|
||||
{
|
||||
static const double coeffs[3][3] =
|
||||
{
|
||||
{cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
|
||||
{sin(CV_PI / 6), cos(CV_PI / 6), -100.0},
|
||||
{0.0, 0.0, 1.0}
|
||||
};
|
||||
Mat M(3, 3, CV_64F, (void *)coeffs);
|
||||
const int interpolation = INTER_LINEAR;
|
||||
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
Mat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst)
|
||||
.time(srcSize == OCL_SIZE_4000 ? 18 : srcSize == OCL_SIZE_2000 ? 5 : 2);
|
||||
|
||||
if (RUN_OCL_IMPL)
|
||||
{
|
||||
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
|
||||
|
||||
OCL_TEST_CYCLE() cv::ocl::warpPerspective(oclSrc, oclDst, M, srcSize, interpolation);
|
||||
|
||||
oclDst.download(dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
else if (RUN_PLAIN_IMPL)
|
||||
{
|
||||
TEST_CYCLE() cv::warpPerspective(src, dst, M, srcSize, interpolation);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
OCL_PERF_ELSE
|
||||
}
|
||||
|
||||
///////////// resize ////////////////////////
|
||||
|
||||
CV_ENUM(resizeInterType, INTER_NEAREST, INTER_LINEAR)
|
||||
|
||||
typedef tuple<Size, MatType, resizeInterType, double> resizeParams;
|
||||
typedef TestBaseWithParam<resizeParams> resizeFixture;
|
||||
|
||||
PERF_TEST_P(resizeFixture, resize,
|
||||
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
|
||||
OCL_PERF_ENUM(CV_8UC1, CV_8UC4),
|
||||
resizeInterType::all(),
|
||||
::testing::Values(0.5, 2.0)))
|
||||
{
|
||||
const resizeParams params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params), interType = get<2>(params);
|
||||
double scale = get<3>(params);
|
||||
|
||||
Mat src(srcSize, type), dst;
|
||||
const Size dstSize(cvRound(srcSize.width * scale), cvRound(srcSize.height * scale));
|
||||
dst.create(dstSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
if (interType == INTER_LINEAR && type == CV_8UC4 && OCL_SIZE_4000 == srcSize)
|
||||
declare.time(11);
|
||||
|
||||
if (RUN_OCL_IMPL)
|
||||
{
|
||||
ocl::oclMat oclSrc(src), oclDst(dstSize, type);
|
||||
|
||||
OCL_TEST_CYCLE() cv::ocl::resize(oclSrc, oclDst, Size(), scale, scale, interType);
|
||||
|
||||
oclDst.download(dst);
|
||||
|
||||
SANITY_CHECK(dst, 1 + DBL_EPSILON);
|
||||
}
|
||||
else if (RUN_PLAIN_IMPL)
|
||||
{
|
||||
TEST_CYCLE() cv::resize(src, dst, Size(), scale, scale, interType);
|
||||
|
||||
SANITY_CHECK(dst, 1 + DBL_EPSILON);
|
||||
}
|
||||
else
|
||||
OCL_PERF_ELSE
|
||||
}
|
||||
|
||||
///////////// threshold////////////////////////
|
||||
|
||||
CV_ENUM(ThreshType, THRESH_BINARY, THRESH_TOZERO_INV)
|
||||
@ -727,67 +594,6 @@ PERF_TEST_P(meanShiftProcFixture, meanShiftProc,
|
||||
OCL_PERF_ELSE
|
||||
}
|
||||
|
||||
///////////// remap////////////////////////
|
||||
|
||||
CV_ENUM(RemapInterType, INTER_NEAREST, INTER_LINEAR)
|
||||
|
||||
typedef tuple<Size, MatType, RemapInterType> remapParams;
|
||||
typedef TestBaseWithParam<remapParams> remapFixture;
|
||||
|
||||
PERF_TEST_P(remapFixture, remap,
|
||||
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
|
||||
OCL_PERF_ENUM(CV_8UC1, CV_8UC4),
|
||||
RemapInterType::all()))
|
||||
{
|
||||
const remapParams params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params), interpolation = get<2>(params);
|
||||
|
||||
Mat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
if (srcSize == OCL_SIZE_4000 && interpolation == INTER_LINEAR)
|
||||
declare.time(9);
|
||||
|
||||
Mat xmap, ymap;
|
||||
xmap.create(srcSize, CV_32FC1);
|
||||
ymap.create(srcSize, CV_32FC1);
|
||||
|
||||
for (int i = 0; i < srcSize.height; ++i)
|
||||
{
|
||||
float * const xmap_row = xmap.ptr<float>(i);
|
||||
float * const ymap_row = ymap.ptr<float>(i);
|
||||
|
||||
for (int j = 0; j < srcSize.width; ++j)
|
||||
{
|
||||
xmap_row[j] = (j - srcSize.width * 0.5f) * 0.75f + srcSize.width * 0.5f;
|
||||
ymap_row[j] = (i - srcSize.height * 0.5f) * 0.75f + srcSize.height * 0.5f;
|
||||
}
|
||||
}
|
||||
|
||||
const int borderMode = BORDER_CONSTANT;
|
||||
|
||||
if (RUN_OCL_IMPL)
|
||||
{
|
||||
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
|
||||
ocl::oclMat oclXMap(xmap), oclYMap(ymap);
|
||||
|
||||
OCL_TEST_CYCLE() cv::ocl::remap(oclSrc, oclDst, oclXMap, oclYMap, interpolation, borderMode);
|
||||
|
||||
oclDst.download(dst);
|
||||
|
||||
SANITY_CHECK(dst, 1 + DBL_EPSILON);
|
||||
}
|
||||
else if (RUN_PLAIN_IMPL)
|
||||
{
|
||||
TEST_CYCLE() cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
|
||||
|
||||
SANITY_CHECK(dst, 1 + DBL_EPSILON);
|
||||
}
|
||||
else
|
||||
OCL_PERF_ELSE
|
||||
}
|
||||
|
||||
///////////// CLAHE ////////////////////////
|
||||
|
||||
typedef TestBaseWithParam<Size> CLAHEFixture;
|
||||
|
324
modules/ocl/perf/perf_imgwarp.cpp
Normal file
324
modules/ocl/perf/perf_imgwarp.cpp
Normal file
@ -0,0 +1,324 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Fangfang Bai, fangfang@multicorewareinc.com
|
||||
// Jin Ma, jin@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
using namespace perf;
|
||||
using std::tr1::tuple;
|
||||
using std::tr1::get;
|
||||
|
||||
///////////// WarpAffine ////////////////////////
|
||||
|
||||
typedef Size_MatType WarpAffineFixture;
|
||||
|
||||
PERF_TEST_P(WarpAffineFixture, WarpAffine,
|
||||
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
|
||||
OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
|
||||
{
|
||||
static const double coeffs[2][3] =
|
||||
{
|
||||
{ cos(CV_PI / 6), -sin(CV_PI / 6), 100.0 },
|
||||
{ sin(CV_PI / 6), cos(CV_PI / 6), -100.0 }
|
||||
};
|
||||
Mat M(2, 3, CV_64F, (void *)coeffs);
|
||||
const int interpolation = INTER_NEAREST;
|
||||
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
Mat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
if (RUN_OCL_IMPL)
|
||||
{
|
||||
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
|
||||
|
||||
OCL_TEST_CYCLE() cv::ocl::warpAffine(oclSrc, oclDst, M, srcSize, interpolation);
|
||||
|
||||
oclDst.download(dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
else if (RUN_PLAIN_IMPL)
|
||||
{
|
||||
TEST_CYCLE() cv::warpAffine(src, dst, M, srcSize, interpolation);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
OCL_PERF_ELSE
|
||||
}
|
||||
|
||||
///////////// WarpPerspective ////////////////////////
|
||||
|
||||
typedef Size_MatType WarpPerspectiveFixture;
|
||||
|
||||
PERF_TEST_P(WarpPerspectiveFixture, WarpPerspective,
|
||||
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
|
||||
OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
|
||||
{
|
||||
static const double coeffs[3][3] =
|
||||
{
|
||||
{cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
|
||||
{sin(CV_PI / 6), cos(CV_PI / 6), -100.0},
|
||||
{0.0, 0.0, 1.0}
|
||||
};
|
||||
Mat M(3, 3, CV_64F, (void *)coeffs);
|
||||
const int interpolation = INTER_LINEAR;
|
||||
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
Mat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst)
|
||||
.time(srcSize == OCL_SIZE_4000 ? 18 : srcSize == OCL_SIZE_2000 ? 5 : 2);
|
||||
|
||||
if (RUN_OCL_IMPL)
|
||||
{
|
||||
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
|
||||
|
||||
OCL_TEST_CYCLE() cv::ocl::warpPerspective(oclSrc, oclDst, M, srcSize, interpolation);
|
||||
|
||||
oclDst.download(dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
else if (RUN_PLAIN_IMPL)
|
||||
{
|
||||
TEST_CYCLE() cv::warpPerspective(src, dst, M, srcSize, interpolation);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
OCL_PERF_ELSE
|
||||
}
|
||||
|
||||
///////////// resize ////////////////////////
|
||||
|
||||
CV_ENUM(resizeInterType, INTER_NEAREST, INTER_LINEAR)
|
||||
|
||||
typedef tuple<Size, MatType, resizeInterType, double> resizeParams;
|
||||
typedef TestBaseWithParam<resizeParams> resizeFixture;
|
||||
|
||||
PERF_TEST_P(resizeFixture, resize,
|
||||
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
|
||||
OCL_PERF_ENUM(CV_8UC1, CV_8UC4),
|
||||
resizeInterType::all(),
|
||||
::testing::Values(0.5, 2.0)))
|
||||
{
|
||||
const resizeParams params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params), interType = get<2>(params);
|
||||
double scale = get<3>(params);
|
||||
const Size dstSize(cvRound(srcSize.width * scale), cvRound(srcSize.height * scale));
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
checkDeviceMaxMemoryAllocSize(dstSize, type);
|
||||
|
||||
Mat src(srcSize, type), dst;
|
||||
dst.create(dstSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
if (interType == INTER_LINEAR && type == CV_8UC4 && OCL_SIZE_4000 == srcSize)
|
||||
declare.time(11);
|
||||
|
||||
if (RUN_OCL_IMPL)
|
||||
{
|
||||
ocl::oclMat oclSrc(src), oclDst(dstSize, type);
|
||||
|
||||
OCL_TEST_CYCLE() cv::ocl::resize(oclSrc, oclDst, Size(), scale, scale, interType);
|
||||
|
||||
oclDst.download(dst);
|
||||
|
||||
SANITY_CHECK(dst, 1 + DBL_EPSILON);
|
||||
}
|
||||
else if (RUN_PLAIN_IMPL)
|
||||
{
|
||||
TEST_CYCLE() cv::resize(src, dst, Size(), scale, scale, interType);
|
||||
|
||||
SANITY_CHECK(dst, 1 + DBL_EPSILON);
|
||||
}
|
||||
else
|
||||
OCL_PERF_ELSE
|
||||
}
|
||||
|
||||
///////////// remap////////////////////////
|
||||
|
||||
CV_ENUM(RemapInterType, INTER_NEAREST, INTER_LINEAR)
|
||||
|
||||
typedef tuple<Size, MatType, RemapInterType> remapParams;
|
||||
typedef TestBaseWithParam<remapParams> remapFixture;
|
||||
|
||||
PERF_TEST_P(remapFixture, remap,
|
||||
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
|
||||
OCL_PERF_ENUM(CV_8UC1, CV_8UC4),
|
||||
RemapInterType::all()))
|
||||
{
|
||||
const remapParams params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params), interpolation = get<2>(params);
|
||||
|
||||
Mat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
if (srcSize == OCL_SIZE_4000 && interpolation == INTER_LINEAR)
|
||||
declare.time(9);
|
||||
|
||||
Mat xmap, ymap;
|
||||
xmap.create(srcSize, CV_32FC1);
|
||||
ymap.create(srcSize, CV_32FC1);
|
||||
|
||||
for (int i = 0; i < srcSize.height; ++i)
|
||||
{
|
||||
float * const xmap_row = xmap.ptr<float>(i);
|
||||
float * const ymap_row = ymap.ptr<float>(i);
|
||||
|
||||
for (int j = 0; j < srcSize.width; ++j)
|
||||
{
|
||||
xmap_row[j] = (j - srcSize.width * 0.5f) * 0.75f + srcSize.width * 0.5f;
|
||||
ymap_row[j] = (i - srcSize.height * 0.5f) * 0.75f + srcSize.height * 0.5f;
|
||||
}
|
||||
}
|
||||
|
||||
const int borderMode = BORDER_CONSTANT;
|
||||
|
||||
if (RUN_OCL_IMPL)
|
||||
{
|
||||
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
|
||||
ocl::oclMat oclXMap(xmap), oclYMap(ymap);
|
||||
|
||||
OCL_TEST_CYCLE() cv::ocl::remap(oclSrc, oclDst, oclXMap, oclYMap, interpolation, borderMode);
|
||||
|
||||
oclDst.download(dst);
|
||||
|
||||
SANITY_CHECK(dst, 1 + DBL_EPSILON);
|
||||
}
|
||||
else if (RUN_PLAIN_IMPL)
|
||||
{
|
||||
TEST_CYCLE() cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
|
||||
|
||||
SANITY_CHECK(dst, 1 + DBL_EPSILON);
|
||||
}
|
||||
else
|
||||
OCL_PERF_ELSE
|
||||
}
|
||||
|
||||
|
||||
///////////// buildWarpPerspectiveMaps ////////////////////////
|
||||
|
||||
static void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, Mat &xmap, Mat &ymap)
|
||||
{
|
||||
CV_Assert(M.rows == 3 && M.cols == 3);
|
||||
CV_Assert(dsize.area() > 0);
|
||||
|
||||
xmap.create(dsize, CV_32FC1);
|
||||
ymap.create(dsize, CV_32FC1);
|
||||
|
||||
float coeffs[3 * 3];
|
||||
Mat coeffsMat(3, 3, CV_32F, (void *)coeffs);
|
||||
|
||||
if (inverse)
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
else
|
||||
{
|
||||
cv::Mat iM;
|
||||
invert(M, iM);
|
||||
iM.convertTo(coeffsMat, coeffsMat.type());
|
||||
}
|
||||
|
||||
for (int y = 0; y < dsize.height; ++y)
|
||||
{
|
||||
float * const xmap_ptr = xmap.ptr<float>(y);
|
||||
float * const ymap_ptr = ymap.ptr<float>(y);
|
||||
|
||||
for (int x = 0; x < dsize.width; ++x)
|
||||
{
|
||||
float coeff = 1.0f / (x * coeffs[6] + y * coeffs[7] + coeffs[8]);
|
||||
xmap_ptr[x] = (x * coeffs[0] + y * coeffs[1] + coeffs[2]) * coeff;
|
||||
ymap_ptr[x] = (x * coeffs[3] + y * coeffs[4] + coeffs[5]) * coeff;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef TestBaseWithParam<Size> buildWarpPerspectiveMapsFixture;
|
||||
|
||||
PERF_TEST_P(buildWarpPerspectiveMapsFixture, Inverse, OCL_TYPICAL_MAT_SIZES)
|
||||
{
|
||||
static const double coeffs[3][3] =
|
||||
{
|
||||
{cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
|
||||
{sin(CV_PI / 6), cos(CV_PI / 6), -100.0},
|
||||
{0.0, 0.0, 1.0}
|
||||
};
|
||||
Mat M(3, 3, CV_64F, (void *)coeffs);
|
||||
const Size dsize = GetParam();
|
||||
const double eps = 5e-4;
|
||||
|
||||
Mat xmap(dsize, CV_32FC1), ymap(dsize, CV_32FC1);
|
||||
declare.in(M).out(xmap, ymap);
|
||||
|
||||
if (RUN_OCL_IMPL)
|
||||
{
|
||||
ocl::oclMat oclXMap(dsize, CV_32FC1), oclYMap(dsize, CV_32FC1);
|
||||
|
||||
OCL_TEST_CYCLE() cv::ocl::buildWarpPerspectiveMaps(M, true, dsize, oclXMap, oclYMap);
|
||||
|
||||
oclXMap.download(xmap);
|
||||
oclYMap.download(ymap);
|
||||
|
||||
SANITY_CHECK(xmap, eps);
|
||||
SANITY_CHECK(ymap, eps);
|
||||
}
|
||||
else if (RUN_PLAIN_IMPL)
|
||||
{
|
||||
TEST_CYCLE() buildWarpPerspectiveMaps(M, true, dsize, xmap, ymap);
|
||||
|
||||
SANITY_CHECK(xmap, eps);
|
||||
SANITY_CHECK(ymap, eps);
|
||||
}
|
||||
else
|
||||
OCL_PERF_ELSE
|
||||
}
|
@ -108,13 +108,13 @@ PERF_TEST_P(CV_TM_CCORR_NORMEDFixture, matchTemplate, OCL_TYPICAL_MAT_SIZES)
|
||||
|
||||
oclDst.download(dst);
|
||||
|
||||
SANITY_CHECK(dst, 2e-2);
|
||||
SANITY_CHECK(dst, 3e-2);
|
||||
}
|
||||
else if (RUN_PLAIN_IMPL)
|
||||
{
|
||||
TEST_CYCLE() cv::matchTemplate(src, templ, dst, TM_CCORR_NORMED);
|
||||
|
||||
SANITY_CHECK(dst, 2e-2);
|
||||
SANITY_CHECK(dst, 3e-2);
|
||||
}
|
||||
else
|
||||
OCL_PERF_ELSE
|
||||
|
@ -63,6 +63,10 @@ PERF_TEST_P(ConvertToFixture, ConvertTo,
|
||||
|
||||
Mat src(srcSize, type), dst;
|
||||
const int dstType = CV_MAKE_TYPE(CV_32F, src.channels());
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, dstType);
|
||||
|
||||
dst.create(srcSize, dstType);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
|
@ -112,8 +112,30 @@ using namespace cv;
|
||||
CV_TEST_FAIL_NO_IMPL();
|
||||
#endif
|
||||
|
||||
#define OCL_TEST_CYCLE_N(n) for(declare.iterations(n); startTimer(), next(); ocl::finish(), stopTimer())
|
||||
#define OCL_TEST_CYCLE() for(; startTimer(), next(); ocl::finish(), stopTimer())
|
||||
#define OCL_TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); startTimer(), next(); stopTimer()) for(int r = 0; r < runsNum; ocl::finish(), ++r)
|
||||
#define OCL_TEST_CYCLE_N(n) for(declare.iterations(n); startTimer(), next(); cv::ocl::finish(), stopTimer())
|
||||
#define OCL_TEST_CYCLE() for(; startTimer(), next(); cv::ocl::finish(), stopTimer())
|
||||
#define OCL_TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); startTimer(), next(); stopTimer()) for(int r = 0; r < runsNum; cv::ocl::finish(), ++r)
|
||||
|
||||
namespace cvtest {
|
||||
namespace ocl {
|
||||
inline void checkDeviceMaxMemoryAllocSize(const Size& size, int type, int factor = 1)
|
||||
{
|
||||
assert(factor > 0);
|
||||
if (!(IMPL_OCL == perf::TestBase::getSelectedImpl()))
|
||||
return; // OpenCL devices are not used
|
||||
int cn = CV_MAT_CN(type);
|
||||
int cn_ocl = cn == 3 ? 4 : cn;
|
||||
int type_ocl = CV_MAKE_TYPE(CV_MAT_DEPTH(type), cn_ocl);
|
||||
size_t memSize = size.area() * CV_ELEM_SIZE(type_ocl);
|
||||
const cv::ocl::DeviceInfo& devInfo = cv::ocl::Context::getContext()->getDeviceInfo();
|
||||
if (memSize * factor >= devInfo.maxMemAllocSize)
|
||||
{
|
||||
throw perf::TestBase::PerfSkipTestException();
|
||||
}
|
||||
}
|
||||
} // namespace cvtest::ocl
|
||||
} // namespace cvtest
|
||||
|
||||
using namespace cvtest::ocl;
|
||||
|
||||
#endif
|
||||
|
@ -60,9 +60,12 @@ PERF_TEST_P(pyrDownFixture, pyrDown,
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
Size dstSize((srcSize.height + 1) >> 1, (srcSize.width + 1) >> 1);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
checkDeviceMaxMemoryAllocSize(dstSize, type);
|
||||
|
||||
Mat src(srcSize, type), dst;
|
||||
Size dstSize((srcSize.height + 1) >> 1, (srcSize.width + 1) >> 1);
|
||||
dst.create(dstSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
@ -97,9 +100,12 @@ PERF_TEST_P(pyrUpFixture, pyrUp,
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
Size dstSize(srcSize.height << 1, srcSize.width << 1);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
checkDeviceMaxMemoryAllocSize(dstSize, type);
|
||||
|
||||
Mat src(srcSize, type), dst;
|
||||
Size dstSize(srcSize.height << 1, srcSize.width << 1);
|
||||
dst.create(dstSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
|
@ -60,8 +60,10 @@ PERF_TEST_P(MergeFixture, Merge,
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int depth = get<1>(params), channels = 3;
|
||||
|
||||
const int dstType = CV_MAKE_TYPE(depth, channels);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, dstType);
|
||||
|
||||
Mat dst(srcSize, dstType);
|
||||
vector<Mat> src(channels);
|
||||
for (vector<Mat>::iterator i = src.begin(), end = src.end(); i != end; ++i)
|
||||
@ -105,8 +107,11 @@ PERF_TEST_P(SplitFixture, Split,
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int depth = get<1>(params), channels = 3;
|
||||
const int type = CV_MAKE_TYPE(depth, channels);
|
||||
|
||||
Mat src(srcSize, CV_MAKE_TYPE(depth, channels));
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
Mat src(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
if (RUN_OCL_IMPL)
|
||||
|
@ -867,30 +867,32 @@ void cv::ocl::log(const oclMat &src, oclMat &dst)
|
||||
|
||||
static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, String kernelName)
|
||||
{
|
||||
int channels = dst.oclchannels();
|
||||
int depth = dst.depth();
|
||||
|
||||
size_t vector_length = 1;
|
||||
int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1);
|
||||
int cols = divUp(dst.cols * channels + offset_cols, vector_length);
|
||||
|
||||
size_t localThreads[3] = { 64, 4, 1 };
|
||||
size_t globalThreads[3] = { cols, dst.rows, 1 };
|
||||
size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
|
||||
|
||||
int src1_step = src1.step / src1.elemSize(), src1_offset = src1.offset / src1.elemSize();
|
||||
int src2_step = src2.step / src2.elemSize(), src2_offset = src2.offset / src2.elemSize();
|
||||
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.cols ));
|
||||
|
||||
openCLExecuteKernel(src1.clCxt, &arithm_magnitude, kernelName, globalThreads, localThreads, args, -1, depth);
|
||||
const char * const channelMap[] = { "", "", "2", "4", "4" };
|
||||
std::string buildOptions = format("-D T=%s%s", depth == CV_32F ? "float" : "double", channelMap[dst.channels()]);
|
||||
|
||||
openCLExecuteKernel(src1.clCxt, &arithm_magnitude, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str());
|
||||
}
|
||||
|
||||
void cv::ocl::magnitude(const oclMat &src1, const oclMat &src2, oclMat &dst)
|
||||
@ -964,25 +966,29 @@ static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, o
|
||||
size_t localThreads[3] = { 64, 4, 1 };
|
||||
size_t globalThreads[3] = { cols, src1.rows, 1 };
|
||||
|
||||
int tmp = angleInDegrees ? 1 : 0;
|
||||
int src1_step = src1.step / src1.elemSize1(), src1_offset = src1.offset / src1.elemSize1();
|
||||
int src2_step = src2.step / src2.elemSize1(), src2_offset = src2.offset / src2.elemSize1();
|
||||
int dst_mag_step = dst_mag.step / dst_mag.elemSize1(), dst_mag_offset = dst_mag.offset / dst_mag.elemSize1();
|
||||
int dst_cart_step = dst_cart.step / dst_cart.elemSize1(), dst_cart_offset = dst_cart.offset / dst_cart.elemSize1();
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst_mag.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_mag.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_mag.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_mag_step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_mag_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst_cart.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_cart.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_cart.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_cart_step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_cart_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.rows ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tmp ));
|
||||
|
||||
openCLExecuteKernel(src1.clCxt, &arithm_cartToPolar, kernelName, globalThreads, localThreads, args, -1, depth);
|
||||
openCLExecuteKernel(src1.clCxt, &arithm_cartToPolar, kernelName, globalThreads, localThreads, args,
|
||||
-1, depth, angleInDegrees ? "-D DEGREE" : "-D RADIAN");
|
||||
}
|
||||
|
||||
void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat &angle, bool angleInDegrees)
|
||||
@ -1008,37 +1014,38 @@ void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat
|
||||
static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees,
|
||||
String kernelName)
|
||||
{
|
||||
int channels = src2.oclchannels();
|
||||
int depth = src2.depth();
|
||||
|
||||
int cols = src2.cols * channels;
|
||||
int rows = src2.rows;
|
||||
int channels = src2.oclchannels(), depth = src2.depth();
|
||||
int cols = src2.cols * channels, rows = src2.rows;
|
||||
|
||||
size_t localThreads[3] = { 64, 4, 1 };
|
||||
size_t globalThreads[3] = { cols, rows, 1 };
|
||||
|
||||
int tmp = angleInDegrees ? 1 : 0;
|
||||
int src1_step = src1.step / src1.elemSize1(), src1_offset = src1.offset / src1.elemSize1();
|
||||
int src2_step = src2.step / src2.elemSize1(), src2_offset = src2.offset / src2.elemSize1();
|
||||
int dst1_step = dst1.step / dst1.elemSize1(), dst1_offset = dst1.offset / dst1.elemSize1();
|
||||
int dst2_step = dst2.step / dst2.elemSize1(), dst2_offset = dst2.offset / dst2.elemSize1();
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
if (src1.data)
|
||||
{
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_offset ));
|
||||
}
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst1.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst1.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst1.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst1_step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst1_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst2.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst2.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst2.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst2_step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst2_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tmp ));
|
||||
|
||||
openCLExecuteKernel(src1.clCxt, &arithm_polarToCart, kernelName, globalThreads, localThreads, args, -1, depth);
|
||||
openCLExecuteKernel(src1.clCxt, &arithm_polarToCart, kernelName, globalThreads, localThreads,
|
||||
args, -1, depth, angleInDegrees ? "-D DEGREE" : "-D RADIAN");
|
||||
}
|
||||
|
||||
void cv::ocl::polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees)
|
||||
@ -1623,38 +1630,38 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
|
||||
/////////////////////////////////// Pow //////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, String kernelName, const cv::ocl::ProgramEntry* source)
|
||||
static void arithmetic_pow_run(const oclMat &src, double p, oclMat &dst, String kernelName, const cv::ocl::ProgramEntry* source)
|
||||
{
|
||||
int channels = dst.oclchannels();
|
||||
int depth = dst.depth();
|
||||
|
||||
size_t vector_length = 1;
|
||||
int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1);
|
||||
int cols = divUp(dst.cols * channels + offset_cols, vector_length);
|
||||
int rows = dst.rows;
|
||||
|
||||
size_t localThreads[3] = { 64, 4, 1 };
|
||||
size_t globalThreads[3] = { cols, rows, 1 };
|
||||
size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
|
||||
|
||||
const char * const typeStr = depth == CV_32F ? "float" : "double";
|
||||
const char * const channelMap[] = { "", "", "2", "4", "4" };
|
||||
std::string buildOptions = format("-D VT=%s%s -D T=%s", typeStr, channelMap[channels], typeStr);
|
||||
|
||||
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
|
||||
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
|
||||
|
||||
int dst_step1 = dst.cols * dst.elemSize();
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.cols ));
|
||||
|
||||
float pf = static_cast<float>(p);
|
||||
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
|
||||
if(src.depth() == CV_32F)
|
||||
args.push_back( std::make_pair( sizeof(cl_float), (void *)&pf ));
|
||||
else
|
||||
args.push_back( std::make_pair( sizeof(cl_double), (void *)&p ));
|
||||
|
||||
openCLExecuteKernel(src1.clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
|
||||
openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str());
|
||||
}
|
||||
|
||||
void cv::ocl::pow(const oclMat &x, double p, oclMat &y)
|
||||
|
@ -53,7 +53,7 @@ using namespace cv::ocl;
|
||||
// buildWarpPlaneMaps
|
||||
|
||||
void cv::ocl::buildWarpPlaneMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T,
|
||||
float scale, oclMat &map_x, oclMat &map_y)
|
||||
float scale, oclMat &xmap, oclMat &ymap)
|
||||
{
|
||||
CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
|
||||
CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
|
||||
@ -68,37 +68,40 @@ void cv::ocl::buildWarpPlaneMaps(Size /*src_size*/, Rect dst_roi, const Mat &K,
|
||||
|
||||
oclMat KRT_oclMat(KRT_mat);
|
||||
// transfer K_Rinv and T into a single cl_mem
|
||||
map_x.create(dst_roi.size(), CV_32F);
|
||||
map_y.create(dst_roi.size(), CV_32F);
|
||||
xmap.create(dst_roi.size(), CV_32F);
|
||||
ymap.create(dst_roi.size(), CV_32F);
|
||||
|
||||
int tl_u = dst_roi.tl().x;
|
||||
int tl_v = dst_roi.tl().y;
|
||||
|
||||
Context *clCxt = Context::getContext();
|
||||
String kernelName = "buildWarpPlaneMaps";
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
|
||||
int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map_x.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map_y.data));
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&KRT_mat.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_u));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_v));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_x.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_x.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_x.step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_y.step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));
|
||||
args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));
|
||||
|
||||
size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
|
||||
size_t localThreads[3] = { 32, 8, 1 };
|
||||
|
||||
openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpPlaneMaps", globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// buildWarpCylyndricalMaps
|
||||
|
||||
void cv::ocl::buildWarpCylindricalMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, float scale,
|
||||
oclMat &map_x, oclMat &map_y)
|
||||
oclMat &xmap, oclMat &ymap)
|
||||
{
|
||||
CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
|
||||
CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
|
||||
@ -108,36 +111,40 @@ void cv::ocl::buildWarpCylindricalMaps(Size /*src_size*/, Rect dst_roi, const Ma
|
||||
|
||||
oclMat KR_oclMat(K_Rinv.reshape(1, 1));
|
||||
|
||||
map_x.create(dst_roi.size(), CV_32F);
|
||||
map_y.create(dst_roi.size(), CV_32F);
|
||||
xmap.create(dst_roi.size(), CV_32F);
|
||||
ymap.create(dst_roi.size(), CV_32F);
|
||||
|
||||
int tl_u = dst_roi.tl().x;
|
||||
int tl_v = dst_roi.tl().y;
|
||||
|
||||
Context *clCxt = Context::getContext();
|
||||
String kernelName = "buildWarpCylindricalMaps";
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
|
||||
int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map_x.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map_y.data));
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_u));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_v));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_x.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_x.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_x.step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_y.step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));
|
||||
args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));
|
||||
|
||||
size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
|
||||
size_t localThreads[3] = { 32, 8, 1 };
|
||||
|
||||
openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpCylindricalMaps", globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// buildWarpSphericalMaps
|
||||
|
||||
void cv::ocl::buildWarpSphericalMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, float scale,
|
||||
oclMat &map_x, oclMat &map_y)
|
||||
oclMat &xmap, oclMat &ymap)
|
||||
{
|
||||
CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
|
||||
CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
|
||||
@ -147,37 +154,41 @@ void cv::ocl::buildWarpSphericalMaps(Size /*src_size*/, Rect dst_roi, const Mat
|
||||
|
||||
oclMat KR_oclMat(K_Rinv.reshape(1, 1));
|
||||
// transfer K_Rinv, R_Kinv into a single cl_mem
|
||||
map_x.create(dst_roi.size(), CV_32F);
|
||||
map_y.create(dst_roi.size(), CV_32F);
|
||||
xmap.create(dst_roi.size(), CV_32F);
|
||||
ymap.create(dst_roi.size(), CV_32F);
|
||||
|
||||
int tl_u = dst_roi.tl().x;
|
||||
int tl_v = dst_roi.tl().y;
|
||||
|
||||
Context *clCxt = Context::getContext();
|
||||
String kernelName = "buildWarpSphericalMaps";
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
|
||||
int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map_x.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map_y.data));
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_u));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_v));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_x.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_x.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_x.step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_y.step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));
|
||||
args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));
|
||||
|
||||
size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
|
||||
size_t localThreads[3] = { 32, 8, 1 };
|
||||
openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpSphericalMaps", globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// buildWarpAffineMaps
|
||||
|
||||
void cv::ocl::buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap)
|
||||
{
|
||||
|
||||
CV_Assert(M.rows == 2 && M.cols == 3);
|
||||
CV_Assert(dsize.area());
|
||||
|
||||
xmap.create(dsize, CV_32FC1);
|
||||
ymap.create(dsize, CV_32FC1);
|
||||
@ -194,29 +205,34 @@ void cv::ocl::buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat
|
||||
iM.convertTo(coeffsMat, coeffsMat.type());
|
||||
}
|
||||
|
||||
int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
|
||||
int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
|
||||
|
||||
oclMat coeffsOclMat(coeffsMat.reshape(1, 1));
|
||||
|
||||
Context *clCxt = Context::getContext();
|
||||
String kernelName = "buildWarpAffineMaps";
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap.step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));
|
||||
|
||||
size_t globalThreads[3] = {xmap.cols, xmap.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
|
||||
size_t localThreads[3] = { 32, 8, 1 };
|
||||
openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpAffineMaps", globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// buildWarpPerspectiveMaps
|
||||
|
||||
void cv::ocl::buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap)
|
||||
{
|
||||
|
||||
CV_Assert(M.rows == 3 && M.cols == 3);
|
||||
CV_Assert(dsize.area() > 0);
|
||||
|
||||
xmap.create(dsize, CV_32FC1);
|
||||
ymap.create(dsize, CV_32FC1);
|
||||
@ -235,19 +251,21 @@ void cv::ocl::buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, o
|
||||
|
||||
oclMat coeffsOclMat(coeffsMat.reshape(1, 1));
|
||||
|
||||
Context *clCxt = Context::getContext();
|
||||
String kernelName = "buildWarpPerspectiveMaps";
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
|
||||
int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
|
||||
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap.step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));
|
||||
|
||||
size_t globalThreads[3] = {xmap.cols, xmap.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
|
||||
|
||||
openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpPerspectiveMaps", globalThreads, NULL, args, -1, -1);
|
||||
}
|
||||
|
@ -50,254 +50,430 @@
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
|
||||
#ifndef CV_DESCALE
|
||||
#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
|
||||
#endif
|
||||
|
||||
#ifndef FLT_EPSILON
|
||||
#define FLT_EPSILON 1.192092896e-07F
|
||||
#endif
|
||||
|
||||
namespace
|
||||
static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
|
||||
const std::string & additionalOptions = std::string(),
|
||||
const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat())
|
||||
{
|
||||
|
||||
void RGB2Gray_caller(const oclMat &src, oclMat &dst, int bidx)
|
||||
{
|
||||
int channels = src.oclchannels();
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
|
||||
String build_options = format("-D DEPTH_%d", src.depth());
|
||||
if (!additionalOptions.empty())
|
||||
build_options = build_options + additionalOptions;
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&channels));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1};
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2Gray", gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
void Gray2RGB_caller(const oclMat &src, oclMat &dst)
|
||||
{
|
||||
String build_options = format("-D DEPTH_%d", src.depth());
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1};
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, "Gray2RGB", gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
void RGB2YUV_caller(const oclMat &src, oclMat &dst, int bidx)
|
||||
{
|
||||
int channels = src.oclchannels();
|
||||
String build_options = format("-D DEPTH_%d", src.depth());
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&channels));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1};
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2YUV", gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
void YUV2RGB_caller(const oclMat &src, oclMat &dst, int bidx)
|
||||
{
|
||||
int channels = src.oclchannels();
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
|
||||
String buildOptions = format("-D DEPTH_%d", src.depth());
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&channels));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1};
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, "YUV2RGB", gt, lt, args, -1, -1, buildOptions.c_str());
|
||||
}
|
||||
|
||||
void YUV2RGB_NV12_caller(const oclMat &src, oclMat &dst, int bidx)
|
||||
{
|
||||
String build_options = format("-D DEPTH_%d", src.depth());
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
size_t gt[3] = {dst.cols / 2, dst.rows / 2, 1}, lt[3] = {16, 16, 1};
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, "YUV2RGBA_NV12", gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
void RGB2YCrCb_caller(const oclMat &src, oclMat &dst, int bidx)
|
||||
{
|
||||
int channels = src.oclchannels();
|
||||
String build_options = format("-D DEPTH_%d", src.depth());
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&channels));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1};
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2YCrCb", gt, lt, args, -1, -1, build_options.c_str());
|
||||
if (!data1.empty())
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data1.data ));
|
||||
if (!data2.empty())
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data2.data ));
|
||||
|
||||
size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
|
||||
static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
|
||||
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
|
||||
{
|
||||
String build_options = format("-D DEPTH_%d -D dcn=%d", src.depth(), dst.channels());
|
||||
if (!additionalOptions.empty())
|
||||
build_options = build_options + additionalOptions;
|
||||
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
if (!data.empty())
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
|
||||
|
||||
size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
|
||||
{
|
||||
String build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s", src.depth(),
|
||||
dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, "RGB", gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
|
||||
{
|
||||
String build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d",
|
||||
src.depth(), greenbits, dst.channels());
|
||||
int src_offset = src.offset >> 1, src_step = src.step >> 1;
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step / dst.elemSize1();
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
|
||||
{
|
||||
String build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d",
|
||||
src.depth(), greenbits, src.channels());
|
||||
int src_offset = (int)src.offset, src_step = (int)src.step;
|
||||
int dst_offset = dst.offset >> 1, dst_step = dst.step >> 1;
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
|
||||
{
|
||||
Size sz = src.size();
|
||||
int scn = src.oclchannels(), depth = src.depth(), bidx;
|
||||
int scn = src.channels(), depth = src.depth(), bidx;
|
||||
|
||||
CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32F);
|
||||
|
||||
switch (code)
|
||||
{
|
||||
/*
|
||||
case COLOR_BGR2BGRA: case COLOR_RGB2BGRA: case COLOR_BGRA2BGR:
|
||||
case COLOR_RGBA2BGR: case COLOR_RGB2BGR: case COLOR_BGRA2RGBA:
|
||||
case COLOR_BGR2BGR565: case COLOR_BGR2BGR555: case COLOR_RGB2BGR565: case COLOR_RGB2BGR555:
|
||||
case COLOR_BGRA2BGR565: case COLOR_BGRA2BGR555: case COLOR_RGBA2BGR565: case COLOR_RGBA2BGR555:
|
||||
case COLOR_BGR5652BGR: case COLOR_BGR5552BGR: case COLOR_BGR5652RGB: case COLOR_BGR5552RGB:
|
||||
case COLOR_BGR5652BGRA: case COLOR_BGR5552BGRA: case COLOR_BGR5652RGBA: case COLOR_BGR5552RGBA:
|
||||
*/
|
||||
case COLOR_BGR2GRAY:
|
||||
case COLOR_BGRA2GRAY:
|
||||
case COLOR_RGB2GRAY:
|
||||
case COLOR_RGBA2GRAY:
|
||||
case COLOR_BGR2BGRA: case COLOR_RGB2BGRA: case COLOR_BGRA2BGR:
|
||||
case COLOR_RGBA2BGR: case COLOR_RGB2BGR: case COLOR_BGRA2RGBA:
|
||||
{
|
||||
CV_Assert(scn == 3 || scn == 4);
|
||||
dcn = code == COLOR_BGR2BGRA || code == COLOR_RGB2BGRA || code == COLOR_BGRA2RGBA ? 4 : 3;
|
||||
bool reverse = !(code == COLOR_BGR2BGRA || code == COLOR_BGRA2BGR);
|
||||
dst.create(sz, CV_MAKE_TYPE(depth, dcn));
|
||||
RGB_caller(src, dst, reverse);
|
||||
break;
|
||||
}
|
||||
case COLOR_BGR2BGR565: case COLOR_BGR2BGR555: case COLOR_RGB2BGR565: case COLOR_RGB2BGR555:
|
||||
case COLOR_BGRA2BGR565: case COLOR_BGRA2BGR555: case COLOR_RGBA2BGR565: case COLOR_RGBA2BGR555:
|
||||
{
|
||||
CV_Assert((scn == 3 || scn == 4) && depth == CV_8U );
|
||||
bidx = code == COLOR_BGR2BGR565 || code == COLOR_BGR2BGR555 ||
|
||||
code == COLOR_BGRA2BGR565 || code == COLOR_BGRA2BGR555 ? 0 : 2;
|
||||
int greenbits = code == COLOR_BGR2BGR565 || code == COLOR_RGB2BGR565 ||
|
||||
code == COLOR_BGRA2BGR565 || code == COLOR_RGBA2BGR565 ? 6 : 5;
|
||||
dst.create(sz, CV_8UC2);
|
||||
toRGB5x5_caller(src, dst, bidx, greenbits, "RGB2RGB5x5");
|
||||
break;
|
||||
}
|
||||
case COLOR_BGR5652BGR: case COLOR_BGR5552BGR: case COLOR_BGR5652RGB: case COLOR_BGR5552RGB:
|
||||
case COLOR_BGR5652BGRA: case COLOR_BGR5552BGRA: case COLOR_BGR5652RGBA: case COLOR_BGR5552RGBA:
|
||||
{
|
||||
dcn = code == COLOR_BGR5652BGRA || code == COLOR_BGR5552BGRA || code == COLOR_BGR5652RGBA || code == COLOR_BGR5552RGBA ? 4 : 3;
|
||||
CV_Assert((dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U);
|
||||
bidx = code == COLOR_BGR5652BGR || code == COLOR_BGR5552BGR ||
|
||||
code == COLOR_BGR5652BGRA || code == COLOR_BGR5552BGRA ? 0 : 2;
|
||||
int greenbits = code == COLOR_BGR5652BGR || code == COLOR_BGR5652RGB ||
|
||||
code == COLOR_BGR5652BGRA || code == COLOR_BGR5652RGBA ? 6 : 5;
|
||||
dst.create(sz, CV_MAKETYPE(depth, dcn));
|
||||
fromRGB5x5_caller(src, dst, bidx, greenbits, "RGB5x52RGB");
|
||||
break;
|
||||
}
|
||||
case COLOR_BGR5652GRAY: case COLOR_BGR5552GRAY:
|
||||
{
|
||||
CV_Assert(scn == 2 && depth == CV_8U);
|
||||
dst.create(sz, CV_8UC1);
|
||||
int greenbits = code == COLOR_BGR5652GRAY ? 6 : 5;
|
||||
fromRGB5x5_caller(src, dst, -1, greenbits, "BGR5x52Gray");
|
||||
break;
|
||||
}
|
||||
case COLOR_GRAY2BGR565: case COLOR_GRAY2BGR555:
|
||||
{
|
||||
CV_Assert(scn == 1 && depth == CV_8U);
|
||||
dst.create(sz, CV_8UC2);
|
||||
int greenbits = code == COLOR_GRAY2BGR565 ? 6 : 5;
|
||||
toRGB5x5_caller(src, dst, -1, greenbits, "Gray2BGR5x5");
|
||||
break;
|
||||
}
|
||||
case COLOR_RGB2GRAY: case COLOR_BGR2GRAY: case COLOR_RGBA2GRAY: case COLOR_BGRA2GRAY:
|
||||
{
|
||||
CV_Assert(scn == 3 || scn == 4);
|
||||
bidx = code == COLOR_BGR2GRAY || code == COLOR_BGRA2GRAY ? 0 : 2;
|
||||
dst.create(sz, CV_MAKETYPE(depth, 1));
|
||||
RGB2Gray_caller(src, dst, bidx);
|
||||
fromRGB_caller(src, dst, bidx, "RGB2Gray");
|
||||
break;
|
||||
}
|
||||
case COLOR_GRAY2BGR:
|
||||
case COLOR_GRAY2BGRA:
|
||||
case COLOR_GRAY2BGR: case COLOR_GRAY2BGRA:
|
||||
{
|
||||
CV_Assert(scn == 1);
|
||||
dcn = code == COLOR_GRAY2BGRA ? 4 : 3;
|
||||
dst.create(sz, CV_MAKETYPE(depth, dcn));
|
||||
Gray2RGB_caller(src, dst);
|
||||
toRGB_caller(src, dst, 0, "Gray2RGB");
|
||||
break;
|
||||
}
|
||||
case COLOR_BGR2YUV:
|
||||
case COLOR_RGB2YUV:
|
||||
case COLOR_BGR2YUV: case COLOR_RGB2YUV:
|
||||
{
|
||||
CV_Assert(scn == 3 || scn == 4);
|
||||
bidx = code == COLOR_RGB2YUV ? 0 : 2;
|
||||
bidx = code == COLOR_BGR2YUV ? 0 : 2;
|
||||
dst.create(sz, CV_MAKETYPE(depth, 3));
|
||||
RGB2YUV_caller(src, dst, bidx);
|
||||
fromRGB_caller(src, dst, bidx, "RGB2YUV");
|
||||
break;
|
||||
}
|
||||
case COLOR_YUV2BGR:
|
||||
case COLOR_YUV2RGB:
|
||||
case COLOR_YUV2BGR: case COLOR_YUV2RGB:
|
||||
{
|
||||
CV_Assert(scn == 3 || scn == 4);
|
||||
bidx = code == COLOR_YUV2RGB ? 0 : 2;
|
||||
dst.create(sz, CV_MAKETYPE(depth, 3));
|
||||
YUV2RGB_caller(src, dst, bidx);
|
||||
if( dcn <= 0 )
|
||||
dcn = 3;
|
||||
CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
|
||||
bidx = code == COLOR_YUV2BGR ? 0 : 2;
|
||||
dst.create(sz, CV_MAKETYPE(depth, dcn));
|
||||
toRGB_caller(src, dst, bidx, "YUV2RGB");
|
||||
break;
|
||||
}
|
||||
case COLOR_YUV2RGB_NV12:
|
||||
case COLOR_YUV2BGR_NV12:
|
||||
case COLOR_YUV2RGBA_NV12:
|
||||
case COLOR_YUV2BGRA_NV12:
|
||||
case COLOR_YUV2RGB_NV12: case COLOR_YUV2BGR_NV12:
|
||||
case COLOR_YUV2RGBA_NV12: case COLOR_YUV2BGRA_NV12:
|
||||
{
|
||||
CV_Assert(scn == 1);
|
||||
CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
|
||||
dcn = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2RGBA_NV12 ? 4 : 3;
|
||||
dcn = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2RGBA_NV12 ? 4 : 3;
|
||||
bidx = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2BGR_NV12 ? 0 : 2;
|
||||
|
||||
Size dstSz(sz.width, sz.height * 2 / 3);
|
||||
dst.create(dstSz, CV_MAKETYPE(depth, dcn));
|
||||
YUV2RGB_NV12_caller(src, dst, bidx);
|
||||
toRGB_caller(src, dst, bidx, "YUV2RGBA_NV12");
|
||||
break;
|
||||
}
|
||||
case COLOR_BGR2YCrCb:
|
||||
case COLOR_RGB2YCrCb:
|
||||
case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb:
|
||||
{
|
||||
CV_Assert(scn == 3 || scn == 4);
|
||||
bidx = code == COLOR_BGR2YCrCb ? 0 : 2;
|
||||
dst.create(sz, CV_MAKETYPE(depth, 3));
|
||||
RGB2YCrCb_caller(src, dst, bidx);
|
||||
fromRGB_caller(src, dst, bidx, "RGB2YCrCb");
|
||||
break;
|
||||
}
|
||||
case COLOR_YCrCb2BGR:
|
||||
case COLOR_YCrCb2RGB:
|
||||
case COLOR_YCrCb2BGR: case COLOR_YCrCb2RGB:
|
||||
{
|
||||
if( dcn <= 0 )
|
||||
dcn = 3;
|
||||
CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
|
||||
bidx = code == COLOR_YCrCb2BGR ? 0 : 2;
|
||||
dst.create(sz, CV_MAKETYPE(depth, dcn));
|
||||
toRGB_caller(src, dst, bidx, "YCrCb2RGB");
|
||||
break;
|
||||
}
|
||||
/*
|
||||
case COLOR_BGR5652GRAY: case COLOR_BGR5552GRAY:
|
||||
case COLOR_GRAY2BGR565: case COLOR_GRAY2BGR555:
|
||||
case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb:
|
||||
case COLOR_BGR2XYZ: case COLOR_RGB2XYZ:
|
||||
{
|
||||
CV_Assert(scn == 3 || scn == 4);
|
||||
bidx = code == COLOR_BGR2XYZ ? 0 : 2;
|
||||
dst.create(sz, CV_MAKE_TYPE(depth, 3));
|
||||
|
||||
Mat c;
|
||||
if (depth == CV_32F)
|
||||
{
|
||||
float coeffs[] =
|
||||
{
|
||||
0.412453f, 0.357580f, 0.180423f,
|
||||
0.212671f, 0.715160f, 0.072169f,
|
||||
0.019334f, 0.119193f, 0.950227f
|
||||
};
|
||||
if (bidx == 0)
|
||||
{
|
||||
std::swap(coeffs[0], coeffs[2]);
|
||||
std::swap(coeffs[3], coeffs[5]);
|
||||
std::swap(coeffs[6], coeffs[8]);
|
||||
}
|
||||
Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c);
|
||||
}
|
||||
else
|
||||
{
|
||||
int coeffs[] =
|
||||
{
|
||||
1689, 1465, 739,
|
||||
871, 2929, 296,
|
||||
79, 488, 3892
|
||||
};
|
||||
if (bidx == 0)
|
||||
{
|
||||
std::swap(coeffs[0], coeffs[2]);
|
||||
std::swap(coeffs[3], coeffs[5]);
|
||||
std::swap(coeffs[6], coeffs[8]);
|
||||
}
|
||||
Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c);
|
||||
}
|
||||
oclMat oclCoeffs(c);
|
||||
|
||||
fromRGB_caller(src, dst, bidx, "RGB2XYZ", "", oclCoeffs);
|
||||
break;
|
||||
}
|
||||
case COLOR_XYZ2BGR: case COLOR_XYZ2RGB:
|
||||
{
|
||||
if (dcn <= 0)
|
||||
dcn = 3;
|
||||
CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
|
||||
bidx = code == COLOR_XYZ2BGR ? 0 : 2;
|
||||
dst.create(sz, CV_MAKE_TYPE(depth, dcn));
|
||||
|
||||
Mat c;
|
||||
if (depth == CV_32F)
|
||||
{
|
||||
float coeffs[] =
|
||||
{
|
||||
3.240479f, -1.53715f, -0.498535f,
|
||||
-0.969256f, 1.875991f, 0.041556f,
|
||||
0.055648f, -0.204043f, 1.057311f
|
||||
};
|
||||
if (bidx == 0)
|
||||
{
|
||||
std::swap(coeffs[0], coeffs[6]);
|
||||
std::swap(coeffs[1], coeffs[7]);
|
||||
std::swap(coeffs[2], coeffs[8]);
|
||||
}
|
||||
Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c);
|
||||
}
|
||||
else
|
||||
{
|
||||
int coeffs[] =
|
||||
{
|
||||
13273, -6296, -2042,
|
||||
-3970, 7684, 170,
|
||||
228, -836, 4331
|
||||
};
|
||||
if (bidx == 0)
|
||||
{
|
||||
std::swap(coeffs[0], coeffs[6]);
|
||||
std::swap(coeffs[1], coeffs[7]);
|
||||
std::swap(coeffs[2], coeffs[8]);
|
||||
}
|
||||
Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c);
|
||||
}
|
||||
oclMat oclCoeffs(c);
|
||||
|
||||
toRGB_caller(src, dst, bidx, "XYZ2RGB", "", oclCoeffs);
|
||||
break;
|
||||
}
|
||||
case COLOR_BGR2HSV: case COLOR_RGB2HSV: case COLOR_BGR2HSV_FULL: case COLOR_RGB2HSV_FULL:
|
||||
case COLOR_BGR2HLS: case COLOR_RGB2HLS: case COLOR_BGR2HLS_FULL: case COLOR_RGB2HLS_FULL:
|
||||
{
|
||||
CV_Assert((scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F));
|
||||
bidx = code == COLOR_BGR2HSV || code == COLOR_BGR2HLS ||
|
||||
code == COLOR_BGR2HSV_FULL || code == COLOR_BGR2HLS_FULL ? 0 : 2;
|
||||
int hrange = depth == CV_32F ? 360 : code == COLOR_BGR2HSV || code == COLOR_RGB2HSV ||
|
||||
code == COLOR_BGR2HLS || code == COLOR_RGB2HLS ? 180 : 256;
|
||||
bool is_hsv = code == COLOR_BGR2HSV || code == COLOR_RGB2HSV || code == COLOR_BGR2HSV_FULL || code == COLOR_RGB2HSV_FULL;
|
||||
dst.create(sz, CV_MAKETYPE(depth, 3));
|
||||
std::string kernelName = std::string("RGB2") + (is_hsv ? "HSV" : "HLS");
|
||||
|
||||
if (is_hsv && depth == CV_8U)
|
||||
{
|
||||
static oclMat sdiv_data;
|
||||
static oclMat hdiv_data180;
|
||||
static oclMat hdiv_data256;
|
||||
static int sdiv_table[256];
|
||||
static int hdiv_table180[256];
|
||||
static int hdiv_table256[256];
|
||||
static volatile bool initialized180 = false, initialized256 = false;
|
||||
volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;
|
||||
|
||||
if (!initialized)
|
||||
{
|
||||
int * const hdiv_table = hrange == 180 ? hdiv_table180 : hdiv_table256, hsv_shift = 12;
|
||||
oclMat & hdiv_data = hrange == 180 ? hdiv_data180 : hdiv_data256;
|
||||
|
||||
sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0;
|
||||
|
||||
int v = 255 << hsv_shift;
|
||||
if (!initialized180 && !initialized256)
|
||||
{
|
||||
for(int i = 1; i < 256; i++ )
|
||||
sdiv_table[i] = saturate_cast<int>(v/(1.*i));
|
||||
sdiv_data.upload(Mat(1, 256, CV_32SC1, sdiv_table));
|
||||
}
|
||||
|
||||
v = hrange << hsv_shift;
|
||||
for (int i = 1; i < 256; i++ )
|
||||
hdiv_table[i] = saturate_cast<int>(v/(6.*i));
|
||||
|
||||
hdiv_data.upload(Mat(1, 256, CV_32SC1, hdiv_table));
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
fromRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180);
|
||||
return;
|
||||
}
|
||||
|
||||
fromRGB_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f)));
|
||||
break;
|
||||
}
|
||||
case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL:
|
||||
case COLOR_HLS2BGR: case COLOR_HLS2RGB: case COLOR_HLS2BGR_FULL: case COLOR_HLS2RGB_FULL:
|
||||
*/
|
||||
{
|
||||
if (dcn <= 0)
|
||||
dcn = 3;
|
||||
CV_Assert(scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F));
|
||||
bidx = code == COLOR_HSV2BGR || code == COLOR_HLS2BGR ||
|
||||
code == COLOR_HSV2BGR_FULL || code == COLOR_HLS2BGR_FULL ? 0 : 2;
|
||||
int hrange = depth == CV_32F ? 360 : code == COLOR_HSV2BGR || code == COLOR_HSV2RGB ||
|
||||
code == COLOR_HLS2BGR || code == COLOR_HLS2RGB ? 180 : 255;
|
||||
bool is_hsv = code == COLOR_HSV2BGR || code == COLOR_HSV2RGB ||
|
||||
code == COLOR_HSV2BGR_FULL || code == COLOR_HSV2RGB_FULL;
|
||||
|
||||
dst.create(sz, CV_MAKETYPE(depth, dcn));
|
||||
|
||||
std::string kernelName = std::string(is_hsv ? "HSV" : "HLS") + "2RGB";
|
||||
toRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange));
|
||||
break;
|
||||
}
|
||||
case COLOR_RGBA2mRGBA: case COLOR_mRGBA2RGBA:
|
||||
{
|
||||
CV_Assert(scn == 4 && depth == CV_8U);
|
||||
dst.create(sz, CV_MAKETYPE(depth, 4));
|
||||
std::string kernelName = code == COLOR_RGBA2mRGBA ? "RGBA2mRGBA" : "mRGBA2RGBA";
|
||||
|
||||
fromRGB_caller(src, dst, 0, kernelName);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
CV_Error(Error::StsBadFlag, "Unknown/unsupported color conversion code" );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cv::ocl::cvtColor(const oclMat &src, oclMat &dst, int code, int dcn)
|
||||
{
|
||||
|
@ -747,6 +747,15 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
|
||||
oclMat gsum(totalheight + 4, gimg.cols + 1, CV_32SC1);
|
||||
oclMat gsqsum(totalheight + 4, gimg.cols + 1, CV_32FC1);
|
||||
|
||||
int sdepth = 0;
|
||||
if(Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
|
||||
sdepth = CV_64FC1;
|
||||
else
|
||||
sdepth = CV_32FC1;
|
||||
sdepth = CV_MAT_DEPTH(sdepth);
|
||||
int type = CV_MAKE_TYPE(sdepth, 1);
|
||||
oclMat gsqsum_t(totalheight + 4, gimg.cols + 1, type);
|
||||
|
||||
cl_mem stagebuffer;
|
||||
cl_mem nodebuffer;
|
||||
cl_mem candidatebuffer;
|
||||
@ -754,6 +763,7 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
|
||||
cv::Rect roi, roi2;
|
||||
cv::Mat imgroi, imgroisq;
|
||||
cv::ocl::oclMat resizeroi, gimgroi, gimgroisq;
|
||||
|
||||
int grp_per_CU = 12;
|
||||
|
||||
size_t blocksize = 8;
|
||||
@ -773,7 +783,7 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
|
||||
roi2 = Rect(0, 0, sz.width - 1, sz.height - 1);
|
||||
resizeroi = gimg1(roi2);
|
||||
gimgroi = gsum(roi);
|
||||
gimgroisq = gsqsum(roi);
|
||||
gimgroisq = gsqsum_t(roi);
|
||||
int width = gimgroi.cols - 1 - cascade->orig_window_size.width;
|
||||
int height = gimgroi.rows - 1 - cascade->orig_window_size.height;
|
||||
scaleinfo[i].width_height = (width << 16) | height;
|
||||
@ -787,8 +797,13 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
|
||||
scaleinfo[i].factor = factor;
|
||||
cv::ocl::resize(gimg, resizeroi, Size(sz.width - 1, sz.height - 1), 0, 0, INTER_LINEAR);
|
||||
cv::ocl::integral(resizeroi, gimgroi, gimgroisq);
|
||||
|
||||
indexy += sz.height;
|
||||
}
|
||||
if(gsqsum_t.depth() == CV_64F)
|
||||
gsqsum_t.convertTo(gsqsum, CV_32FC1);
|
||||
else
|
||||
gsqsum = gsqsum_t;
|
||||
|
||||
gcascade = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
|
||||
stage = (GpuHidHaarStageClassifier *)(gcascade + 1);
|
||||
@ -996,7 +1011,12 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
|
||||
int n_factors = 0;
|
||||
oclMat gsum;
|
||||
oclMat gsqsum;
|
||||
cv::ocl::integral(gimg, gsum, gsqsum);
|
||||
oclMat gsqsum_t;
|
||||
cv::ocl::integral(gimg, gsum, gsqsum_t);
|
||||
if(gsqsum_t.depth() == CV_64F)
|
||||
gsqsum_t.convertTo(gsqsum, CV_32FC1);
|
||||
else
|
||||
gsqsum = gsqsum_t;
|
||||
CvSize sz;
|
||||
std::vector<CvSize> sizev;
|
||||
std::vector<float> scalev;
|
||||
|
@ -199,10 +199,8 @@ namespace cv
|
||||
if (map1.empty())
|
||||
map1.swap(map2);
|
||||
|
||||
CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST
|
||||
/*|| interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4*/);
|
||||
CV_Assert((map1.type() == CV_16SC2 && (map2.empty() || (interpolation == INTER_NEAREST &&
|
||||
(map2.type() == CV_16UC1 || map2.type() == CV_16SC1)) )) ||
|
||||
CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST);
|
||||
CV_Assert((map1.type() == CV_16SC2 && (map2.empty() || (map2.type() == CV_16UC1 || map2.type() == CV_16SC1)) ) ||
|
||||
(map1.type() == CV_32FC2 && !map2.data) ||
|
||||
(map1.type() == CV_32FC1 && map2.type() == CV_32FC1));
|
||||
CV_Assert(!map2.data || map2.size() == map1.size());
|
||||
@ -232,8 +230,8 @@ namespace cv
|
||||
CV_Error(Error::StsBadArg, "Unsupported map types");
|
||||
|
||||
int ocn = dst.oclchannels();
|
||||
size_t localThreads[3] = { 16, 16, 1};
|
||||
size_t globalThreads[3] = { dst.cols, dst.rows, 1};
|
||||
size_t localThreads[3] = { 256, 1, 1 };
|
||||
size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
|
||||
|
||||
Mat scalar(1, 1, CV_MAKE_TYPE(dst.depth(), ocn), borderValue);
|
||||
String buildOptions = format("-D %s -D %s -D T=%s%s", interMap[interpolation],
|
||||
@ -286,96 +284,63 @@ namespace cv
|
||||
|
||||
static void resize_gpu( const oclMat &src, oclMat &dst, double fx, double fy, int interpolation)
|
||||
{
|
||||
CV_Assert( (src.channels() == dst.channels()) );
|
||||
Context *clCxt = src.clCxt;
|
||||
float ifx = 1. / fx;
|
||||
float ify = 1. / fy;
|
||||
double ifx_d = 1. / fx;
|
||||
double ify_d = 1. / fy;
|
||||
int srcStep_in_pixel = src.step1() / src.oclchannels();
|
||||
int srcoffset_in_pixel = src.offset / src.elemSize();
|
||||
int dstStep_in_pixel = dst.step1() / dst.oclchannels();
|
||||
int dstoffset_in_pixel = dst.offset / dst.elemSize();
|
||||
float ifx = 1.f / fx, ify = 1.f / fy;
|
||||
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
|
||||
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
|
||||
int ocn = interpolation == INTER_LINEAR ? dst.oclchannels() : -1;
|
||||
int depth = interpolation == INTER_LINEAR ? dst.depth() : -1;
|
||||
|
||||
String kernelName;
|
||||
if (interpolation == INTER_LINEAR)
|
||||
kernelName = "resizeLN";
|
||||
else if (interpolation == INTER_NEAREST)
|
||||
kernelName = "resizeNN";
|
||||
const char * const interMap[] = { "NN", "LN", "CUBIC", "AREA", "LAN4" };
|
||||
std::string kernelName = std::string("resize") + interMap[interpolation];
|
||||
|
||||
const char * const typeMap[] = { "uchar", "uchar", "ushort", "ushort", "int", "int", "double" };
|
||||
const char * const channelMap[] = { "" , "", "2", "4", "4" };
|
||||
std::string buildOption = format("-D %s -D T=%s%s", interMap[interpolation], typeMap[dst.depth()], channelMap[dst.oclchannels()]);
|
||||
|
||||
//TODO: improve this kernel
|
||||
size_t blkSizeX = 16, blkSizeY = 16;
|
||||
size_t glbSizeX;
|
||||
if (src.type() == CV_8UC1)
|
||||
if (src.type() == CV_8UC1 && interpolation == INTER_LINEAR)
|
||||
{
|
||||
size_t cols = (dst.cols + dst.offset % 4 + 3) / 4;
|
||||
glbSizeX = cols % blkSizeX == 0 && cols != 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
|
||||
}
|
||||
else
|
||||
glbSizeX = dst.cols % blkSizeX == 0 && dst.cols != 0 ? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX;
|
||||
glbSizeX = dst.cols;
|
||||
|
||||
size_t glbSizeY = dst.rows % blkSizeY == 0 && dst.rows != 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY;
|
||||
size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
|
||||
size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
|
||||
size_t globalThreads[3] = { glbSizeX, dst.rows, 1 };
|
||||
size_t localThreads[3] = { blkSizeX, blkSizeY, 1 };
|
||||
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
if (interpolation == INTER_NEAREST)
|
||||
{
|
||||
args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data));
|
||||
args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&dstoffset_in_pixel));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&srcoffset_in_pixel));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&dstStep_in_pixel));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&srcStep_in_pixel));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.cols));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.rows));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.cols));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows));
|
||||
if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
|
||||
{
|
||||
args.push_back( std::make_pair(sizeof(cl_double), (void *)&ifx_d));
|
||||
args.push_back( std::make_pair(sizeof(cl_double), (void *)&ify_d));
|
||||
}
|
||||
else
|
||||
{
|
||||
args.push_back( std::make_pair(sizeof(cl_float), (void *)&ifx));
|
||||
args.push_back( std::make_pair(sizeof(cl_float), (void *)&ify));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data));
|
||||
args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&dstoffset_in_pixel));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&srcoffset_in_pixel));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&dstStep_in_pixel));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&srcStep_in_pixel));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.cols));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.rows));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.cols));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows));
|
||||
args.push_back( std::make_pair(sizeof(cl_float), (void *)&ifx));
|
||||
args.push_back( std::make_pair(sizeof(cl_float), (void *)&ify));
|
||||
}
|
||||
args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data));
|
||||
args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst_offset));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&src_offset));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst_step));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&src_step));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.cols));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.rows));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.cols));
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows));
|
||||
args.push_back( std::make_pair(sizeof(cl_float), (void *)&ifx));
|
||||
args.push_back( std::make_pair(sizeof(cl_float), (void *)&ify));
|
||||
|
||||
openCLExecuteKernel(clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args,
|
||||
ocn, depth, buildOption.c_str());
|
||||
}
|
||||
|
||||
void resize(const oclMat &src, oclMat &dst, Size dsize,
|
||||
double fx, double fy, int interpolation)
|
||||
void resize(const oclMat &src, oclMat &dst, Size dsize, double fx, double fy, int interpolation)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3 || src.type() == CV_8UC4
|
||||
|| src.type() == CV_32FC1 || src.type() == CV_32FC3 || src.type() == CV_32FC4);
|
||||
CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST);
|
||||
CV_Assert( src.size().area() > 0 );
|
||||
CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );
|
||||
CV_Assert(dsize.area() > 0 || (fx > 0 && fy > 0));
|
||||
|
||||
if (!(dsize == Size()) && (fx > 0 && fy > 0))
|
||||
if (dsize.width != (int)(src.cols * fx) || dsize.height != (int)(src.rows * fy))
|
||||
CV_Error(Error::StsUnmatchedSizes, "invalid dsize and fx, fy!");
|
||||
|
||||
if ( dsize == Size() )
|
||||
if (dsize.area() == 0)
|
||||
{
|
||||
dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));
|
||||
CV_Assert(dsize.area() > 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
fx = (double)dsize.width / src.cols;
|
||||
@ -384,13 +349,7 @@ namespace cv
|
||||
|
||||
dst.create(dsize, src.type());
|
||||
|
||||
if ( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR )
|
||||
{
|
||||
resize_gpu( src, dst, fx, fy, interpolation);
|
||||
return;
|
||||
}
|
||||
|
||||
CV_Error(Error::StsUnsupportedFormat, "Non-supported interpolation method");
|
||||
resize_gpu( src, dst, fx, fy, interpolation);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -785,7 +744,7 @@ namespace cv
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// integral
|
||||
|
||||
void integral(const oclMat &src, oclMat &sum, oclMat &sqsum)
|
||||
void integral(const oclMat &src, oclMat &sum, oclMat &sqsum, int sdepth)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC1);
|
||||
if (!src.clCxt->supportsFeature(ocl::FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
|
||||
@ -794,6 +753,11 @@ namespace cv
|
||||
return;
|
||||
}
|
||||
|
||||
if( sdepth <= 0 )
|
||||
sdepth = CV_32S;
|
||||
sdepth = CV_MAT_DEPTH(sdepth);
|
||||
int type = CV_MAKE_TYPE(sdepth, 1);
|
||||
|
||||
int vlen = 4;
|
||||
int offset = src.offset / vlen;
|
||||
int pre_invalid = src.offset % vlen;
|
||||
@ -801,17 +765,26 @@ namespace cv
|
||||
|
||||
oclMat t_sum , t_sqsum;
|
||||
int w = src.cols + 1, h = src.rows + 1;
|
||||
int depth = src.depth() == CV_8U ? CV_32S : CV_64F;
|
||||
int type = CV_MAKE_TYPE(depth, 1);
|
||||
|
||||
char build_option[250];
|
||||
if(Context::getContext()->supportsFeature(ocl::FEATURE_CL_DOUBLE))
|
||||
{
|
||||
t_sqsum.create(src.cols, src.rows, CV_64FC1);
|
||||
sqsum.create(h, w, CV_64FC1);
|
||||
sprintf(build_option, "-D TYPE=double -D TYPE4=double4 -D convert_TYPE4=convert_double4");
|
||||
}
|
||||
else
|
||||
{
|
||||
t_sqsum.create(src.cols, src.rows, CV_32FC1);
|
||||
sqsum.create(h, w, CV_32FC1);
|
||||
sprintf(build_option, "-D TYPE=float -D TYPE4=float4 -D convert_TYPE4=convert_float4");
|
||||
}
|
||||
|
||||
t_sum.create(src.cols, src.rows, type);
|
||||
sum.create(h, w, type);
|
||||
|
||||
t_sqsum.create(src.cols, src.rows, CV_32FC1);
|
||||
sqsum.create(h, w, CV_32FC1);
|
||||
|
||||
int sum_offset = sum.offset / vlen;
|
||||
int sqsum_offset = sqsum.offset / vlen;
|
||||
int sum_offset = sum.offset / sum.elemSize();
|
||||
int sqsum_offset = sqsum.offset / sqsum.elemSize();
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
|
||||
@ -823,8 +796,9 @@ namespace cv
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sqsum.step));
|
||||
size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, depth);
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, sdepth, build_option);
|
||||
|
||||
args.clear();
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
|
||||
@ -834,15 +808,16 @@ namespace cv
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sqsum.step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum.step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sqsum.step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum_offset));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sqsum_offset));
|
||||
size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1};
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, depth);
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, sdepth, build_option);
|
||||
}
|
||||
|
||||
void integral(const oclMat &src, oclMat &sum)
|
||||
void integral(const oclMat &src, oclMat &sum, int sdepth)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC1);
|
||||
int vlen = 4;
|
||||
@ -850,10 +825,13 @@ namespace cv
|
||||
int pre_invalid = src.offset % vlen;
|
||||
int vcols = (pre_invalid + src.cols + vlen - 1) / vlen;
|
||||
|
||||
if( sdepth <= 0 )
|
||||
sdepth = CV_32S;
|
||||
sdepth = CV_MAT_DEPTH(sdepth);
|
||||
int type = CV_MAKE_TYPE(sdepth, 1);
|
||||
|
||||
oclMat t_sum;
|
||||
int w = src.cols + 1, h = src.rows + 1;
|
||||
int depth = src.depth() == CV_8U ? CV_32S : CV_32F;
|
||||
int type = CV_MAKE_TYPE(depth, 1);
|
||||
|
||||
t_sum.create(src.cols, src.rows, type);
|
||||
sum.create(h, w, type);
|
||||
@ -869,7 +847,7 @@ namespace cv
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step));
|
||||
size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, depth);
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, sdepth);
|
||||
|
||||
args.clear();
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
|
||||
@ -880,7 +858,7 @@ namespace cv
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum.step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum_offset));
|
||||
size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1};
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, depth);
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, sdepth);
|
||||
}
|
||||
|
||||
/////////////////////// corner //////////////////////////////
|
||||
|
@ -268,12 +268,15 @@ namespace cv
|
||||
void matchTemplate_CCORR_NORMED(
|
||||
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
|
||||
{
|
||||
cv::ocl::oclMat temp;
|
||||
matchTemplate_CCORR(image, templ, result, buf);
|
||||
buf.image_sums.resize(1);
|
||||
buf.image_sqsums.resize(1);
|
||||
|
||||
integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
|
||||
|
||||
integral(image.reshape(1), buf.image_sums[0], temp);
|
||||
if(temp.depth() == CV_64F)
|
||||
temp.convertTo(buf.image_sqsums[0], CV_32FC1);
|
||||
else
|
||||
buf.image_sqsums[0] = temp;
|
||||
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
||||
|
||||
Context *clCxt = image.clCxt;
|
||||
@ -439,7 +442,12 @@ namespace cv
|
||||
{
|
||||
buf.image_sums.resize(1);
|
||||
buf.image_sqsums.resize(1);
|
||||
integral(image, buf.image_sums[0], buf.image_sqsums[0]);
|
||||
cv::ocl::oclMat temp;
|
||||
integral(image, buf.image_sums[0], temp);
|
||||
if(temp.depth() == CV_64F)
|
||||
temp.convertTo(buf.image_sqsums[0], CV_32FC1);
|
||||
else
|
||||
buf.image_sqsums[0] = temp;
|
||||
|
||||
templ_sum[0] = (float)sum(templ)[0];
|
||||
|
||||
@ -475,10 +483,14 @@ namespace cv
|
||||
templ_sum *= scale;
|
||||
buf.image_sums.resize(buf.images.size());
|
||||
buf.image_sqsums.resize(buf.images.size());
|
||||
|
||||
cv::ocl::oclMat temp;
|
||||
for(int i = 0; i < image.oclchannels(); i ++)
|
||||
{
|
||||
integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
|
||||
integral(buf.images[i], buf.image_sums[i], temp);
|
||||
if(temp.depth() == CV_64F)
|
||||
temp.convertTo(buf.image_sqsums[i], CV_32FC1);
|
||||
else
|
||||
buf.image_sqsums[i] = temp;
|
||||
}
|
||||
|
||||
switch(image.oclchannels())
|
||||
|
@ -34,9 +34,13 @@
|
||||
//
|
||||
//
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
__kernel void LUT_C1( __global const srcT * src, __global const dstT *lut,
|
||||
__global dstT *dst,
|
||||
|
@ -44,11 +44,11 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -65,12 +65,16 @@ __kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_st
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset);
|
||||
int dst_index = mad24(y, dst_step, x + dst_offset);
|
||||
#ifdef INTEL_DEVICE //workaround for intel compiler bug
|
||||
if(src1_index >= 0 && src2_index >= 0)
|
||||
#endif
|
||||
{
|
||||
dstT t0 = convertToDstT(src1[src1_index]);
|
||||
dstT t1 = convertToDstT(src2[src2_index]);
|
||||
dstT t2 = t0 - t1;
|
||||
|
||||
dstT t0 = convertToDstT(src1[src1_index]);
|
||||
dstT t1 = convertToDstT(src2[src2_index]);
|
||||
dstT t2 = t0 - t1;
|
||||
|
||||
dst[dst_index] = t2 >= (dstT)(0) ? t2 : -t2;
|
||||
dst[dst_index] = t2 >= (dstT)(0) ? t2 : -t2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -85,9 +89,13 @@ __kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset);
|
||||
int dst_index = mad24(y, dst_step, x + dst_offset);
|
||||
#ifdef INTEL_DEVICE //workaround for intel compiler bug
|
||||
if(src1_index >= 0)
|
||||
#endif
|
||||
{
|
||||
dstT t0 = convertToDstT(src1[src1_index]);
|
||||
|
||||
dstT t0 = convertToDstT(src1[src1_index]);
|
||||
|
||||
dst[dst_index] = t0 >= (dstT)(0) ? t0 : -t0;
|
||||
dst[dst_index] = t0 >= (dstT)(0) ? t0 : -t0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -44,11 +44,11 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -43,11 +43,11 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -43,11 +43,11 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -43,11 +43,11 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -43,11 +43,11 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -43,14 +43,6 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -43,11 +43,11 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -43,98 +43,99 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#define CV_PI 3.1415926535897932384626433832795
|
||||
#ifndef DBL_EPSILON
|
||||
#define DBL_EPSILON 0x1.0p-52
|
||||
#endif
|
||||
#else
|
||||
#define CV_PI 3.1415926535897932384626433832795f
|
||||
#ifndef DBL_EPSILON
|
||||
#define DBL_EPSILON 0x1.0p-52f
|
||||
#endif
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#define CV_PI M_PI
|
||||
#else
|
||||
#define CV_PI M_PI_F
|
||||
#endif
|
||||
|
||||
|
||||
__kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int src1_offset,
|
||||
__global float *src2, int src2_step, int src2_offset,
|
||||
__global float *dst1, int dst1_step, int dst1_offset, //magnitude
|
||||
__global float *dst2, int dst2_step, int dst2_offset, //cartToPolar
|
||||
int rows, int cols, int angInDegree)
|
||||
__global float *dst1, int dst1_step, int dst1_offset, // magnitude
|
||||
__global float *dst2, int dst2_step, int dst2_offset, // cartToPolar
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
|
||||
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset);
|
||||
|
||||
int dst1_index = mad24(y, dst1_step, (x << 2) + dst1_offset);
|
||||
int dst2_index = mad24(y, dst2_step, (x << 2) + dst2_offset);
|
||||
int dst1_index = mad24(y, dst1_step, x + dst1_offset);
|
||||
int dst2_index = mad24(y, dst2_step, x + dst2_offset);
|
||||
|
||||
float x = *((__global float *)((__global char *)src1 + src1_index));
|
||||
float y = *((__global float *)((__global char *)src2 + src2_index));
|
||||
float x = src1[src1_index];
|
||||
float y = src2[src2_index];
|
||||
|
||||
float x2 = x * x;
|
||||
float y2 = y * y;
|
||||
|
||||
float magnitude = sqrt(x2 + y2);
|
||||
float cartToPolar;
|
||||
|
||||
float tmp = y >= 0 ? 0 : CV_PI*2;
|
||||
tmp = x < 0 ? CV_PI : tmp;
|
||||
|
||||
float tmp1 = y >= 0 ? CV_PI*0.5f : CV_PI*1.5f;
|
||||
cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + DBL_EPSILON) + tmp :
|
||||
tmp1 - x*y/(y2 + 0.28f*x2 + DBL_EPSILON);
|
||||
float cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + FLT_EPSILON) + tmp :
|
||||
tmp1 - x*y/(y2 + 0.28f*x2 + FLT_EPSILON);
|
||||
|
||||
cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (float)(180/CV_PI);
|
||||
#ifdef DEGREE
|
||||
cartToPolar *= (180/CV_PI);
|
||||
#endif
|
||||
|
||||
*((__global float *)((__global char *)dst1 + dst1_index)) = magnitude;
|
||||
*((__global float *)((__global char *)dst2 + dst2_index)) = cartToPolar;
|
||||
dst1[dst1_index] = magnitude;
|
||||
dst2[dst2_index] = cartToPolar;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
|
||||
__kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int src1_offset,
|
||||
__global double *src2, int src2_step, int src2_offset,
|
||||
__global double *dst1, int dst1_step, int dst1_offset,
|
||||
__global double *dst2, int dst2_step, int dst2_offset,
|
||||
int rows, int cols, int angInDegree)
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
|
||||
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset);
|
||||
|
||||
int dst1_index = mad24(y, dst1_step, (x << 3) + dst1_offset);
|
||||
int dst2_index = mad24(y, dst2_step, (x << 3) + dst2_offset);
|
||||
int dst1_index = mad24(y, dst1_step, x + dst1_offset);
|
||||
int dst2_index = mad24(y, dst2_step, x + dst2_offset);
|
||||
|
||||
double x = *((__global double *)((__global char *)src1 + src1_index));
|
||||
double y = *((__global double *)((__global char *)src2 + src2_index));
|
||||
double x = src1[src1_index];
|
||||
double y = src2[src2_index];
|
||||
|
||||
double x2 = x * x;
|
||||
double y2 = y * y;
|
||||
|
||||
double magnitude = sqrt(x2 + y2);
|
||||
double cartToPolar;
|
||||
|
||||
float tmp = y >= 0 ? 0 : CV_PI*2;
|
||||
tmp = x < 0 ? CV_PI : tmp;
|
||||
|
||||
float tmp1 = y >= 0 ? CV_PI*0.5 : CV_PI*1.5;
|
||||
cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + (float)DBL_EPSILON) + tmp :
|
||||
tmp1 - x*y/(y2 + 0.28f*x2 + (float)DBL_EPSILON);
|
||||
double cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + DBL_EPSILON) + tmp :
|
||||
tmp1 - x*y/(y2 + 0.28f*x2 + DBL_EPSILON);
|
||||
|
||||
cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (float)(180/CV_PI);
|
||||
#ifdef DEGREE
|
||||
cartToPolar *= (180/CV_PI);
|
||||
#endif
|
||||
|
||||
*((__global double *)((__global char *)dst1 + dst1_index)) = magnitude;
|
||||
*((__global double *)((__global char *)dst2 + dst2_index)) = cartToPolar;
|
||||
dst1[dst1_index] = magnitude;
|
||||
dst2[dst2_index] = cartToPolar;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -43,11 +43,11 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -43,11 +43,11 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -43,11 +43,11 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -43,9 +43,13 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////////////LOG/////////////////////////////////////////////////////
|
||||
|
@ -43,54 +43,32 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
|
||||
__kernel void arithm_magnitude_D5 (__global float *src1, int src1_step, int src1_offset,
|
||||
__global float *src2, int src2_step, int src2_offset,
|
||||
__global float *dst, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
|
||||
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
|
||||
int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
|
||||
|
||||
float data1 = *((__global float *)((__global char *)src1 + src1_index));
|
||||
float data2 = *((__global float *)((__global char *)src2 + src2_index));
|
||||
|
||||
float tmp = sqrt(data1 * data1 + data2 * data2);
|
||||
|
||||
*((__global float *)((__global char *)dst + dst_index)) = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_magnitude_D6 (__global double *src1, int src1_step, int src1_offset,
|
||||
__global double *src2, int src2_step, int src2_offset,
|
||||
__global double *dst, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
|
||||
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
|
||||
int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
|
||||
|
||||
double data1 = *((__global double *)((__global char *)src1 + src1_index));
|
||||
double data2 = *((__global double *)((__global char *)src2 + src2_index));
|
||||
|
||||
double tmp = sqrt(data1 * data1 + data2 * data2);
|
||||
|
||||
*((__global double *)((__global char *)dst + dst_index)) = tmp;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
__kernel void arithm_magnitude(__global T *src1, int src1_step, int src1_offset,
|
||||
__global T *src2, int src2_step, int src2_offset,
|
||||
__global T *dst, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset);
|
||||
int dst_index = mad24(y, dst_step, x + dst_offset);
|
||||
|
||||
T data1 = src1[src1_index];
|
||||
T data2 = src2[src2_index];
|
||||
|
||||
T tmp = hypot(data1, data2);
|
||||
dst[dst_index] = tmp;
|
||||
}
|
||||
}
|
||||
|
@ -45,7 +45,7 @@
|
||||
|
||||
/**************************************PUBLICFUNC*************************************/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
|
@ -44,8 +44,13 @@
|
||||
//M*/
|
||||
|
||||
/**************************************PUBLICFUNC*************************************/
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#define RES_TYPE double4
|
||||
#define CONVERT_RES_TYPE convert_double4
|
||||
#else
|
||||
@ -222,8 +227,9 @@ __kernel void arithm_op_minMaxLoc(int cols, int invalid_cols, int offset, int el
|
||||
{
|
||||
localmem_min[lid] = min(minval,localmem_min[lid]);
|
||||
localmem_max[lid] = max(maxval,localmem_max[lid]);
|
||||
localmem_minloc[lid] = CONDITION_FUNC(localmem_min[lid] == minval, minloc, localmem_minloc[lid]);
|
||||
localmem_maxloc[lid] = CONDITION_FUNC(localmem_max[lid] == maxval, maxloc, localmem_maxloc[lid]);
|
||||
VEC_TYPE minVal = localmem_min[lid], maxVal = localmem_max[lid];
|
||||
localmem_minloc[lid] = CONDITION_FUNC(minVal == minval, minloc, localmem_minloc[lid]);
|
||||
localmem_maxloc[lid] = CONDITION_FUNC(maxVal == maxval, maxloc, localmem_maxloc[lid]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
@ -234,8 +240,10 @@ __kernel void arithm_op_minMaxLoc(int cols, int invalid_cols, int offset, int el
|
||||
int lid2 = lsize + lid;
|
||||
localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
|
||||
localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
|
||||
localmem_minloc[lid] = CONDITION_FUNC(localmem_min[lid] == localmem_min[lid2], localmem_minloc[lid2], localmem_minloc[lid]);
|
||||
localmem_maxloc[lid] = CONDITION_FUNC(localmem_max[lid] == localmem_max[lid2], localmem_maxloc[lid2], localmem_maxloc[lid]);
|
||||
VEC_TYPE min1 = localmem_min[lid], min2 = localmem_min[lid2];
|
||||
localmem_minloc[lid] = CONDITION_FUNC(min1 == min2, localmem_minloc[lid2], localmem_minloc[lid]);
|
||||
VEC_TYPE max1 = localmem_max[lid], max2 = localmem_max[lid2];
|
||||
localmem_maxloc[lid] = CONDITION_FUNC(max1 == max2, localmem_maxloc[lid2], localmem_maxloc[lid]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
@ -44,8 +44,13 @@
|
||||
//M*/
|
||||
|
||||
/**************************************PUBLICFUNC*************************************/
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#define RES_TYPE double4
|
||||
#define CONVERT_RES_TYPE convert_double4
|
||||
#else
|
||||
@ -152,24 +157,26 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
|
||||
int id = get_global_id(0);
|
||||
int idx = id + (id / cols) * invalid_cols;
|
||||
int midx = id + (id / cols) * minvalid_cols;
|
||||
|
||||
__local VEC_TYPE lm_max[128],lm_min[128];
|
||||
VEC_TYPE minval,maxval,temp,m_temp;
|
||||
__local VEC_TYPE_LOC lm_maxloc[128],lm_minloc[128];
|
||||
VEC_TYPE_LOC minloc,maxloc,temploc,negative = -1,one = 1,zero = 0;
|
||||
VEC_TYPE minval, maxval, temp, m_temp, zeroVal = (VEC_TYPE)(0);
|
||||
__local VEC_TYPE_LOC lm_maxloc[128], lm_minloc[128];
|
||||
VEC_TYPE_LOC minloc, maxloc, temploc, negative = -1, one = 1, zero = 0;
|
||||
|
||||
if(id < elemnum)
|
||||
{
|
||||
temp = vload4(idx, &src[offset]);
|
||||
m_temp = CONVERT_TYPE(vload4(midx,&mask[moffset]));
|
||||
int idx_c = (idx << 2) + offset;
|
||||
temploc = (VEC_TYPE_LOC)(idx_c,idx_c+1,idx_c+2,idx_c+3);
|
||||
if(id % cols == cols - 1)
|
||||
if (id % cols == cols - 1)
|
||||
{
|
||||
repeat_me(m_temp);
|
||||
repeat_e(temploc);
|
||||
}
|
||||
minval = m_temp != (VEC_TYPE)0 ? temp : (VEC_TYPE)MAX_VAL;
|
||||
maxval = m_temp != (VEC_TYPE)0 ? temp : (VEC_TYPE)MIN_VAL;
|
||||
minloc = CONDITION_FUNC(m_temp != (VEC_TYPE)0, temploc , negative);
|
||||
minval = m_temp != zeroVal ? temp : (VEC_TYPE)MAX_VAL;
|
||||
maxval = m_temp != zeroVal ? temp : (VEC_TYPE)MIN_VAL;
|
||||
minloc = CONDITION_FUNC(m_temp != zeroVal, temploc , negative);
|
||||
maxloc = minloc;
|
||||
}
|
||||
else
|
||||
@ -179,6 +186,7 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
|
||||
minloc = negative;
|
||||
maxloc = negative;
|
||||
}
|
||||
|
||||
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
|
||||
{
|
||||
idx = id + (id / cols) * invalid_cols;
|
||||
@ -187,17 +195,18 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
|
||||
m_temp = CONVERT_TYPE(vload4(midx,&mask[moffset]));
|
||||
int idx_c = (idx << 2) + offset;
|
||||
temploc = (VEC_TYPE_LOC)(idx_c,idx_c+1,idx_c+2,idx_c+3);
|
||||
if(id % cols == cols - 1)
|
||||
if (id % cols == cols - 1)
|
||||
{
|
||||
repeat_me(m_temp);
|
||||
repeat_e(temploc);
|
||||
}
|
||||
minval = min(minval,m_temp != (VEC_TYPE)0 ? temp : minval);
|
||||
maxval = max(maxval,m_temp != (VEC_TYPE)0 ? temp : maxval);
|
||||
minval = min(minval, m_temp != zeroVal ? temp : minval);
|
||||
maxval = max(maxval, m_temp != zeroVal ? temp : maxval);
|
||||
|
||||
minloc = CONDITION_FUNC((minval == temp) && (m_temp != (VEC_TYPE)0), temploc , minloc);
|
||||
maxloc = CONDITION_FUNC((maxval == temp) && (m_temp != (VEC_TYPE)0), temploc , maxloc);
|
||||
minloc = CONDITION_FUNC(minval == temp && m_temp != zeroVal, temploc , minloc);
|
||||
maxloc = CONDITION_FUNC(maxval == temp && m_temp != zeroVal, temploc , maxloc);
|
||||
}
|
||||
|
||||
if(lid > 127)
|
||||
{
|
||||
lm_min[lid - 128] = minval;
|
||||
@ -206,32 +215,37 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
|
||||
lm_maxloc[lid - 128] = maxloc;
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(lid < 128)
|
||||
{
|
||||
lm_min[lid] = min(minval,lm_min[lid]);
|
||||
lm_max[lid] = max(maxval,lm_max[lid]);
|
||||
lm_min[lid] = min(minval, lm_min[lid]);
|
||||
lm_max[lid] = max(maxval, lm_max[lid]);
|
||||
VEC_TYPE con_min = CONVERT_TYPE(minloc != negative ? one : zero);
|
||||
VEC_TYPE con_max = CONVERT_TYPE(maxloc != negative ? one : zero);
|
||||
lm_minloc[lid] = CONDITION_FUNC((lm_min[lid] == minval) && (con_min != (VEC_TYPE)0), minloc , lm_minloc[lid]);
|
||||
lm_maxloc[lid] = CONDITION_FUNC((lm_max[lid] == maxval) && (con_max != (VEC_TYPE)0), maxloc , lm_maxloc[lid]);
|
||||
VEC_TYPE lmMinVal = lm_min[lid], lmMaxVal = lm_max[lid];
|
||||
lm_minloc[lid] = CONDITION_FUNC(lmMinVal == minval && con_min != zeroVal, minloc , lm_minloc[lid]);
|
||||
lm_maxloc[lid] = CONDITION_FUNC(lmMaxVal == maxval && con_max != zeroVal, maxloc , lm_maxloc[lid]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
for(int lsize = 64; lsize > 0; lsize >>= 1)
|
||||
{
|
||||
if(lid < lsize)
|
||||
{
|
||||
int lid2 = lsize + lid;
|
||||
lm_min[lid] = min(lm_min[lid] , lm_min[lid2]);
|
||||
lm_max[lid] = max(lm_max[lid] , lm_max[lid2]);
|
||||
lm_min[lid] = min(lm_min[lid], lm_min[lid2]);
|
||||
lm_max[lid] = max(lm_max[lid], lm_max[lid2]);
|
||||
VEC_TYPE con_min = CONVERT_TYPE(lm_minloc[lid2] != negative ? one : zero);
|
||||
VEC_TYPE con_max = CONVERT_TYPE(lm_maxloc[lid2] != negative ? one : zero);
|
||||
lm_minloc[lid] =
|
||||
CONDITION_FUNC((lm_min[lid] == lm_min[lid2]) && (con_min != (VEC_TYPE)0), lm_minloc[lid2] , lm_minloc[lid]);
|
||||
lm_maxloc[lid] =
|
||||
CONDITION_FUNC((lm_max[lid] == lm_max[lid2]) && (con_max != (VEC_TYPE)0), lm_maxloc[lid2] , lm_maxloc[lid]);
|
||||
|
||||
VEC_TYPE lmMinVal1 = lm_min[lid], lmMinVal2 = lm_min[lid2];
|
||||
VEC_TYPE lmMaxVal1 = lm_max[lid], lmMaxVal2 = lm_max[lid2];
|
||||
lm_minloc[lid] = CONDITION_FUNC(lmMinVal1 == lmMinVal2 && con_min != zeroVal, lm_minloc[lid2] , lm_minloc[lid]);
|
||||
lm_maxloc[lid] = CONDITION_FUNC(lmMaxVal1 == lmMaxVal2 && con_max != zeroVal, lm_maxloc[lid2] , lm_maxloc[lid]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
if( lid == 0)
|
||||
{
|
||||
dst[gid] = CONVERT_RES_TYPE(lm_min[0]);
|
||||
|
@ -42,7 +42,7 @@
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
|
@ -44,17 +44,17 @@
|
||||
//
|
||||
//
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#define CV_PI 3.1415926535897932384626433832795
|
||||
#define CV_2PI 2*CV_PI
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#define CV_PI M_PI
|
||||
#define CV_2PI (2 * CV_PI)
|
||||
#else
|
||||
#define CV_PI 3.1415926535897932384626433832795f
|
||||
#define CV_2PI 2*CV_PI
|
||||
#define CV_PI M_PI_F
|
||||
#define CV_2PI (2 * CV_PI)
|
||||
#endif
|
||||
|
||||
/**************************************phase inradians**************************************/
|
||||
@ -159,7 +159,7 @@ __kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1,
|
||||
|
||||
double data1 = src1[src1_index];
|
||||
double data2 = src2[src2_index];
|
||||
double tmp = atan2(src2[src2_index], src1[src1_index]);
|
||||
double tmp = atan2(data2, data1);
|
||||
|
||||
tmp = 180 * tmp / CV_PI;
|
||||
if (tmp < 0)
|
||||
|
@ -44,42 +44,51 @@
|
||||
//M*/
|
||||
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#define CV_PI 3.1415926535897932384626433832795
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#define CV_PI M_PI
|
||||
#else
|
||||
#define CV_PI 3.1415926535897932384626433832795f
|
||||
#define CV_PI M_PI_F
|
||||
#endif
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////////polarToCart with magnitude//////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void arithm_polarToCart_mag_D5 (__global float *src1, int src1_step, int src1_offset,//magnitue
|
||||
__global float *src2, int src2_step, int src2_offset,//angle
|
||||
__global float *dst1, int dst1_step, int dst1_offset,
|
||||
__global float *dst2, int dst2_step, int dst2_offset,
|
||||
int rows, int cols, int angInDegree)
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
|
||||
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset);
|
||||
|
||||
int dst1_index = mad24(y, dst1_step, (x << 2) + dst1_offset);
|
||||
int dst2_index = mad24(y, dst2_step, (x << 2) + dst2_offset);
|
||||
int dst1_index = mad24(y, dst1_step, x + dst1_offset);
|
||||
int dst2_index = mad24(y, dst2_step, x + dst2_offset);
|
||||
|
||||
float x = *((__global float *)((__global char *)src1 + src1_index));
|
||||
float y = *((__global float *)((__global char *)src2 + src2_index));
|
||||
float x = src1[src1_index];
|
||||
float y = src2[src2_index];
|
||||
|
||||
#ifdef DEGREE
|
||||
float ascale = CV_PI/180.0f;
|
||||
float alpha = angInDegree == 1 ? y * ascale : y;
|
||||
float alpha = y * ascale;
|
||||
#else
|
||||
float alpha = y;
|
||||
#endif
|
||||
float a = cos(alpha) * x;
|
||||
float b = sin(alpha) * x;
|
||||
|
||||
*((__global float *)((__global char *)dst1 + dst1_index)) = a;
|
||||
*((__global float *)((__global char *)dst2 + dst2_index)) = b;
|
||||
dst1[dst1_index] = a;
|
||||
dst2[dst2_index] = b;
|
||||
}
|
||||
}
|
||||
|
||||
@ -88,29 +97,33 @@ __kernel void arithm_polarToCart_mag_D6 (__global double *src1, int src1_step, i
|
||||
__global double *src2, int src2_step, int src2_offset,//angle
|
||||
__global double *dst1, int dst1_step, int dst1_offset,
|
||||
__global double *dst2, int dst2_step, int dst2_offset,
|
||||
int rows, int cols, int angInDegree)
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
|
||||
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset);
|
||||
|
||||
int dst1_index = mad24(y, dst1_step, (x << 3) + dst1_offset);
|
||||
int dst2_index = mad24(y, dst2_step, (x << 3) + dst2_offset);
|
||||
int dst1_index = mad24(y, dst1_step, x + dst1_offset);
|
||||
int dst2_index = mad24(y, dst2_step, x + dst2_offset);
|
||||
|
||||
double x = *((__global double *)((__global char *)src1 + src1_index));
|
||||
double y = *((__global double *)((__global char *)src2 + src2_index));
|
||||
double x = src1[src1_index];
|
||||
double y = src2[src2_index];
|
||||
|
||||
#ifdef DEGREE
|
||||
float ascale = CV_PI/180.0;
|
||||
double alpha = angInDegree == 1 ? y * ascale : y;
|
||||
float alpha = y * ascale;
|
||||
#else
|
||||
float alpha = y;
|
||||
#endif
|
||||
double a = cos(alpha) * x;
|
||||
double b = sin(alpha) * x;
|
||||
|
||||
*((__global double *)((__global char *)dst1 + dst1_index)) = a;
|
||||
*((__global double *)((__global char *)dst2 + dst2_index)) = b;
|
||||
dst1[dst1_index] = a;
|
||||
dst2[dst2_index] = b;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -118,30 +131,35 @@ __kernel void arithm_polarToCart_mag_D6 (__global double *src1, int src1_step, i
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////////polarToCart without magnitude//////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void arithm_polarToCart_D5 (__global float *src, int src_step, int src_offset,//angle
|
||||
__global float *dst1, int dst1_step, int dst1_offset,
|
||||
__global float *dst2, int dst2_step, int dst2_offset,
|
||||
int rows, int cols, int angInDegree)
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src_index = mad24(y, src_step, (x << 2) + src_offset);
|
||||
int src_index = mad24(y, src_step, x + src_offset);
|
||||
|
||||
int dst1_index = mad24(y, dst1_step, (x << 2) + dst1_offset);
|
||||
int dst2_index = mad24(y, dst2_step, (x << 2) + dst2_offset);
|
||||
int dst1_index = mad24(y, dst1_step, x + dst1_offset);
|
||||
int dst2_index = mad24(y, dst2_step, x + dst2_offset);
|
||||
|
||||
float y = *((__global float *)((__global char *)src + src_index));
|
||||
float y = src[src_index];
|
||||
|
||||
#ifdef DEGREE
|
||||
float ascale = CV_PI/180.0f;
|
||||
float alpha = angInDegree == 1 ? y * ascale : y;
|
||||
float alpha = y * ascale;
|
||||
#else
|
||||
float alpha = y;
|
||||
#endif
|
||||
float a = cos(alpha);
|
||||
float b = sin(alpha);
|
||||
|
||||
*((__global float *)((__global char *)dst1 + dst1_index)) = a;
|
||||
*((__global float *)((__global char *)dst2 + dst2_index)) = b;
|
||||
dst1[dst1_index] = a;
|
||||
dst2[dst2_index] = b;
|
||||
}
|
||||
}
|
||||
|
||||
@ -149,27 +167,31 @@ __kernel void arithm_polarToCart_D5 (__global float *src, int src_step, int sr
|
||||
__kernel void arithm_polarToCart_D6 (__global float *src, int src_step, int src_offset,//angle
|
||||
__global float *dst1, int dst1_step, int dst1_offset,
|
||||
__global float *dst2, int dst2_step, int dst2_offset,
|
||||
int rows, int cols, int angInDegree)
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src_index = mad24(y, src_step, (x << 3) + src_offset);
|
||||
int src_index = mad24(y, src_step, x + src_offset);
|
||||
|
||||
int dst1_index = mad24(y, dst1_step, (x << 3) + dst1_offset);
|
||||
int dst2_index = mad24(y, dst2_step, (x << 3) + dst2_offset);
|
||||
int dst1_index = mad24(y, dst1_step, x + dst1_offset);
|
||||
int dst2_index = mad24(y, dst2_step, x + dst2_offset);
|
||||
|
||||
double y = *((__global double *)((__global char *)src + src_index));
|
||||
double y = src[src_index];
|
||||
|
||||
float ascale = CV_PI/180.0;
|
||||
double alpha = angInDegree == 1 ? y * ascale : y;
|
||||
#ifdef DEGREE
|
||||
float ascale = CV_PI/180.0f;
|
||||
float alpha = y * ascale;
|
||||
#else
|
||||
float alpha = y;
|
||||
#endif
|
||||
double a = cos(alpha);
|
||||
double b = sin(alpha);
|
||||
|
||||
*((__global double *)((__global char *)dst1 + dst1_index)) = a;
|
||||
*((__global double *)((__global char *)dst2 + dst2_index)) = b;
|
||||
dst1[dst1_index] = a;
|
||||
dst2[dst2_index] = b;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -43,58 +43,31 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
typedef double F;
|
||||
typedef double4 F4;
|
||||
#define convert_F4 convert_double4;
|
||||
#else
|
||||
typedef float F;
|
||||
typedef float4 F4;
|
||||
#define convert_F4 convert_float4;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/************************************** pow **************************************/
|
||||
__kernel void arithm_pow_D5 (__global float *src1, int src1_step, int src1_offset,
|
||||
__global float *dst, int dst_step, int dst_offset,
|
||||
int rows, int cols, int dst_step1,
|
||||
F p)
|
||||
{
|
||||
|
||||
__kernel void arithm_pow(__global VT * src, int src_step, int src_offset,
|
||||
__global VT * dst, int dst_step, int dst_offset,
|
||||
int rows, int cols, T p)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if(x < cols && y < rows)
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
|
||||
int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
|
||||
int src_index = mad24(y, src_step, x + src_offset);
|
||||
int dst_index = mad24(y, dst_step, x + dst_offset);
|
||||
|
||||
float src1_data = *((__global float *)((__global char *)src1 + src1_index));
|
||||
float tmp = src1_data > 0 ? exp(p * log(src1_data)) : (src1_data == 0 ? 0 : exp(p * log(fabs(src1_data))));
|
||||
VT src_data = src[src_index];
|
||||
VT tmp = src_data > 0 ? exp(p * log(src_data)) : (src_data == 0 ? 0 : exp(p * log(fabs(src_data))));
|
||||
|
||||
*((__global float *)((__global char *)dst + dst_index)) = tmp;
|
||||
dst[dst_index] = tmp;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_pow_D6 (__global double *src1, int src1_step, int src1_offset,
|
||||
__global double *dst, int dst_step, int dst_offset,
|
||||
int rows, int cols, int dst_step1,
|
||||
F p)
|
||||
{
|
||||
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if(x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
|
||||
int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
|
||||
|
||||
double src1_data = *((__global double *)((__global char *)src1 + src1_index));
|
||||
double tmp = src1_data > 0 ? exp(p * log(src1_data)) : (src1_data == 0 ? 0 : exp(p * log(fabs(src1_data))));
|
||||
*((__global double *)((__global char *)dst + dst_index)) = tmp;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -43,11 +43,11 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -43,11 +43,11 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -43,7 +43,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
|
@ -67,11 +67,14 @@ static float clamp1(float var, float learningRate, float diff, float minVar)
|
||||
{
|
||||
return fmax(var + learningRate * (diff * diff - var), minVar);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define T_FRAME uchar4
|
||||
#define T_MEAN_VAR float4
|
||||
#define CONVERT_TYPE convert_uchar4_sat
|
||||
#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
|
||||
|
||||
inline float4 cvt(const uchar4 val)
|
||||
{
|
||||
float4 result;
|
||||
@ -93,6 +96,14 @@ inline float sum(const float4 val)
|
||||
return (val.x + val.y + val.z);
|
||||
}
|
||||
|
||||
static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
|
||||
{
|
||||
float4 val = ptr[(k * rows + y) * ptr_step + x];
|
||||
ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
|
||||
ptr[((k + 1) * rows + y) * ptr_step + x] = val;
|
||||
}
|
||||
|
||||
|
||||
static float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
|
||||
{
|
||||
float4 result;
|
||||
@ -102,6 +113,7 @@ static float4 clamp1(const float4 var, float learningRate, const float4 diff, fl
|
||||
result.w = 0.0f;
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
@ -114,7 +126,7 @@ typedef struct
|
||||
float c_varMax;
|
||||
float c_tau;
|
||||
uchar c_shadowVal;
|
||||
}con_srtuct_t;
|
||||
} con_srtuct_t;
|
||||
|
||||
static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
|
||||
{
|
||||
@ -123,13 +135,6 @@ static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_ste
|
||||
ptr[((k + 1) * rows + y) * ptr_step + x] = val;
|
||||
}
|
||||
|
||||
static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
|
||||
{
|
||||
float4 val = ptr[(k * rows + y) * ptr_step + x];
|
||||
ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
|
||||
ptr[((k + 1) * rows + y) * ptr_step + x] = val;
|
||||
}
|
||||
|
||||
__kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar* fgmask,
|
||||
__global float* weight, __global T_MEAN_VAR* mean, __global T_MEAN_VAR* var,
|
||||
int frame_row, int frame_col, int frame_step, int fgmask_step,
|
||||
|
@ -43,7 +43,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
|
@ -63,14 +63,6 @@
|
||||
#define DIST_TYPE 0
|
||||
#endif
|
||||
|
||||
//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
|
||||
static int bit1Count(int v)
|
||||
{
|
||||
v = v - ((v >> 1) & 0x55555555); // reuse input as temporary
|
||||
v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp
|
||||
return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
|
||||
}
|
||||
|
||||
// dirty fix for non-template support
|
||||
#if (DIST_TYPE == 0) // L1Dist
|
||||
# ifdef T_FLOAT
|
||||
@ -89,6 +81,13 @@ typedef float value_type;
|
||||
typedef float result_type;
|
||||
#define DIST_RES(x) sqrt(x)
|
||||
#elif (DIST_TYPE == 2) // Hamming
|
||||
//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
|
||||
static int bit1Count(int v)
|
||||
{
|
||||
v = v - ((v >> 1) & 0x55555555); // reuse input as temporary
|
||||
v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp
|
||||
return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
|
||||
}
|
||||
#define DIST(x, y) bit1Count( (x) ^ (y) )
|
||||
typedef int value_type;
|
||||
typedef int result_type;
|
||||
|
@ -43,31 +43,25 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
__kernel
|
||||
void buildWarpPlaneMaps
|
||||
(
|
||||
__global float * map_x,
|
||||
__global float * map_y,
|
||||
__constant float * KRT,
|
||||
int tl_u,
|
||||
int tl_v,
|
||||
int cols,
|
||||
int rows,
|
||||
int step_x,
|
||||
int step_y,
|
||||
float scale
|
||||
)
|
||||
__kernel void buildWarpPlaneMaps(__global float * xmap, __global float * ymap,
|
||||
__constant float * KRT,
|
||||
int tl_u, int tl_v,
|
||||
int cols, int rows,
|
||||
int xmap_step, int ymap_step,
|
||||
int xmap_offset, int ymap_offset,
|
||||
float scale)
|
||||
{
|
||||
int du = get_global_id(0);
|
||||
int dv = get_global_id(1);
|
||||
step_x /= sizeof(float);
|
||||
step_y /= sizeof(float);
|
||||
|
||||
__constant float * ck_rinv = KRT;
|
||||
__constant float * ct = KRT + 9;
|
||||
|
||||
if (du < cols && dv < rows)
|
||||
{
|
||||
int xmap_index = mad24(dv, xmap_step, xmap_offset + du);
|
||||
int ymap_index = mad24(dv, ymap_step, ymap_offset + du);
|
||||
|
||||
float u = tl_u + du;
|
||||
float v = tl_v + dv;
|
||||
float x, y;
|
||||
@ -83,33 +77,27 @@ __kernel
|
||||
x /= z;
|
||||
y /= z;
|
||||
|
||||
map_x[dv * step_x + du] = x;
|
||||
map_y[dv * step_y + du] = y;
|
||||
xmap[xmap_index] = x;
|
||||
ymap[ymap_index] = y;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel
|
||||
void buildWarpCylindricalMaps
|
||||
(
|
||||
__global float * map_x,
|
||||
__global float * map_y,
|
||||
__constant float * ck_rinv,
|
||||
int tl_u,
|
||||
int tl_v,
|
||||
int cols,
|
||||
int rows,
|
||||
int step_x,
|
||||
int step_y,
|
||||
float scale
|
||||
)
|
||||
__kernel void buildWarpCylindricalMaps(__global float * xmap, __global float * ymap,
|
||||
__constant float * ck_rinv,
|
||||
int tl_u, int tl_v,
|
||||
int cols, int rows,
|
||||
int xmap_step, int ymap_step,
|
||||
int xmap_offset, int ymap_offset,
|
||||
float scale)
|
||||
{
|
||||
int du = get_global_id(0);
|
||||
int dv = get_global_id(1);
|
||||
step_x /= sizeof(float);
|
||||
step_y /= sizeof(float);
|
||||
|
||||
if (du < cols && dv < rows)
|
||||
{
|
||||
int xmap_index = mad24(dv, xmap_step, xmap_offset + du);
|
||||
int ymap_index = mad24(dv, ymap_step, ymap_offset + du);
|
||||
|
||||
float u = tl_u + du;
|
||||
float v = tl_v + dv;
|
||||
float x, y;
|
||||
@ -127,33 +115,27 @@ __kernel
|
||||
if (z > 0) { x /= z; y /= z; }
|
||||
else x = y = -1;
|
||||
|
||||
map_x[dv * step_x + du] = x;
|
||||
map_y[dv * step_y + du] = y;
|
||||
xmap[xmap_index] = x;
|
||||
ymap[ymap_index] = y;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel
|
||||
void buildWarpSphericalMaps
|
||||
(
|
||||
__global float * map_x,
|
||||
__global float * map_y,
|
||||
__constant float * ck_rinv,
|
||||
int tl_u,
|
||||
int tl_v,
|
||||
int cols,
|
||||
int rows,
|
||||
int step_x,
|
||||
int step_y,
|
||||
float scale
|
||||
)
|
||||
__kernel void buildWarpSphericalMaps(__global float * xmap, __global float * ymap,
|
||||
__constant float * ck_rinv,
|
||||
int tl_u, int tl_v,
|
||||
int cols, int rows,
|
||||
int xmap_step, int ymap_step,
|
||||
int xmap_offset, int ymap_offset,
|
||||
float scale)
|
||||
{
|
||||
int du = get_global_id(0);
|
||||
int dv = get_global_id(1);
|
||||
step_x /= sizeof(float);
|
||||
step_y /= sizeof(float);
|
||||
|
||||
if (du < cols && dv < rows)
|
||||
{
|
||||
int xmap_index = mad24(dv, xmap_step, xmap_offset + du);
|
||||
int ymap_index = mad24(dv, ymap_step, ymap_offset + du);
|
||||
|
||||
float u = tl_u + du;
|
||||
float v = tl_v + dv;
|
||||
float x, y;
|
||||
@ -174,63 +156,52 @@ __kernel
|
||||
if (z > 0) { x /= z; y /= z; }
|
||||
else x = y = -1;
|
||||
|
||||
map_x[dv * step_x + du] = x;
|
||||
map_y[dv * step_y + du] = y;
|
||||
xmap[xmap_index] = x;
|
||||
ymap[ymap_index] = y;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel
|
||||
void buildWarpAffineMaps
|
||||
(
|
||||
__global float * xmap,
|
||||
__global float * ymap,
|
||||
__constant float * c_warpMat,
|
||||
int cols,
|
||||
int rows,
|
||||
int step_x,
|
||||
int step_y
|
||||
)
|
||||
__kernel void buildWarpAffineMaps(__global float * xmap, __global float * ymap,
|
||||
__constant float * c_warpMat,
|
||||
int cols, int rows,
|
||||
int xmap_step, int ymap_step,
|
||||
int xmap_offset, int ymap_offset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
step_x /= sizeof(float);
|
||||
step_y /= sizeof(float);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
const float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
|
||||
const float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
|
||||
int xmap_index = mad24(y, xmap_step, x + xmap_offset);
|
||||
int ymap_index = mad24(y, ymap_step, x + ymap_offset);
|
||||
|
||||
map_x[y * step_x + x] = xcoo;
|
||||
map_y[y * step_y + x] = ycoo;
|
||||
float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
|
||||
float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
|
||||
|
||||
xmap[xmap_index] = xcoo;
|
||||
ymap[ymap_index] = ycoo;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel
|
||||
void buildWarpPerspectiveMaps
|
||||
(
|
||||
__global float * xmap,
|
||||
__global float * ymap,
|
||||
__constant float * c_warpMat,
|
||||
int cols,
|
||||
int rows,
|
||||
int step_x,
|
||||
int step_y
|
||||
)
|
||||
__kernel void buildWarpPerspectiveMaps(__global float * xmap, __global float * ymap,
|
||||
__constant float * c_warpMat,
|
||||
int cols, int rows,
|
||||
int xmap_step, int ymap_step,
|
||||
int xmap_offset, int ymap_offset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
step_x /= sizeof(float);
|
||||
step_y /= sizeof(float);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
const float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
|
||||
int xmap_index = mad24(y, xmap_step, x + xmap_offset);
|
||||
int ymap_index = mad24(y, ymap_step, x + ymap_offset);
|
||||
|
||||
const float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
|
||||
const float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
|
||||
float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
|
||||
float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
|
||||
float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
|
||||
|
||||
map_x[y * step_x + x] = xcoo;
|
||||
map_y[y * step_y + x] = ycoo;
|
||||
xmap[xmap_index] = xcoo;
|
||||
ymap[ymap_index] = ycoo;
|
||||
}
|
||||
}
|
||||
|
@ -33,12 +33,17 @@
|
||||
//
|
||||
//
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
__kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst, int cols, int rows,
|
||||
int dstStep_in_piexl,int pixel_end)
|
||||
__kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst,
|
||||
int cols, int rows,
|
||||
int dstStep_in_piexl, int pixel_end)
|
||||
{
|
||||
int id = get_global_id(0);
|
||||
int3 pixelid = (int3)(mul24(id,3),mad24(id,3,1),mad24(id,3,2));
|
||||
@ -88,13 +93,12 @@ __kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTY
|
||||
dst[addr.y] = outpix1;
|
||||
}
|
||||
else if(outx.x<cols && outy.x<rows)
|
||||
{
|
||||
dst[addr.x] = outpix0;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst, int cols, int rows,
|
||||
int srcStep_in_pixel,int pixel_end)
|
||||
__kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst,
|
||||
int cols, int rows,
|
||||
int srcStep_in_pixel, int pixel_end)
|
||||
{
|
||||
int id = get_global_id(0)<<2;
|
||||
int y = id / cols;
|
||||
@ -145,7 +149,5 @@ __kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTY
|
||||
dst[outaddr.y] = outpixel1;
|
||||
}
|
||||
else if(outaddr.x <= pixel_end)
|
||||
{
|
||||
dst[outaddr.x] = pixel0;
|
||||
}
|
||||
}
|
||||
|
@ -46,41 +46,45 @@
|
||||
|
||||
/**************************************PUBLICFUNC*************************************/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#ifndef hscale
|
||||
#define hscale 0
|
||||
#endif
|
||||
|
||||
#if defined (DEPTH_0)
|
||||
#ifndef hrange
|
||||
#define hrange 0
|
||||
#endif
|
||||
|
||||
#ifdef DEPTH_0
|
||||
#define DATA_TYPE uchar
|
||||
#define COEFF_TYPE int
|
||||
#define MAX_NUM 255
|
||||
#define HALF_MAX 128
|
||||
#define SAT_CAST(num) convert_uchar_sat(num)
|
||||
#define SAT_CAST(num) convert_uchar_sat_rte(num)
|
||||
#endif
|
||||
|
||||
#if defined (DEPTH_2)
|
||||
#ifdef DEPTH_2
|
||||
#define DATA_TYPE ushort
|
||||
#define COEFF_TYPE int
|
||||
#define MAX_NUM 65535
|
||||
#define HALF_MAX 32768
|
||||
#define SAT_CAST(num) convert_ushort_sat(num)
|
||||
#define SAT_CAST(num) convert_ushort_sat_rte(num)
|
||||
#endif
|
||||
|
||||
#if defined (DEPTH_5)
|
||||
#ifdef DEPTH_5
|
||||
#define DATA_TYPE float
|
||||
#define COEFF_TYPE float
|
||||
#define MAX_NUM 1.0f
|
||||
#define HALF_MAX 0.5f
|
||||
#define SAT_CAST(num) (num)
|
||||
#endif
|
||||
|
||||
#ifndef DATA_TYPE
|
||||
#define DATA_TYPE UNDEFINED
|
||||
#endif
|
||||
|
||||
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
|
||||
#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
|
||||
|
||||
enum
|
||||
{
|
||||
yuv_shift = 14,
|
||||
xyz_shift = 12,
|
||||
hsv_shift = 12,
|
||||
R2Y = 4899,
|
||||
G2Y = 9617,
|
||||
B2Y = 1868,
|
||||
@ -89,18 +93,18 @@ enum
|
||||
|
||||
///////////////////////////////////// RGB <-> GRAY //////////////////////////////////////
|
||||
|
||||
__kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step, int channels,
|
||||
__kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step,
|
||||
int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
int src_idx = mad24(y, src_step, src_offset + x * channels);
|
||||
int src_idx = mad24(y, src_step, src_offset + (x << 2));
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
#if defined (DEPTH_5)
|
||||
#ifdef DEPTH_5
|
||||
dst[dst_idx] = src[src_idx + bidx] * 0.114f + src[src_idx + 1] * 0.587f + src[src_idx + (bidx^2)] * 0.299f;
|
||||
#else
|
||||
dst[dst_idx] = (DATA_TYPE)CV_DESCALE((src[src_idx + bidx] * B2Y + src[src_idx + 1] * G2Y + src[src_idx + (bidx^2)] * R2Y), yuv_shift);
|
||||
@ -108,22 +112,25 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step, int chann
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void Gray2RGB(int cols,int rows,int src_step,int dst_step,
|
||||
__kernel void Gray2RGB(int cols, int rows, int src_step, int dst_step, int bidx,
|
||||
__global const DATA_TYPE* src, __global DATA_TYPE* dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x * 4);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + (x << 2));
|
||||
|
||||
DATA_TYPE val = src[src_idx];
|
||||
dst[dst_idx++] = val;
|
||||
dst[dst_idx++] = val;
|
||||
dst[dst_idx++] = val;
|
||||
dst[dst_idx] = MAX_NUM;
|
||||
dst[dst_idx] = val;
|
||||
dst[dst_idx + 1] = val;
|
||||
dst[dst_idx + 2] = val;
|
||||
#if dcn == 4
|
||||
dst[dst_idx + 3] = MAX_NUM;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -132,7 +139,7 @@ __kernel void Gray2RGB(int cols,int rows,int src_step,int dst_step,
|
||||
__constant float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
|
||||
__constant int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 };
|
||||
|
||||
__kernel void RGB2YUV(int cols,int rows,int src_step,int dst_step,int channels,
|
||||
__kernel void RGB2YUV(int cols, int rows, int src_step, int dst_step,
|
||||
int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
@ -141,35 +148,34 @@ __kernel void RGB2YUV(int cols,int rows,int src_step,int dst_step,int channels,
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
x *= channels;
|
||||
x <<= 2;
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
dst += dst_idx;
|
||||
const DATA_TYPE rgb[] = {src[src_idx], src[src_idx + 1], src[src_idx + 2]};
|
||||
DATA_TYPE rgb[] = { src[src_idx], src[src_idx + 1], src[src_idx + 2] };
|
||||
|
||||
#if defined (DEPTH_5)
|
||||
#ifdef DEPTH_5
|
||||
__constant float * coeffs = c_RGB2YUVCoeffs_f;
|
||||
const DATA_TYPE Y = rgb[0] * coeffs[bidx] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx^2];
|
||||
const DATA_TYPE Cr = (rgb[bidx] - Y) * coeffs[3] + HALF_MAX;
|
||||
const DATA_TYPE Cb = (rgb[bidx^2] - Y) * coeffs[4] + HALF_MAX;
|
||||
DATA_TYPE Y = rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx];
|
||||
DATA_TYPE Cr = (rgb[bidx^2] - Y) * coeffs[3] + HALF_MAX;
|
||||
DATA_TYPE Cb = (rgb[bidx] - Y) * coeffs[4] + HALF_MAX;
|
||||
#else
|
||||
__constant int * coeffs = c_RGB2YUVCoeffs_i;
|
||||
const int delta = HALF_MAX * (1 << yuv_shift);
|
||||
const int Y = CV_DESCALE(rgb[0] * coeffs[bidx] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx^2], yuv_shift);
|
||||
const int Cr = CV_DESCALE((rgb[bidx] - Y) * coeffs[3] + delta, yuv_shift);
|
||||
const int Cb = CV_DESCALE((rgb[bidx^2] - Y) * coeffs[4] + delta, yuv_shift);
|
||||
int delta = HALF_MAX * (1 << yuv_shift);
|
||||
int Y = CV_DESCALE(rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx], yuv_shift);
|
||||
int Cr = CV_DESCALE((rgb[bidx^2] - Y) * coeffs[3] + delta, yuv_shift);
|
||||
int Cb = CV_DESCALE((rgb[bidx] - Y) * coeffs[4] + delta, yuv_shift);
|
||||
#endif
|
||||
|
||||
dst[0] = SAT_CAST( Y );
|
||||
dst[1] = SAT_CAST( Cr );
|
||||
dst[2] = SAT_CAST( Cb );
|
||||
dst[dst_idx] = SAT_CAST( Y );
|
||||
dst[dst_idx + 1] = SAT_CAST( Cr );
|
||||
dst[dst_idx + 2] = SAT_CAST( Cb );
|
||||
}
|
||||
}
|
||||
|
||||
__constant float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f };
|
||||
__constant int c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 };
|
||||
|
||||
__kernel void YUV2RGB(int cols,int rows,int src_step,int dst_step,int channels,
|
||||
__kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step,
|
||||
int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
@ -178,27 +184,29 @@ __kernel void YUV2RGB(int cols,int rows,int src_step,int dst_step,int channels,
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
x *= channels;
|
||||
x <<= 2;
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
dst += dst_idx;
|
||||
const DATA_TYPE yuv[] = {src[src_idx], src[src_idx + 1], src[src_idx + 2]};
|
||||
DATA_TYPE yuv[] = { src[src_idx], src[src_idx + 1], src[src_idx + 2] };
|
||||
|
||||
#if defined (DEPTH_5)
|
||||
#ifdef DEPTH_5
|
||||
__constant float * coeffs = c_YUV2RGBCoeffs_f;
|
||||
const float b = yuv[0] + (yuv[2] - HALF_MAX) * coeffs[3];
|
||||
const float g = yuv[0] + (yuv[2] - HALF_MAX) * coeffs[2] + (yuv[1] - HALF_MAX) * coeffs[1];
|
||||
const float r = yuv[0] + (yuv[1] - HALF_MAX) * coeffs[0];
|
||||
float b = yuv[0] + (yuv[2] - HALF_MAX) * coeffs[3];
|
||||
float g = yuv[0] + (yuv[2] - HALF_MAX) * coeffs[2] + (yuv[1] - HALF_MAX) * coeffs[1];
|
||||
float r = yuv[0] + (yuv[1] - HALF_MAX) * coeffs[0];
|
||||
#else
|
||||
__constant int * coeffs = c_YUV2RGBCoeffs_i;
|
||||
const int b = yuv[0] + CV_DESCALE((yuv[2] - HALF_MAX) * coeffs[3], yuv_shift);
|
||||
const int g = yuv[0] + CV_DESCALE((yuv[2] - HALF_MAX) * coeffs[2] + (yuv[1] - HALF_MAX) * coeffs[1], yuv_shift);
|
||||
const int r = yuv[0] + CV_DESCALE((yuv[1] - HALF_MAX) * coeffs[0], yuv_shift);
|
||||
int b = yuv[0] + CV_DESCALE((yuv[2] - HALF_MAX) * coeffs[3], yuv_shift);
|
||||
int g = yuv[0] + CV_DESCALE((yuv[2] - HALF_MAX) * coeffs[2] + (yuv[1] - HALF_MAX) * coeffs[1], yuv_shift);
|
||||
int r = yuv[0] + CV_DESCALE((yuv[1] - HALF_MAX) * coeffs[0], yuv_shift);
|
||||
#endif
|
||||
|
||||
dst[bidx^2] = SAT_CAST( b );
|
||||
dst[1] = SAT_CAST( g );
|
||||
dst[bidx] = SAT_CAST( r );
|
||||
dst[dst_idx + bidx] = SAT_CAST( b );
|
||||
dst[dst_idx + 1] = SAT_CAST( g );
|
||||
dst[dst_idx + (bidx^2)] = SAT_CAST( r );
|
||||
#if dcn == 4
|
||||
dst[dst_idx + 3] = MAX_NUM;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -209,17 +217,17 @@ __constant int ITUR_BT_601_CVG = 852492;
|
||||
__constant int ITUR_BT_601_CVR = 1673527;
|
||||
__constant int ITUR_BT_601_SHIFT = 20;
|
||||
|
||||
__kernel void YUV2RGBA_NV12(int cols,int rows,int src_step,int dst_step,
|
||||
int bidx, int width, int height, __global const uchar* src, __global uchar* dst,
|
||||
__kernel void YUV2RGBA_NV12(int cols, int rows, int src_step, int dst_step,
|
||||
int bidx, __global const uchar* src, __global uchar* dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
const int x = get_global_id(0); // max_x = width / 2
|
||||
const int y = get_global_id(1); // max_y = height/ 2
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
if (y < height / 2 && x < width / 2 )
|
||||
if (y < rows / 2 && x < cols / 2 )
|
||||
{
|
||||
__global const uchar* ysrc = src + mad24(y << 1, src_step, (x << 1) + src_offset);
|
||||
__global const uchar* usrc = src + mad24(height + y, src_step, (x << 1) + src_offset);
|
||||
__global const uchar* usrc = src + mad24(rows + y, src_step, (x << 1) + src_offset);
|
||||
__global uchar* dst1 = dst + mad24(y << 1, dst_step, (x << 3) + dst_offset);
|
||||
__global uchar* dst2 = dst + mad24((y << 1) + 1, dst_step, (x << 3) + dst_offset);
|
||||
|
||||
@ -261,12 +269,12 @@ __kernel void YUV2RGBA_NV12(int cols,int rows,int src_step,int dst_step,
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////// RGB <-> YUV //////////////////////////////////////
|
||||
///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
|
||||
|
||||
__constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
|
||||
__constant int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241};
|
||||
|
||||
__kernel void RGB2YCrCb(int cols,int rows,int src_step,int dst_step,int channels,
|
||||
__kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step,
|
||||
int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
@ -275,28 +283,735 @@ __kernel void RGB2YCrCb(int cols,int rows,int src_step,int dst_step,int channels
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
x *= channels;
|
||||
x <<= 2;
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
|
||||
dst += dst_idx;
|
||||
const DATA_TYPE rgb[] = { src[src_idx], src[src_idx + 1], src[src_idx + 2] };
|
||||
DATA_TYPE rgb[] = { src[src_idx], src[src_idx + 1], src[src_idx + 2] };
|
||||
|
||||
#if defined (DEPTH_5)
|
||||
#ifdef DEPTH_5
|
||||
__constant float * coeffs = c_RGB2YCrCbCoeffs_f;
|
||||
const DATA_TYPE Y = rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx];
|
||||
const DATA_TYPE Cr = (rgb[bidx^2] - Y) * coeffs[3] + HALF_MAX;
|
||||
const DATA_TYPE Cb = (rgb[bidx] - Y) * coeffs[4] + HALF_MAX;
|
||||
DATA_TYPE Y = rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx];
|
||||
DATA_TYPE Cr = (rgb[bidx^2] - Y) * coeffs[3] + HALF_MAX;
|
||||
DATA_TYPE Cb = (rgb[bidx] - Y) * coeffs[4] + HALF_MAX;
|
||||
#else
|
||||
__constant int * coeffs = c_RGB2YCrCbCoeffs_i;
|
||||
const int delta = HALF_MAX * (1 << yuv_shift);
|
||||
const int Y = CV_DESCALE(rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx], yuv_shift);
|
||||
const int Cr = CV_DESCALE((rgb[bidx^2] - Y) * coeffs[3] + delta, yuv_shift);
|
||||
const int Cb = CV_DESCALE((rgb[bidx] - Y) * coeffs[4] + delta, yuv_shift);
|
||||
int delta = HALF_MAX * (1 << yuv_shift);
|
||||
int Y = CV_DESCALE(rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx], yuv_shift);
|
||||
int Cr = CV_DESCALE((rgb[bidx^2] - Y) * coeffs[3] + delta, yuv_shift);
|
||||
int Cb = CV_DESCALE((rgb[bidx] - Y) * coeffs[4] + delta, yuv_shift);
|
||||
#endif
|
||||
|
||||
dst[0] = SAT_CAST( Y );
|
||||
dst[1] = SAT_CAST( Cr );
|
||||
dst[2] = SAT_CAST( Cb );
|
||||
dst[dst_idx] = SAT_CAST( Y );
|
||||
dst[dst_idx + 1] = SAT_CAST( Cr );
|
||||
dst[dst_idx + 2] = SAT_CAST( Cb );
|
||||
}
|
||||
}
|
||||
|
||||
__constant float c_YCrCb2RGBCoeffs_f[4] = { 1.403f, -0.714f, -0.344f, 1.773f };
|
||||
__constant int c_YCrCb2RGBCoeffs_i[4] = { 22987, -11698, -5636, 29049 };
|
||||
|
||||
__kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step,
|
||||
int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
x <<= 2;
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
|
||||
DATA_TYPE ycrcb[] = { src[src_idx], src[src_idx + 1], src[src_idx + 2] };
|
||||
|
||||
#ifdef DEPTH_5
|
||||
__constant float * coeff = c_YCrCb2RGBCoeffs_f;
|
||||
float r = ycrcb[0] + coeff[0] * (ycrcb[1] - HALF_MAX);
|
||||
float g = ycrcb[0] + coeff[1] * (ycrcb[1] - HALF_MAX) + coeff[2] * (ycrcb[2] - HALF_MAX);
|
||||
float b = ycrcb[0] + coeff[3] * (ycrcb[2] - HALF_MAX);
|
||||
#else
|
||||
__constant int * coeff = c_YCrCb2RGBCoeffs_i;
|
||||
int r = ycrcb[0] + CV_DESCALE(coeff[0] * (ycrcb[1] - HALF_MAX), yuv_shift);
|
||||
int g = ycrcb[0] + CV_DESCALE(coeff[1] * (ycrcb[1] - HALF_MAX) + coeff[2] * (ycrcb[2] - HALF_MAX), yuv_shift);
|
||||
int b = ycrcb[0] + CV_DESCALE(coeff[3] * (ycrcb[2] - HALF_MAX), yuv_shift);
|
||||
#endif
|
||||
|
||||
dst[dst_idx + (bidx^2)] = SAT_CAST(r);
|
||||
dst[dst_idx + 1] = SAT_CAST(g);
|
||||
dst[dst_idx + bidx] = SAT_CAST(b);
|
||||
#if dcn == 4
|
||||
dst[dst_idx + 3] = MAX_NUM;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////// RGB <-> XYZ //////////////////////////////////////
|
||||
|
||||
__kernel void RGB2XYZ(int cols, int rows, int src_step, int dst_step,
|
||||
int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst,
|
||||
int src_offset, int dst_offset, __constant COEFF_TYPE * coeffs)
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
|
||||
if (dy < rows && dx < cols)
|
||||
{
|
||||
dx <<= 2;
|
||||
int src_idx = mad24(dy, src_step, src_offset + dx);
|
||||
int dst_idx = mad24(dy, dst_step, dst_offset + dx);
|
||||
|
||||
DATA_TYPE r = src[src_idx], g = src[src_idx + 1], b = src[src_idx + 2];
|
||||
|
||||
#ifdef DEPTH_5
|
||||
float x = r * coeffs[0] + g * coeffs[1] + b * coeffs[2];
|
||||
float y = r * coeffs[3] + g * coeffs[4] + b * coeffs[5];
|
||||
float z = r * coeffs[6] + g * coeffs[7] + b * coeffs[8];
|
||||
#else
|
||||
int x = CV_DESCALE(r * coeffs[0] + g * coeffs[1] + b * coeffs[2], xyz_shift);
|
||||
int y = CV_DESCALE(r * coeffs[3] + g * coeffs[4] + b * coeffs[5], xyz_shift);
|
||||
int z = CV_DESCALE(r * coeffs[6] + g * coeffs[7] + b * coeffs[8], xyz_shift);
|
||||
#endif
|
||||
dst[dst_idx] = SAT_CAST(x);
|
||||
dst[dst_idx + 1] = SAT_CAST(y);
|
||||
dst[dst_idx + 2] = SAT_CAST(z);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void XYZ2RGB(int cols, int rows, int src_step, int dst_step,
|
||||
int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst,
|
||||
int src_offset, int dst_offset, __constant COEFF_TYPE * coeffs)
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
|
||||
if (dy < rows && dx < cols)
|
||||
{
|
||||
dx <<= 2;
|
||||
int src_idx = mad24(dy, src_step, src_offset + dx);
|
||||
int dst_idx = mad24(dy, dst_step, dst_offset + dx);
|
||||
|
||||
DATA_TYPE x = src[src_idx], y = src[src_idx + 1], z = src[src_idx + 2];
|
||||
|
||||
#ifdef DEPTH_5
|
||||
float b = x * coeffs[0] + y * coeffs[1] + z * coeffs[2];
|
||||
float g = x * coeffs[3] + y * coeffs[4] + z * coeffs[5];
|
||||
float r = x * coeffs[6] + y * coeffs[7] + z * coeffs[8];
|
||||
#else
|
||||
int b = CV_DESCALE(x * coeffs[0] + y * coeffs[1] + z * coeffs[2], xyz_shift);
|
||||
int g = CV_DESCALE(x * coeffs[3] + y * coeffs[4] + z * coeffs[5], xyz_shift);
|
||||
int r = CV_DESCALE(x * coeffs[6] + y * coeffs[7] + z * coeffs[8], xyz_shift);
|
||||
#endif
|
||||
dst[dst_idx] = SAT_CAST(b);
|
||||
dst[dst_idx + 1] = SAT_CAST(g);
|
||||
dst[dst_idx + 2] = SAT_CAST(r);
|
||||
#if dcn == 4
|
||||
dst[dst_idx + 3] = MAX_NUM;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////// RGB[A] <-> BGR[A] //////////////////////////////////////
|
||||
|
||||
__kernel void RGB(int cols, int rows, int src_step, int dst_step,
|
||||
__global const DATA_TYPE * src, __global DATA_TYPE * dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
x <<= 2;
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
|
||||
#ifdef REVERSE
|
||||
dst[dst_idx] = src[src_idx + 2];
|
||||
dst[dst_idx + 1] = src[src_idx + 1];
|
||||
dst[dst_idx + 2] = src[src_idx];
|
||||
#elif defined ORDER
|
||||
dst[dst_idx] = src[src_idx];
|
||||
dst[dst_idx + 1] = src[src_idx + 1];
|
||||
dst[dst_idx + 2] = src[src_idx + 2];
|
||||
#endif
|
||||
|
||||
#if dcn == 4
|
||||
#if scn == 3
|
||||
dst[dst_idx + 3] = MAX_NUM;
|
||||
#else
|
||||
dst[dst_idx + 3] = src[src_idx + 3];
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////// RGB5x5 <-> RGB //////////////////////////////////////
|
||||
|
||||
__kernel void RGB5x52RGB(int cols, int rows, int src_step, int dst_step, int bidx,
|
||||
__global const ushort * src, __global uchar * dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + (x << 2));
|
||||
ushort t = src[src_idx];
|
||||
|
||||
#if greenbits == 6
|
||||
dst[dst_idx + bidx] = (uchar)(t << 3);
|
||||
dst[dst_idx + 1] = (uchar)((t >> 3) & ~3);
|
||||
dst[dst_idx + (bidx^2)] = (uchar)((t >> 8) & ~7);
|
||||
#else
|
||||
dst[dst_idx + bidx] = (uchar)(t << 3);
|
||||
dst[dst_idx + 1] = (uchar)((t >> 2) & ~7);
|
||||
dst[dst_idx + (bidx^2)] = (uchar)((t >> 7) & ~7);
|
||||
#endif
|
||||
|
||||
#if dcn == 4
|
||||
#if greenbits == 6
|
||||
dst[dst_idx + 3] = 255;
|
||||
#else
|
||||
dst[dst_idx + 3] = t & 0x8000 ? 255 : 0;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void RGB2RGB5x5(int cols, int rows, int src_step, int dst_step, int bidx,
|
||||
__global const uchar * src, __global ushort * dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
int src_idx = mad24(y, src_step, src_offset + (x << 2));
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
|
||||
#if greenbits == 6
|
||||
dst[dst_idx] = (ushort)((src[src_idx + bidx] >> 3)|((src[src_idx + 1]&~3) << 3)|((src[src_idx + (bidx^2)]&~7) << 8));
|
||||
#elif scn == 3
|
||||
dst[dst_idx] = (ushort)((src[src_idx + bidx] >> 3)|((src[src_idx + 1]&~7) << 2)|((src[src_idx + (bidx^2)]&~7) << 7));
|
||||
#else
|
||||
dst[dst_idx] = (ushort)((src[src_idx + bidx] >> 3)|((src[src_idx + 1]&~7) << 2)|
|
||||
((src[src_idx + (bidx^2)]&~7) << 7)|(src[src_idx + 3] ? 0x8000 : 0));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////// RGB5x5 <-> RGB //////////////////////////////////////
|
||||
|
||||
__kernel void BGR5x52Gray(int cols, int rows, int src_step, int dst_step, int bidx,
|
||||
__global const ushort * src, __global uchar * dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
int t = src[src_idx];
|
||||
|
||||
#if greenbits == 6
|
||||
dst[dst_idx] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
|
||||
((t >> 3) & 0xfc)*G2Y +
|
||||
((t >> 8) & 0xf8)*R2Y, yuv_shift);
|
||||
#else
|
||||
dst[dst_idx] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
|
||||
((t >> 2) & 0xf8)*G2Y +
|
||||
((t >> 7) & 0xf8)*R2Y, yuv_shift);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void Gray2BGR5x5(int cols, int rows, int src_step, int dst_step, int bidx,
|
||||
__global const uchar * src, __global ushort * dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
int t = src[src_idx];
|
||||
|
||||
#if greenbits == 6
|
||||
dst[dst_idx] = (ushort)((t >> 3) | ((t & ~3) << 3) | ((t & ~7) << 8));
|
||||
#else
|
||||
t >>= 3;
|
||||
dst[dst_idx] = (ushort)(t|(t << 5)|(t << 10));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////// RGB <-> HSV //////////////////////////////////////
|
||||
|
||||
__constant int sector_data[][3] = { {1, 3, 0}, { 1, 0, 2 }, { 3, 0, 1 }, { 0, 2, 1 }, { 0, 1, 3 }, { 2, 1, 0 } };
|
||||
|
||||
#ifdef DEPTH_0
|
||||
|
||||
__kernel void RGB2HSV(int cols, int rows, int src_step, int dst_step, int bidx,
|
||||
__global const uchar * src, __global uchar * dst,
|
||||
int src_offset, int dst_offset,
|
||||
__constant int * sdiv_table, __constant int * hdiv_table)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
x <<= 2;
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
|
||||
int b = src[src_idx + bidx], g = src[src_idx + 1], r = src[src_idx + (bidx^2)];
|
||||
int h, s, v = b;
|
||||
int vmin = b, diff;
|
||||
int vr, vg;
|
||||
|
||||
v = max( v, g );
|
||||
v = max( v, r );
|
||||
vmin = min( vmin, g );
|
||||
vmin = min( vmin, r );
|
||||
|
||||
diff = v - vmin;
|
||||
vr = v == r ? -1 : 0;
|
||||
vg = v == g ? -1 : 0;
|
||||
|
||||
s = (diff * sdiv_table[v] + (1 << (hsv_shift-1))) >> hsv_shift;
|
||||
h = (vr & (g - b)) +
|
||||
(~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff))));
|
||||
h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
|
||||
h += h < 0 ? hrange : 0;
|
||||
|
||||
dst[dst_idx] = convert_uchar_sat_rte(h);
|
||||
dst[dst_idx + 1] = (uchar)s;
|
||||
dst[dst_idx + 2] = (uchar)v;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void HSV2RGB(int cols, int rows, int src_step, int dst_step, int bidx,
|
||||
__global const uchar * src, __global uchar * dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
x <<= 2;
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
|
||||
float h = src[src_idx], s = src[src_idx + 1]*(1/255.f), v = src[src_idx + 2]*(1/255.f);
|
||||
float b, g, r;
|
||||
|
||||
if (s != 0)
|
||||
{
|
||||
float tab[4];
|
||||
int sector;
|
||||
h *= hscale;
|
||||
if( h < 0 )
|
||||
do h += 6; while( h < 0 );
|
||||
else if( h >= 6 )
|
||||
do h -= 6; while( h >= 6 );
|
||||
sector = convert_int_sat_rtn(h);
|
||||
h -= sector;
|
||||
if( (unsigned)sector >= 6u )
|
||||
{
|
||||
sector = 0;
|
||||
h = 0.f;
|
||||
}
|
||||
|
||||
tab[0] = v;
|
||||
tab[1] = v*(1.f - s);
|
||||
tab[2] = v*(1.f - s*h);
|
||||
tab[3] = v*(1.f - s*(1.f - h));
|
||||
|
||||
b = tab[sector_data[sector][0]];
|
||||
g = tab[sector_data[sector][1]];
|
||||
r = tab[sector_data[sector][2]];
|
||||
}
|
||||
else
|
||||
b = g = r = v;
|
||||
|
||||
dst[dst_idx + bidx] = convert_uchar_sat_rte(b*255.f);
|
||||
dst[dst_idx + 1] = convert_uchar_sat_rte(g*255.f);
|
||||
dst[dst_idx + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
|
||||
#if dcn == 4
|
||||
dst[dst_idx + 3] = MAX_NUM;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined DEPTH_5
|
||||
|
||||
__kernel void RGB2HSV(int cols, int rows, int src_step, int dst_step, int bidx,
|
||||
__global const float * src, __global float * dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
x <<= 2;
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
|
||||
float b = src[src_idx + bidx], g = src[src_idx + 1], r = src[src_idx + (bidx^2)];
|
||||
float h, s, v;
|
||||
|
||||
float vmin, diff;
|
||||
|
||||
v = vmin = r;
|
||||
if( v < g ) v = g;
|
||||
if( v < b ) v = b;
|
||||
if( vmin > g ) vmin = g;
|
||||
if( vmin > b ) vmin = b;
|
||||
|
||||
diff = v - vmin;
|
||||
s = diff/(float)(fabs(v) + FLT_EPSILON);
|
||||
diff = (float)(60./(diff + FLT_EPSILON));
|
||||
if( v == r )
|
||||
h = (g - b)*diff;
|
||||
else if( v == g )
|
||||
h = (b - r)*diff + 120.f;
|
||||
else
|
||||
h = (r - g)*diff + 240.f;
|
||||
|
||||
if( h < 0 ) h += 360.f;
|
||||
|
||||
dst[dst_idx] = h*hscale;
|
||||
dst[dst_idx + 1] = s;
|
||||
dst[dst_idx + 2] = v;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void HSV2RGB(int cols, int rows, int src_step, int dst_step, int bidx,
|
||||
__global const float * src, __global float * dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
x <<= 2;
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
|
||||
float h = src[src_idx], s = src[src_idx + 1], v = src[src_idx + 2];
|
||||
float b, g, r;
|
||||
|
||||
if (s != 0)
|
||||
{
|
||||
float tab[4];
|
||||
int sector;
|
||||
h *= hscale;
|
||||
if(h < 0)
|
||||
do h += 6; while (h < 0);
|
||||
else if (h >= 6)
|
||||
do h -= 6; while (h >= 6);
|
||||
sector = convert_int_sat_rtn(h);
|
||||
h -= sector;
|
||||
if ((unsigned)sector >= 6u)
|
||||
{
|
||||
sector = 0;
|
||||
h = 0.f;
|
||||
}
|
||||
|
||||
tab[0] = v;
|
||||
tab[1] = v*(1.f - s);
|
||||
tab[2] = v*(1.f - s*h);
|
||||
tab[3] = v*(1.f - s*(1.f - h));
|
||||
|
||||
b = tab[sector_data[sector][0]];
|
||||
g = tab[sector_data[sector][1]];
|
||||
r = tab[sector_data[sector][2]];
|
||||
}
|
||||
else
|
||||
b = g = r = v;
|
||||
|
||||
dst[dst_idx + bidx] = b;
|
||||
dst[dst_idx + 1] = g;
|
||||
dst[dst_idx + (bidx^2)] = r;
|
||||
#if dcn == 4
|
||||
dst[dst_idx + 3] = MAX_NUM;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
///////////////////////////////////// RGB <-> HLS //////////////////////////////////////
|
||||
|
||||
#ifdef DEPTH_0
|
||||
|
||||
__kernel void RGB2HLS(int cols, int rows, int src_step, int dst_step, int bidx,
|
||||
__global const uchar * src, __global uchar * dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
x <<= 2;
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
|
||||
float b = src[src_idx + bidx]*(1/255.f), g = src[src_idx + 1]*(1/255.f), r = src[src_idx + (bidx^2)]*(1/255.f);
|
||||
float h = 0.f, s = 0.f, l;
|
||||
float vmin, vmax, diff;
|
||||
|
||||
vmax = vmin = r;
|
||||
if (vmax < g) vmax = g;
|
||||
if (vmax < b) vmax = b;
|
||||
if (vmin > g) vmin = g;
|
||||
if (vmin > b) vmin = b;
|
||||
|
||||
diff = vmax - vmin;
|
||||
l = (vmax + vmin)*0.5f;
|
||||
|
||||
if (diff > FLT_EPSILON)
|
||||
{
|
||||
s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
|
||||
diff = 60.f/diff;
|
||||
|
||||
if( vmax == r )
|
||||
h = (g - b)*diff;
|
||||
else if( vmax == g )
|
||||
h = (b - r)*diff + 120.f;
|
||||
else
|
||||
h = (r - g)*diff + 240.f;
|
||||
|
||||
if( h < 0.f ) h += 360.f;
|
||||
}
|
||||
|
||||
dst[dst_idx] = convert_uchar_sat_rte(h*hscale);
|
||||
dst[dst_idx + 1] = convert_uchar_sat_rte(l*255.f);
|
||||
dst[dst_idx + 2] = convert_uchar_sat_rte(s*255.f);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void HLS2RGB(int cols, int rows, int src_step, int dst_step, int bidx,
|
||||
__global const uchar * src, __global uchar * dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
x <<= 2;
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
|
||||
float h = src[src_idx], l = src[src_idx + 1]*(1.f/255.f), s = src[src_idx + 2]*(1.f/255.f);
|
||||
float b, g, r;
|
||||
|
||||
if (s != 0)
|
||||
{
|
||||
float tab[4];
|
||||
|
||||
float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
|
||||
float p1 = 2*l - p2;
|
||||
|
||||
h *= hscale;
|
||||
if( h < 0 )
|
||||
do h += 6; while( h < 0 );
|
||||
else if( h >= 6 )
|
||||
do h -= 6; while( h >= 6 );
|
||||
|
||||
int sector = convert_int_sat_rtn(h);
|
||||
h -= sector;
|
||||
|
||||
tab[0] = p2;
|
||||
tab[1] = p1;
|
||||
tab[2] = p1 + (p2 - p1)*(1-h);
|
||||
tab[3] = p1 + (p2 - p1)*h;
|
||||
|
||||
b = tab[sector_data[sector][0]];
|
||||
g = tab[sector_data[sector][1]];
|
||||
r = tab[sector_data[sector][2]];
|
||||
}
|
||||
else
|
||||
b = g = r = l;
|
||||
|
||||
dst[dst_idx + bidx] = convert_uchar_sat_rte(b*255.f);
|
||||
dst[dst_idx + 1] = convert_uchar_sat_rte(g*255.f);
|
||||
dst[dst_idx + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
|
||||
#if dcn == 4
|
||||
dst[dst_idx + 3] = MAX_NUM;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined DEPTH_5
|
||||
|
||||
__kernel void RGB2HLS(int cols, int rows, int src_step, int dst_step, int bidx,
|
||||
__global const float * src, __global float * dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
x <<= 2;
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
|
||||
float b = src[src_idx + bidx], g = src[src_idx + 1], r = src[src_idx + (bidx^2)];
|
||||
float h = 0.f, s = 0.f, l;
|
||||
float vmin, vmax, diff;
|
||||
|
||||
vmax = vmin = r;
|
||||
if (vmax < g) vmax = g;
|
||||
if (vmax < b) vmax = b;
|
||||
if (vmin > g) vmin = g;
|
||||
if (vmin > b) vmin = b;
|
||||
|
||||
diff = vmax - vmin;
|
||||
l = (vmax + vmin)*0.5f;
|
||||
|
||||
if (diff > FLT_EPSILON)
|
||||
{
|
||||
s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
|
||||
diff = 60.f/diff;
|
||||
|
||||
if( vmax == r )
|
||||
h = (g - b)*diff;
|
||||
else if( vmax == g )
|
||||
h = (b - r)*diff + 120.f;
|
||||
else
|
||||
h = (r - g)*diff + 240.f;
|
||||
|
||||
if( h < 0.f ) h += 360.f;
|
||||
}
|
||||
|
||||
dst[dst_idx] = h*hscale;
|
||||
dst[dst_idx + 1] = l;
|
||||
dst[dst_idx + 2] = s;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void HLS2RGB(int cols, int rows, int src_step, int dst_step, int bidx,
|
||||
__global const float * src, __global float * dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
x <<= 2;
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
|
||||
float h = src[src_idx], l = src[src_idx + 1], s = src[src_idx + 2];
|
||||
float b, g, r;
|
||||
|
||||
if (s != 0)
|
||||
{
|
||||
float tab[4];
|
||||
int sector;
|
||||
|
||||
float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
|
||||
float p1 = 2*l - p2;
|
||||
|
||||
h *= hscale;
|
||||
if( h < 0 )
|
||||
do h += 6; while( h < 0 );
|
||||
else if( h >= 6 )
|
||||
do h -= 6; while( h >= 6 );
|
||||
|
||||
sector = convert_int_sat_rtn(h);
|
||||
h -= sector;
|
||||
|
||||
tab[0] = p2;
|
||||
tab[1] = p1;
|
||||
tab[2] = p1 + (p2 - p1)*(1-h);
|
||||
tab[3] = p1 + (p2 - p1)*h;
|
||||
|
||||
b = tab[sector_data[sector][0]];
|
||||
g = tab[sector_data[sector][1]];
|
||||
r = tab[sector_data[sector][2]];
|
||||
}
|
||||
else
|
||||
b = g = r = l;
|
||||
|
||||
dst[dst_idx + bidx] = b;
|
||||
dst[dst_idx + 1] = g;
|
||||
dst[dst_idx + (bidx^2)] = r;
|
||||
#if dcn == 4
|
||||
dst[dst_idx + 3] = MAX_NUM;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/////////////////////////// RGBA <-> mRGBA (alpha premultiplied) //////////////
|
||||
|
||||
#ifdef DEPTH_0
|
||||
|
||||
__kernel void RGBA2mRGBA(int cols, int rows, int src_step, int dst_step,
|
||||
int bidx, __global const uchar * src, __global uchar * dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
x <<= 2;
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
|
||||
uchar v0 = src[src_idx], v1 = src[src_idx + 1];
|
||||
uchar v2 = src[src_idx + 2], v3 = src[src_idx + 3];
|
||||
|
||||
dst[dst_idx] = (v0 * v3 + HALF_MAX) / MAX_NUM;
|
||||
dst[dst_idx + 1] = (v1 * v3 + HALF_MAX) / MAX_NUM;
|
||||
dst[dst_idx + 2] = (v2 * v3 + HALF_MAX) / MAX_NUM;
|
||||
dst[dst_idx + 3] = v3;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void mRGBA2RGBA(int cols, int rows, int src_step, int dst_step, int bidx,
|
||||
__global const uchar * src, __global uchar * dst,
|
||||
int src_offset, int dst_offset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
x <<= 2;
|
||||
int src_idx = mad24(y, src_step, src_offset + x);
|
||||
int dst_idx = mad24(y, dst_step, dst_offset + x);
|
||||
|
||||
uchar v0 = src[src_idx], v1 = src[src_idx + 1];
|
||||
uchar v2 = src[src_idx + 2], v3 = src[src_idx + 3];
|
||||
uchar v3_half = v3 / 2;
|
||||
|
||||
dst[dst_idx] = v3 == 0 ? 0 : (v0 * MAX_NUM + v3_half) / v3;
|
||||
dst[dst_idx + 1] = v3 == 0 ? 0 : (v1 * MAX_NUM + v3_half) / v3;
|
||||
dst[dst_idx + 2] = v3 == 0 ? 0 : (v2 * MAX_NUM + v3_half) / v3;
|
||||
dst[dst_idx + 3] = v3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -146,7 +146,11 @@
|
||||
#endif
|
||||
|
||||
#if USE_DOUBLE
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#define FPTYPE double
|
||||
#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
|
||||
#else
|
||||
|
@ -143,7 +143,11 @@
|
||||
#endif
|
||||
|
||||
#if USE_DOUBLE
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#define FPTYPE double
|
||||
#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
|
||||
#else
|
||||
|
@ -45,8 +45,6 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
// Enter your kernel in this window
|
||||
//#pragma OPENCL EXTENSION cl_amd_printf:enable
|
||||
#define CV_HAAR_FEATURE_MAX 3
|
||||
typedef int sumtype;
|
||||
typedef float sqsumtype;
|
||||
@ -288,8 +286,8 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
|
||||
int counter = get_global_id(0);
|
||||
int tr_x[3], tr_y[3], tr_h[3], tr_w[3], i = 0;
|
||||
GpuHidHaarTreeNode t1 = *(orinode + counter);
|
||||
#pragma unroll
|
||||
|
||||
#pragma unroll
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
tr_x[i] = (int)(t1.p[i][0] * scale + 0.5f);
|
||||
@ -300,8 +298,8 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
|
||||
|
||||
t1.weight[0] = -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]);
|
||||
counter += nodenum;
|
||||
#pragma unroll
|
||||
|
||||
#pragma unroll
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
newnode[counter].p[i][0] = tr_x[i];
|
||||
|
@ -43,11 +43,13 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (__ATI__)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (__NVIDIA__)
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/************************************** convolve **************************************/
|
||||
|
||||
|
@ -34,7 +34,7 @@
|
||||
//
|
||||
//
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
|
@ -43,13 +43,17 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#define CONVERT(step) ((step)>>1)
|
||||
#else
|
||||
#define CONVERT(step) ((step))
|
||||
#endif
|
||||
|
||||
#define LSIZE 256
|
||||
#define LSIZE_1 255
|
||||
#define LSIZE_2 254
|
||||
@ -60,17 +64,17 @@
|
||||
#define GET_CONFLICT_OFFSET(lid) ((lid) >> LOG_NUM_BANKS)
|
||||
|
||||
|
||||
kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global float *sqsum,
|
||||
int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
|
||||
kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global TYPE *sqsum,
|
||||
int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step,int dst1_step)
|
||||
{
|
||||
int lid = get_local_id(0);
|
||||
int gid = get_group_id(0);
|
||||
int4 src_t[2], sum_t[2];
|
||||
float4 sqsum_t[2];
|
||||
TYPE4 sqsum_t[2];
|
||||
__local int4 lm_sum[2][LSIZE + LOG_LSIZE];
|
||||
__local float4 lm_sqsum[2][LSIZE + LOG_LSIZE];
|
||||
__local TYPE4 lm_sqsum[2][LSIZE + LOG_LSIZE];
|
||||
__local int* sum_p;
|
||||
__local float* sqsum_p;
|
||||
__local TYPE* sqsum_p;
|
||||
src_step = src_step >> 2;
|
||||
gid = gid << 1;
|
||||
for(int i = 0; i < rows; i =i + LSIZE_1)
|
||||
@ -79,17 +83,17 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
|
||||
src_t[1] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + min(gid + 1, cols - 1)]) : 0);
|
||||
|
||||
sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
|
||||
sqsum_t[0] = (i == 0 ? (float4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
|
||||
sqsum_t[0] = (i == 0 ? (TYPE4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
|
||||
sum_t[1] = (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
|
||||
sqsum_t[1] = (i == 0 ? (float4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
|
||||
sqsum_t[1] = (i == 0 ? (TYPE4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
|
||||
lm_sum[0][bf_loc] = src_t[0];
|
||||
lm_sqsum[0][bf_loc] = convert_float4(src_t[0] * src_t[0]);
|
||||
lm_sqsum[0][bf_loc] = convert_TYPE4(src_t[0] * src_t[0]);
|
||||
|
||||
lm_sum[1][bf_loc] = src_t[1];
|
||||
lm_sqsum[1][bf_loc] = convert_float4(src_t[1] * src_t[1]);
|
||||
lm_sqsum[1][bf_loc] = convert_TYPE4(src_t[1] * src_t[1]);
|
||||
|
||||
int offset = 1;
|
||||
for(int d = LSIZE >> 1 ; d > 0; d>>=1)
|
||||
@ -130,7 +134,8 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
|
||||
int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step /4, loc_s1 = loc_s0 + dst_step ;
|
||||
int loc_sq0 = gid * CONVERT(dst1_step) + i + lid - 1 - pre_invalid * dst1_step / sizeof(TYPE),loc_sq1 = loc_sq0 + CONVERT(dst1_step);
|
||||
if(lid > 0 && (i+lid) <= rows)
|
||||
{
|
||||
lm_sum[0][bf_loc] += sum_t[0];
|
||||
@ -138,20 +143,20 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
|
||||
lm_sqsum[0][bf_loc] += sqsum_t[0];
|
||||
lm_sqsum[1][bf_loc] += sqsum_t[1];
|
||||
sum_p = (__local int*)(&(lm_sum[0][bf_loc]));
|
||||
sqsum_p = (__local float*)(&(lm_sqsum[0][bf_loc]));
|
||||
sqsum_p = (__local TYPE*)(&(lm_sqsum[0][bf_loc]));
|
||||
for(int k = 0; k < 4; k++)
|
||||
{
|
||||
if(gid * 4 + k >= cols + pre_invalid || gid * 4 + k < pre_invalid) continue;
|
||||
sum[loc_s0 + k * dst_step / 4] = sum_p[k];
|
||||
sqsum[loc_s0 + k * dst_step / 4] = sqsum_p[k];
|
||||
sqsum[loc_sq0 + k * dst1_step / sizeof(TYPE)] = sqsum_p[k];
|
||||
}
|
||||
sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
|
||||
sqsum_p = (__local float*)(&(lm_sqsum[1][bf_loc]));
|
||||
sqsum_p = (__local TYPE*)(&(lm_sqsum[1][bf_loc]));
|
||||
for(int k = 0; k < 4; k++)
|
||||
{
|
||||
if(gid * 4 + k + 4 >= cols + pre_invalid) break;
|
||||
sum[loc_s1 + k * dst_step / 4] = sum_p[k];
|
||||
sqsum[loc_s1 + k * dst_step / 4] = sqsum_p[k];
|
||||
sqsum[loc_sq1 + k * dst1_step / sizeof(TYPE)] = sqsum_p[k];
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@ -159,30 +164,32 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
|
||||
}
|
||||
|
||||
|
||||
kernel void integral_rows_D4(__global int4 *srcsum,__global float4 * srcsqsum,__global int *sum ,
|
||||
__global float *sqsum,int rows,int cols,int src_step,int sum_step,
|
||||
kernel void integral_rows_D4(__global int4 *srcsum,__global TYPE4 * srcsqsum,__global int *sum ,
|
||||
__global TYPE *sqsum,int rows,int cols,int src_step,int src1_step,int sum_step,
|
||||
int sqsum_step,int sum_offset,int sqsum_offset)
|
||||
{
|
||||
int lid = get_local_id(0);
|
||||
int gid = get_group_id(0);
|
||||
int4 src_t[2], sum_t[2];
|
||||
float4 sqsrc_t[2],sqsum_t[2];
|
||||
TYPE4 sqsrc_t[2],sqsum_t[2];
|
||||
__local int4 lm_sum[2][LSIZE + LOG_LSIZE];
|
||||
__local float4 lm_sqsum[2][LSIZE + LOG_LSIZE];
|
||||
__local TYPE4 lm_sqsum[2][LSIZE + LOG_LSIZE];
|
||||
__local int *sum_p;
|
||||
__local float *sqsum_p;
|
||||
__local TYPE *sqsum_p;
|
||||
src_step = src_step >> 4;
|
||||
src1_step = (src1_step / sizeof(TYPE)) >> 2 ;
|
||||
gid <<= 1;
|
||||
for(int i = 0; i < rows; i =i + LSIZE_1)
|
||||
{
|
||||
src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2] : (int4)0;
|
||||
sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src_step + gid * 2] : (float4)0;
|
||||
src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2 + 1] : (int4)0;
|
||||
sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src_step + gid * 2 + 1] : (float4)0;
|
||||
src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid ] : (int4)0;
|
||||
sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src1_step + gid ] : (TYPE4)0;
|
||||
src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid + 1] : (int4)0;
|
||||
sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src1_step + gid + 1] : (TYPE4)0;
|
||||
|
||||
sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
|
||||
sqsum_t[0] = (i == 0 ? (float4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
|
||||
sqsum_t[0] = (i == 0 ? (TYPE4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
|
||||
sum_t[1] = (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
|
||||
sqsum_t[1] = (i == 0 ? (float4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
|
||||
sqsum_t[1] = (i == 0 ? (TYPE4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
|
||||
@ -238,17 +245,18 @@ kernel void integral_rows_D4(__global int4 *srcsum,__global float4 * srcsqsum,__
|
||||
}
|
||||
if(i + lid == 0)
|
||||
{
|
||||
int loc0 = gid * 2 * sum_step;
|
||||
int loc1 = gid * 2 * sqsum_step;
|
||||
int loc0 = gid * sum_step;
|
||||
int loc1 = gid * CONVERT(sqsum_step);
|
||||
for(int k = 1; k <= 8; k++)
|
||||
{
|
||||
if(gid * 8 + k > cols) break;
|
||||
if(gid * 4 + k > cols) break;
|
||||
sum[sum_offset + loc0 + k * sum_step / 4] = 0;
|
||||
sqsum[sqsum_offset + loc1 + k * sqsum_step / 4] = 0;
|
||||
sqsum[sqsum_offset + loc1 + k * sqsum_step / sizeof(TYPE)] = 0;
|
||||
}
|
||||
}
|
||||
int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
|
||||
int loc_sq0 = sqsum_offset + gid * 2 * sqsum_step + sqsum_step / 4 + i + lid, loc_sq1 = loc_sq0 + sqsum_step ;
|
||||
int loc_s0 = sum_offset + gid * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
|
||||
int loc_sq0 = sqsum_offset + gid * CONVERT(sqsum_step) + sqsum_step / sizeof(TYPE) + i + lid, loc_sq1 = loc_sq0 + CONVERT(sqsum_step) ;
|
||||
|
||||
if(lid > 0 && (i+lid) <= rows)
|
||||
{
|
||||
lm_sum[0][bf_loc] += sum_t[0];
|
||||
@ -256,37 +264,37 @@ kernel void integral_rows_D4(__global int4 *srcsum,__global float4 * srcsqsum,__
|
||||
lm_sqsum[0][bf_loc] += sqsum_t[0];
|
||||
lm_sqsum[1][bf_loc] += sqsum_t[1];
|
||||
sum_p = (__local int*)(&(lm_sum[0][bf_loc]));
|
||||
sqsum_p = (__local float*)(&(lm_sqsum[0][bf_loc]));
|
||||
sqsum_p = (__local TYPE*)(&(lm_sqsum[0][bf_loc]));
|
||||
for(int k = 0; k < 4; k++)
|
||||
{
|
||||
if(gid * 8 + k >= cols) break;
|
||||
if(gid * 4 + k >= cols) break;
|
||||
sum[loc_s0 + k * sum_step / 4] = sum_p[k];
|
||||
sqsum[loc_sq0 + k * sqsum_step / 4] = sqsum_p[k];
|
||||
sqsum[loc_sq0 + k * sqsum_step / sizeof(TYPE)] = sqsum_p[k];
|
||||
}
|
||||
sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
|
||||
sqsum_p = (__local float*)(&(lm_sqsum[1][bf_loc]));
|
||||
sqsum_p = (__local TYPE*)(&(lm_sqsum[1][bf_loc]));
|
||||
for(int k = 0; k < 4; k++)
|
||||
{
|
||||
if(gid * 8 + 4 + k >= cols) break;
|
||||
if(gid * 4 + 4 + k >= cols) break;
|
||||
sum[loc_s1 + k * sum_step / 4] = sum_p[k];
|
||||
sqsum[loc_sq1 + k * sqsum_step / 4] = sqsum_p[k];
|
||||
sqsum[loc_sq1 + k * sqsum_step / sizeof(TYPE)] = sqsum_p[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
}
|
||||
|
||||
kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global float *sqsum,
|
||||
int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
|
||||
kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global TYPE *sqsum,
|
||||
int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step, int dst1_step)
|
||||
{
|
||||
int lid = get_local_id(0);
|
||||
int gid = get_group_id(0);
|
||||
float4 src_t[2], sum_t[2];
|
||||
float4 sqsum_t[2];
|
||||
TYPE4 sqsum_t[2];
|
||||
__local float4 lm_sum[2][LSIZE + LOG_LSIZE];
|
||||
__local float4 lm_sqsum[2][LSIZE + LOG_LSIZE];
|
||||
__local TYPE4 lm_sqsum[2][LSIZE + LOG_LSIZE];
|
||||
__local float* sum_p;
|
||||
__local float* sqsum_p;
|
||||
__local TYPE* sqsum_p;
|
||||
src_step = src_step >> 2;
|
||||
gid = gid << 1;
|
||||
for(int i = 0; i < rows; i =i + LSIZE_1)
|
||||
@ -295,17 +303,17 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
|
||||
src_t[1] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid + 1, cols - 1)]) : (float4)0);
|
||||
|
||||
sum_t[0] = (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
|
||||
sqsum_t[0] = (i == 0 ? (float4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
|
||||
sqsum_t[0] = (i == 0 ? (TYPE4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
|
||||
sum_t[1] = (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
|
||||
sqsum_t[1] = (i == 0 ? (float4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
|
||||
sqsum_t[1] = (i == 0 ? (TYPE4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
|
||||
lm_sum[0][bf_loc] = src_t[0];
|
||||
lm_sqsum[0][bf_loc] = convert_float4(src_t[0] * src_t[0]);
|
||||
lm_sqsum[0][bf_loc] = convert_TYPE4(src_t[0] * src_t[0]);
|
||||
|
||||
lm_sum[1][bf_loc] = src_t[1];
|
||||
lm_sqsum[1][bf_loc] = convert_float4(src_t[1] * src_t[1]);
|
||||
lm_sqsum[1][bf_loc] = convert_TYPE4(src_t[1] * src_t[1]);
|
||||
|
||||
int offset = 1;
|
||||
for(int d = LSIZE >> 1 ; d > 0; d>>=1)
|
||||
@ -347,6 +355,7 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
|
||||
int loc_sq0 = gid * CONVERT(dst1_step) + i + lid - 1 - pre_invalid * dst1_step / sizeof(TYPE), loc_sq1 = loc_sq0 + CONVERT(dst1_step);
|
||||
if(lid > 0 && (i+lid) <= rows)
|
||||
{
|
||||
lm_sum[0][bf_loc] += sum_t[0];
|
||||
@ -354,20 +363,20 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
|
||||
lm_sqsum[0][bf_loc] += sqsum_t[0];
|
||||
lm_sqsum[1][bf_loc] += sqsum_t[1];
|
||||
sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
|
||||
sqsum_p = (__local float*)(&(lm_sqsum[0][bf_loc]));
|
||||
sqsum_p = (__local TYPE*)(&(lm_sqsum[0][bf_loc]));
|
||||
for(int k = 0; k < 4; k++)
|
||||
{
|
||||
if(gid * 4 + k >= cols + pre_invalid || gid * 4 + k < pre_invalid) continue;
|
||||
sum[loc_s0 + k * dst_step / 4] = sum_p[k];
|
||||
sqsum[loc_s0 + k * dst_step / 4] = sqsum_p[k];
|
||||
sqsum[loc_sq0 + k * dst1_step / sizeof(TYPE)] = sqsum_p[k];
|
||||
}
|
||||
sum_p = (__local float*)(&(lm_sum[1][bf_loc]));
|
||||
sqsum_p = (__local float*)(&(lm_sqsum[1][bf_loc]));
|
||||
sqsum_p = (__local TYPE*)(&(lm_sqsum[1][bf_loc]));
|
||||
for(int k = 0; k < 4; k++)
|
||||
{
|
||||
if(gid * 4 + k + 4 >= cols + pre_invalid) break;
|
||||
sum[loc_s1 + k * dst_step / 4] = sum_p[k];
|
||||
sqsum[loc_s1 + k * dst_step / 4] = sqsum_p[k];
|
||||
sqsum[loc_sq1 + k * dst1_step / sizeof(TYPE)] = sqsum_p[k];
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@ -375,30 +384,31 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
|
||||
}
|
||||
|
||||
|
||||
kernel void integral_rows_D5(__global float4 *srcsum,__global float4 * srcsqsum,__global float *sum ,
|
||||
__global float *sqsum,int rows,int cols,int src_step,int sum_step,
|
||||
kernel void integral_rows_D5(__global float4 *srcsum,__global TYPE4 * srcsqsum,__global float *sum ,
|
||||
__global TYPE *sqsum,int rows,int cols,int src_step,int src1_step, int sum_step,
|
||||
int sqsum_step,int sum_offset,int sqsum_offset)
|
||||
{
|
||||
int lid = get_local_id(0);
|
||||
int gid = get_group_id(0);
|
||||
float4 src_t[2], sum_t[2];
|
||||
float4 sqsrc_t[2],sqsum_t[2];
|
||||
TYPE4 sqsrc_t[2],sqsum_t[2];
|
||||
__local float4 lm_sum[2][LSIZE + LOG_LSIZE];
|
||||
__local float4 lm_sqsum[2][LSIZE + LOG_LSIZE];
|
||||
__local TYPE4 lm_sqsum[2][LSIZE + LOG_LSIZE];
|
||||
__local float *sum_p;
|
||||
__local float *sqsum_p;
|
||||
__local TYPE *sqsum_p;
|
||||
src_step = src_step >> 4;
|
||||
src1_step = (src1_step / sizeof(TYPE)) >> 2;
|
||||
for(int i = 0; i < rows; i =i + LSIZE_1)
|
||||
{
|
||||
src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2] : (float4)0;
|
||||
sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src_step + gid * 2] : (float4)0;
|
||||
sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src1_step + gid * 2] : (TYPE4)0;
|
||||
src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2 + 1] : (float4)0;
|
||||
sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src_step + gid * 2 + 1] : (float4)0;
|
||||
sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src1_step + gid * 2 + 1] : (TYPE4)0;
|
||||
|
||||
sum_t[0] = (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
|
||||
sqsum_t[0] = (i == 0 ? (float4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
|
||||
sqsum_t[0] = (i == 0 ? (TYPE4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
|
||||
sum_t[1] = (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
|
||||
sqsum_t[1] = (i == 0 ? (float4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
|
||||
sqsum_t[1] = (i == 0 ? (TYPE4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
|
||||
@ -455,16 +465,16 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global float4 * srcsqsum,
|
||||
if(i + lid == 0)
|
||||
{
|
||||
int loc0 = gid * 2 * sum_step;
|
||||
int loc1 = gid * 2 * sqsum_step;
|
||||
int loc1 = gid * 2 * CONVERT(sqsum_step);
|
||||
for(int k = 1; k <= 8; k++)
|
||||
{
|
||||
if(gid * 8 + k > cols) break;
|
||||
sum[sum_offset + loc0 + k * sum_step / 4] = 0;
|
||||
sqsum[sqsum_offset + loc1 + k * sqsum_step / 4] = 0;
|
||||
sqsum[sqsum_offset + loc1 + k * sqsum_step / sizeof(TYPE)] = 0;
|
||||
}
|
||||
}
|
||||
int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
|
||||
int loc_sq0 = sqsum_offset + gid * 2 * sqsum_step + sqsum_step / 4 + i + lid, loc_sq1 = loc_sq0 + sqsum_step ;
|
||||
int loc_sq0 = sqsum_offset + gid * 2 * CONVERT(sqsum_step) + sqsum_step / sizeof(TYPE) + i + lid, loc_sq1 = loc_sq0 + CONVERT(sqsum_step) ;
|
||||
if(lid > 0 && (i+lid) <= rows)
|
||||
{
|
||||
lm_sum[0][bf_loc] += sum_t[0];
|
||||
@ -472,20 +482,20 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global float4 * srcsqsum,
|
||||
lm_sqsum[0][bf_loc] += sqsum_t[0];
|
||||
lm_sqsum[1][bf_loc] += sqsum_t[1];
|
||||
sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
|
||||
sqsum_p = (__local float*)(&(lm_sqsum[0][bf_loc]));
|
||||
sqsum_p = (__local TYPE*)(&(lm_sqsum[0][bf_loc]));
|
||||
for(int k = 0; k < 4; k++)
|
||||
{
|
||||
if(gid * 8 + k >= cols) break;
|
||||
sum[loc_s0 + k * sum_step / 4] = sum_p[k];
|
||||
sqsum[loc_sq0 + k * sqsum_step / 4] = sqsum_p[k];
|
||||
sqsum[loc_sq0 + k * sqsum_step / sizeof(TYPE)] = sqsum_p[k];
|
||||
}
|
||||
sum_p = (__local float*)(&(lm_sum[1][bf_loc]));
|
||||
sqsum_p = (__local float*)(&(lm_sqsum[1][bf_loc]));
|
||||
sqsum_p = (__local TYPE*)(&(lm_sqsum[1][bf_loc]));
|
||||
for(int k = 0; k < 4; k++)
|
||||
{
|
||||
if(gid * 8 + 4 + k >= cols) break;
|
||||
sum[loc_s1 + k * sum_step / 4] = sum_p[k];
|
||||
sqsum[loc_sq1 + k * sqsum_step / 4] = sqsum_p[k];
|
||||
sqsum[loc_sq1 + k * sqsum_step / sizeof(TYPE)] = sqsum_p[k];
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
@ -43,11 +43,11 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -43,11 +43,11 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -243,6 +243,60 @@ __kernel void remap_16SC2_16UC1(__global const T * restrict src, __global T * ds
|
||||
|
||||
#elif INTER_LINEAR
|
||||
|
||||
__kernel void remap_16SC2_16UC1(__global T const * restrict src, __global T * dst,
|
||||
__global short2 * restrict map1, __global ushort * restrict map2,
|
||||
int src_offset, int dst_offset, int map1_offset, int map2_offset,
|
||||
int src_step, int dst_step, int map1_step, int map2_step,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, T nVal)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < dst_cols && y < dst_rows)
|
||||
{
|
||||
int dstIdx = mad24(y, dst_step, x + dst_offset);
|
||||
int map1Idx = mad24(y, map1_step, x + map1_offset);
|
||||
int map2Idx = mad24(y, map2_step, x + map2_offset);
|
||||
|
||||
int2 map_dataA = convert_int2(map1[map1Idx]);
|
||||
int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
|
||||
int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
|
||||
int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
|
||||
|
||||
ushort map2Value = (ushort)(map2[map2Idx] & (INTER_TAB_SIZE2 - 1));
|
||||
WT2 u = (WT2)(map2Value & (INTER_TAB_SIZE - 1), map2Value >> INTER_BITS) / (WT2)(INTER_TAB_SIZE);
|
||||
|
||||
WT scalar = convertToWT(nVal);
|
||||
WT a = scalar, b = scalar, c = scalar, d = scalar;
|
||||
|
||||
if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y))
|
||||
a = convertToWT(src[mad24(map_dataA.y, src_step, map_dataA.x + src_offset)]);
|
||||
else
|
||||
EXTRAPOLATE(map_dataA, a);
|
||||
|
||||
if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y))
|
||||
b = convertToWT(src[mad24(map_dataB.y, src_step, map_dataB.x + src_offset)]);
|
||||
else
|
||||
EXTRAPOLATE(map_dataB, b);
|
||||
|
||||
if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y))
|
||||
c = convertToWT(src[mad24(map_dataC.y, src_step, map_dataC.x + src_offset)]);
|
||||
else
|
||||
EXTRAPOLATE(map_dataC, c);
|
||||
|
||||
if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y))
|
||||
d = convertToWT(src[mad24(map_dataD.y, src_step, map_dataD.x + src_offset)]);
|
||||
else
|
||||
EXTRAPOLATE(map_dataD, d);
|
||||
|
||||
WT dst_data = a * (1 - u.x) * (1 - u.y) +
|
||||
b * (u.x) * (1 - u.y) +
|
||||
c * (1 - u.x) * (u.y) +
|
||||
d * (u.x) * (u.y);
|
||||
dst[dstIdx] = convertToT(dst_data);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void remap_2_32FC1(__global T const * restrict src, __global T * dst,
|
||||
__global float * map1, __global float * map2,
|
||||
int src_offset, int dst_offset, int map1_offset, int map2_offset,
|
||||
@ -263,7 +317,7 @@ __kernel void remap_2_32FC1(__global T const * restrict src, __global T * dst,
|
||||
int2 map_dataA = convert_int2_sat_rtn(map_data);
|
||||
int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
|
||||
int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
|
||||
int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y +1);
|
||||
int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
|
||||
|
||||
float2 _u = map_data - convert_float2(map_dataA);
|
||||
WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
|
||||
@ -290,10 +344,10 @@ __kernel void remap_2_32FC1(__global T const * restrict src, __global T * dst,
|
||||
else
|
||||
EXTRAPOLATE(map_dataD, d);
|
||||
|
||||
WT dst_data = a * (WT)(1 - u.x) * (WT)(1 - u.y) +
|
||||
b * (WT)(u.x) * (WT)(1 - u.y) +
|
||||
c * (WT)(1 - u.x) * (WT)(u.y) +
|
||||
d * (WT)(u.x) * (WT)(u.y);
|
||||
WT dst_data = a * (1 - u.x) * (1 - u.y) +
|
||||
b * (u.x) * (1 - u.y) +
|
||||
c * (1 - u.x) * (u.y) +
|
||||
d * (u.x) * (u.y);
|
||||
dst[dstIdx] = convertToT(dst_data);
|
||||
}
|
||||
}
|
||||
@ -343,10 +397,10 @@ __kernel void remap_32FC2(__global T const * restrict src, __global T * dst,
|
||||
else
|
||||
EXTRAPOLATE(map_dataD, d);
|
||||
|
||||
WT dst_data = a * (WT)(1 - u.x) * (WT)(1 - u.y) +
|
||||
b * (WT)(u.x) * (WT)(1 - u.y) +
|
||||
c * (WT)(1 - u.x) * (WT)(u.y) +
|
||||
d * (WT)(u.x) * (WT)(u.y);
|
||||
WT dst_data = a * (1 - u.x) * (1 - u.y) +
|
||||
b * (u.x) * (1 - u.y) +
|
||||
c * (1 - u.x) * (u.y) +
|
||||
d * (u.x) * (u.y);
|
||||
dst[dstIdx] = convertToT(dst_data);
|
||||
}
|
||||
}
|
||||
|
@ -45,16 +45,16 @@
|
||||
|
||||
|
||||
// resize kernel
|
||||
// Currently, CV_8UC1 CV_8UC4 CV_32FC1 and CV_32FC4are supported.
|
||||
// Currently, CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 are supported.
|
||||
// We shall support other types later if necessary.
|
||||
|
||||
#if defined DOUBLE_SUPPORT
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#define F double
|
||||
#else
|
||||
#define F float
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#define INTER_RESIZE_COEF_BITS 11
|
||||
#define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS)
|
||||
@ -62,8 +62,10 @@
|
||||
#define CAST_SCALE (1.0f/(1<<CAST_BITS))
|
||||
#define INC(x,l) ((x+1) >= (l) ? (x):((x)+1))
|
||||
|
||||
#ifdef LN
|
||||
|
||||
__kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restrict src,
|
||||
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
|
||||
int dst_offset, int src_offset,int dst_step, int src_step,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
|
||||
{
|
||||
int gx = get_global_id(0);
|
||||
@ -71,7 +73,7 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
|
||||
|
||||
float4 sx, u, xf;
|
||||
int4 x, DX;
|
||||
gx = (gx<<2) - (dstoffset_in_pixel&3);
|
||||
gx = (gx<<2) - (dst_offset&3);
|
||||
DX = (int4)(gx, gx+1, gx+2, gx+3);
|
||||
sx = (convert_float4(DX) + 0.5f) * ifx - 0.5f;
|
||||
xf = floor(sx);
|
||||
@ -109,10 +111,10 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
|
||||
int4 val1, val2, val;
|
||||
int4 sdata1, sdata2, sdata3, sdata4;
|
||||
|
||||
int4 pos1 = mad24((int4)y, (int4)srcstep_in_pixel, x+(int4)srcoffset_in_pixel);
|
||||
int4 pos2 = mad24((int4)y, (int4)srcstep_in_pixel, x_+(int4)srcoffset_in_pixel);
|
||||
int4 pos3 = mad24((int4)y_, (int4)srcstep_in_pixel, x+(int4)srcoffset_in_pixel);
|
||||
int4 pos4 = mad24((int4)y_, (int4)srcstep_in_pixel, x_+(int4)srcoffset_in_pixel);
|
||||
int4 pos1 = mad24((int4)y, (int4)src_step, x+(int4)src_offset);
|
||||
int4 pos2 = mad24((int4)y, (int4)src_step, x_+(int4)src_offset);
|
||||
int4 pos3 = mad24((int4)y_, (int4)src_step, x+(int4)src_offset);
|
||||
int4 pos4 = mad24((int4)y_, (int4)src_step, x_+(int4)src_offset);
|
||||
|
||||
sdata1.s0 = src[pos1.s0];
|
||||
sdata1.s1 = src[pos1.s1];
|
||||
@ -140,12 +142,12 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
|
||||
|
||||
val = ((val + (1<<(CAST_BITS-1))) >> CAST_BITS);
|
||||
|
||||
pos4 = mad24(dy, dststep_in_pixel, gx+dstoffset_in_pixel);
|
||||
pos4 = mad24(dy, dst_step, gx+dst_offset);
|
||||
pos4.y++;
|
||||
pos4.z+=2;
|
||||
pos4.w+=3;
|
||||
uchar4 uval = convert_uchar4_sat(val);
|
||||
int con = (gx >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows && (dstoffset_in_pixel&3)==0);
|
||||
int con = (gx >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows && (dst_offset&3)==0);
|
||||
if(con)
|
||||
{
|
||||
*(__global uchar4*)(dst + pos4.x)=uval;
|
||||
@ -172,7 +174,7 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
|
||||
}
|
||||
|
||||
__kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
|
||||
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
|
||||
int dst_offset, int src_offset,int dst_step, int src_step,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
@ -198,24 +200,24 @@ __kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
|
||||
int y_ = INC(y,src_rows);
|
||||
int x_ = INC(x,src_cols);
|
||||
int4 srcpos;
|
||||
srcpos.x = mad24(y, srcstep_in_pixel, x+srcoffset_in_pixel);
|
||||
srcpos.y = mad24(y, srcstep_in_pixel, x_+srcoffset_in_pixel);
|
||||
srcpos.z = mad24(y_, srcstep_in_pixel, x+srcoffset_in_pixel);
|
||||
srcpos.w = mad24(y_, srcstep_in_pixel, x_+srcoffset_in_pixel);
|
||||
srcpos.x = mad24(y, src_step, x+src_offset);
|
||||
srcpos.y = mad24(y, src_step, x_+src_offset);
|
||||
srcpos.z = mad24(y_, src_step, x+src_offset);
|
||||
srcpos.w = mad24(y_, src_step, x_+src_offset);
|
||||
int4 data0 = convert_int4(src[srcpos.x]);
|
||||
int4 data1 = convert_int4(src[srcpos.y]);
|
||||
int4 data2 = convert_int4(src[srcpos.z]);
|
||||
int4 data3 = convert_int4(src[srcpos.w]);
|
||||
int4 val = mul24((int4)mul24(U1, V1) , data0) + mul24((int4)mul24(U, V1) , data1)
|
||||
+mul24((int4)mul24(U1, V) , data2)+mul24((int4)mul24(U, V) , data3);
|
||||
int dstpos = mad24(dy, dststep_in_pixel, dx+dstoffset_in_pixel);
|
||||
int dstpos = mad24(dy, dst_step, dx+dst_offset);
|
||||
uchar4 uval = convert_uchar4((val + (1<<(CAST_BITS-1)))>>CAST_BITS);
|
||||
if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
|
||||
dst[dstpos] = uval;
|
||||
}
|
||||
|
||||
__kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
|
||||
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
|
||||
int dst_offset, int src_offset,int dst_step, int src_step,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
@ -235,10 +237,10 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
|
||||
float u1 = 1.f-u;
|
||||
float v1 = 1.f-v;
|
||||
int4 srcpos;
|
||||
srcpos.x = mad24(y, srcstep_in_pixel, x+srcoffset_in_pixel);
|
||||
srcpos.y = mad24(y, srcstep_in_pixel, x_+srcoffset_in_pixel);
|
||||
srcpos.z = mad24(y_, srcstep_in_pixel, x+srcoffset_in_pixel);
|
||||
srcpos.w = mad24(y_, srcstep_in_pixel, x_+srcoffset_in_pixel);
|
||||
srcpos.x = mad24(y, src_step, x+src_offset);
|
||||
srcpos.y = mad24(y, src_step, x_+src_offset);
|
||||
srcpos.z = mad24(y_, src_step, x+src_offset);
|
||||
srcpos.w = mad24(y_, src_step, x_+src_offset);
|
||||
float data0 = src[srcpos.x];
|
||||
float data1 = src[srcpos.y];
|
||||
float data2 = src[srcpos.z];
|
||||
@ -248,13 +250,13 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
|
||||
float val2 = u1 * data2 +
|
||||
u * data3;
|
||||
float val = v1 * val1 + v * val2;
|
||||
int dstpos = mad24(dy, dststep_in_pixel, dx+dstoffset_in_pixel);
|
||||
int dstpos = mad24(dy, dst_step, dx+dst_offset);
|
||||
if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
|
||||
dst[dstpos] = val;
|
||||
}
|
||||
|
||||
__kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
|
||||
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
|
||||
int dst_offset, int src_offset,int dst_step, int src_step,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
@ -274,10 +276,10 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
|
||||
float u1 = 1.f-u;
|
||||
float v1 = 1.f-v;
|
||||
int4 srcpos;
|
||||
srcpos.x = mad24(y, srcstep_in_pixel, x+srcoffset_in_pixel);
|
||||
srcpos.y = mad24(y, srcstep_in_pixel, x_+srcoffset_in_pixel);
|
||||
srcpos.z = mad24(y_, srcstep_in_pixel, x+srcoffset_in_pixel);
|
||||
srcpos.w = mad24(y_, srcstep_in_pixel, x_+srcoffset_in_pixel);
|
||||
srcpos.x = mad24(y, src_step, x+src_offset);
|
||||
srcpos.y = mad24(y, src_step, x_+src_offset);
|
||||
srcpos.z = mad24(y_, src_step, x+src_offset);
|
||||
srcpos.w = mad24(y_, src_step, x_+src_offset);
|
||||
float4 s_data1, s_data2, s_data3, s_data4;
|
||||
s_data1 = src[srcpos.x];
|
||||
s_data2 = src[srcpos.y];
|
||||
@ -285,129 +287,32 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
|
||||
s_data4 = src[srcpos.w];
|
||||
float4 val = u1 * v1 * s_data1 + u * v1 * s_data2
|
||||
+u1 * v *s_data3 + u * v *s_data4;
|
||||
int dstpos = mad24(dy, dststep_in_pixel, dx+dstoffset_in_pixel);
|
||||
int dstpos = mad24(dy, dst_step, dx+dst_offset);
|
||||
|
||||
if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
|
||||
dst[dstpos] = val;
|
||||
}
|
||||
|
||||
__kernel void resizeNN_C1_D0(__global uchar * dst, __global uchar * src,
|
||||
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, F ifx, F ify )
|
||||
#elif defined NN
|
||||
|
||||
__kernel void resizeNN(__global T * dst, __global T * src,
|
||||
int dst_offset, int src_offset,int dst_step, int src_step,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify)
|
||||
{
|
||||
int gx = get_global_id(0);
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
|
||||
gx = (gx<<2) - (dstoffset_in_pixel&3);
|
||||
//int4 GX = (int4)(gx, gx+1, gx+2, gx+3);
|
||||
|
||||
int4 sx;
|
||||
int sy;
|
||||
F ss1 = gx*ifx;
|
||||
F ss2 = (gx+1)*ifx;
|
||||
F ss3 = (gx+2)*ifx;
|
||||
F ss4 = (gx+3)*ifx;
|
||||
F s5 = dy * ify;
|
||||
sx.s0 = min((int)floor(ss1), src_cols-1);
|
||||
sx.s1 = min((int)floor(ss2), src_cols-1);
|
||||
sx.s2 = min((int)floor(ss3), src_cols-1);
|
||||
sx.s3 = min((int)floor(ss4), src_cols-1);
|
||||
sy = min((int)floor(s5), src_rows-1);
|
||||
|
||||
uchar4 val;
|
||||
int4 pos = mad24((int4)sy, (int4)srcstep_in_pixel, sx+(int4)srcoffset_in_pixel);
|
||||
val.s0 = src[pos.s0];
|
||||
val.s1 = src[pos.s1];
|
||||
val.s2 = src[pos.s2];
|
||||
val.s3 = src[pos.s3];
|
||||
|
||||
//__global uchar4* d = (__global uchar4*)(dst + dstoffset_in_pixel + dy * dststep_in_pixel + gx);
|
||||
//uchar4 dVal = *d;
|
||||
pos = mad24(dy, dststep_in_pixel, gx+dstoffset_in_pixel);
|
||||
pos.y++;
|
||||
pos.z+=2;
|
||||
pos.w+=3;
|
||||
|
||||
int con = (gx >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows && (dstoffset_in_pixel&3)==0);
|
||||
if(con)
|
||||
if (dx < dst_cols && dy < dst_rows)
|
||||
{
|
||||
*(__global uchar4*)(dst + pos.x)=val;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(gx >= 0 && gx < dst_cols && dy >= 0 && dy < dst_rows)
|
||||
{
|
||||
dst[pos.x]=val.x;
|
||||
}
|
||||
if(gx+1 >= 0 && gx+1 < dst_cols && dy >= 0 && dy < dst_rows)
|
||||
{
|
||||
dst[pos.y]=val.y;
|
||||
}
|
||||
if(gx+2 >= 0 && gx+2 < dst_cols && dy >= 0 && dy < dst_rows)
|
||||
{
|
||||
dst[pos.z]=val.z;
|
||||
}
|
||||
if(gx+3 >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows)
|
||||
{
|
||||
dst[pos.w]=val.w;
|
||||
}
|
||||
float s1 = dx * ifx, s2 = dy * ify;
|
||||
int sx = min(convert_int_sat_rtn(s1), src_cols - 1);
|
||||
int sy = min(convert_int_sat_rtn(s2), src_rows - 1);
|
||||
|
||||
int dst_index = mad24(dy, dst_step, dx + dst_offset);
|
||||
int src_index = mad24(sy, src_step, sx + src_offset);
|
||||
|
||||
dst[dst_index] = src[src_index];
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void resizeNN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
|
||||
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, F ifx, F ify )
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
|
||||
F s1 = dx*ifx;
|
||||
F s2 = dy*ify;
|
||||
int sx = fmin((float)floor(s1), (float)src_cols-1);
|
||||
int sy = fmin((float)floor(s2), (float)src_rows-1);
|
||||
int dpos = mad24(dy, dststep_in_pixel, dx + dstoffset_in_pixel);
|
||||
int spos = mad24(sy, srcstep_in_pixel, sx + srcoffset_in_pixel);
|
||||
|
||||
if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
|
||||
dst[dpos] = src[spos];
|
||||
|
||||
}
|
||||
|
||||
__kernel void resizeNN_C1_D5(__global float * dst, __global float * src,
|
||||
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, F ifx, F ify )
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
|
||||
F s1 = dx*ifx;
|
||||
F s2 = dy*ify;
|
||||
int sx = fmin((float)floor(s1), (float)src_cols-1);
|
||||
int sy = fmin((float)floor(s2), (float)src_rows-1);
|
||||
|
||||
int dpos = mad24(dy, dststep_in_pixel, dx + dstoffset_in_pixel);
|
||||
int spos = mad24(sy, srcstep_in_pixel, sx + srcoffset_in_pixel);
|
||||
if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
|
||||
dst[dpos] = src[spos];
|
||||
|
||||
}
|
||||
|
||||
__kernel void resizeNN_C4_D5(__global float4 * dst, __global float4 * src,
|
||||
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, F ifx, F ify )
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
F s1 = dx*ifx;
|
||||
F s2 = dy*ify;
|
||||
int s_col = floor(s1);
|
||||
int s_row = floor(s2);
|
||||
int sx = min(s_col, src_cols-1);
|
||||
int sy = min(s_row, src_rows-1);
|
||||
int dpos = mad24(dy, dststep_in_pixel, dx + dstoffset_in_pixel);
|
||||
int spos = mad24(sy, srcstep_in_pixel, sx + srcoffset_in_pixel);
|
||||
|
||||
if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
|
||||
dst[dpos] = src[spos];
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -43,7 +43,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
|
@ -47,11 +47,11 @@
|
||||
//warpAffine kernel
|
||||
//support data types: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, and three interpolation methods: NN, Linear, Cubic.
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
typedef double F;
|
||||
typedef double4 F4;
|
||||
|
@ -47,11 +47,11 @@
|
||||
//wrapPerspective kernel
|
||||
//support data types: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, and three interpolation methods: NN, Linear, Cubic.
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
typedef double F;
|
||||
typedef double4 F4;
|
||||
|
@ -61,35 +61,6 @@
|
||||
#define my_comp(x,y) ((x) < (y))
|
||||
#endif
|
||||
|
||||
///////////// parallel merge sort ///////////////
|
||||
// ported from https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/stablesort_by_key_kernels.cl
|
||||
static uint lowerBoundLinear( global K_T* data, uint left, uint right, K_T searchVal)
|
||||
{
|
||||
// The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
|
||||
uint firstIndex = left;
|
||||
uint lastIndex = right;
|
||||
|
||||
// This loops through [firstIndex, lastIndex)
|
||||
// Since firstIndex and lastIndex will be different for every thread depending on the nested branch,
|
||||
// this while loop will be divergent within a wavefront
|
||||
while( firstIndex < lastIndex )
|
||||
{
|
||||
K_T dataVal = data[ firstIndex ];
|
||||
|
||||
// This branch will create divergent wavefronts
|
||||
if( my_comp( dataVal, searchVal ) )
|
||||
{
|
||||
firstIndex = firstIndex+1;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return firstIndex;
|
||||
}
|
||||
|
||||
// This implements a binary search routine to look for an 'insertion point' in a sequence, denoted
|
||||
// by a base pointer and left and right index for a particular candidate value. The comparison operator is
|
||||
// passed as a functor parameter my_comp
|
||||
|
@ -42,8 +42,13 @@
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#define TYPE double
|
||||
#else
|
||||
#define TYPE float
|
||||
|
@ -43,14 +43,12 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
|
||||
#define TYPE_IMAGE_SQSUM double
|
||||
#else
|
||||
#define TYPE_IMAGE_SQSUM float
|
||||
|
@ -43,15 +43,19 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////optimized code using vector roi//////////////////////////
|
||||
////////////vector fuction name format: merge_vector_C(channels number)D_(data type depth)//////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void merge_vector_C2_D0(__global uchar *mat_dst, int dst_step, int dst_offset,
|
||||
__global uchar *mat_src0, int src0_step, int src0_offset,
|
||||
__global uchar *mat_src1, int src1_step, int src1_offset,
|
||||
|
@ -44,11 +44,11 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
typedef double T;
|
||||
#else
|
||||
|
@ -35,8 +35,12 @@
|
||||
//
|
||||
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
__kernel void convert_to(
|
||||
__global const srcT* restrict srcMat,
|
||||
|
@ -34,11 +34,11 @@
|
||||
//
|
||||
//
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -34,11 +34,11 @@
|
||||
//
|
||||
//
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -34,11 +34,11 @@
|
||||
//
|
||||
//
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -45,8 +45,6 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
//#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
|
||||
#define BUFFER 64
|
||||
#define BUFFER2 BUFFER>>1
|
||||
#ifndef WAVE_SIZE
|
||||
|
@ -38,9 +38,14 @@
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if DATA_DEPTH == 0
|
||||
#define BASE_TYPE uchar
|
||||
|
@ -260,7 +260,6 @@ static float CalcSums(__local float *cols, __local float *cols_cache, int winsz)
|
||||
{
|
||||
unsigned int cache = cols[0];
|
||||
|
||||
#pragma unroll
|
||||
for(int i = 1; i <= winsz; i++)
|
||||
cache += cols[i];
|
||||
|
||||
|
@ -45,13 +45,11 @@
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef T_FLOAT
|
||||
|
@ -44,19 +44,10 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
|
||||
#ifndef FLT_MAX
|
||||
#define FLT_MAX CL_FLT_MAX
|
||||
#endif
|
||||
|
||||
#ifndef SHRT_MAX
|
||||
#define SHRT_MAX CL_SHORT_MAX
|
||||
#endif
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////get_first_k_initial_global//////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void get_first_k_initial_global_0(__global short *data_cost_selected_, __global short *selected_disp_pyr,
|
||||
__global short *ctemp, int h, int w, int nr_plane,
|
||||
int cmsg_step1, int cdisp_step1, int cndisp)
|
||||
@ -91,6 +82,7 @@ __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void get_first_k_initial_global_1(__global float *data_cost_selected_, __global float *selected_disp_pyr,
|
||||
__global float *ctemp, int h, int w, int nr_plane,
|
||||
int cmsg_step1, int cdisp_step1, int cndisp)
|
||||
@ -129,6 +121,7 @@ __kernel void get_first_k_initial_global_1(__global float *data_cost_selected_,
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////get_first_k_initial_local////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void get_first_k_initial_local_0(__global short *data_cost_selected_, __global short *selected_disp_pyr,
|
||||
__global short *ctemp,int h, int w, int nr_plane,
|
||||
int cmsg_step1, int cdisp_step1, int cndisp)
|
||||
@ -248,6 +241,7 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
|
||||
///////////////////////////////////////////////////////////////
|
||||
/////////////////////// init data cost ////////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
inline float compute_3(__global uchar* left, __global uchar* right,
|
||||
float cdata_weight, float cmax_data_term)
|
||||
{
|
||||
@ -257,6 +251,7 @@ inline float compute_3(__global uchar* left, __global uchar* right,
|
||||
|
||||
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
|
||||
}
|
||||
|
||||
inline float compute_1(__global uchar* left, __global uchar* right,
|
||||
float cdata_weight, float cmax_data_term)
|
||||
{
|
||||
@ -316,6 +311,7 @@ __kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __g
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
|
||||
int h, int w, int level, int channels,
|
||||
int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
|
||||
@ -360,9 +356,11 @@ __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __g
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////init_data_cost_reduce//////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
|
||||
__local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
|
||||
int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
|
||||
@ -630,6 +628,7 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
|
||||
///////////////////////////////////////////////////////////////
|
||||
////////////////////// compute data cost //////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
|
||||
__global uchar *cleft, __global uchar *cright,
|
||||
int h, int w, int level, int nr_plane, int channels,
|
||||
@ -680,6 +679,7 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
|
||||
__global uchar *cleft, __global uchar *cright,
|
||||
int h, int w, int level, int nr_plane, int channels,
|
||||
@ -729,9 +729,11 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////compute_data_cost_reduce//////////////////////////////////////////
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
|
||||
__global uchar *cleft, __global uchar *cright,__local float *smem,
|
||||
int level, int rows, int cols, int h, int nr_plane,
|
||||
@ -1033,41 +1035,6 @@ static void get_first_k_element_increase_0(__global short* u_new, __global short
|
||||
}
|
||||
}
|
||||
|
||||
static void get_first_k_element_increase_1(__global float *u_new, __global float *d_new, __global float *l_new,
|
||||
__global float *r_new, __global const float *u_cur, __global const float *d_cur,
|
||||
__global const float *l_cur, __global const float *r_cur,
|
||||
__global float *data_cost_selected, __global float *disparity_selected_new,
|
||||
__global float *data_cost_new, __global const float *data_cost_cur,
|
||||
__global const float *disparity_selected_cur,
|
||||
int nr_plane, int nr_plane2,
|
||||
int cdisp_step1, int cdisp_step2)
|
||||
{
|
||||
for(int i = 0; i < nr_plane; i++)
|
||||
{
|
||||
float minimum = FLT_MAX;
|
||||
int id = 0;
|
||||
|
||||
for(int j = 0; j < nr_plane2; j++)
|
||||
{
|
||||
float cur = data_cost_new[j * cdisp_step1];
|
||||
if(cur < minimum)
|
||||
{
|
||||
minimum = cur;
|
||||
id = j;
|
||||
}
|
||||
}
|
||||
|
||||
data_cost_selected[i * cdisp_step1] = data_cost_cur[id * cdisp_step1];
|
||||
disparity_selected_new[i * cdisp_step1] = disparity_selected_cur[id * cdisp_step2];
|
||||
|
||||
u_new[i * cdisp_step1] = u_cur[id * cdisp_step2];
|
||||
d_new[i * cdisp_step1] = d_cur[id * cdisp_step2];
|
||||
l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
|
||||
r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
|
||||
data_cost_new[id * cdisp_step1] = FLT_MAX;
|
||||
|
||||
}
|
||||
}
|
||||
__kernel void init_message_0(__global short *u_new_, __global short *d_new_, __global short *l_new_,
|
||||
__global short *r_new_, __global short *u_cur_, __global const short *d_cur_,
|
||||
__global const short *l_cur_, __global const short *r_cur_, __global short *ctemp,
|
||||
@ -1118,6 +1085,7 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g
|
||||
cdisp_step1, cdisp_step2);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void init_message_1(__global float *u_new_, __global float *d_new_, __global float *l_new_,
|
||||
__global float *r_new_, __global const float *u_cur_, __global const float *d_cur_,
|
||||
__global const float *l_cur_, __global const float *r_cur_, __global float *ctemp,
|
||||
|
@ -33,11 +33,12 @@
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#define TYPE double
|
||||
#else
|
||||
@ -53,7 +54,6 @@
|
||||
#else
|
||||
#define POW(X,Y) X
|
||||
#endif
|
||||
#define FLT_MAX 3.402823466e+38F
|
||||
#define MAX_VAL (FLT_MAX*1e-3)
|
||||
|
||||
__kernel void svm_linear(__global float* src, int src_step, __global float* src2, int src2_step, __global TYPE* dst, int dst_step, int src_rows, int src2_cols,
|
||||
@ -206,4 +206,4 @@ __kernel void svm_rbf(__global float* src, int src_step, __global float* src2, i
|
||||
dst[row * dst_step + col] = temp1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -44,7 +44,7 @@
|
||||
//M*/
|
||||
|
||||
__kernel void centeredGradientKernel(__global const float* src, int src_col, int src_row, int src_step,
|
||||
__global float* dx, __global float* dy, int dx_step)
|
||||
__global float* dx, __global float* dy, int dx_step)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
@ -53,13 +53,6 @@ __global float* dx, __global float* dy, int dx_step)
|
||||
{
|
||||
int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1);
|
||||
int src_x2 = (x - 1) > 0 ? (x -1) : 0;
|
||||
|
||||
//if(src[y * src_step + src_x1] == src[y * src_step+ src_x2])
|
||||
//{
|
||||
// printf("y = %d\n", y);
|
||||
// printf("src_x1 = %d\n", src_x1);
|
||||
// printf("src_x2 = %d\n", src_x2);
|
||||
//}
|
||||
dx[y * dx_step+ x] = 0.5f * (src[y * src_step + src_x1] - src[y * src_step+ src_x2]);
|
||||
|
||||
int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1);
|
||||
@ -97,24 +90,24 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
|
||||
int u2_offset_x,
|
||||
int u2_offset_y)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if(x < I0_col&&y < I0_row)
|
||||
{
|
||||
//const float u1Val = u1(y, x);
|
||||
const float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
|
||||
//const float u2Val = u2(y, x);
|
||||
const float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
|
||||
//float u1Val = u1(y, x);
|
||||
float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
|
||||
//float u2Val = u2(y, x);
|
||||
float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
|
||||
|
||||
const float wx = x + u1Val;
|
||||
const float wy = y + u2Val;
|
||||
float wx = x + u1Val;
|
||||
float wy = y + u2Val;
|
||||
|
||||
const int xmin = ceil(wx - 2.0f);
|
||||
const int xmax = floor(wx + 2.0f);
|
||||
int xmin = ceil(wx - 2.0f);
|
||||
int xmax = floor(wx + 2.0f);
|
||||
|
||||
const int ymin = ceil(wy - 2.0f);
|
||||
const int ymax = floor(wy + 2.0f);
|
||||
int ymin = ceil(wy - 2.0f);
|
||||
int ymax = floor(wy + 2.0f);
|
||||
|
||||
float sum = 0.0f;
|
||||
float sumx = 0.0f;
|
||||
@ -126,7 +119,7 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
|
||||
{
|
||||
for (int cx = xmin; cx <= xmax; ++cx)
|
||||
{
|
||||
const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
|
||||
float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
|
||||
|
||||
//sum += w * tex2D(tex_I1 , cx, cy);
|
||||
int2 cood = (int2)(cx, cy);
|
||||
@ -140,30 +133,30 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
|
||||
}
|
||||
}
|
||||
|
||||
const float coeff = 1.0f / wsum;
|
||||
float coeff = 1.0f / wsum;
|
||||
|
||||
const float I1wVal = sum * coeff;
|
||||
const float I1wxVal = sumx * coeff;
|
||||
const float I1wyVal = sumy * coeff;
|
||||
float I1wVal = sum * coeff;
|
||||
float I1wxVal = sumx * coeff;
|
||||
float I1wyVal = sumy * coeff;
|
||||
|
||||
I1w[y * I1w_step + x] = I1wVal;
|
||||
I1wx[y * I1w_step + x] = I1wxVal;
|
||||
I1wy[y * I1w_step + x] = I1wyVal;
|
||||
|
||||
const float Ix2 = I1wxVal * I1wxVal;
|
||||
const float Iy2 = I1wyVal * I1wyVal;
|
||||
float Ix2 = I1wxVal * I1wxVal;
|
||||
float Iy2 = I1wyVal * I1wyVal;
|
||||
|
||||
// store the |Grad(I1)|^2
|
||||
grad[y * I1w_step + x] = Ix2 + Iy2;
|
||||
|
||||
// compute the constant part of the rho function
|
||||
const float I0Val = I0[y * I0_step + x];
|
||||
float I0Val = I0[y * I0_step + x];
|
||||
rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static float readImage(__global const float *image, const int x, const int y, const int rows, const int cols, const int elemCntPerRow)
|
||||
static float readImage(__global float *image, int x, int y, int rows, int cols, int elemCntPerRow)
|
||||
{
|
||||
int i0 = clamp(x, 0, cols - 1);
|
||||
int j0 = clamp(y, 0, rows - 1);
|
||||
@ -185,24 +178,24 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
|
||||
int I1_step,
|
||||
int I1x_step)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if(x < I0_col&&y < I0_row)
|
||||
{
|
||||
//const float u1Val = u1(y, x);
|
||||
const float u1Val = u1[y * u1_step + x];
|
||||
//const float u2Val = u2(y, x);
|
||||
const float u2Val = u2[y * u2_step + x];
|
||||
//float u1Val = u1(y, x);
|
||||
float u1Val = u1[y * u1_step + x];
|
||||
//float u2Val = u2(y, x);
|
||||
float u2Val = u2[y * u2_step + x];
|
||||
|
||||
const float wx = x + u1Val;
|
||||
const float wy = y + u2Val;
|
||||
float wx = x + u1Val;
|
||||
float wy = y + u2Val;
|
||||
|
||||
const int xmin = ceil(wx - 2.0f);
|
||||
const int xmax = floor(wx + 2.0f);
|
||||
int xmin = ceil(wx - 2.0f);
|
||||
int xmax = floor(wx + 2.0f);
|
||||
|
||||
const int ymin = ceil(wy - 2.0f);
|
||||
const int ymax = floor(wy + 2.0f);
|
||||
int ymin = ceil(wy - 2.0f);
|
||||
int ymax = floor(wy + 2.0f);
|
||||
|
||||
float sum = 0.0f;
|
||||
float sumx = 0.0f;
|
||||
@ -213,7 +206,7 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
|
||||
{
|
||||
for (int cx = xmin; cx <= xmax; ++cx)
|
||||
{
|
||||
const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
|
||||
float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
|
||||
|
||||
int2 cood = (int2)(cx, cy);
|
||||
sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step);
|
||||
@ -223,24 +216,24 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
|
||||
}
|
||||
}
|
||||
|
||||
const float coeff = 1.0f / wsum;
|
||||
float coeff = 1.0f / wsum;
|
||||
|
||||
const float I1wVal = sum * coeff;
|
||||
const float I1wxVal = sumx * coeff;
|
||||
const float I1wyVal = sumy * coeff;
|
||||
float I1wVal = sum * coeff;
|
||||
float I1wxVal = sumx * coeff;
|
||||
float I1wyVal = sumy * coeff;
|
||||
|
||||
I1w[y * I1w_step + x] = I1wVal;
|
||||
I1wx[y * I1w_step + x] = I1wxVal;
|
||||
I1wy[y * I1w_step + x] = I1wyVal;
|
||||
|
||||
const float Ix2 = I1wxVal * I1wxVal;
|
||||
const float Iy2 = I1wyVal * I1wyVal;
|
||||
float Ix2 = I1wxVal * I1wxVal;
|
||||
float Iy2 = I1wyVal * I1wyVal;
|
||||
|
||||
// store the |Grad(I1)|^2
|
||||
grad[y * I1w_step + x] = Ix2 + Iy2;
|
||||
|
||||
// compute the constant part of the rho function
|
||||
const float I0Val = I0[y * I0_step + x];
|
||||
float I0Val = I0[y * I0_step + x];
|
||||
rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
|
||||
}
|
||||
|
||||
@ -253,38 +246,35 @@ __kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col,
|
||||
__global float* p12,
|
||||
__global float* p21,
|
||||
__global float* p22,
|
||||
const float taut,
|
||||
float taut,
|
||||
int u2_step,
|
||||
int u1_offset_x,
|
||||
int u1_offset_y,
|
||||
int u2_offset_x,
|
||||
int u2_offset_y)
|
||||
{
|
||||
|
||||
//const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
//const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if(x < u1_col && y < u1_row)
|
||||
{
|
||||
int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
|
||||
const float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
|
||||
float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
|
||||
|
||||
int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
|
||||
const float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
|
||||
float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
|
||||
|
||||
int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
|
||||
const float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
|
||||
float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
|
||||
|
||||
int src_y2 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
|
||||
const float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
|
||||
float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
|
||||
|
||||
const float g1 = hypot(u1x, u1y);
|
||||
const float g2 = hypot(u2x, u2y);
|
||||
float g1 = hypot(u1x, u1y);
|
||||
float g2 = hypot(u2x, u2y);
|
||||
|
||||
const float ng1 = 1.0f + taut * g1;
|
||||
const float ng2 = 1.0f + taut * g2;
|
||||
float ng1 = 1.0f + taut * g1;
|
||||
float ng2 = 1.0f + taut * g2;
|
||||
|
||||
p11[y * p11_step + x] = (p11[y * p11_step + x] + taut * u1x) / ng1;
|
||||
p12[y * p11_step + x] = (p12[y * p11_step + x] + taut * u1y) / ng1;
|
||||
@ -299,8 +289,8 @@ static float divergence(__global const float* v1, __global const float* v2, int
|
||||
|
||||
if (x > 0 && y > 0)
|
||||
{
|
||||
const float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1];
|
||||
const float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x];
|
||||
float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1];
|
||||
float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x];
|
||||
return v1x + v2y;
|
||||
}
|
||||
else
|
||||
@ -328,30 +318,25 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx
|
||||
__global const float* p22, /*int p22_step,*/
|
||||
__global float* u1, int u1_step,
|
||||
__global float* u2,
|
||||
__global float* error, const float l_t, const float theta, int u2_step,
|
||||
__global float* error, float l_t, float theta, int u2_step,
|
||||
int u1_offset_x,
|
||||
int u1_offset_y,
|
||||
int u2_offset_x,
|
||||
int u2_offset_y,
|
||||
char calc_error)
|
||||
{
|
||||
|
||||
//const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
//const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
|
||||
if(x < I1wx_col && y < I1wx_row)
|
||||
{
|
||||
const float I1wxVal = I1wx[y * I1wx_step + x];
|
||||
const float I1wyVal = I1wy[y * I1wx_step + x];
|
||||
const float gradVal = grad[y * I1wx_step + x];
|
||||
const float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
|
||||
const float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
|
||||
float I1wxVal = I1wx[y * I1wx_step + x];
|
||||
float I1wyVal = I1wy[y * I1wx_step + x];
|
||||
float gradVal = grad[y * I1wx_step + x];
|
||||
float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
|
||||
float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
|
||||
|
||||
const float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal);
|
||||
float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal);
|
||||
|
||||
// estimate the values of the variable (v1, v2) (thresholding operator TH)
|
||||
|
||||
@ -370,31 +355,31 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx
|
||||
}
|
||||
else if (gradVal > 1.192092896e-07f)
|
||||
{
|
||||
const float fi = -rho / gradVal;
|
||||
float fi = -rho / gradVal;
|
||||
d1 = fi * I1wxVal;
|
||||
d2 = fi * I1wyVal;
|
||||
}
|
||||
|
||||
const float v1 = u1OldVal + d1;
|
||||
const float v2 = u2OldVal + d2;
|
||||
float v1 = u1OldVal + d1;
|
||||
float v2 = u2OldVal + d2;
|
||||
|
||||
// compute the divergence of the dual variable (p1, p2)
|
||||
|
||||
const float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step);
|
||||
const float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step);
|
||||
float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step);
|
||||
float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step);
|
||||
|
||||
// estimate the values of the optical flow (u1, u2)
|
||||
|
||||
const float u1NewVal = v1 + theta * div_p1;
|
||||
const float u2NewVal = v2 + theta * div_p2;
|
||||
float u1NewVal = v1 + theta * div_p1;
|
||||
float u2NewVal = v2 + theta * div_p2;
|
||||
|
||||
u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal;
|
||||
u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal;
|
||||
|
||||
if(calc_error)
|
||||
{
|
||||
const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
|
||||
const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
|
||||
float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
|
||||
float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
|
||||
error[y * I1wx_step + x] = n1 + n2;
|
||||
}
|
||||
}
|
||||
|
@ -249,7 +249,8 @@ namespace cv
|
||||
&& devInfo.deviceType == CVCL_DEVICE_TYPE_CPU
|
||||
&& devInfo.platform->platformVendor.find("Intel") != std::string::npos
|
||||
&& (devInfo.deviceVersion.find("Build 56860") != std::string::npos
|
||||
|| devInfo.deviceVersion.find("Build 76921") != std::string::npos))
|
||||
|| devInfo.deviceVersion.find("Build 76921") != std::string::npos
|
||||
|| devInfo.deviceVersion.find("Build 78712") != std::string::npos))
|
||||
build_options = build_options + " -D BYPASS_VSTORE=true";
|
||||
|
||||
size_t globalThreads[3] = { divUp(src.cols, VEC_SIZE), src.rows, 1 };
|
||||
|
@ -49,9 +49,8 @@ using namespace cv;
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
namespace
|
||||
{
|
||||
using namespace testing;
|
||||
using namespace cv;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// cvtColor
|
||||
@ -62,20 +61,20 @@ PARAM_TEST_CASE(CvtColor, MatDepth, bool)
|
||||
bool use_roi;
|
||||
|
||||
// src mat
|
||||
cv::Mat src1;
|
||||
cv::Mat dst1;
|
||||
Mat src;
|
||||
Mat dst;
|
||||
|
||||
// src mat with roi
|
||||
cv::Mat src1_roi;
|
||||
cv::Mat dst1_roi;
|
||||
Mat src_roi;
|
||||
Mat dst_roi;
|
||||
|
||||
// ocl dst mat for testing
|
||||
cv::ocl::oclMat gsrc1_whole;
|
||||
cv::ocl::oclMat gdst1_whole;
|
||||
ocl::oclMat gsrc_whole;
|
||||
ocl::oclMat gdst_whole;
|
||||
|
||||
// ocl mat with roi
|
||||
cv::ocl::oclMat gsrc1_roi;
|
||||
cv::ocl::oclMat gdst1_roi;
|
||||
ocl::oclMat gsrc_roi;
|
||||
ocl::oclMat gdst_roi;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
@ -90,99 +89,188 @@ PARAM_TEST_CASE(CvtColor, MatDepth, bool)
|
||||
|
||||
Size roiSize = randomSize(1, MAX_VALUE);
|
||||
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(src1, src1_roi, roiSize, srcBorder, srcType, 2, 100);
|
||||
randomSubMat(src, src_roi, roiSize, srcBorder, srcType, 2, 100);
|
||||
|
||||
Border dst1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(dst1, dst1_roi, roiSize, dst1Border, dstType, 5, 16);
|
||||
Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(dst, dst_roi, roiSize, dstBorder, dstType, 5, 16);
|
||||
|
||||
generateOclMat(gsrc1_whole, gsrc1_roi, src1, roiSize, srcBorder);
|
||||
generateOclMat(gdst1_whole, gdst1_roi, dst1, roiSize, dst1Border);
|
||||
generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
|
||||
generateOclMat(gdst_whole, gdst_roi, dst, roiSize, dstBorder);
|
||||
}
|
||||
|
||||
void Near(double threshold = 1e-3)
|
||||
void Near(double threshold)
|
||||
{
|
||||
EXPECT_MAT_NEAR(dst1, gdst1_whole, threshold);
|
||||
EXPECT_MAT_NEAR(dst1_roi, gdst1_roi, threshold);
|
||||
Mat whole, roi;
|
||||
gdst_whole.download(whole);
|
||||
gdst_roi.download(roi);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, whole, threshold);
|
||||
EXPECT_MAT_NEAR(dst_roi, roi, threshold);
|
||||
}
|
||||
|
||||
void doTest(int channelsIn, int channelsOut, int code)
|
||||
void doTest(int channelsIn, int channelsOut, int code, double threshold = 1e-3)
|
||||
{
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi(channelsIn, channelsOut);
|
||||
|
||||
cv::cvtColor(src1_roi, dst1_roi, code);
|
||||
cv::ocl::cvtColor(gsrc1_roi, gdst1_roi, code);
|
||||
cvtColor(src_roi, dst_roi, code, channelsOut);
|
||||
ocl::cvtColor(gsrc_roi, gdst_roi, code, channelsOut);
|
||||
|
||||
Near();
|
||||
Near(threshold);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#define CVTCODE(name) cv::COLOR_ ## name
|
||||
#define CVTCODE(name) COLOR_ ## name
|
||||
|
||||
OCL_TEST_P(CvtColor, RGB2GRAY)
|
||||
{
|
||||
doTest(3, 1, CVTCODE(RGB2GRAY));
|
||||
}
|
||||
OCL_TEST_P(CvtColor, GRAY2RGB)
|
||||
{
|
||||
doTest(1, 3, CVTCODE(GRAY2RGB));
|
||||
};
|
||||
// RGB[A] <-> BGR[A]
|
||||
|
||||
OCL_TEST_P(CvtColor, BGR2GRAY)
|
||||
{
|
||||
doTest(3, 1, CVTCODE(BGR2GRAY));
|
||||
}
|
||||
OCL_TEST_P(CvtColor, GRAY2BGR)
|
||||
{
|
||||
doTest(1, 3, CVTCODE(GRAY2BGR));
|
||||
};
|
||||
OCL_TEST_P(CvtColor, BGR2BGRA) { doTest(3, 4, CVTCODE(BGR2BGRA)); }
|
||||
OCL_TEST_P(CvtColor, RGB2RGBA) { doTest(3, 4, CVTCODE(RGB2RGBA)); }
|
||||
OCL_TEST_P(CvtColor, BGRA2BGR) { doTest(4, 3, CVTCODE(BGRA2BGR)); }
|
||||
OCL_TEST_P(CvtColor, RGBA2RGB) { doTest(4, 3, CVTCODE(RGBA2RGB)); }
|
||||
OCL_TEST_P(CvtColor, BGR2RGBA) { doTest(3, 4, CVTCODE(BGR2RGBA)); }
|
||||
OCL_TEST_P(CvtColor, RGB2BGRA) { doTest(3, 4, CVTCODE(RGB2BGRA)); }
|
||||
OCL_TEST_P(CvtColor, RGBA2BGR) { doTest(4, 3, CVTCODE(RGBA2BGR)); }
|
||||
OCL_TEST_P(CvtColor, BGRA2RGB) { doTest(4, 3, CVTCODE(BGRA2RGB)); }
|
||||
OCL_TEST_P(CvtColor, BGR2RGB) { doTest(3, 3, CVTCODE(BGR2RGB)); }
|
||||
OCL_TEST_P(CvtColor, RGB2BGR) { doTest(3, 3, CVTCODE(RGB2BGR)); }
|
||||
OCL_TEST_P(CvtColor, BGRA2RGBA) { doTest(4, 4, CVTCODE(BGRA2RGBA)); }
|
||||
OCL_TEST_P(CvtColor, RGBA2BGRA) { doTest(4, 4, CVTCODE(RGBA2BGRA)); }
|
||||
|
||||
OCL_TEST_P(CvtColor, RGBA2GRAY)
|
||||
{
|
||||
doTest(3, 1, CVTCODE(RGBA2GRAY));
|
||||
}
|
||||
OCL_TEST_P(CvtColor, GRAY2RGBA)
|
||||
{
|
||||
doTest(1, 3, CVTCODE(GRAY2RGBA));
|
||||
};
|
||||
// RGB <-> Gray
|
||||
|
||||
OCL_TEST_P(CvtColor, BGRA2GRAY)
|
||||
{
|
||||
doTest(3, 1, CVTCODE(BGRA2GRAY));
|
||||
}
|
||||
OCL_TEST_P(CvtColor, GRAY2BGRA)
|
||||
{
|
||||
doTest(1, 3, CVTCODE(GRAY2BGRA));
|
||||
};
|
||||
OCL_TEST_P(CvtColor, RGB2GRAY) { doTest(3, 1, CVTCODE(RGB2GRAY)); }
|
||||
OCL_TEST_P(CvtColor, GRAY2RGB) { doTest(1, 3, CVTCODE(GRAY2RGB)); }
|
||||
OCL_TEST_P(CvtColor, BGR2GRAY) { doTest(3, 1, CVTCODE(BGR2GRAY)); }
|
||||
OCL_TEST_P(CvtColor, GRAY2BGR) { doTest(1, 3, CVTCODE(GRAY2BGR)); }
|
||||
OCL_TEST_P(CvtColor, RGBA2GRAY) { doTest(4, 1, CVTCODE(RGBA2GRAY)); }
|
||||
OCL_TEST_P(CvtColor, GRAY2RGBA) { doTest(1, 4, CVTCODE(GRAY2RGBA)); }
|
||||
OCL_TEST_P(CvtColor, BGRA2GRAY) { doTest(4, 1, CVTCODE(BGRA2GRAY)); }
|
||||
OCL_TEST_P(CvtColor, GRAY2BGRA) { doTest(1, 4, CVTCODE(GRAY2BGRA)); }
|
||||
|
||||
OCL_TEST_P(CvtColor, RGB2YUV)
|
||||
{
|
||||
doTest(3, 3, CVTCODE(RGB2YUV));
|
||||
}
|
||||
OCL_TEST_P(CvtColor, BGR2YUV)
|
||||
{
|
||||
doTest(3, 3, CVTCODE(BGR2YUV));
|
||||
}
|
||||
OCL_TEST_P(CvtColor, YUV2RGB)
|
||||
{
|
||||
doTest(3, 3, CVTCODE(YUV2RGB));
|
||||
}
|
||||
OCL_TEST_P(CvtColor, YUV2BGR)
|
||||
{
|
||||
doTest(3, 3, CVTCODE(YUV2BGR));
|
||||
}
|
||||
OCL_TEST_P(CvtColor, RGB2YCrCb)
|
||||
{
|
||||
doTest(3, 3, CVTCODE(RGB2YCrCb));
|
||||
}
|
||||
OCL_TEST_P(CvtColor, BGR2YCrCb)
|
||||
{
|
||||
doTest(3, 3, CVTCODE(BGR2YCrCb));
|
||||
}
|
||||
// RGB <-> YUV
|
||||
|
||||
struct CvtColor_YUV420 : CvtColor
|
||||
OCL_TEST_P(CvtColor, RGB2YUV) { doTest(3, 3, CVTCODE(RGB2YUV)); }
|
||||
OCL_TEST_P(CvtColor, BGR2YUV) { doTest(3, 3, CVTCODE(BGR2YUV)); }
|
||||
OCL_TEST_P(CvtColor, RGBA2YUV) { doTest(4, 3, CVTCODE(RGB2YUV)); }
|
||||
OCL_TEST_P(CvtColor, BGRA2YUV) { doTest(4, 3, CVTCODE(BGR2YUV)); }
|
||||
OCL_TEST_P(CvtColor, YUV2RGB) { doTest(3, 3, CVTCODE(YUV2RGB)); }
|
||||
OCL_TEST_P(CvtColor, YUV2BGR) { doTest(3, 3, CVTCODE(YUV2BGR)); }
|
||||
OCL_TEST_P(CvtColor, YUV2RGBA) { doTest(3, 4, CVTCODE(YUV2RGB)); }
|
||||
OCL_TEST_P(CvtColor, YUV2BGRA) { doTest(3, 4, CVTCODE(YUV2BGR)); }
|
||||
|
||||
// RGB <-> YCrCb
|
||||
|
||||
OCL_TEST_P(CvtColor, RGB2YCrCb) { doTest(3, 3, CVTCODE(RGB2YCrCb)); }
|
||||
OCL_TEST_P(CvtColor, BGR2YCrCb) { doTest(3, 3, CVTCODE(BGR2YCrCb)); }
|
||||
OCL_TEST_P(CvtColor, RGBA2YCrCb) { doTest(4, 3, CVTCODE(RGB2YCrCb)); }
|
||||
OCL_TEST_P(CvtColor, BGRA2YCrCb) { doTest(4, 3, CVTCODE(BGR2YCrCb)); }
|
||||
OCL_TEST_P(CvtColor, YCrCb2RGB) { doTest(3, 3, CVTCODE(YCrCb2RGB)); }
|
||||
OCL_TEST_P(CvtColor, YCrCb2BGR) { doTest(3, 3, CVTCODE(YCrCb2BGR)); }
|
||||
OCL_TEST_P(CvtColor, YCrCb2RGBA) { doTest(3, 4, CVTCODE(YCrCb2RGB)); }
|
||||
OCL_TEST_P(CvtColor, YCrCb2BGRA) { doTest(3, 4, CVTCODE(YCrCb2BGR)); }
|
||||
|
||||
// RGB <-> XYZ
|
||||
|
||||
OCL_TEST_P(CvtColor, RGB2XYZ) { doTest(3, 3, CVTCODE(RGB2XYZ)); }
|
||||
OCL_TEST_P(CvtColor, BGR2XYZ) { doTest(3, 3, CVTCODE(BGR2XYZ)); }
|
||||
OCL_TEST_P(CvtColor, RGBA2XYZ) { doTest(4, 3, CVTCODE(RGB2XYZ)); }
|
||||
OCL_TEST_P(CvtColor, BGRA2XYZ) { doTest(4, 3, CVTCODE(BGR2XYZ)); }
|
||||
|
||||
OCL_TEST_P(CvtColor, XYZ2RGB) { doTest(3, 3, CVTCODE(XYZ2RGB)); }
|
||||
OCL_TEST_P(CvtColor, XYZ2BGR) { doTest(3, 3, CVTCODE(XYZ2BGR)); }
|
||||
OCL_TEST_P(CvtColor, XYZ2RGBA) { doTest(3, 4, CVTCODE(XYZ2RGB)); }
|
||||
OCL_TEST_P(CvtColor, XYZ2BGRA) { doTest(3, 4, CVTCODE(XYZ2BGR)); }
|
||||
|
||||
// RGB <-> HSV
|
||||
|
||||
typedef CvtColor CvtColor8u32f;
|
||||
|
||||
OCL_TEST_P(CvtColor8u32f, RGB2HSV) { doTest(3, 3, CVTCODE(RGB2HSV)); }
|
||||
OCL_TEST_P(CvtColor8u32f, BGR2HSV) { doTest(3, 3, CVTCODE(BGR2HSV)); }
|
||||
OCL_TEST_P(CvtColor8u32f, RGBA2HSV) { doTest(4, 3, CVTCODE(RGB2HSV)); }
|
||||
OCL_TEST_P(CvtColor8u32f, BGRA2HSV) { doTest(4, 3, CVTCODE(BGR2HSV)); }
|
||||
|
||||
OCL_TEST_P(CvtColor8u32f, RGB2HSV_FULL) { doTest(3, 3, CVTCODE(RGB2HSV_FULL)); }
|
||||
OCL_TEST_P(CvtColor8u32f, BGR2HSV_FULL) { doTest(3, 3, CVTCODE(BGR2HSV_FULL)); }
|
||||
OCL_TEST_P(CvtColor8u32f, RGBA2HSV_FULL) { doTest(4, 3, CVTCODE(RGB2HSV_FULL)); }
|
||||
OCL_TEST_P(CvtColor8u32f, BGRA2HSV_FULL) { doTest(4, 3, CVTCODE(BGR2HSV_FULL)); }
|
||||
|
||||
OCL_TEST_P(CvtColor8u32f, HSV2RGB) { doTest(3, 3, CVTCODE(HSV2RGB), depth == CV_8U ? 1 : 4e-1); }
|
||||
OCL_TEST_P(CvtColor8u32f, HSV2BGR) { doTest(3, 3, CVTCODE(HSV2BGR), depth == CV_8U ? 1 : 4e-1); }
|
||||
OCL_TEST_P(CvtColor8u32f, HSV2RGBA) { doTest(3, 4, CVTCODE(HSV2RGB), depth == CV_8U ? 1 : 4e-1); }
|
||||
OCL_TEST_P(CvtColor8u32f, HSV2BGRA) { doTest(3, 4, CVTCODE(HSV2BGR), depth == CV_8U ? 1 : 4e-1); }
|
||||
|
||||
OCL_TEST_P(CvtColor8u32f, HSV2RGB_FULL) { doTest(3, 3, CVTCODE(HSV2RGB_FULL), depth == CV_8U ? 1 : 4e-1); }
|
||||
OCL_TEST_P(CvtColor8u32f, HSV2BGR_FULL) { doTest(3, 3, CVTCODE(HSV2BGR_FULL), depth == CV_8U ? 1 : 4e-1); }
|
||||
OCL_TEST_P(CvtColor8u32f, HSV2RGBA_FULL) { doTest(3, 4, CVTCODE(HSV2BGR_FULL), depth == CV_8U ? 1 : 4e-1); }
|
||||
OCL_TEST_P(CvtColor8u32f, HSV2BGRA_FULL) { doTest(3, 4, CVTCODE(HSV2BGR_FULL), depth == CV_8U ? 1 : 4e-1); }
|
||||
|
||||
// RGB <-> HLS
|
||||
|
||||
OCL_TEST_P(CvtColor8u32f, RGB2HLS) { doTest(3, 3, CVTCODE(RGB2HLS), depth == CV_8U ? 1 : 1e-3); }
|
||||
OCL_TEST_P(CvtColor8u32f, BGR2HLS) { doTest(3, 3, CVTCODE(BGR2HLS), depth == CV_8U ? 1 : 1e-3); }
|
||||
OCL_TEST_P(CvtColor8u32f, RGBA2HLS) { doTest(4, 3, CVTCODE(RGB2HLS), depth == CV_8U ? 1 : 1e-3); }
|
||||
OCL_TEST_P(CvtColor8u32f, BGRA2HLS) { doTest(4, 3, CVTCODE(BGR2HLS), depth == CV_8U ? 1 : 1e-3); }
|
||||
|
||||
OCL_TEST_P(CvtColor8u32f, RGB2HLS_FULL) { doTest(3, 3, CVTCODE(RGB2HLS_FULL), depth == CV_8U ? 1 : 1e-3); }
|
||||
OCL_TEST_P(CvtColor8u32f, BGR2HLS_FULL) { doTest(3, 3, CVTCODE(BGR2HLS_FULL), depth == CV_8U ? 1 : 1e-3); }
|
||||
OCL_TEST_P(CvtColor8u32f, RGBA2HLS_FULL) { doTest(4, 3, CVTCODE(RGB2HLS_FULL), depth == CV_8U ? 1 : 1e-3); }
|
||||
OCL_TEST_P(CvtColor8u32f, BGRA2HLS_FULL) { doTest(4, 3, CVTCODE(BGR2HLS_FULL), depth == CV_8U ? 1 : 1e-3); }
|
||||
|
||||
OCL_TEST_P(CvtColor8u32f, HLS2RGB) { doTest(3, 3, CVTCODE(HLS2RGB), 1); }
|
||||
OCL_TEST_P(CvtColor8u32f, HLS2BGR) { doTest(3, 3, CVTCODE(HLS2BGR), 1); }
|
||||
OCL_TEST_P(CvtColor8u32f, HLS2RGBA) { doTest(3, 4, CVTCODE(HLS2RGB), 1); }
|
||||
OCL_TEST_P(CvtColor8u32f, HLS2BGRA) { doTest(3, 4, CVTCODE(HLS2BGR), 1); }
|
||||
|
||||
OCL_TEST_P(CvtColor8u32f, HLS2RGB_FULL) { doTest(3, 3, CVTCODE(HLS2RGB_FULL), 1); }
|
||||
OCL_TEST_P(CvtColor8u32f, HLS2BGR_FULL) { doTest(3, 3, CVTCODE(HLS2BGR_FULL), 1); }
|
||||
OCL_TEST_P(CvtColor8u32f, HLS2RGBA_FULL) { doTest(3, 4, CVTCODE(HLS2RGB_FULL), 1); }
|
||||
OCL_TEST_P(CvtColor8u32f, HLS2BGRA_FULL) { doTest(3, 4, CVTCODE(HLS2BGR_FULL), 1); }
|
||||
|
||||
// RGB5x5 <-> RGB
|
||||
|
||||
typedef CvtColor CvtColor8u;
|
||||
|
||||
OCL_TEST_P(CvtColor8u, BGR5652BGR) { doTest(2, 3, CVTCODE(BGR5652BGR)); }
|
||||
OCL_TEST_P(CvtColor8u, BGR5652RGB) { doTest(2, 3, CVTCODE(BGR5652RGB)); }
|
||||
OCL_TEST_P(CvtColor8u, BGR5652BGRA) { doTest(2, 4, CVTCODE(BGR5652BGRA)); }
|
||||
OCL_TEST_P(CvtColor8u, BGR5652RGBA) { doTest(2, 4, CVTCODE(BGR5652RGBA)); }
|
||||
|
||||
OCL_TEST_P(CvtColor8u, BGR5552BGR) { doTest(2, 3, CVTCODE(BGR5552BGR)); }
|
||||
OCL_TEST_P(CvtColor8u, BGR5552RGB) { doTest(2, 3, CVTCODE(BGR5552RGB)); }
|
||||
OCL_TEST_P(CvtColor8u, BGR5552BGRA) { doTest(2, 4, CVTCODE(BGR5552BGRA)); }
|
||||
OCL_TEST_P(CvtColor8u, BGR5552RGBA) { doTest(2, 4, CVTCODE(BGR5552RGBA)); }
|
||||
|
||||
OCL_TEST_P(CvtColor8u, BGR2BGR565) { doTest(3, 2, CVTCODE(BGR2BGR565)); }
|
||||
OCL_TEST_P(CvtColor8u, RGB2BGR565) { doTest(3, 2, CVTCODE(RGB2BGR565)); }
|
||||
OCL_TEST_P(CvtColor8u, BGRA2BGR565) { doTest(4, 2, CVTCODE(BGRA2BGR565)); }
|
||||
OCL_TEST_P(CvtColor8u, RGBA2BGR565) { doTest(4, 2, CVTCODE(RGBA2BGR565)); }
|
||||
|
||||
OCL_TEST_P(CvtColor8u, BGR2BGR555) { doTest(3, 2, CVTCODE(BGR2BGR555)); }
|
||||
OCL_TEST_P(CvtColor8u, RGB2BGR555) { doTest(3, 2, CVTCODE(RGB2BGR555)); }
|
||||
OCL_TEST_P(CvtColor8u, BGRA2BGR555) { doTest(4, 2, CVTCODE(BGRA2BGR555)); }
|
||||
OCL_TEST_P(CvtColor8u, RGBA2BGR555) { doTest(4, 2, CVTCODE(RGBA2BGR555)); }
|
||||
|
||||
// RGB5x5 <-> Gray
|
||||
|
||||
OCL_TEST_P(CvtColor8u, BGR5652GRAY) { doTest(2, 1, CVTCODE(BGR5652GRAY)); }
|
||||
OCL_TEST_P(CvtColor8u, BGR5552GRAY) { doTest(2, 1, CVTCODE(BGR5552GRAY)); }
|
||||
|
||||
OCL_TEST_P(CvtColor8u, GRAY2BGR565) { doTest(1, 2, CVTCODE(GRAY2BGR565)); }
|
||||
OCL_TEST_P(CvtColor8u, GRAY2BGR555) { doTest(1, 2, CVTCODE(GRAY2BGR555)); }
|
||||
|
||||
// RGBA <-> mRGBA
|
||||
|
||||
OCL_TEST_P(CvtColor8u, RGBA2mRGBA) { doTest(4, 4, CVTCODE(RGBA2mRGBA)); }
|
||||
OCL_TEST_P(CvtColor8u, mRGBA2RGBA) { doTest(4, 4, CVTCODE(mRGBA2RGBA)); }
|
||||
|
||||
// YUV -> RGBA_NV12
|
||||
|
||||
struct CvtColor_YUV420 :
|
||||
public CvtColor
|
||||
{
|
||||
void random_roi(int channelsIn, int channelsOut)
|
||||
{
|
||||
@ -193,50 +281,36 @@ struct CvtColor_YUV420 : CvtColor
|
||||
roiSize.width *= 2;
|
||||
roiSize.height *= 3;
|
||||
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(src1, src1_roi, roiSize, srcBorder, srcType, 2, 100);
|
||||
randomSubMat(src, src_roi, roiSize, srcBorder, srcType, 2, 100);
|
||||
|
||||
Border dst1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(dst1, dst1_roi, roiSize, dst1Border, dstType, 5, 16);
|
||||
Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(dst, dst_roi, roiSize, dstBorder, dstType, 5, 16);
|
||||
|
||||
generateOclMat(gsrc1_whole, gsrc1_roi, src1, roiSize, srcBorder);
|
||||
generateOclMat(gdst1_whole, gdst1_roi, dst1, roiSize, dst1Border);
|
||||
generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
|
||||
generateOclMat(gdst_whole, gdst_roi, dst, roiSize, dstBorder);
|
||||
}
|
||||
};
|
||||
|
||||
OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_NV12)
|
||||
{
|
||||
doTest(1, 4, COLOR_YUV2RGBA_NV12);
|
||||
};
|
||||
OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_NV12) { doTest(1, 4, COLOR_YUV2RGBA_NV12); }
|
||||
OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV12) { doTest(1, 4, COLOR_YUV2BGRA_NV12); }
|
||||
OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV12) { doTest(1, 3, COLOR_YUV2RGB_NV12); }
|
||||
OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV12) { doTest(1, 3, COLOR_YUV2BGR_NV12); }
|
||||
|
||||
OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV12)
|
||||
{
|
||||
doTest(1, 4, COLOR_YUV2BGRA_NV12);
|
||||
};
|
||||
|
||||
OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV12)
|
||||
{
|
||||
doTest(1, 3, COLOR_YUV2RGB_NV12);
|
||||
};
|
||||
|
||||
OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV12)
|
||||
{
|
||||
doTest(1, 3, COLOR_YUV2BGR_NV12);
|
||||
};
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor8u,
|
||||
testing::Combine(testing::Values(MatDepth(CV_8U)), Bool()));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor8u32f,
|
||||
testing::Combine(testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)), Bool()));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor,
|
||||
testing::Combine(
|
||||
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)),
|
||||
Bool()
|
||||
)
|
||||
);
|
||||
Bool()));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor_YUV420,
|
||||
testing::Combine(
|
||||
testing::Values(MatDepth(CV_8U)),
|
||||
Bool()
|
||||
)
|
||||
);
|
||||
Bool()));
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -275,23 +275,33 @@ OCL_TEST_P(CornerHarris, Mat)
|
||||
|
||||
//////////////////////////////////integral/////////////////////////////////////////////////
|
||||
|
||||
typedef ImgprocTestBase Integral;
|
||||
struct Integral :
|
||||
public ImgprocTestBase
|
||||
{
|
||||
int sdepth;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
type = GET_PARAM(0);
|
||||
blockSize = GET_PARAM(1);
|
||||
sdepth = GET_PARAM(2);
|
||||
useRoi = GET_PARAM(3);
|
||||
}
|
||||
};
|
||||
OCL_TEST_P(Integral, Mat1)
|
||||
{
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
ocl::integral(gsrc_roi, gdst_roi);
|
||||
integral(src_roi, dst_roi);
|
||||
ocl::integral(gsrc_roi, gdst_roi, sdepth);
|
||||
integral(src_roi, dst_roi, sdepth);
|
||||
|
||||
Near();
|
||||
}
|
||||
}
|
||||
|
||||
// TODO wrong output type
|
||||
OCL_TEST_P(Integral, DISABLED_Mat2)
|
||||
OCL_TEST_P(Integral, Mat2)
|
||||
{
|
||||
Mat dst1;
|
||||
ocl::oclMat gdst1;
|
||||
@ -300,10 +310,12 @@ OCL_TEST_P(Integral, DISABLED_Mat2)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
integral(src_roi, dst1, dst_roi);
|
||||
ocl::integral(gsrc_roi, gdst1, gdst_roi);
|
||||
integral(src_roi, dst_roi, dst1, sdepth);
|
||||
ocl::integral(gsrc_roi, gdst_roi, gdst1, sdepth);
|
||||
|
||||
Near();
|
||||
if(gdst1.clCxt->supportsFeature(ocl::FEATURE_CL_DOUBLE))
|
||||
EXPECT_MAT_NEAR(dst1, Mat(gdst1), 0.);
|
||||
}
|
||||
}
|
||||
|
||||
@ -543,7 +555,7 @@ INSTANTIATE_TEST_CASE_P(Imgproc, CornerHarris, Combine(
|
||||
INSTANTIATE_TEST_CASE_P(Imgproc, Integral, Combine(
|
||||
Values((MatType)CV_8UC1), // TODO does not work with CV_32F, CV_64F
|
||||
Values(0), // not used
|
||||
Values(0), // not used
|
||||
Values((MatType)CV_32SC1, (MatType)CV_32FC1),
|
||||
Bool()));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Imgproc, Threshold, Combine(
|
||||
|
@ -156,6 +156,114 @@ OCL_TEST_P(WarpPerspective, Mat)
|
||||
}
|
||||
}
|
||||
|
||||
// buildWarpPerspectiveMaps
|
||||
|
||||
PARAM_TEST_CASE(BuildWarpPerspectiveMaps, bool, bool)
|
||||
{
|
||||
bool useRoi, mapInverse;
|
||||
Size dsize;
|
||||
|
||||
Mat xmap_whole, ymap_whole, xmap_roi, ymap_roi;
|
||||
ocl::oclMat gxmap_whole, gymap_whole, gxmap_roi, gymap_roi;
|
||||
|
||||
void SetUp()
|
||||
{
|
||||
mapInverse = GET_PARAM(0);
|
||||
useRoi = GET_PARAM(1);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
{
|
||||
dsize = randomSize(1, MAX_VALUE);
|
||||
|
||||
Border xmapBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
||||
randomSubMat(xmap_whole, xmap_roi, dsize, xmapBorder, CV_32FC1, -MAX_VALUE, MAX_VALUE);
|
||||
|
||||
Border ymapBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
||||
randomSubMat(ymap_whole, ymap_roi, dsize, ymapBorder, CV_32FC1, -MAX_VALUE, MAX_VALUE);
|
||||
|
||||
generateOclMat(gxmap_whole, gxmap_roi, xmap_whole, dsize, xmapBorder);
|
||||
generateOclMat(gymap_whole, gymap_roi, ymap_whole, dsize, ymapBorder);
|
||||
}
|
||||
|
||||
void Near(double threshold = 0.0)
|
||||
{
|
||||
Mat whole, roi;
|
||||
gxmap_whole.download(whole);
|
||||
gxmap_roi.download(roi);
|
||||
|
||||
EXPECT_MAT_NEAR(xmap_whole, whole, threshold);
|
||||
EXPECT_MAT_NEAR(xmap_roi, roi, threshold);
|
||||
}
|
||||
|
||||
void Near1(double threshold = 0.0)
|
||||
{
|
||||
Mat whole, roi;
|
||||
gymap_whole.download(whole);
|
||||
gymap_roi.download(roi);
|
||||
|
||||
EXPECT_MAT_NEAR(ymap_whole, whole, threshold);
|
||||
EXPECT_MAT_NEAR(ymap_roi, roi, threshold);
|
||||
}
|
||||
};
|
||||
|
||||
static void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, Mat &xmap, Mat &ymap)
|
||||
{
|
||||
CV_Assert(M.rows == 3 && M.cols == 3);
|
||||
CV_Assert(dsize.area() > 0);
|
||||
|
||||
xmap.create(dsize, CV_32FC1);
|
||||
ymap.create(dsize, CV_32FC1);
|
||||
|
||||
float coeffs[3 * 3];
|
||||
Mat coeffsMat(3, 3, CV_32F, (void *)coeffs);
|
||||
|
||||
if (inverse)
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
else
|
||||
{
|
||||
cv::Mat iM;
|
||||
invert(M, iM);
|
||||
iM.convertTo(coeffsMat, coeffsMat.type());
|
||||
}
|
||||
|
||||
for (int y = 0; y < dsize.height; ++y)
|
||||
{
|
||||
float * const xmap_ptr = xmap.ptr<float>(y);
|
||||
float * const ymap_ptr = ymap.ptr<float>(y);
|
||||
|
||||
for (int x = 0; x < dsize.width; ++x)
|
||||
{
|
||||
float coeff = 1.0f / (x * coeffs[6] + y * coeffs[7] + coeffs[8]);
|
||||
xmap_ptr[x] = (x * coeffs[0] + y * coeffs[1] + coeffs[2]) * coeff;
|
||||
ymap_ptr[x] = (x * coeffs[3] + y * coeffs[4] + coeffs[5]) * coeff;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OCL_TEST_P(BuildWarpPerspectiveMaps, Mat)
|
||||
{
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
float cols = static_cast<float>(MAX_VALUE), rows = static_cast<float>(MAX_VALUE);
|
||||
float cols2 = cols / 2.0f, rows2 = rows / 2.0f;
|
||||
Point2f sp[] = { Point2f(0.0f, 0.0f), Point2f(cols, 0.0f), Point2f(0.0f, rows), Point2f(cols, rows) };
|
||||
Point2f dp[] = { Point2f(rng.uniform(0.0f, cols2), rng.uniform(0.0f, rows2)),
|
||||
Point2f(rng.uniform(cols2, cols), rng.uniform(0.0f, rows2)),
|
||||
Point2f(rng.uniform(0.0f, cols2), rng.uniform(rows2, rows)),
|
||||
Point2f(rng.uniform(cols2, cols), rng.uniform(rows2, rows)) };
|
||||
Mat M = getPerspectiveTransform(sp, dp);
|
||||
|
||||
buildWarpPerspectiveMaps(M, mapInverse, dsize, xmap_roi, ymap_roi);
|
||||
ocl::buildWarpPerspectiveMaps(M, mapInverse, dsize, gxmap_roi, gymap_roi);
|
||||
|
||||
Near(5e-3);
|
||||
Near1(5e-3);
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// remap
|
||||
|
||||
@ -205,7 +313,12 @@ PARAM_TEST_CASE(Remap, MatDepth, Channels, pair<MatType, MatType>, Border, bool)
|
||||
|
||||
Border map2Border = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
||||
if (map2Type != noType)
|
||||
randomSubMat(map2, map2_roi, dstROISize, map2Border, map2Type, -mapMaxValue, mapMaxValue);
|
||||
{
|
||||
int mapMinValue = -mapMaxValue;
|
||||
if (map2Type == CV_16UC1 || map2Type == CV_16SC1)
|
||||
mapMinValue = 0, mapMaxValue = INTER_TAB_SIZE2;
|
||||
randomSubMat(map2, map2_roi, dstROISize, map2Border, map2Type, mapMinValue, mapMaxValue);
|
||||
}
|
||||
|
||||
generateOclMat(gsrc, gsrc_roi, src, srcROISize, srcBorder);
|
||||
generateOclMat(gdst, gdst_roi, dst, dstROISize, dstBorder);
|
||||
@ -338,10 +451,13 @@ INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpPerspective, Combine(
|
||||
Bool(),
|
||||
Bool()));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ImgprocWarp, BuildWarpPerspectiveMaps, Combine(Bool(), Bool()));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_LINEAR, Combine(
|
||||
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
|
||||
Values(1, 2, 3, 4),
|
||||
Values(pair<MatType, MatType>((MatType)CV_32FC1, (MatType)CV_32FC1),
|
||||
pair<MatType, MatType>((MatType)CV_16SC2, (MatType)CV_16UC1),
|
||||
pair<MatType, MatType>((MatType)CV_32FC2, noType)),
|
||||
Values((Border)BORDER_CONSTANT,
|
||||
(Border)BORDER_REPLICATE,
|
||||
|
@ -237,6 +237,7 @@ typedef struct CV_EXPORTS performance_metrics
|
||||
TERM_TIME = 1,
|
||||
TERM_INTERRUPT = 2,
|
||||
TERM_EXCEPTION = 3,
|
||||
TERM_SKIP_TEST = 4, // there are some limitations and test should be skipped
|
||||
TERM_UNKNOWN = -1
|
||||
};
|
||||
|
||||
@ -273,6 +274,8 @@ public:
|
||||
static enum PERF_STRATEGY getPerformanceStrategy();
|
||||
static enum PERF_STRATEGY setPerformanceStrategy(enum PERF_STRATEGY strategy);
|
||||
|
||||
class PerfSkipTestException: public cv::Exception {};
|
||||
|
||||
protected:
|
||||
virtual void PerfTestBody() = 0;
|
||||
|
||||
|
@ -1174,7 +1174,14 @@ void TestBase::reportMetrics(bool toJUnitXML)
|
||||
{
|
||||
performance_metrics& m = calcMetrics();
|
||||
|
||||
if (toJUnitXML)
|
||||
if (m.terminationReason == performance_metrics::TERM_SKIP_TEST)
|
||||
{
|
||||
if (toJUnitXML)
|
||||
{
|
||||
RecordProperty("custom_status", "skipped");
|
||||
}
|
||||
}
|
||||
else if (toJUnitXML)
|
||||
{
|
||||
RecordProperty("bytesIn", (int)m.bytesIn);
|
||||
RecordProperty("bytesOut", (int)m.bytesOut);
|
||||
@ -1266,21 +1273,30 @@ void TestBase::SetUp()
|
||||
|
||||
void TestBase::TearDown()
|
||||
{
|
||||
if (!HasFailure() && !verified)
|
||||
ADD_FAILURE() << "The test has no sanity checks. There should be at least one check at the end of performance test.";
|
||||
|
||||
validateMetrics();
|
||||
if (HasFailure())
|
||||
reportMetrics(false);
|
||||
if (metrics.terminationReason == performance_metrics::TERM_SKIP_TEST)
|
||||
{
|
||||
LOGI("\tTest was skipped");
|
||||
GTEST_SUCCEED() << "Test was skipped";
|
||||
}
|
||||
else
|
||||
{
|
||||
const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info();
|
||||
const char* type_param = test_info->type_param();
|
||||
const char* value_param = test_info->value_param();
|
||||
if (value_param) printf("[ VALUE ] \t%s\n", value_param), fflush(stdout);
|
||||
if (type_param) printf("[ TYPE ] \t%s\n", type_param), fflush(stdout);
|
||||
reportMetrics(true);
|
||||
if (!HasFailure() && !verified)
|
||||
ADD_FAILURE() << "The test has no sanity checks. There should be at least one check at the end of performance test.";
|
||||
|
||||
validateMetrics();
|
||||
if (HasFailure())
|
||||
{
|
||||
reportMetrics(false);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info();
|
||||
const char* type_param = test_info->type_param();
|
||||
const char* value_param = test_info->value_param();
|
||||
if (value_param) printf("[ VALUE ] \t%s\n", value_param), fflush(stdout);
|
||||
if (type_param) printf("[ TYPE ] \t%s\n", type_param), fflush(stdout);
|
||||
reportMetrics(true);
|
||||
}
|
||||
|
||||
std::string TestBase::getDataPath(const std::string& relativePath)
|
||||
@ -1330,6 +1346,11 @@ void TestBase::RunPerfTestBody()
|
||||
{
|
||||
this->PerfTestBody();
|
||||
}
|
||||
catch(PerfSkipTestException&)
|
||||
{
|
||||
metrics.terminationReason = performance_metrics::TERM_SKIP_TEST;
|
||||
return;
|
||||
}
|
||||
catch(PerfEarlyExitException&)
|
||||
{
|
||||
metrics.terminationReason = performance_metrics::TERM_INTERRUPT;
|
||||
|
@ -1,10 +1,10 @@
|
||||
|
||||
//============================================================================
|
||||
// Name : HighDynamicRange_RetinaCompression.cpp
|
||||
// Name : OpenEXRimages_HDR_Retina_toneMapping.cpp
|
||||
// Author : Alexandre Benoit (benoit.alexandre.vision@gmail.com)
|
||||
// Version : 0.1
|
||||
// Copyright : Alexandre Benoit, LISTIC Lab, july 2011
|
||||
// Description : HighDynamicRange compression (tone mapping) with the help of the Gipsa/Listic's retina in C++, Ansi-style
|
||||
// Description : HighDynamicRange retina tone mapping with the help of the Gipsa/Listic's retina in C++, Ansi-style
|
||||
//============================================================================
|
||||
|
||||
#include <iostream>
|
||||
@ -71,7 +71,7 @@ static void drawPlot(const cv::Mat curve, const std::string figureTitle, const i
|
||||
{
|
||||
cv::Mat rgbIntImg;
|
||||
outputMat.convertTo(rgbIntImg, CV_8UC3);
|
||||
cv::cvtColor(rgbIntImg, intGrayImage, cv::COLOR_BGR2GRAY);
|
||||
cvtColor(rgbIntImg, intGrayImage, cv::COLOR_BGR2GRAY);
|
||||
}
|
||||
|
||||
// get histogram density probability in order to cut values under above edges limits (here 5-95%)... usefull for HDR pixel errors cancellation
|
||||
|
@ -4,7 +4,7 @@
|
||||
// Author : Alexandre Benoit (benoit.alexandre.vision@gmail.com)
|
||||
// Version : 0.2
|
||||
// Copyright : Alexandre Benoit, LISTIC Lab, december 2011
|
||||
// Description : HighDynamicRange compression (tone mapping) for image sequences with the help of the Gipsa/Listic's retina in C++, Ansi-style
|
||||
// Description : HighDynamicRange retina tone mapping for image sequences with the help of the Gipsa/Listic's retina in C++, Ansi-style
|
||||
// Known issues: the input OpenEXR sequences can have bad computed pixels that should be removed
|
||||
// => a simple method consists of cutting histogram edges (a slider for this on the UI is provided)
|
||||
// => however, in image sequences, this histogramm cut must be done in an elegant way from frame to frame... still not done...
|
||||
@ -94,7 +94,7 @@ static void rescaleGrayLevelMat(const cv::Mat &inputMat, cv::Mat &outputMat, con
|
||||
{
|
||||
cv::Mat rgbIntImg;
|
||||
normalisedImage.convertTo(rgbIntImg, CV_8UC3);
|
||||
cv::cvtColor(rgbIntImg, intGrayImage, cv::COLOR_BGR2GRAY);
|
||||
cvtColor(rgbIntImg, intGrayImage, cv::COLOR_BGR2GRAY);
|
||||
}
|
||||
|
||||
// get histogram density probability in order to cut values under above edges limits (here 5-95%)... usefull for HDR pixel errors cancellation
|
||||
|
@ -4,15 +4,11 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include <opencv2/core/core_c.h>
|
||||
#include <opencv2/imgproc/imgproc_c.h>
|
||||
#include <opencv2/legacy/compat.hpp>
|
||||
#include <opencv2/calib3d/calib3d_c.h>
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/calib3d.hpp>
|
||||
#include <opencv2/legacy/compat.hpp>
|
||||
|
||||
#if defined WIN32 || defined _WIN32 || defined WINCE
|
||||
#include <windows.h>
|
||||
@ -116,19 +112,16 @@ static void initPOSIT(std::vector<CvPoint3D32f> *modelPoints)
|
||||
modelPoints->push_back(cvPoint3D32f(0.0f, CUBE_SIZE, 0.0f));
|
||||
}
|
||||
|
||||
static void foundCorners(vector<CvPoint2D32f> *srcImagePoints,IplImage* source, IplImage* grayImage)
|
||||
static void foundCorners(vector<CvPoint2D32f> *srcImagePoints, const Mat& source, Mat& grayImage)
|
||||
{
|
||||
cvCvtColor(source,grayImage,CV_RGB2GRAY);
|
||||
cvSmooth( grayImage, grayImage,CV_GAUSSIAN,11);
|
||||
cvNormalize(grayImage, grayImage, 0, 255, CV_MINMAX);
|
||||
cvThreshold( grayImage, grayImage, 26, 255, CV_THRESH_BINARY_INV);//25
|
||||
cvtColor(source, grayImage, COLOR_RGB2GRAY);
|
||||
GaussianBlur(grayImage, grayImage, Size(11,11), 0, 0);
|
||||
normalize(grayImage, grayImage, 0, 255, NORM_MINMAX);
|
||||
threshold(grayImage, grayImage, 26, 255, THRESH_BINARY_INV); //25
|
||||
|
||||
Mat MgrayImage = cv::cvarrToMat(grayImage);
|
||||
//For debug
|
||||
//MgrayImage = MgrayImage.clone();//deep copy
|
||||
vector<vector<Point> > contours;
|
||||
vector<Vec4i> hierarchy;
|
||||
findContours(MgrayImage, contours, hierarchy, RETR_EXTERNAL, CHAIN_APPROX_NONE);
|
||||
findContours(grayImage, contours, hierarchy, RETR_EXTERNAL, CHAIN_APPROX_NONE);
|
||||
|
||||
Point p;
|
||||
vector<CvPoint2D32f> srcImagePoints_temp(4,cvPoint2D32f(0,0));
|
||||
@ -189,17 +182,17 @@ static void foundCorners(vector<CvPoint2D32f> *srcImagePoints,IplImage* source,
|
||||
}
|
||||
srcImagePoints->at(3) = srcImagePoints_temp.at(index);
|
||||
|
||||
Mat Msource = cv::cvarrToMat(source);
|
||||
Mat Msource = source;
|
||||
stringstream ss;
|
||||
for(size_t i = 0 ; i<srcImagePoints_temp.size(); i++ )
|
||||
{
|
||||
ss<<i;
|
||||
circle(Msource,srcImagePoints->at(i),5,CV_RGB(255,0,0));
|
||||
putText( Msource, ss.str(), srcImagePoints->at(i),CV_FONT_HERSHEY_SIMPLEX,1,CV_RGB(255,0,0));
|
||||
circle(Msource,srcImagePoints->at(i),5,Scalar(0,0,255));
|
||||
putText(Msource,ss.str(),srcImagePoints->at(i),FONT_HERSHEY_SIMPLEX,1,Scalar(0,0,255));
|
||||
ss.str("");
|
||||
|
||||
//new coordinate system in the middle of the frame and reversed (camera coordinate system)
|
||||
srcImagePoints->at(i) = cvPoint2D32f(srcImagePoints_temp.at(i).x-source->width/2,source->height/2-srcImagePoints_temp.at(i).y);
|
||||
srcImagePoints->at(i) = cvPoint2D32f(srcImagePoints_temp.at(i).x-source.cols/2,source.rows/2-srcImagePoints_temp.at(i).y);
|
||||
}
|
||||
}
|
||||
|
||||
@ -232,15 +225,14 @@ int main(void)
|
||||
VideoCapture video("cube4.avi");
|
||||
CV_Assert(video.isOpened());
|
||||
|
||||
Mat frame; video >> frame;
|
||||
Mat source, grayImage;
|
||||
|
||||
IplImage* grayImage = cvCreateImage(frame.size(),8,1);
|
||||
video >> source;
|
||||
|
||||
namedWindow("original", WINDOW_AUTOSIZE | WINDOW_FREERATIO);
|
||||
namedWindow("POSIT", WINDOW_AUTOSIZE | WINDOW_FREERATIO);
|
||||
displayOverlay("POSIT", "We lost the 4 corners' detection quite often (the red circles disappear). This demo is only to illustrate how to use OpenGL callback.\n -- Press ESC to exit.", 10000);
|
||||
//For debug
|
||||
//cvNamedWindow("tempGray",CV_WINDOW_AUTOSIZE);
|
||||
|
||||
float OpenGLMatrix[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
||||
setOpenGlDrawCallback("POSIT",on_opengl,OpenGLMatrix);
|
||||
|
||||
@ -259,25 +251,20 @@ int main(void)
|
||||
|
||||
while(waitKey(33) != 27)
|
||||
{
|
||||
video >> frame;
|
||||
imshow("original", frame);
|
||||
video >> source;
|
||||
imshow("original",source);
|
||||
|
||||
IplImage source = frame;
|
||||
foundCorners(&srcImagePoints, &source, grayImage);
|
||||
foundCorners(&srcImagePoints, source, grayImage);
|
||||
cvPOSIT( positObject, &srcImagePoints[0], FOCAL_LENGTH, criteria, rotation_matrix, translation_vector );
|
||||
createOpenGLMatrixFrom(OpenGLMatrix,rotation_matrix,translation_vector);
|
||||
|
||||
imshow("POSIT", frame);
|
||||
//For debug
|
||||
//cvShowImage("tempGray",grayImage);
|
||||
imshow("POSIT",source);
|
||||
|
||||
if (video.get(CAP_PROP_POS_AVI_RATIO) > 0.99)
|
||||
video.set(CAP_PROP_POS_AVI_RATIO, 0);
|
||||
}
|
||||
|
||||
destroyAllWindows();
|
||||
cvReleaseImage(&grayImage);
|
||||
video.release();
|
||||
cvReleasePOSITObject(&positObject);
|
||||
|
||||
return 0;
|
||||
|
@ -54,10 +54,6 @@ static void help(char** argv)
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static void makeDir( const string& dir )
|
||||
{
|
||||
#if defined WIN32 || defined _WIN32
|
||||
|
@ -208,7 +208,7 @@ static void doIteration( const Mat& img1, Mat& img2, bool isWarpPerspective,
|
||||
matchesMask[i1] = 1;
|
||||
}
|
||||
// draw inliers
|
||||
drawMatches( img1, keypoints1, img2, keypoints2, filteredMatches, drawImg, Scalar(0, 255, 0), Scalar(0, 0, 255), matchesMask
|
||||
drawMatches( img1, keypoints1, img2, keypoints2, filteredMatches, drawImg, Scalar(0, 255, 0), Scalar(255, 0, 0), matchesMask
|
||||
#if DRAW_RICH_KEYPOINTS_MODE
|
||||
, DrawMatchesFlags::DRAW_RICH_KEYPOINTS
|
||||
#endif
|
||||
@ -218,7 +218,7 @@ static void doIteration( const Mat& img1, Mat& img2, bool isWarpPerspective,
|
||||
// draw outliers
|
||||
for( size_t i1 = 0; i1 < matchesMask.size(); i1++ )
|
||||
matchesMask[i1] = !matchesMask[i1];
|
||||
drawMatches( img1, keypoints1, img2, keypoints2, filteredMatches, drawImg, Scalar(0, 0, 255), Scalar(255, 0, 0), matchesMask,
|
||||
drawMatches( img1, keypoints1, img2, keypoints2, filteredMatches, drawImg, Scalar(255, 0, 0), Scalar(0, 0, 255), matchesMask,
|
||||
DrawMatchesFlags::DRAW_OVER_OUTIMG | DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS );
|
||||
#endif
|
||||
|
||||
|
@ -130,7 +130,7 @@ int main( int argc, const char** argv )
|
||||
// Call to update the view
|
||||
onTrackbar(0, 0);
|
||||
|
||||
int c = waitKey() & 255;
|
||||
int c = waitKey(0) & 255;
|
||||
|
||||
if( c == 27 )
|
||||
break;
|
||||
|
@ -59,7 +59,7 @@ int main( int /*argc*/, char** /*argv*/ )
|
||||
params.cov_mat_type = CvEM::COV_MAT_DIAGONAL;
|
||||
params.start_step = CvEM::START_E_STEP;
|
||||
params.means = em_model.get_means();
|
||||
params.covs = (const CvMat**)em_model.get_covs();
|
||||
params.covs = em_model.get_covs();
|
||||
params.weights = em_model.get_weights();
|
||||
|
||||
em_model2.train( samples, Mat(), params, &labels );
|
||||
|
@ -80,7 +80,7 @@ Mat DrawCorrespondences(const Mat& img1, const vector<KeyPoint>& features1, cons
|
||||
|
||||
for (size_t i = 0; i < features1.size(); i++)
|
||||
{
|
||||
circle(img_corr, features1[i].pt, 3, Scalar(255, 0, 0));
|
||||
circle(img_corr, features1[i].pt, 3, Scalar(0, 0, 255));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < features2.size(); i++)
|
||||
|
@ -296,15 +296,15 @@ int main( int argc, char** argv )
|
||||
help();
|
||||
|
||||
const string winName = "image";
|
||||
namedWindow( winName.c_str(), WINDOW_AUTOSIZE );
|
||||
setMouseCallback( winName.c_str(), on_mouse, 0 );
|
||||
namedWindow( winName, WINDOW_AUTOSIZE );
|
||||
setMouseCallback( winName, on_mouse, 0 );
|
||||
|
||||
gcapp.setImageAndWinName( image, winName );
|
||||
gcapp.showImage();
|
||||
|
||||
for(;;)
|
||||
{
|
||||
int c = waitKey();
|
||||
int c = waitKey(0);
|
||||
switch( (char) c )
|
||||
{
|
||||
case '\x1b':
|
||||
@ -331,6 +331,6 @@ int main( int argc, char** argv )
|
||||
}
|
||||
|
||||
exit_main:
|
||||
destroyWindow( winName.c_str() );
|
||||
destroyWindow( winName );
|
||||
return 0;
|
||||
}
|
||||
|
@ -77,7 +77,7 @@ int main( int argc, char** argv )
|
||||
|
||||
OpenClose(open_close_pos, 0);
|
||||
ErodeDilate(erode_dilate_pos, 0);
|
||||
c = waitKey();
|
||||
c = waitKey(0);
|
||||
|
||||
if( (char)c == 27 )
|
||||
break;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user