Merge remote-tracking branch 'upstream/master'
This commit is contained in:
		| @@ -147,6 +147,8 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri | |||||||
|     __local int best_disp[2]; |     __local int best_disp[2]; | ||||||
|     __local int best_cost[2]; |     __local int best_cost[2]; | ||||||
|     best_cost[nthread] = MAX_VAL; |     best_cost[nthread] = MAX_VAL; | ||||||
|  |     best_disp[nthread] = MAX_VAL; | ||||||
|  |     barrier(CLK_LOCAL_MEM_FENCE); | ||||||
|  |  | ||||||
|     short costbuf[wsz]; |     short costbuf[wsz]; | ||||||
|     int head = 0; |     int head = 0; | ||||||
| @@ -159,7 +161,7 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri | |||||||
|     int costIdx = calcLocalIdx(lx, ly, d, sizeY); |     int costIdx = calcLocalIdx(lx, ly, d, sizeY); | ||||||
|     cost = costFunc + costIdx; |     cost = costFunc + costIdx; | ||||||
|  |  | ||||||
|     short tempcost = 0; |     int tempcost = 0; | ||||||
|     if(x < cols-wsz2-mindisp && y < rows-wsz2) |     if(x < cols-wsz2-mindisp && y < rows-wsz2) | ||||||
|     { |     { | ||||||
|         int shift = 1*nthread + cols*(1-nthread); |         int shift = 1*nthread + cols*(1-nthread); | ||||||
| @@ -191,7 +193,7 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri | |||||||
|     barrier(CLK_LOCAL_MEM_FENCE); |     barrier(CLK_LOCAL_MEM_FENCE); | ||||||
|  |  | ||||||
|     if(best_cost[1] == tempcost) |     if(best_cost[1] == tempcost) | ||||||
|         best_disp[1] = ndisp - d - 1; |         atomic_min(best_disp + 1, ndisp - d - 1); | ||||||
|     barrier(CLK_LOCAL_MEM_FENCE); |     barrier(CLK_LOCAL_MEM_FENCE); | ||||||
|  |  | ||||||
|     int dispIdx = mad24(gy, disp_step, disp_offset + gx*(int)sizeof(short)); |     int dispIdx = mad24(gy, disp_step, disp_offset + gx*(int)sizeof(short)); | ||||||
| @@ -209,6 +211,7 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri | |||||||
|         y = (ly < sizeY) ? gy + shiftY + ly : rows; |         y = (ly < sizeY) ? gy + shiftY + ly : rows; | ||||||
|  |  | ||||||
|         best_cost[nthread] = MAX_VAL; |         best_cost[nthread] = MAX_VAL; | ||||||
|  |         best_disp[nthread] = MAX_VAL; | ||||||
|         barrier(CLK_LOCAL_MEM_FENCE); |         barrier(CLK_LOCAL_MEM_FENCE); | ||||||
|  |  | ||||||
|         costIdx = calcLocalIdx(lx, ly, d, sizeY); |         costIdx = calcLocalIdx(lx, ly, d, sizeY); | ||||||
| @@ -227,12 +230,11 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri | |||||||
|         barrier(CLK_LOCAL_MEM_FENCE); |         barrier(CLK_LOCAL_MEM_FENCE); | ||||||
|  |  | ||||||
|         if(best_cost[nthread] == tempcost) |         if(best_cost[nthread] == tempcost) | ||||||
|             best_disp[nthread] = ndisp - d - 1; |             atomic_min(best_disp + nthread, ndisp - d - 1); | ||||||
|         barrier(CLK_LOCAL_MEM_FENCE); |         barrier(CLK_LOCAL_MEM_FENCE); | ||||||
|  |  | ||||||
|         int dispIdx = mad24(gy+ly, disp_step, disp_offset + (gx+lx)*(int)sizeof(short)); |         int dispIdx = mad24(gy+ly, disp_step, disp_offset + (gx+lx)*(int)sizeof(short)); | ||||||
|         disp = (__global short *)(dispptr + dispIdx); |         disp = (__global short *)(dispptr + dispIdx); | ||||||
|  |  | ||||||
|         calcDisp(cost, disp, uniquenessRatio, mindisp, ndisp, 2*sizeY, |         calcDisp(cost, disp, uniquenessRatio, mindisp, ndisp, 2*sizeY, | ||||||
|             best_disp + nthread, best_cost + nthread, d, x, y, cols, rows, wsz2); |             best_disp + nthread, best_cost + nthread, d, x, y, cols, rows, wsz2); | ||||||
|         barrier(CLK_LOCAL_MEM_FENCE); |         barrier(CLK_LOCAL_MEM_FENCE); | ||||||
|   | |||||||
| @@ -414,24 +414,23 @@ const String& getBuildInformation() | |||||||
|  |  | ||||||
| String format( const char* fmt, ... ) | String format( const char* fmt, ... ) | ||||||
| { | { | ||||||
|     char buf[1024]; |     AutoBuffer<char, 1024> buf; | ||||||
|  |  | ||||||
|  |     for ( ; ; ) | ||||||
|  |     { | ||||||
|         va_list va; |         va_list va; | ||||||
|         va_start(va, fmt); |         va_start(va, fmt); | ||||||
|     int len = vsnprintf(buf, sizeof(buf), fmt, va); |         int bsize = static_cast<int>(buf.size()), | ||||||
|  |                 len = vsnprintf((char *)buf, bsize, fmt, va); | ||||||
|         va_end(va); |         va_end(va); | ||||||
|  |  | ||||||
|     if (len >= (int)sizeof(buf)) |         if (len < 0 || len >= bsize) | ||||||
|         { |         { | ||||||
|         String s(len, '\0'); |             buf.resize(std::max(bsize << 1, len + 1)); | ||||||
|         va_start(va, fmt); |             continue; | ||||||
|         len = vsnprintf((char*)s.c_str(), len + 1, fmt, va); |         } | ||||||
|         (void)len; |         return String((char *)buf, len); | ||||||
|         va_end(va); |  | ||||||
|         return s; |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return String(buf, len); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| String tempfile( const char* suffix ) | String tempfile( const char* suffix ) | ||||||
|   | |||||||
| @@ -795,4 +795,176 @@ TEST(UMat, ReadBufferRect) | |||||||
|     EXPECT_MAT_NEAR(t, t2, 0); |     EXPECT_MAT_NEAR(t, t2, 0); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // Use iGPU or OPENCV_OPENCL_DEVICE=:CPU: to catch problem | ||||||
|  | TEST(UMat, DISABLED_synchronization_map_unmap) | ||||||
|  | { | ||||||
|  |     class TestParallelLoopBody : public cv::ParallelLoopBody | ||||||
|  |     { | ||||||
|  |         UMat u_; | ||||||
|  |     public: | ||||||
|  |         TestParallelLoopBody(const UMat& u) : u_(u) { } | ||||||
|  |         void operator() (const cv::Range& range) const | ||||||
|  |         { | ||||||
|  |             printf("range: %d, %d -- begin\n", range.start, range.end); | ||||||
|  |             for (int i = 0; i < 10; i++) | ||||||
|  |             { | ||||||
|  |                 printf("%d: %d map...\n", range.start, i); | ||||||
|  |                 Mat m = u_.getMat(cv::ACCESS_READ); | ||||||
|  |  | ||||||
|  |                 printf("%d: %d unmap...\n", range.start, i); | ||||||
|  |                 m.release(); | ||||||
|  |             } | ||||||
|  |             printf("range: %d, %d -- end\n", range.start, range.end); | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  |     try | ||||||
|  |     { | ||||||
|  |         UMat u(1000, 1000, CV_32FC1); | ||||||
|  |         parallel_for_(cv::Range(0, 2), TestParallelLoopBody(u)); | ||||||
|  |     } | ||||||
|  |     catch (const cv::Exception& e) | ||||||
|  |     { | ||||||
|  |         FAIL() << "Exception: " << e.what(); | ||||||
|  |         ADD_FAILURE(); | ||||||
|  |     } | ||||||
|  |     catch (...) | ||||||
|  |     { | ||||||
|  |         FAIL() << "Exception!"; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| } } // namespace cvtest::ocl | } } // namespace cvtest::ocl | ||||||
|  |  | ||||||
|  | TEST(UMat, DISABLED_bug_with_unmap) | ||||||
|  | { | ||||||
|  |     for (int i = 0; i < 20; i++) | ||||||
|  |     { | ||||||
|  |         try | ||||||
|  |         { | ||||||
|  |             Mat m = Mat(1000, 1000, CV_8UC1); | ||||||
|  |             UMat u = m.getUMat(ACCESS_READ); | ||||||
|  |             UMat dst; | ||||||
|  |             add(u, Scalar::all(0), dst); // start async operation | ||||||
|  |             u.release(); | ||||||
|  |             m.release(); | ||||||
|  |         } | ||||||
|  |         catch (const cv::Exception& e) | ||||||
|  |         { | ||||||
|  |             printf("i = %d... %s\n", i, e.what()); | ||||||
|  |             ADD_FAILURE(); | ||||||
|  |         } | ||||||
|  |         catch (...) | ||||||
|  |         { | ||||||
|  |             printf("i = %d...\n", i); | ||||||
|  |             ADD_FAILURE(); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | TEST(UMat, DISABLED_bug_with_unmap_in_class) | ||||||
|  | { | ||||||
|  |     class Logic | ||||||
|  |     { | ||||||
|  |     public: | ||||||
|  |         Logic() {} | ||||||
|  |         void processData(InputArray input) | ||||||
|  |         { | ||||||
|  |             Mat m = input.getMat(); | ||||||
|  |             { | ||||||
|  |                 Mat dst; | ||||||
|  |                 m.convertTo(dst, CV_32FC1); | ||||||
|  |                 // some additional CPU-based per-pixel processing into dst | ||||||
|  |                 intermediateResult = dst.getUMat(ACCESS_READ); | ||||||
|  |                 std::cout << "data processed..." << std::endl; | ||||||
|  |             } // problem is here: dst::~Mat() | ||||||
|  |             std::cout << "leave ProcessData()" << std::endl; | ||||||
|  |         } | ||||||
|  |         UMat getResult() const { return intermediateResult; } | ||||||
|  |     protected: | ||||||
|  |         UMat intermediateResult; | ||||||
|  |     }; | ||||||
|  |     try | ||||||
|  |     { | ||||||
|  |         Mat m = Mat(1000, 1000, CV_8UC1); | ||||||
|  |         Logic l; | ||||||
|  |         l.processData(m); | ||||||
|  |         UMat result = l.getResult(); | ||||||
|  |     } | ||||||
|  |     catch (const cv::Exception& e) | ||||||
|  |     { | ||||||
|  |         printf("exception... %s\n", e.what()); | ||||||
|  |         ADD_FAILURE(); | ||||||
|  |     } | ||||||
|  |     catch (...) | ||||||
|  |     { | ||||||
|  |         printf("exception... \n"); | ||||||
|  |         ADD_FAILURE(); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | TEST(UMat, Test_same_behaviour_read_and_read) | ||||||
|  | { | ||||||
|  |     bool exceptionDetected = false; | ||||||
|  |     try | ||||||
|  |     { | ||||||
|  |         UMat u(Size(10, 10), CV_8UC1); | ||||||
|  |         Mat m = u.getMat(ACCESS_READ); | ||||||
|  |         UMat dst; | ||||||
|  |         add(u, Scalar::all(1), dst); | ||||||
|  |     } | ||||||
|  |     catch (...) | ||||||
|  |     { | ||||||
|  |         exceptionDetected = true; | ||||||
|  |     } | ||||||
|  |     ASSERT_FALSE(exceptionDetected); // no data race, 2+ reads are valid | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // VP: this test (and probably others from same_behaviour series) is not valid in my opinion. | ||||||
|  | TEST(UMat, DISABLED_Test_same_behaviour_read_and_write) | ||||||
|  | { | ||||||
|  |     bool exceptionDetected = false; | ||||||
|  |     try | ||||||
|  |     { | ||||||
|  |         UMat u(Size(10, 10), CV_8UC1); | ||||||
|  |         Mat m = u.getMat(ACCESS_READ); | ||||||
|  |         add(u, Scalar::all(1), u); | ||||||
|  |     } | ||||||
|  |     catch (...) | ||||||
|  |     { | ||||||
|  |         exceptionDetected = true; | ||||||
|  |     } | ||||||
|  |     ASSERT_TRUE(exceptionDetected); // data race | ||||||
|  | } | ||||||
|  |  | ||||||
|  | TEST(UMat, DISABLED_Test_same_behaviour_write_and_read) | ||||||
|  | { | ||||||
|  |     bool exceptionDetected = false; | ||||||
|  |     try | ||||||
|  |     { | ||||||
|  |         UMat u(Size(10, 10), CV_8UC1); | ||||||
|  |         Mat m = u.getMat(ACCESS_WRITE); | ||||||
|  |         UMat dst; | ||||||
|  |         add(u, Scalar::all(1), dst); | ||||||
|  |     } | ||||||
|  |     catch (...) | ||||||
|  |     { | ||||||
|  |         exceptionDetected = true; | ||||||
|  |     } | ||||||
|  |     ASSERT_TRUE(exceptionDetected); // data race | ||||||
|  | } | ||||||
|  |  | ||||||
|  | TEST(UMat, DISABLED_Test_same_behaviour_write_and_write) | ||||||
|  | { | ||||||
|  |     bool exceptionDetected = false; | ||||||
|  |     try | ||||||
|  |     { | ||||||
|  |         UMat u(Size(10, 10), CV_8UC1); | ||||||
|  |         Mat m = u.getMat(ACCESS_WRITE); | ||||||
|  |         add(u, Scalar::all(1), u); | ||||||
|  |     } | ||||||
|  |     catch (...) | ||||||
|  |     { | ||||||
|  |         exceptionDetected = true; | ||||||
|  |     } | ||||||
|  |     ASSERT_TRUE(exceptionDetected); // data race | ||||||
|  | } | ||||||
|   | |||||||
| @@ -616,10 +616,10 @@ protected: | |||||||
| }; | }; | ||||||
|  |  | ||||||
|  |  | ||||||
| class CV_EXPORTS DenseFeatureDetector : public FeatureDetector | class CV_EXPORTS_W DenseFeatureDetector : public FeatureDetector | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     explicit DenseFeatureDetector( float initFeatureScale=1.f, int featureScaleLevels=1, |     CV_WRAP explicit DenseFeatureDetector( float initFeatureScale=1.f, int featureScaleLevels=1, | ||||||
|                                            float featureScaleMul=0.1f, |                                            float featureScaleMul=0.1f, | ||||||
|                                            int initXyStep=6, int initImgBound=0, |                                            int initXyStep=6, int initImgBound=0, | ||||||
|                                            bool varyXyStepWithScale=true, |                                            bool varyXyStepWithScale=true, | ||||||
|   | |||||||
| @@ -216,6 +216,7 @@ enum { IMREAD_UNCHANGED  = -1, // 8bit, color or not | |||||||
|      }; |      }; | ||||||
|  |  | ||||||
| enum { IMWRITE_JPEG_QUALITY     = 1, | enum { IMWRITE_JPEG_QUALITY     = 1, | ||||||
|  |        IMWRITE_JPEG_PROGRESSIVE = 2, | ||||||
|        IMWRITE_PNG_COMPRESSION  = 16, |        IMWRITE_PNG_COMPRESSION  = 16, | ||||||
|        IMWRITE_PNG_STRATEGY     = 17, |        IMWRITE_PNG_STRATEGY     = 17, | ||||||
|        IMWRITE_PNG_BILEVEL      = 18, |        IMWRITE_PNG_BILEVEL      = 18, | ||||||
|   | |||||||
| @@ -220,6 +220,7 @@ CVAPI(CvMat*) cvLoadImageM( const char* filename, int iscolor CV_DEFAULT(CV_LOAD | |||||||
| enum | enum | ||||||
| { | { | ||||||
|     CV_IMWRITE_JPEG_QUALITY =1, |     CV_IMWRITE_JPEG_QUALITY =1, | ||||||
|  |     CV_IMWRITE_JPEG_PROGRESSIVE =2, | ||||||
|     CV_IMWRITE_PNG_COMPRESSION =16, |     CV_IMWRITE_PNG_COMPRESSION =16, | ||||||
|     CV_IMWRITE_PNG_STRATEGY =17, |     CV_IMWRITE_PNG_STRATEGY =17, | ||||||
|     CV_IMWRITE_PNG_BILEVEL =18, |     CV_IMWRITE_PNG_BILEVEL =18, | ||||||
|   | |||||||
| @@ -598,6 +598,7 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params ) | |||||||
|         cinfo.in_color_space = channels > 1 ? JCS_RGB : JCS_GRAYSCALE; |         cinfo.in_color_space = channels > 1 ? JCS_RGB : JCS_GRAYSCALE; | ||||||
|  |  | ||||||
|         int quality = 95; |         int quality = 95; | ||||||
|  |         int progressive = 0; | ||||||
|  |  | ||||||
|         for( size_t i = 0; i < params.size(); i += 2 ) |         for( size_t i = 0; i < params.size(); i += 2 ) | ||||||
|         { |         { | ||||||
| @@ -606,11 +607,18 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params ) | |||||||
|                 quality = params[i+1]; |                 quality = params[i+1]; | ||||||
|                 quality = MIN(MAX(quality, 0), 100); |                 quality = MIN(MAX(quality, 0), 100); | ||||||
|             } |             } | ||||||
|  |  | ||||||
|  |             if( params[i] == CV_IMWRITE_JPEG_PROGRESSIVE ) | ||||||
|  |             { | ||||||
|  |                 progressive = params[i+1]; | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         jpeg_set_defaults( &cinfo ); |         jpeg_set_defaults( &cinfo ); | ||||||
|         jpeg_set_quality( &cinfo, quality, |         jpeg_set_quality( &cinfo, quality, | ||||||
|                           TRUE /* limit to baseline-JPEG values */ ); |                           TRUE /* limit to baseline-JPEG values */ ); | ||||||
|  |         if( progressive ) | ||||||
|  |             jpeg_simple_progression( &cinfo ); | ||||||
|         jpeg_start_compress( &cinfo, TRUE ); |         jpeg_start_compress( &cinfo, TRUE ); | ||||||
|  |  | ||||||
|         if( channels > 1 ) |         if( channels > 1 ) | ||||||
|   | |||||||
| @@ -386,6 +386,30 @@ TEST(Highgui_Jpeg, encode_empty) | |||||||
|  |  | ||||||
|     ASSERT_THROW(cv::imencode(".jpg", img, jpegImg), cv::Exception); |     ASSERT_THROW(cv::imencode(".jpg", img, jpegImg), cv::Exception); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | TEST(Highgui_Jpeg, encode_decode_progressive_jpeg) | ||||||
|  | { | ||||||
|  |     cvtest::TS& ts = *cvtest::TS::ptr(); | ||||||
|  |     string input = string(ts.get_data_path()) + "../cv/shared/lena.png"; | ||||||
|  |     cv::Mat img = cv::imread(input); | ||||||
|  |     ASSERT_FALSE(img.empty()); | ||||||
|  |  | ||||||
|  |     std::vector<int> params; | ||||||
|  |     params.push_back(IMWRITE_JPEG_PROGRESSIVE); | ||||||
|  |     params.push_back(1); | ||||||
|  |  | ||||||
|  |     string output_progressive = cv::tempfile(".jpg"); | ||||||
|  |     EXPECT_NO_THROW(cv::imwrite(output_progressive, img, params)); | ||||||
|  |     cv::Mat img_jpg_progressive = cv::imread(output_progressive); | ||||||
|  |  | ||||||
|  |     string output_normal = cv::tempfile(".jpg"); | ||||||
|  |     EXPECT_NO_THROW(cv::imwrite(output_normal, img)); | ||||||
|  |     cv::Mat img_jpg_normal = cv::imread(output_normal); | ||||||
|  |  | ||||||
|  |     EXPECT_EQ(0, cv::norm(img_jpg_progressive, img_jpg_normal, NORM_INF)); | ||||||
|  |  | ||||||
|  |     remove(output_progressive.c_str()); | ||||||
|  | } | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -95,6 +95,34 @@ OCL_PERF_TEST_P(CalcHistFixture, CalcHist, OCL_TEST_SIZES) | |||||||
|     SANITY_CHECK(hist); |     SANITY_CHECK(hist); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | ///////////// calcHist //////////////////////// | ||||||
|  |  | ||||||
|  | typedef TestBaseWithParam<Size> CalcBackProjFixture; | ||||||
|  |  | ||||||
|  | OCL_PERF_TEST_P(CalcBackProjFixture, CalcBackProj, OCL_TEST_SIZES) | ||||||
|  | { | ||||||
|  |     const Size srcSize = GetParam(); | ||||||
|  |  | ||||||
|  |     const std::vector<int> channels(1, 0); | ||||||
|  |     std::vector<float> ranges(2); | ||||||
|  |     std::vector<int> histSize(1, 256); | ||||||
|  |     ranges[0] = 0; | ||||||
|  |     ranges[1] = 256; | ||||||
|  |  | ||||||
|  |     checkDeviceMaxMemoryAllocSize(srcSize, CV_8UC1); | ||||||
|  |  | ||||||
|  |     UMat src(srcSize, CV_8UC1), hist(256, 1, CV_32FC1), dst(srcSize, CV_8UC1); | ||||||
|  |     declare.in(src, WARMUP_RNG).out(hist); | ||||||
|  |  | ||||||
|  |     cv::calcHist(std::vector<UMat>(1, src), channels, noArray(), hist, histSize, ranges, false); | ||||||
|  |  | ||||||
|  |     declare.in(src, WARMUP_RNG).out(dst); | ||||||
|  |     OCL_TEST_CYCLE() cv::calcBackProject(std::vector<UMat>(1,src), channels, hist, dst, ranges, 1); | ||||||
|  |  | ||||||
|  |     SANITY_CHECK_NOTHING(); | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
| /////////// CopyMakeBorder ////////////////////// | /////////// CopyMakeBorder ////////////////////// | ||||||
|  |  | ||||||
| CV_ENUM(Border, BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT, BORDER_WRAP, BORDER_REFLECT_101) | CV_ENUM(Border, BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT, BORDER_WRAP, BORDER_REFLECT_101) | ||||||
|   | |||||||
| @@ -42,7 +42,6 @@ | |||||||
|  |  | ||||||
| #include "precomp.hpp" | #include "precomp.hpp" | ||||||
| #include "opencl_kernels.hpp" | #include "opencl_kernels.hpp" | ||||||
| #include <sstream> |  | ||||||
|  |  | ||||||
| /****************************************************************************************\ | /****************************************************************************************\ | ||||||
|                                     Base Image Filter |                                     Base Image Filter | ||||||
| @@ -3197,6 +3196,8 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth, | |||||||
|     size_t tryWorkItems = maxWorkItemSizes[0]; |     size_t tryWorkItems = maxWorkItemSizes[0]; | ||||||
|     char cvt[2][40]; |     char cvt[2][40]; | ||||||
|  |  | ||||||
|  |     String kerStr = ocl::kernelToStr(kernelMatDataFloat, CV_32F); | ||||||
|  |  | ||||||
|     for ( ; ; ) |     for ( ; ; ) | ||||||
|     { |     { | ||||||
|         size_t BLOCK_SIZE = tryWorkItems; |         size_t BLOCK_SIZE = tryWorkItems; | ||||||
| @@ -3226,14 +3227,14 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth, | |||||||
|  |  | ||||||
|         String opts = format("-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D cn=%d " |         String opts = format("-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D cn=%d " | ||||||
|                              "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d " |                              "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d " | ||||||
|                              "-D KERNEL_SIZE_Y2_ALIGNED=%d -D %s -D %s -D %s%s " |                              "-D KERNEL_SIZE_Y2_ALIGNED=%d -D %s -D %s -D %s%s%s " | ||||||
|                              "-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s " |                              "-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s " | ||||||
|                              "-D convertToWT=%s -D convertToDstT=%s", |                              "-D convertToWT=%s -D convertToDstT=%s", | ||||||
|                              (int)BLOCK_SIZE, (int)BLOCK_SIZE_Y, cn, anchor.x, anchor.y, |                              (int)BLOCK_SIZE, (int)BLOCK_SIZE_Y, cn, anchor.x, anchor.y, | ||||||
|                              ksize.width, ksize.height, kernel_size_y2_aligned, borderMap[borderType], |                              ksize.width, ksize.height, kernel_size_y2_aligned, borderMap[borderType], | ||||||
|                              extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION", |                              extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION", | ||||||
|                              isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED", |                              isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED", | ||||||
|                              doubleSupport ? " -D DOUBLE_SUPPORT" : "", |                              doubleSupport ? " -D DOUBLE_SUPPORT" : "", kerStr.c_str(), | ||||||
|                              ocl::typeToStr(type), ocl::typeToStr(sdepth), ocl::typeToStr(dtype), |                              ocl::typeToStr(type), ocl::typeToStr(sdepth), ocl::typeToStr(dtype), | ||||||
|                              ocl::typeToStr(ddepth), ocl::typeToStr(wtype), ocl::typeToStr(wdepth), |                              ocl::typeToStr(ddepth), ocl::typeToStr(wtype), ocl::typeToStr(wdepth), | ||||||
|                              ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), |                              ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), | ||||||
| @@ -3255,7 +3256,7 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth, | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     _dst.create(sz, dtype); |     _dst.create(sz, dtype); | ||||||
|     UMat dst = _dst.getUMat(), kernalDataUMat(kernelMatDataFloat, true); |     UMat dst = _dst.getUMat(); | ||||||
|  |  | ||||||
|     int srcOffsetX = (int)((src.offset % src.step) / src.elemSize()); |     int srcOffsetX = (int)((src.offset % src.step) / src.elemSize()); | ||||||
|     int srcOffsetY = (int)(src.offset / src.step); |     int srcOffsetY = (int)(src.offset / src.step); | ||||||
| @@ -3263,8 +3264,7 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth, | |||||||
|     int srcEndY = (isolated ? (srcOffsetY + sz.height) : wholeSize.height); |     int srcEndY = (isolated ? (srcOffsetY + sz.height) : wholeSize.height); | ||||||
|  |  | ||||||
|     k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, srcOffsetX, srcOffsetY, |     k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, srcOffsetX, srcOffsetY, | ||||||
|            srcEndX, srcEndY, ocl::KernelArg::WriteOnly(dst), |            srcEndX, srcEndY, ocl::KernelArg::WriteOnly(dst), (float)delta); | ||||||
|            ocl::KernelArg::PtrReadOnly(kernalDataUMat), (float)delta); |  | ||||||
|  |  | ||||||
|     return k.run(2, globalsize, localsize, false); |     return k.run(2, globalsize, localsize, false); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -200,8 +200,11 @@ inline WT readSrcPixel(int2 pos, __global const uchar * srcptr, int src_step, co | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #define DIG(a) a, | ||||||
|  | __constant WT1 kernelData[] = { COEFF }; | ||||||
|  |  | ||||||
| __kernel void filter2D(__global const uchar * srcptr, int src_step, int srcOffsetX, int srcOffsetY, int srcEndX, int srcEndY, | __kernel void filter2D(__global const uchar * srcptr, int src_step, int srcOffsetX, int srcOffsetY, int srcEndX, int srcEndY, | ||||||
|                        __global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols, __constant WT1 * kernelData, float delta) |                        __global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols, float delta) | ||||||
| { | { | ||||||
|     const struct RectCoords srcCoords = { srcOffsetX, srcOffsetY, srcEndX, srcEndY }; // for non-isolated border: offsetX, offsetY, wholeX, wholeY |     const struct RectCoords srcCoords = { srcOffsetX, srcOffsetY, srcEndX, srcEndY }; // for non-isolated border: offsetX, offsetY, wholeX, wholeY | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Tony
					Tony