Merge pull request #3088 from vbystricky:ocl_enableNormEtc

2014-08-14 14:34:40 +00:00
parent 95a4943762 942ff5be57
commit 52ac61d87c
4 changed files with 61 additions and 22 deletions
--- a/modules/core/src/convert.cpp
+++ b/modules/core/src/convert.cpp
@@ -1765,13 +1765,12 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha
        kercn = ocl::predictOptimalVectorWidth(_src, _dst), rowsPerWI = d.isIntel() ? 4 : 1;
    bool doubleSupport = d.doubleFPConfig() > 0;

-    if (depth == CV_32F || depth == CV_64F)
+    if (!doubleSupport && depth == CV_64F)
        return false;

    char cvt[2][50];
    int wdepth = std::max(depth, CV_32F);
-    ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
-                  format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D srcT1=%s"
+    String build_opt = format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D srcT1=%s"
                         " -D workT=%s -D wdepth=%d -D convertToWT1=%s -D convertToDT=%s"
                         " -D workT1=%s -D rowsPerWI=%d%s",
                         ocl::typeToStr(CV_8UC(kercn)),
@@ -1780,7 +1779,8 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha
                         ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]),
                         ocl::convertTypeStr(wdepth, CV_8U, kercn, cvt[1]),
                         ocl::typeToStr(wdepth), rowsPerWI,
-                         doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
+                         doubleSupport ? " -D DOUBLE_SUPPORT" : "");
+    ocl::Kernel k("KF", ocl::core::arithm_oclsrc, build_opt);
    if (k.empty())
        return false;

--- a/modules/core/src/opencl/minmaxloc.cl
+++ b/modules/core/src/opencl/minmaxloc.cl
@@ -98,7 +98,7 @@

 #ifdef OP_CALC2
 #define CALC_MAX2(p) \
-    maxval2 = MAX(maxval2, temp.p);
+    maxval2 = MAX(maxval2, temp2.p);
 #else
 #define CALC_MAX2(p)
 #endif
@@ -196,7 +196,7 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off

 #ifdef HAVE_SRC2
 #ifdef HAVE_SRC2_CONT
-            src2_index = mul24(id, srcTSIZE);
+            src2_index = id * srcTSIZE; //mul24(id, srcTSIZE);
 #else
            src2_index = mad24(id / cols, src2_step, mul24(id % cols, srcTSIZE));
 #endif
--- a/modules/core/src/stat.cpp
+++ b/modules/core/src/stat.cpp
@@ -1444,7 +1444,10 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
    bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(),
        haveSrc2 = _src2.kind() != _InputArray::NONE;
    int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
-            kercn = haveMask ? cn : std::min(4, ocl::predictOptimalVectorWidth(_src));
+            kercn = haveMask ? cn : std::min(4, ocl::predictOptimalVectorWidth(_src, _src2));
+
+    if (haveMask && dev.isAMD())
+        return false;

    CV_Assert( (cn == 1 && (!haveMask || _mask.type() == CV_8U)) ||
              (cn >= 1 && !minLoc && !maxLoc) );
@@ -1536,7 +1539,7 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
    }

    size_t globalsize = groupnum * wgs;
-    if (!k.run(1, &globalsize, &wgs, false))
+    if (!k.run(1, &globalsize, &wgs, true))
        return false;

    static const getMinMaxResFunc functab[7] =
@@ -2190,9 +2193,6 @@ static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double &
         (!doubleSupport && depth == CV_64F))
        return false;

-    if( depth == CV_32F && (!_mask.empty() || normType == NORM_INF) )
-        return false;
-
    UMat src = _src.getUMat();

    if (normType == NORM_INF)
@@ -2548,9 +2548,6 @@ static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArr
    normType &= ~NORM_RELATIVE;
    bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR;

-    if ( !normsum || !_mask.empty() )
-        return false;
-
    if (normsum)
    {
        if (!ocl_sum(_src1, sc1, normType == NORM_L2 || normType == NORM_L2SQR ?