moments work now and work more or less fast
This commit is contained in:
		@@ -2262,6 +2262,12 @@ void _OutputArray::release() const
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    if( k == UMAT )
 | 
			
		||||
    {
 | 
			
		||||
        ((UMat*)obj)->release();
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if( k == GPU_MAT )
 | 
			
		||||
    {
 | 
			
		||||
        ((cuda::GpuMat*)obj)->release();
 | 
			
		||||
 
 | 
			
		||||
@@ -363,36 +363,31 @@ Moments::Moments( double _m00, double _m10, double _m01, double _m20, double _m1
 | 
			
		||||
    nu30 = mu30*s3; nu21 = mu21*s3; nu12 = mu12*s3; nu03 = mu03*s3;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static const int OCL_TILE_SIZE = 32;
 | 
			
		||||
    
 | 
			
		||||
static bool ocl_moments( InputArray _src, Moments& m, bool binary )
 | 
			
		||||
static bool ocl_moments( InputArray _src, Moments& m)
 | 
			
		||||
{
 | 
			
		||||
    printf("!!!!!!!!!!!!!!!!!! ocl moments !!!!!!!!!!!!!!!!!!!\n");
 | 
			
		||||
    const int TILE_SIZE = 16;
 | 
			
		||||
    const int K = 10;
 | 
			
		||||
    ocl::Kernel k("moments", ocl::imgproc::moments_oclsrc, binary ? "-D BINARY_MOMENTS" : "");
 | 
			
		||||
    ocl::Kernel k("moments", ocl::imgproc::moments_oclsrc, format("-D TILE_SIZE=%d", TILE_SIZE));
 | 
			
		||||
    if( k.empty() )
 | 
			
		||||
        return false;
 | 
			
		||||
    
 | 
			
		||||
    UMat src = _src.getUMat();
 | 
			
		||||
    Size sz = src.size();
 | 
			
		||||
    int xtiles = (sz.width + OCL_TILE_SIZE-1)/OCL_TILE_SIZE;
 | 
			
		||||
    int ytiles = (sz.height + OCL_TILE_SIZE-1)/OCL_TILE_SIZE;
 | 
			
		||||
    int xtiles = (sz.width + TILE_SIZE-1)/TILE_SIZE;
 | 
			
		||||
    int ytiles = (sz.height + TILE_SIZE-1)/TILE_SIZE;
 | 
			
		||||
    int ntiles = xtiles*ytiles;
 | 
			
		||||
    UMat umbuf(1, ntiles*K, CV_32S);
 | 
			
		||||
    umbuf.setTo(Scalar::all(0));
 | 
			
		||||
    
 | 
			
		||||
    size_t globalsize[] = {xtiles, ytiles};
 | 
			
		||||
    size_t localsize[] = {1, 1};
 | 
			
		||||
    bool ok = k.args(ocl::KernelArg::ReadOnly(src),
 | 
			
		||||
                     ocl::KernelArg::PtrWriteOnly(umbuf),
 | 
			
		||||
                     OCL_TILE_SIZE, xtiles, ytiles).run(2, globalsize, localsize, false);
 | 
			
		||||
                     xtiles).run(2, globalsize, 0, true);
 | 
			
		||||
    if(!ok)
 | 
			
		||||
        return false;
 | 
			
		||||
    Mat mbuf;
 | 
			
		||||
    umbuf.copyTo(mbuf);
 | 
			
		||||
    Mat mbuf = umbuf.getMat(ACCESS_READ);
 | 
			
		||||
    for( int i = 0; i < ntiles; i++ )
 | 
			
		||||
    {
 | 
			
		||||
        double x = (i % xtiles)*OCL_TILE_SIZE, y = (i / xtiles)*OCL_TILE_SIZE;
 | 
			
		||||
        double x = (i % xtiles)*TILE_SIZE, y = (i / xtiles)*TILE_SIZE;
 | 
			
		||||
        const int* mom = mbuf.ptr<int>() + i*K;
 | 
			
		||||
        double xm = x * mom[0], ym = y * mom[0];
 | 
			
		||||
        
 | 
			
		||||
@@ -452,10 +447,8 @@ cv::Moments cv::moments( InputArray _src, bool binary )
 | 
			
		||||
    if( size.width <= 0 || size.height <= 0 )
 | 
			
		||||
        return m;
 | 
			
		||||
    
 | 
			
		||||
    if( ocl::useOpenCL() && depth == CV_8U &&
 | 
			
		||||
        size.width >= OCL_TILE_SIZE &&
 | 
			
		||||
        size.height >= OCL_TILE_SIZE &&
 | 
			
		||||
        /*_src.isUMat() &&*/ ocl_moments(_src, m, binary) )
 | 
			
		||||
    if( ocl::useOpenCL() && depth == CV_8U && !binary &&
 | 
			
		||||
        _src.isUMat() && ocl_moments(_src, m) )
 | 
			
		||||
        ;
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,110 +1,70 @@
 | 
			
		||||
/* See LICENSE file in the root OpenCV directory */
 | 
			
		||||
 | 
			
		||||
#ifdef BINARY_MOMENTS
 | 
			
		||||
#define READ_PIX(ref) (ref != 0)
 | 
			
		||||
#else
 | 
			
		||||
#define READ_PIX(ref) ref
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
__kernel void moments(__global const uchar* src, int src_step, int src_offset,
 | 
			
		||||
                      int src_rows, int src_cols, __global int* mom0,
 | 
			
		||||
                      int tile_size, int xtiles, int ytiles)
 | 
			
		||||
                      int src_rows, int src_cols, __global int* mom0, int xtiles)
 | 
			
		||||
{
 | 
			
		||||
    int x = get_global_id(0);
 | 
			
		||||
    int y = get_global_id(1);
 | 
			
		||||
    int x_min = x*tile_size;
 | 
			
		||||
    int y_min = y*tile_size;
 | 
			
		||||
    int x_min = x*TILE_SIZE;
 | 
			
		||||
    int y_min = y*TILE_SIZE;
 | 
			
		||||
 | 
			
		||||
    if( x_min < src_cols && y_min < src_rows )
 | 
			
		||||
    {
 | 
			
		||||
        int x_max = src_cols - x_min;
 | 
			
		||||
        int y_max = src_rows - y_min;
 | 
			
		||||
        int m[10]={0,0,0,0,0,0,0,0,0,0};
 | 
			
		||||
        __global const uchar* ptr = (src + src_offset);// + y_min*src_step + x_min;
 | 
			
		||||
        int x_max = min(src_cols - x_min, TILE_SIZE);
 | 
			
		||||
        int y_max = min(src_rows - y_min, TILE_SIZE);
 | 
			
		||||
        int m00=0, m10=0, m01=0, m20=0, m11=0, m02=0, m30=0, m21=0, m12=0, m03=0;
 | 
			
		||||
        __global const uchar* ptr = src + src_offset + y_min*src_step + x_min;
 | 
			
		||||
        __global int* mom = mom0 + (xtiles*y + x)*10;
 | 
			
		||||
 | 
			
		||||
        x_max = x_max < tile_size ? x_max : tile_size;
 | 
			
		||||
        y_max = y_max < tile_size ? y_max : tile_size;
 | 
			
		||||
 | 
			
		||||
        for( y = 0; y < y_max; y++ )
 | 
			
		||||
        for( y = 0; y < y_max; y++, ptr += src_step )
 | 
			
		||||
        {
 | 
			
		||||
            int x00, x10, x20, x30;
 | 
			
		||||
            int sx, sy, p;
 | 
			
		||||
            x00 = x10 = x20 = x30 = 0;
 | 
			
		||||
            sy = y*y;
 | 
			
		||||
            int4 S = (int4)(0,0,0,0);
 | 
			
		||||
 | 
			
		||||
            for( x = 0; x < x_max; x++ )
 | 
			
		||||
            for( x = 0; x <= x_max - 4; x += 4 )
 | 
			
		||||
            {
 | 
			
		||||
                p = ptr[0];//READ_PIX(ptr[x]);
 | 
			
		||||
                sx = x*x;
 | 
			
		||||
                x00 += p;
 | 
			
		||||
                x10 += x*p;
 | 
			
		||||
                x20 += sx*p;
 | 
			
		||||
                x30 += x*sx*p;
 | 
			
		||||
                int4 p = convert_int4(vload4(0, ptr + x));
 | 
			
		||||
                #define SUM_ELEM(elem, ofs) \
 | 
			
		||||
                    (int4)(elem, (x+ofs)*elem, (x+ofs)*(x+ofs)*elem, (x+ofs)*(x+ofs)*(x+ofs)*elem)
 | 
			
		||||
                S += SUM_ELEM(p.s0, 0) + SUM_ELEM(p.s1, 1) + SUM_ELEM(p.s2, 2) + SUM_ELEM(p.s3, 3);
 | 
			
		||||
            }
 | 
			
		||||
            if( x < x_max )
 | 
			
		||||
            {
 | 
			
		||||
                int ps = ptr[x];
 | 
			
		||||
                S += SUM_ELEM(ps, 0);
 | 
			
		||||
                if( x+1 < x_max )
 | 
			
		||||
                {
 | 
			
		||||
                    ps = ptr[x+1];
 | 
			
		||||
                    S += SUM_ELEM(ps, 1);
 | 
			
		||||
                    if( x+2 < x_max )
 | 
			
		||||
                    {
 | 
			
		||||
                        ps = ptr[x+2];
 | 
			
		||||
                        S += SUM_ELEM(ps, 2);
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            
 | 
			
		||||
            m[0] += x00;
 | 
			
		||||
            m[1] += x10;
 | 
			
		||||
            m[2] += y*x00;
 | 
			
		||||
            m[3] += x20;
 | 
			
		||||
            m[4] += y*x10;
 | 
			
		||||
            m[5] += sy*x00;
 | 
			
		||||
            m[6] += x30;
 | 
			
		||||
            m[7] += y*x20;
 | 
			
		||||
            m[8] += sy*x10;
 | 
			
		||||
            m[9] += y*sy*x00;
 | 
			
		||||
            //ptr += src_step;
 | 
			
		||||
            int sy = y*y;
 | 
			
		||||
            m00 += S.s0;
 | 
			
		||||
            m10 += S.s1;
 | 
			
		||||
            m01 += y*S.s0;
 | 
			
		||||
            m20 += S.s2;
 | 
			
		||||
            m11 += y*S.s1;
 | 
			
		||||
            m02 += sy*S.s0;
 | 
			
		||||
            m30 += S.s3;
 | 
			
		||||
            m21 += y*S.s2;
 | 
			
		||||
            m12 += sy*S.s1;
 | 
			
		||||
            m03 += y*sy*S.s0;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        mom[0] = m[0];
 | 
			
		||||
 | 
			
		||||
        mom[1] = m[1];
 | 
			
		||||
        mom[2] = m[2];
 | 
			
		||||
 | 
			
		||||
        mom[3] = m[3];
 | 
			
		||||
        mom[4] = m[4];
 | 
			
		||||
        mom[5] = m[5];
 | 
			
		||||
 | 
			
		||||
        mom[6] = m[6];
 | 
			
		||||
        mom[7] = m[7];
 | 
			
		||||
        mom[8] = m[8];
 | 
			
		||||
        mom[9] = m[9];
 | 
			
		||||
        mom[0] = m00;
 | 
			
		||||
        mom[1] = m10;
 | 
			
		||||
        mom[2] = m01;
 | 
			
		||||
        mom[3] = m20;
 | 
			
		||||
        mom[4] = m11;
 | 
			
		||||
        mom[5] = m02;
 | 
			
		||||
        mom[6] = m30;
 | 
			
		||||
        mom[7] = m21;
 | 
			
		||||
        mom[8] = m12;
 | 
			
		||||
        mom[9] = m03;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*__kernel void moments(__global const uchar* src, int src_step, int src_offset,
 | 
			
		||||
                     int src_rows, int src_cols, __global float* mom0,
 | 
			
		||||
                     int tile_size, int xtiles, int ytiles)
 | 
			
		||||
{
 | 
			
		||||
    int x = get_global_id(0);
 | 
			
		||||
    int y = get_global_id(1);
 | 
			
		||||
    if( x < xtiles && y < ytiles )
 | 
			
		||||
    {
 | 
			
		||||
        //int x_min = x*tile_size;
 | 
			
		||||
        //int y_min = y*tile_size;
 | 
			
		||||
        //int x_max = src_cols - x_min;
 | 
			
		||||
        //int y_max = src_rows - y_min;
 | 
			
		||||
        __global const uchar* ptr = src + src_offset;// + src_step*y_min + x_min;
 | 
			
		||||
        __global float* mom = mom0;// + (y*xtiles + x)*16;
 | 
			
		||||
        //int x00, x10, x20, x30, m00=0;
 | 
			
		||||
        //x_max = min(x_max, tile_size);
 | 
			
		||||
        //y_max = min(y_max, tile_size);
 | 
			
		||||
        //int m00 = 0;
 | 
			
		||||
        
 | 
			
		||||
        //for( y = 0; y < y_max; y++, ptr += src_step )
 | 
			
		||||
        //{
 | 
			
		||||
            //int x00 = 0, x10 = 0, x20 = 0, x30 = 0;
 | 
			
		||||
            //for( x = 0; x < x_max; x++ )
 | 
			
		||||
            //{
 | 
			
		||||
                int p = ptr[x];
 | 
			
		||||
                //m00 = p;
 | 
			
		||||
                //x10 += x*p;
 | 
			
		||||
                /*x20 += x*x*p;
 | 
			
		||||
                x30 += x*x*x*p;
 | 
			
		||||
            //}
 | 
			
		||||
            //m00 = m00 + x00;
 | 
			
		||||
        //}
 | 
			
		||||
        mom[0] = p;
 | 
			
		||||
    }
 | 
			
		||||
}*/
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -60,6 +60,7 @@ protected:
 | 
			
		||||
    void run_func();
 | 
			
		||||
    int coi;
 | 
			
		||||
    bool is_binary;
 | 
			
		||||
    bool try_umat;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -102,20 +103,25 @@ void CV_MomentsTest::get_test_array_types_and_sizes( int test_case_idx,
 | 
			
		||||
{
 | 
			
		||||
    RNG& rng = ts->get_rng();
 | 
			
		||||
    cvtest::ArrayTest::get_test_array_types_and_sizes( test_case_idx, sizes, types );
 | 
			
		||||
    int cn = cvtest::randInt(rng) % 4 + 1;
 | 
			
		||||
    int cn = (cvtest::randInt(rng) % 4) + 1;
 | 
			
		||||
    int depth = cvtest::randInt(rng) % 4;
 | 
			
		||||
    depth = depth == 0 ? CV_8U : depth == 1 ? CV_16U : depth == 2 ? CV_16S : CV_32F;
 | 
			
		||||
    if( cn == 2 )
 | 
			
		||||
    
 | 
			
		||||
    is_binary = cvtest::randInt(rng) % 2 != 0;
 | 
			
		||||
    if( depth == 0 && !is_binary )
 | 
			
		||||
        try_umat = cvtest::randInt(rng) % 5 != 0;
 | 
			
		||||
    else
 | 
			
		||||
        try_umat = cvtest::randInt(rng) % 2 != 0;
 | 
			
		||||
    
 | 
			
		||||
    if( cn == 2 || try_umat )
 | 
			
		||||
        cn = 1;
 | 
			
		||||
 | 
			
		||||
    sizes[INPUT][0].height = sizes[INPUT][0].width;
 | 
			
		||||
    types[INPUT][0] = CV_MAKETYPE(depth, cn);
 | 
			
		||||
    types[OUTPUT][0] = types[REF_OUTPUT][0] = CV_64FC1;
 | 
			
		||||
    sizes[OUTPUT][0] = sizes[REF_OUTPUT][0] = cvSize(MOMENT_COUNT,1);
 | 
			
		||||
    if(CV_MAT_DEPTH(types[INPUT][0])>=CV_32S)
 | 
			
		||||
        sizes[INPUT][0].width = MAX(sizes[INPUT][0].width, 3);
 | 
			
		||||
    
 | 
			
		||||
    is_binary = cvtest::randInt(rng) % 2 != 0;
 | 
			
		||||
    coi = 0;
 | 
			
		||||
    cvmat_allowed = true;
 | 
			
		||||
    if( cn > 1 )
 | 
			
		||||
@@ -150,7 +156,16 @@ void CV_MomentsTest::run_func()
 | 
			
		||||
{
 | 
			
		||||
    CvMoments* m = (CvMoments*)test_mat[OUTPUT][0].ptr<double>();
 | 
			
		||||
    double* others = (double*)(m + 1);
 | 
			
		||||
    if( try_umat )
 | 
			
		||||
    {
 | 
			
		||||
        UMat u;
 | 
			
		||||
        test_mat[INPUT][0].clone().copyTo(u);
 | 
			
		||||
        Moments new_m = moments(u, is_binary != 0);
 | 
			
		||||
        *m = new_m;
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
        cvMoments( test_array[INPUT][0], m, is_binary );
 | 
			
		||||
    
 | 
			
		||||
    others[0] = cvGetNormalizedCentralMoment( m, 2, 0 );
 | 
			
		||||
    others[1] = cvGetNormalizedCentralMoment( m, 1, 1 );
 | 
			
		||||
    others[2] = cvGetNormalizedCentralMoment( m, 0, 2 );
 | 
			
		||||
@@ -275,10 +290,6 @@ void CV_MomentsTest::prepare_to_validation( int /*test_case_idx*/ )
 | 
			
		||||
        mdata[6] = m.mu03 * s3;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    test_mat[REF_OUTPUT][0].copyTo(test_mat[OUTPUT][0]);
 | 
			
		||||
    cout << "ref moments: " << test_mat[REF_OUTPUT][0] << "\n";
 | 
			
		||||
    cout << "fun moments: " << test_mat[OUTPUT][0] << "\n";
 | 
			
		||||
    
 | 
			
		||||
    double* a = test_mat[REF_OUTPUT][0].ptr<double>();
 | 
			
		||||
    double* b = test_mat[OUTPUT][0].ptr<double>();
 | 
			
		||||
    for( i = 0; i < MOMENT_COUNT; i++ )
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user