From 0630e7010e52218dc0d1deb4f6ffce9e63b35d58 Mon Sep 17 00:00:00 2001 From: Victoria Zhislina Date: Tue, 21 Feb 2012 11:31:23 +0000 Subject: [PATCH] CV_USE_UNROLLED for imgproc --- modules/imgproc/perf/perf_filter2d.cpp | 2 +- modules/imgproc/src/_list.h | 2 +- modules/imgproc/src/accum.cpp | 12 ++++-- modules/imgproc/src/filter.cpp | 56 ++++++++++++++++++++------ modules/imgproc/src/gabor.cpp | 4 +- modules/imgproc/src/geometry.cpp | 6 +-- modules/imgproc/src/imgwarp.cpp | 12 ++++-- modules/imgproc/src/morph.cpp | 24 ++++++----- modules/imgproc/src/segmentation.cpp | 3 +- modules/imgproc/src/thresh.cpp | 18 ++++++--- 10 files changed, 96 insertions(+), 43 deletions(-) diff --git a/modules/imgproc/perf/perf_filter2d.cpp b/modules/imgproc/perf/perf_filter2d.cpp index 5aa000c49..c87f31c8d 100644 --- a/modules/imgproc/perf/perf_filter2d.cpp +++ b/modules/imgproc/perf/perf_filter2d.cpp @@ -32,7 +32,7 @@ PERF_TEST_P( TestFilter2d, Filter2d, Mat kernel(kSize, kSize, CV_32FC1); randu(kernel, -3, 10); - float s = (float)fabs( sum(kernel)[0] ); + float s = fabs( sum(kernel)[0] ); if(s > 1e-3) kernel /= s; declare.in(src, WARMUP_RNG).out(dst).time(20); diff --git a/modules/imgproc/src/_list.h b/modules/imgproc/src/_list.h index a19f7e2af..b2b63e9cb 100644 --- a/modules/imgproc/src/_list.h +++ b/modules/imgproc/src/_list.h @@ -345,7 +345,7 @@ void prefix##remove_at_##type(_CVLIST* l, CVPOS pos)\ void prefix##set_##type(CVPOS pos, type* data)\ {\ ELEMENT_##type* element = ((ELEMENT_##type*)(pos.m_pos));\ - memcpy(&(element->m_data), data, sizeof(*data));\ + memcpy(&(element->m_data), data, sizeof(data));\ }\ type* prefix##get_##type(CVPOS pos)\ {\ diff --git a/modules/imgproc/src/accum.cpp b/modules/imgproc/src/accum.cpp index 4a588f8fc..3c2f0e522 100644 --- a/modules/imgproc/src/accum.cpp +++ b/modules/imgproc/src/accum.cpp @@ -53,6 +53,7 @@ acc_( const T* src, AT* dst, const uchar* mask, int len, int cn ) if( !mask ) { len *= cn; + #if CV_ENABLE_UNROLLED for( ; i <= len - 4; i += 4 ) { AT t0, t1; @@ -64,7 +65,7 @@ acc_( const T* src, AT* dst, const uchar* mask, int len, int cn ) t1 = src[i+3] + dst[i+3]; dst[i+2] = t0; dst[i+3] = t1; } - + #endif for( ; i < len; i++ ) dst[i] += src[i]; } @@ -110,6 +111,7 @@ accSqr_( const T* src, AT* dst, const uchar* mask, int len, int cn ) if( !mask ) { len *= cn; + #if CV_ENABLE_UNROLLED for( ; i <= len - 4; i += 4 ) { AT t0, t1; @@ -121,7 +123,7 @@ accSqr_( const T* src, AT* dst, const uchar* mask, int len, int cn ) t1 = (AT)src[i+3]*src[i+3] + dst[i+3]; dst[i+2] = t0; dst[i+3] = t1; } - + #endif for( ; i < len; i++ ) dst[i] += (AT)src[i]*src[i]; } @@ -167,6 +169,7 @@ accProd_( const T* src1, const T* src2, AT* dst, const uchar* mask, int len, int if( !mask ) { len *= cn; + #if CV_ENABLE_UNROLLED for( ; i <= len - 4; i += 4 ) { AT t0, t1; @@ -178,7 +181,7 @@ accProd_( const T* src1, const T* src2, AT* dst, const uchar* mask, int len, int t1 = (AT)src1[i+3]*src2[i+3] + dst[i+3]; dst[i+2] = t0; dst[i+3] = t1; } - + #endif for( ; i < len; i++ ) dst[i] += (AT)src1[i]*src2[i]; } @@ -225,6 +228,7 @@ accW_( const T* src, AT* dst, const uchar* mask, int len, int cn, double alpha ) if( !mask ) { len *= cn; + #if CV_ENABLE_UNROLLED for( ; i <= len - 4; i += 4 ) { AT t0, t1; @@ -236,7 +240,7 @@ accW_( const T* src, AT* dst, const uchar* mask, int len, int cn, double alpha ) t1 = src[i+3]*a + dst[i+3]*b; dst[i+2] = t0; dst[i+3] = t1; } - + #endif for( ; i < len; i++ ) dst[i] = src[i]*a + dst[i]*b; } diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index 01a54574e..efe552a39 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -2227,7 +2227,7 @@ template struct RowFilter : public BaseRo i = vecOp(src, dst, width, cn); width *= cn; - + #if CV_ENABLE_UNROLLED for( ; i <= width - 4; i += 4 ) { S = (const ST*)src + i; @@ -2245,7 +2245,7 @@ template struct RowFilter : public BaseRo D[i] = s0; D[i+1] = s1; D[i+2] = s2; D[i+3] = s3; } - + #endif for( ; i < width; i++ ) { S = (const ST*)src + i; @@ -2426,6 +2426,7 @@ template struct ColumnFilter : public BaseColumnFilte { DT* D = (DT*)dst; i = vecOp(src, dst, width); + #if CV_ENABLE_UNROLLED for( ; i <= width - 4; i += 4 ) { ST f = ky[0]; @@ -2443,7 +2444,7 @@ template struct ColumnFilter : public BaseColumnFilte D[i] = castOp(s0); D[i+1] = castOp(s1); D[i+2] = castOp(s2); D[i+3] = castOp(s3); } - + #endif for( ; i < width; i++ ) { ST s0 = ky[0]*((const ST*)src[0])[i] + _delta; @@ -2492,7 +2493,7 @@ template struct SymmColumnFilter : public ColumnFilte { DT* D = (DT*)dst; i = (this->vecOp)(src, dst, width); - + #if CV_ENABLE_UNROLLED for( ; i <= width - 4; i += 4 ) { ST f = ky[0]; @@ -2514,7 +2515,7 @@ template struct SymmColumnFilter : public ColumnFilte D[i] = castOp(s0); D[i+1] = castOp(s1); D[i+2] = castOp(s2); D[i+3] = castOp(s3); } - + #endif for( ; i < width; i++ ) { ST s0 = ky[0]*((const ST*)src[0])[i] + _delta; @@ -2530,7 +2531,7 @@ template struct SymmColumnFilter : public ColumnFilte { DT* D = (DT*)dst; i = this->vecOp(src, dst, width); - + #if CV_ENABLE_UNROLLED for( ; i <= width - 4; i += 4 ) { ST f = ky[0]; @@ -2551,7 +2552,7 @@ template struct SymmColumnFilter : public ColumnFilte D[i] = castOp(s0); D[i+1] = castOp(s1); D[i+2] = castOp(s2); D[i+3] = castOp(s3); } - + #endif for( ; i < width; i++ ) { ST s0 = _delta; @@ -2608,6 +2609,7 @@ struct SymmColumnSmallFilter : public SymmColumnFilter { if( is_1_2_1 ) { + #if CV_ENABLE_UNROLLED for( ; i <= width - 4; i += 4 ) { ST s0 = S0[i] + S1[i]*2 + S2[i] + _delta; @@ -2620,9 +2622,17 @@ struct SymmColumnSmallFilter : public SymmColumnFilter D[i+2] = castOp(s0); D[i+3] = castOp(s1); } + #else + for( ; i < width; i ++ ) + { + ST s0 = S0[i] + S1[i]*2 + S2[i] + _delta; + D[i] = castOp(s0); + } + #endif } else if( is_1_m2_1 ) { + #if CV_ENABLE_UNROLLED for( ; i <= width - 4; i += 4 ) { ST s0 = S0[i] - S1[i]*2 + S2[i] + _delta; @@ -2635,9 +2645,17 @@ struct SymmColumnSmallFilter : public SymmColumnFilter D[i+2] = castOp(s0); D[i+3] = castOp(s1); } + #else + for( ; i < width; i ++ ) + { + ST s0 = S0[i] - S1[i]*2 + S2[i] + _delta; + D[i] = castOp(s0); + } + #endif } else { + #if CV_ENABLE_UNROLLED for( ; i <= width - 4; i += 4 ) { ST s0 = (S0[i] + S2[i])*f1 + S1[i]*f0 + _delta; @@ -2650,8 +2668,14 @@ struct SymmColumnSmallFilter : public SymmColumnFilter D[i+2] = castOp(s0); D[i+3] = castOp(s1); } + #else + for( ; i < width; i ++ ) + { + ST s0 = (S0[i] + S2[i])*f1 + S1[i]*f0 + _delta; + D[i] = castOp(s0); + } + #endif } - for( ; i < width; i++ ) D[i] = castOp((S0[i] + S2[i])*f1 + S1[i]*f0 + _delta); } @@ -2661,7 +2685,7 @@ struct SymmColumnSmallFilter : public SymmColumnFilter { if( f1 < 0 ) std::swap(S0, S2); - + #if CV_ENABLE_UNROLLED for( ; i <= width - 4; i += 4 ) { ST s0 = S2[i] - S0[i] + _delta; @@ -2674,12 +2698,19 @@ struct SymmColumnSmallFilter : public SymmColumnFilter D[i+2] = castOp(s0); D[i+3] = castOp(s1); } - + #else + for( ; i < width; i ++ ) + { + ST s0 = S2[i] - S0[i] + _delta; + D[i] = castOp(s0); + } + #endif if( f1 < 0 ) std::swap(S0, S2); } else { + #if CV_ENABLE_UNROLLED for( ; i <= width - 4; i += 4 ) { ST s0 = (S2[i] - S0[i])*f1 + _delta; @@ -2692,6 +2723,7 @@ struct SymmColumnSmallFilter : public SymmColumnFilter D[i+2] = castOp(s0); D[i+3] = castOp(s1); } + #endif } for( ; i < width; i++ ) @@ -3043,7 +3075,7 @@ template struct Filter2D : public BaseFi kp[k] = (const ST*)src[pt[k].y] + pt[k].x*cn; i = vecOp((const uchar**)kp, dst, width); - + #if CV_ENABLE_UNROLLED for( ; i <= width - 4; i += 4 ) { KT s0 = _delta, s1 = _delta, s2 = _delta, s3 = _delta; @@ -3061,7 +3093,7 @@ template struct Filter2D : public BaseFi D[i] = castOp(s0); D[i+1] = castOp(s1); D[i+2] = castOp(s2); D[i+3] = castOp(s3); } - + #endif for( ; i < width; i++ ) { KT s0 = _delta; diff --git a/modules/imgproc/src/gabor.cpp b/modules/imgproc/src/gabor.cpp index 867fa1a65..5a81312f7 100644 --- a/modules/imgproc/src/gabor.cpp +++ b/modules/imgproc/src/gabor.cpp @@ -60,12 +60,12 @@ cv::Mat cv::getGaborKernel( Size ksize, double sigma, double theta, if( ksize.width > 0 ) xmax = ksize.width/2; else - xmax = (int)std::max(fabs(nstds*sigma_x*c), fabs(nstds*sigma_y*s)); + xmax = std::max(fabs(nstds*sigma_x*c), fabs(nstds*sigma_y*s)); if( ksize.height > 0 ) ymax = ksize.height/2; else - ymax = (int)std::max(fabs(nstds*sigma_x*s), fabs(nstds*sigma_y*c)); + ymax = std::max(fabs(nstds*sigma_x*s), fabs(nstds*sigma_y*c)); xmin = -xmax; ymin = -ymax; diff --git a/modules/imgproc/src/geometry.cpp b/modules/imgproc/src/geometry.cpp index 63bab60cb..66d029185 100644 --- a/modules/imgproc/src/geometry.cpp +++ b/modules/imgproc/src/geometry.cpp @@ -439,8 +439,8 @@ static char segSegInt( Point2f a, Point2f b, Point2f c, Point2f d, Point2f& p, P (0.0 > t) || (t > 1.0) ) code = '0'; - p.x = (float)(a.x + s * ( b.x - a.x )); - p.y = (float)(a.y + s * ( b.y - a.y )); + p.x = a.x + s * ( b.x - a.x ); + p.y = a.y + s * ( b.y - a.y ); return code; } @@ -652,7 +652,7 @@ float cv::intersectConvexConvex( InputArray _p1, InputArray _p2, OutputArray _p1 _p12.release(); return 0.f; } - area = (float)contourArea(_InputArray(result, nr), false); + area = contourArea(_InputArray(result, nr), false); } if( _p12.needed() ) diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index 62be35a85..7f84a6abc 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -877,6 +877,7 @@ struct VResizeLinear VecOp vecOp; int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width); + #if CV_ENABLE_UNROLLED for( ; x <= width - 4; x += 4 ) { WT t0, t1; @@ -887,7 +888,7 @@ struct VResizeLinear t1 = S0[x+3]*b0 + S1[x+3]*b1; dst[x+2] = castOp(t0); dst[x+3] = castOp(t1); } - + #endif for( ; x < width; x++ ) dst[x] = castOp(S0[x]*b0 + S1[x]*b1); } @@ -1033,7 +1034,7 @@ struct VResizeLanczos4 CastOp castOp; VecOp vecOp; int k, x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width); - + #if CV_ENABLE_UNROLLED for( ; x <= width - 4; x += 4 ) { WT b = beta[0]; @@ -1050,7 +1051,7 @@ struct VResizeLanczos4 dst[x] = castOp(s0); dst[x+1] = castOp(s1); dst[x+2] = castOp(s2); dst[x+3] = castOp(s3); } - + #endif for( ; x < width; x++ ) { dst[x] = castOp(src[0][x]*beta[0] + src[1][x]*beta[1] + @@ -1161,8 +1162,11 @@ static void resizeAreaFast_( const Mat& src, Mat& dst, const int* ofs, const int { const T* S = (const T*)(src.data + src.step*sy0) + xofs[dx]; WT sum = 0; - for( k = 0; k <= area - 4; k += 4 ) + k=0; + #if CV_ENABLE_UNROLLED + for( ; k <= area - 4; k += 4 ) sum += S[ofs[k]] + S[ofs[k+1]] + S[ofs[k+2]] + S[ofs[k+3]]; + #endif for( ; k < area; k++ ) sum += S[ofs[k]]; diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index 432d02603..98052f7a9 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -700,7 +700,9 @@ template struct MorphColumnFilter : public BaseColumnFilt for( ; _ksize > 1 && count > 1; count -= 2, D += dststep*2, src += 2 ) { - for( i = i0; i <= width - 4; i += 4 ) + i = i0; + #if CV_ENABLE_UNROLLED + for( ; i <= width - 4; i += 4 ) { const T* sptr = src[1] + i; T s0 = sptr[0], s1 = sptr[1], s2 = sptr[2], s3 = sptr[3]; @@ -724,7 +726,7 @@ template struct MorphColumnFilter : public BaseColumnFilt D[i+dststep+2] = op(s2, sptr[2]); D[i+dststep+3] = op(s3, sptr[3]); } - + #endif for( ; i < width; i++ ) { T s0 = src[1][i]; @@ -739,7 +741,9 @@ template struct MorphColumnFilter : public BaseColumnFilt for( ; count > 0; count--, D += dststep, src++ ) { - for( i = i0; i <= width - 4; i += 4 ) + i = i0; + #if CV_ENABLE_UNROLLED + for( ; i <= width - 4; i += 4 ) { const T* sptr = src[0] + i; T s0 = sptr[0], s1 = sptr[1], s2 = sptr[2], s3 = sptr[3]; @@ -754,7 +758,7 @@ template struct MorphColumnFilter : public BaseColumnFilt D[i] = s0; D[i+1] = s1; D[i+2] = s2; D[i+3] = s3; } - + #endif for( ; i < width; i++ ) { T s0 = src[0][i]; @@ -801,7 +805,7 @@ template struct MorphFilter : BaseFilter kp[k] = (const T*)src[pt[k].y] + pt[k].x*cn; i = vecOp(&ptrs[0], nz, dst, width); - + #if CV_ENABLE_UNROLLED for( ; i <= width - 4; i += 4 ) { const T* sptr = kp[0] + i; @@ -817,7 +821,7 @@ template struct MorphFilter : BaseFilter D[i] = s0; D[i+1] = s1; D[i+2] = s2; D[i+3] = s3; } - + #endif for( ; i < width; i++ ) { T s0 = kp[0][i]; @@ -1074,8 +1078,10 @@ public: { int row0 = min(cvRound(range.begin() * src.rows / nStripes), src.rows); int row1 = min(cvRound(range.end() * src.rows / nStripes), src.rows); - - //printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n", src.rows, src.cols, range.begin(), range.end(), row0, row1); + + if(0) + printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n", + src.rows, src.cols, range.begin(), range.end(), row0, row1); Mat srcStripe = src.rowRange(row0, row1); Mat dstStripe = dst.rowRange(row0, row1); @@ -1099,7 +1105,7 @@ private: Point anchor; int rowBorderType; int columnBorderType; - Scalar borderValue; + const Scalar& borderValue; }; static void morphOp( int op, InputArray _src, OutputArray _dst, diff --git a/modules/imgproc/src/segmentation.cpp b/modules/imgproc/src/segmentation.cpp index cb335e209..2a0a101e1 100644 --- a/modules/imgproc/src/segmentation.cpp +++ b/modules/imgproc/src/segmentation.cpp @@ -454,6 +454,7 @@ cvPyrMeanShiftFiltering( const CvArr* srcarr, CvArr* dstarr, { int row_count = 0; x = minx; + #if CV_ENABLE_UNROLLED for( ; x + 3 <= maxx; x += 4, ptr += 12 ) { int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; @@ -481,7 +482,7 @@ cvPyrMeanShiftFiltering( const CvArr* srcarr, CvArr* dstarr, sx += x+3; row_count++; } } - + #endif for( ; x <= maxx; x++, ptr += 3 ) { int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; diff --git a/modules/imgproc/src/thresh.cpp b/modules/imgproc/src/thresh.cpp index f20089032..0e36117b0 100644 --- a/modules/imgproc/src/thresh.cpp +++ b/modules/imgproc/src/thresh.cpp @@ -248,8 +248,9 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type ) { const uchar* src = (const uchar*)(_src.data + _src.step*i); uchar* dst = (uchar*)(_dst.data + _dst.step*i); - - for( j = j_scalar; j <= roi.width - 4; j += 4 ) + j = j_scalar; + #if CV_ENABLE_UNROLLED + for( ; j <= roi.width - 4; j += 4 ) { uchar t0 = tab[src[j]]; uchar t1 = tab[src[j+1]]; @@ -263,7 +264,7 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type ) dst[j+2] = t0; dst[j+3] = t1; } - + #endif for( ; j < roi.width; j++ ) dst[j] = tab[src[j]]; } @@ -619,13 +620,16 @@ getThreshVal_Otsu_8u( const Mat& _src ) for( i = 0; i < size.height; i++ ) { const uchar* src = _src.data + _src.step*i; - for( j = 0; j <= size.width - 4; j += 4 ) + j = 0; + #if CV_ENABLE_UNROLLED + for( ; j <= size.width - 4; j += 4 ) { int v0 = src[j], v1 = src[j+1]; h[v0]++; h[v1]++; v0 = src[j+2]; v1 = src[j+3]; h[v0]++; h[v1]++; } + #endif for( ; j < size.width; j++ ) h[src[j]]++; } @@ -682,8 +686,10 @@ public: { int row0 = std::min(cvRound(range.begin() * src.rows / nStripes), src.rows); int row1 = std::min(cvRound(range.end() * src.rows / nStripes), src.rows); - - //printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n", src.rows, src.cols, range.begin(), range.end(), row0, row1); + + if(0) + printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n", + src.rows, src.cols, range.begin(), range.end(), row0, row1); Mat srcStripe = src.rowRange(row0, row1); Mat dstStripe = dst.rowRange(row0, row1);