From 6f05a250bef653d8398d5ee855f6cef43194c031 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 30 Sep 2014 11:32:57 -0700 Subject: [PATCH] optimization of cv::CLAHE (~3x) --- modules/imgproc/src/clahe.cpp | 65 ++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 24 deletions(-) diff --git a/modules/imgproc/src/clahe.cpp b/modules/imgproc/src/clahe.cpp index c329148f2..1e5ecc3a7 100644 --- a/modules/imgproc/src/clahe.cpp +++ b/modules/imgproc/src/clahe.cpp @@ -233,6 +233,31 @@ namespace CLAHE_Interpolation_Body(const cv::Mat& src, const cv::Mat& dst, const cv::Mat& lut, const cv::Size& tileSize, const int& tilesX, const int& tilesY) : src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY) { + buf.allocate(src.cols << 2); + ind1_p = (int *)buf; + ind2_p = ind1_p + src.cols; + xa_p = (float *)(ind2_p + src.cols); + xa1_p = xa_p + src.cols; + + int lut_step = static_cast(lut_.step / sizeof(T)); + float inv_tw = 1.0f / tileSize_.width; + + for (int x = 0; x < src.cols; ++x) + { + float txf = x * inv_tw - 0.5f; + + int tx1 = cvFloor(txf); + int tx2 = tx1 + 1; + + xa_p[x] = txf - tx1; + xa1_p[x] = 1.0f - xa_p[x]; + + tx1 = std::max(tx1, 0); + tx2 = std::min(tx2, tilesX_ - 1); + + ind1_p[x] = tx1 * lut_step; + ind2_p[x] = tx2 * lut_step; + } } void operator ()(const cv::Range& range) const; @@ -245,24 +270,28 @@ namespace cv::Size tileSize_; int tilesX_; int tilesY_; + + cv::AutoBuffer buf; + int * ind1_p, * ind2_p; + float * xa_p, * xa1_p; }; template void CLAHE_Interpolation_Body::operator ()(const cv::Range& range) const { - const size_t lut_step = lut_.step / sizeof(T); + float inv_th = 1.0f / tileSize_.height; for (int y = range.start; y < range.end; ++y) { const T* srcRow = src_.ptr(y); T* dstRow = dst_.ptr(y); - const float tyf = (static_cast(y) / tileSize_.height) - 0.5f; + float tyf = y * inv_th - 0.5f; int ty1 = cvFloor(tyf); int ty2 = ty1 + 1; - const float ya = tyf - ty1; + float ya = tyf - ty1, ya1 = 1.0f - ya; ty1 = std::max(ty1, 0); ty2 = std::min(ty2, tilesY_ - 1); @@ -272,27 +301,13 @@ namespace for (int x = 0; x < src_.cols; ++x) { - const float txf = (static_cast(x) / tileSize_.width) - 0.5f; + int srcVal = srcRow[x]; - int tx1 = cvFloor(txf); - int tx2 = tx1 + 1; + int ind1 = ind1_p[x] + srcVal; + int ind2 = ind2_p[x] + srcVal; - const float xa = txf - tx1; - - tx1 = std::max(tx1, 0); - tx2 = std::min(tx2, tilesX_ - 1); - - const int srcVal = srcRow[x]; - - const size_t ind1 = tx1 * lut_step + srcVal; - const size_t ind2 = tx2 * lut_step + srcVal; - - float res = 0; - - res += lutPlane1[ind1] * ((1.0f - xa) * (1.0f - ya)); - res += lutPlane1[ind2] * ((xa) * (1.0f - ya)); - res += lutPlane2[ind1] * ((1.0f - xa) * (ya)); - res += lutPlane2[ind2] * ((xa) * (ya)); + float res = (lutPlane1[ind1] * xa1_p[x] + lutPlane1[ind2] * xa_p[x]) * ya1 + + (lutPlane2[ind1] * xa1_p[x] + lutPlane2[ind2] * xa_p[x]) * ya; dstRow[x] = cv::saturate_cast(res); } @@ -403,7 +418,9 @@ namespace calcLutBody = cv::makePtr >(srcForLut, lut_, tileSize, tilesX_, clipLimit, lutScale); else if (_src.type() == CV_16UC1) calcLutBody = cv::makePtr >(srcForLut, lut_, tileSize, tilesX_, clipLimit, lutScale); - CV_Assert(!calcLutBody.empty()); + else + CV_Error( CV_StsBadArg, "Unsupported type" ); + cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), *calcLutBody); cv::Ptr interpolationBody; @@ -411,7 +428,7 @@ namespace interpolationBody = cv::makePtr >(src, dst, lut_, tileSize, tilesX_, tilesY_); else if (_src.type() == CV_16UC1) interpolationBody = cv::makePtr >(src, dst, lut_, tileSize, tilesX_, tilesY_); - CV_Assert(!interpolationBody.empty()); + cv::parallel_for_(cv::Range(0, src.rows), *interpolationBody); }