diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp
index 5472acf85..b261ad086 100644
--- a/modules/gpu/perf/perf_imgproc.cpp
+++ b/modules/gpu/perf/perf_imgproc.cpp
@@ -5,27 +5,32 @@
 //////////////////////////////////////////////////////////////////////
 // Remap
 
-GPU_PERF_TEST(Remap, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation, BorderMode)
+GPU_PERF_TEST(Remap, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, BorderMode)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
     int interpolation = GET_PARAM(3);
     int borderMode = GET_PARAM(4);
 
-    cv::gpu::setDevice(devInfo.deviceID());
-
     cv::Mat src_host(size, type);
-    cv::Mat xmap_host(size, CV_32FC1);
-    cv::Mat ymap_host(size, CV_32FC1);
+    fill(src_host, 0, 255);
 
-    declare.in(src_host, xmap_host, ymap_host, WARMUP_RNG);
+    cv::Mat xmap_host(size, CV_32FC1);
+    fill(xmap_host, 0, size.width);
+
+    cv::Mat ymap_host(size, CV_32FC1);
+    fill(ymap_host, 0, size.height);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat xmap(xmap_host);
     cv::gpu::GpuMat ymap(ymap_host);
     cv::gpu::GpuMat dst;
 
+    cv::gpu::remap(src, dst, xmap, ymap, interpolation, borderMode);
+
     declare.time(3.0);
 
     TEST_CYCLE()
@@ -35,11 +40,450 @@ GPU_PERF_TEST(Remap, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation
 }
 
 INSTANTIATE_TEST_CASE_P(ImgProc, Remap, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_32FC1),
-                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC),
-                        testing::Values((int) cv::BORDER_REFLECT101, (int) cv::BORDER_REPLICATE, (int) cv::BORDER_CONSTANT)));
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+    testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
+
+
+//////////////////////////////////////////////////////////////////////
+// Resize
+
+IMPLEMENT_PARAM_CLASS(Scale, double)
+
+GPU_PERF_TEST(Resize, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, Scale)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int interpolation = GET_PARAM(3);
+    double f = GET_PARAM(4);
+
+    cv::Mat src_host(size, type);
+    fill(src_host, 0, 255);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::resize(src, dst, cv::Size(), f, f, interpolation);
+
+    declare.time(1.0);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::resize(src, dst, cv::Size(), f, f, interpolation);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR),
+                    Interpolation(cv::INTER_CUBIC),   Interpolation(cv::INTER_AREA)),
+    testing::Values(Scale(0.5), Scale(0.3), Scale(2.0))));
+
+//////////////////////////////////////////////////////////////////////
+// WarpAffine
+
+GPU_PERF_TEST(WarpAffine, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, BorderMode)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int interpolation = GET_PARAM(3);
+    int borderMode = GET_PARAM(4);
+
+    cv::Mat src_host(size, type);
+    fill(src_host, 0, 255);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    const double aplha = CV_PI / 4;
+    double mat[2][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
+                         {std::sin(aplha),  std::cos(aplha), 0}};
+    cv::Mat M(2, 3, CV_64F, (void*) mat);
+
+    cv::gpu::warpAffine(src, dst, M, size, interpolation, borderMode);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::warpAffine(src, dst, M, size, interpolation, borderMode);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, WarpAffine, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+    testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
+
+//////////////////////////////////////////////////////////////////////
+// WarpPerspective
+
+GPU_PERF_TEST(WarpPerspective, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, BorderMode)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int interpolation = GET_PARAM(3);
+    int borderMode = GET_PARAM(4);
+
+    cv::Mat src_host(size, type);
+    fill(src_host, 0, 255);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    const double aplha = CV_PI / 4;
+    double mat[3][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
+                         {std::sin(aplha),  std::cos(aplha), 0},
+                         {0.0,              0.0,             1.0}};
+    cv::Mat M(3, 3, CV_64F, (void*) mat);
+
+    cv::gpu::warpPerspective(src, dst, M, size, interpolation, borderMode);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::warpPerspective(src, dst, M, size, interpolation, borderMode);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, WarpPerspective, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+    testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
+
+//////////////////////////////////////////////////////////////////////
+// CopyMakeBorder
+
+GPU_PERF_TEST(CopyMakeBorder, cv::gpu::DeviceInfo, cv::Size, MatType, BorderMode)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int borderType = GET_PARAM(3);
+
+    cv::Mat src_host(size, type);
+    fill(src_host, 0, 255);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::copyMakeBorder(src, dst, 5, 5, 5, 5, borderType);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::copyMakeBorder(src, dst, 5, 5, 5, 5, borderType);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, CopyMakeBorder, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
+
+//////////////////////////////////////////////////////////////////////
+// Threshold
+
+CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
+#define ALL_THRESH_OPS testing::Values(ThreshOp(cv::THRESH_BINARY), ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV))
+
+GPU_PERF_TEST(Threshold, cv::gpu::DeviceInfo, cv::Size, MatDepth, ThreshOp)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int threshOp = GET_PARAM(3);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0, 255);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::threshold(src, dst, 100.0, 255.0, threshOp);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::threshold(src, dst, 100.0, 255.0, threshOp);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Threshold, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F), MatDepth(CV_64F)),
+    ALL_THRESH_OPS));
+
+//////////////////////////////////////////////////////////////////////
+// Integral
+
+GPU_PERF_TEST(Integral, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+
+    cv::Mat src_host(size, CV_8UC1);
+    fill(src_host, 0, 255);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+    cv::gpu::GpuMat buf;
+
+    cv::gpu::integralBuffered(src, dst, buf);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::integralBuffered(src, dst, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Integral, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// Integral_Sqr
+
+GPU_PERF_TEST(Integral_Sqr, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+
+    cv::Mat src_host(size, CV_8UC1);
+    fill(src_host, 0, 255);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::sqrIntegral(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::sqrIntegral(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Integral_Sqr, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// HistEven_OneChannel
+
+GPU_PERF_TEST(HistEven_OneChannel, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0, 255);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat hist;
+    cv::gpu::GpuMat buf;
+
+    cv::gpu::histEven(src, hist, buf, 30, 0, 180);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::histEven(src, hist, buf, 30, 0, 180);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, HistEven_OneChannel, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S))));
+
+//////////////////////////////////////////////////////////////////////
+// HistEven_FourChannel
+
+GPU_PERF_TEST(HistEven_FourChannel, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, CV_MAKE_TYPE(depth, 4));
+    fill(src_host, 0, 255);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat hist[4];
+    cv::gpu::GpuMat buf;
+    int histSize[] = {30, 30, 30, 30};
+    int lowerLevel[] = {0, 0, 0, 0};
+    int upperLevel[] = {180, 180, 180, 180};
+
+    cv::gpu::histEven(src, hist, buf, histSize, lowerLevel, upperLevel);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::histEven(src, hist, buf, histSize, lowerLevel, upperLevel);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, HistEven_FourChannel, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S))));
+
+//////////////////////////////////////////////////////////////////////
+// CalcHist
+
+GPU_PERF_TEST(CalcHist, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+
+    cv::Mat src_host(size, CV_8UC1);
+    fill(src_host, 0, 255);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat hist;
+    cv::gpu::GpuMat buf;
+
+    cv::gpu::calcHist(src, hist, buf);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::calcHist(src, hist, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, CalcHist, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// EqualizeHist
+
+GPU_PERF_TEST(EqualizeHist, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+
+    cv::Mat src_host(size, CV_8UC1);
+    fill(src_host, 0, 255);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+    cv::gpu::GpuMat hist;
+    cv::gpu::GpuMat buf;
+
+    cv::gpu::equalizeHist(src, dst, hist, buf);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::equalizeHist(src, dst, hist, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, EqualizeHist, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// ColumnSum
+
+GPU_PERF_TEST(ColumnSum, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+
+    cv::Mat src_host(size, CV_32FC1);
+    fill(src_host, 0, 255);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::columnSum(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::columnSum(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, ColumnSum, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// Canny
+
+IMPLEMENT_PARAM_CLASS(AppertureSize, int)
+IMPLEMENT_PARAM_CLASS(L2gradient, bool)
+
+GPU_PERF_TEST(Canny, cv::gpu::DeviceInfo, AppertureSize, L2gradient)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    int apperture_size = GET_PARAM(1);
+    bool useL2gradient = GET_PARAM(2);
+
+    cv::Mat image_host = readImage("perf/1280x1024.jpg", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(image_host.empty());
+
+    cv::gpu::GpuMat image(image_host);
+    cv::gpu::GpuMat dst;
+    cv::gpu::CannyBuf buf;
+
+    cv::gpu::Canny(image, buf, dst, 50.0, 100.0, apperture_size, useL2gradient);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::Canny(image, buf, dst, 50.0, 100.0, apperture_size, useL2gradient);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Canny, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(AppertureSize(3), AppertureSize(5)),
+    testing::Values(L2gradient(false), L2gradient(true))));
 
 //////////////////////////////////////////////////////////////////////
 // MeanShiftFiltering
@@ -47,7 +491,6 @@ INSTANTIATE_TEST_CASE_P(ImgProc, Remap, testing::Combine(
 GPU_PERF_TEST_1(MeanShiftFiltering, cv::gpu::DeviceInfo)
 {
     cv::gpu::DeviceInfo devInfo = GetParam();
-
     cv::gpu::setDevice(devInfo.deviceID());
 
     cv::Mat img = readImage("gpu/meanshift/cones.png");
@@ -59,6 +502,8 @@ GPU_PERF_TEST_1(MeanShiftFiltering, cv::gpu::DeviceInfo)
     cv::gpu::GpuMat src(rgba);
     cv::gpu::GpuMat dst;
 
+    cv::gpu::meanShiftFiltering(src, dst, 50, 50);
+
     declare.time(5.0);
 
     TEST_CYCLE()
@@ -75,7 +520,6 @@ INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftFiltering, ALL_DEVICES);
 GPU_PERF_TEST_1(MeanShiftProc, cv::gpu::DeviceInfo)
 {
     cv::gpu::DeviceInfo devInfo = GetParam();
-
     cv::gpu::setDevice(devInfo.deviceID());
 
     cv::Mat img = readImage("gpu/meanshift/cones.png");
@@ -88,6 +532,8 @@ GPU_PERF_TEST_1(MeanShiftProc, cv::gpu::DeviceInfo)
     cv::gpu::GpuMat dstr;
     cv::gpu::GpuMat dstsp;
 
+    cv::gpu::meanShiftProc(src, dstr, dstsp, 50, 50);
+
     declare.time(5.0);
 
     TEST_CYCLE()
@@ -104,7 +550,6 @@ INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftProc, ALL_DEVICES);
 GPU_PERF_TEST_1(MeanShiftSegmentation, cv::gpu::DeviceInfo)
 {
     cv::gpu::DeviceInfo devInfo = GetParam();
-
     cv::gpu::setDevice(devInfo.deviceID());
 
     cv::Mat img = readImage("gpu/meanshift/cones.png");
@@ -116,6 +561,8 @@ GPU_PERF_TEST_1(MeanShiftSegmentation, cv::gpu::DeviceInfo)
     cv::gpu::GpuMat src(rgba);
     cv::Mat dst;
 
+    meanShiftSegmentation(src, dst, 10, 10, 20);
+
     declare.time(5.0);
 
     TEST_CYCLE()
@@ -127,622 +574,68 @@ GPU_PERF_TEST_1(MeanShiftSegmentation, cv::gpu::DeviceInfo)
 INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftSegmentation, ALL_DEVICES);
 
 //////////////////////////////////////////////////////////////////////
-// DrawColorDisp
+// BlendLinear
 
-GPU_PERF_TEST(DrawColorDisp, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(BlendLinear, cv::gpu::DeviceInfo, cv::Size, MatType)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
 
-    cv::gpu::setDevice(devInfo.deviceID());
+    cv::Mat img1_host(size, type);
+    fill(img1_host, 0, 255);
 
-    cv::Mat src_host(size, type);
+    cv::Mat img2_host(size, type);
+    fill(img2_host, 0, 255);
 
-    fill(src_host, 0, 255);
-
-    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat img1(img1_host);
+    cv::gpu::GpuMat img2(img2_host);
+    cv::gpu::GpuMat weights1(size, CV_32FC1, cv::Scalar::all(0.5));
+    cv::gpu::GpuMat weights2(size, CV_32FC1, cv::Scalar::all(0.5));
     cv::gpu::GpuMat dst;
 
-    TEST_CYCLE()
-    {
-        cv::gpu::drawColorDisp(src, dst, 255);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, DrawColorDisp, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16SC1)));
-
-//////////////////////////////////////////////////////////////////////
-// ReprojectImageTo3D
-
-GPU_PERF_TEST(ReprojectImageTo3D, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
+    cv::gpu::blendLinear(img1, img2, weights1, weights2, dst);
 
     TEST_CYCLE()
     {
-        cv::gpu::reprojectImageTo3D(src, dst, cv::Mat::ones(4, 4, CV_32FC1));
+        cv::gpu::blendLinear(img1, img2, weights1, weights2, dst);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, ReprojectImageTo3D, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16SC1)));
-
-//////////////////////////////////////////////////////////////////////
-// CvtColor
-
-GPU_PERF_TEST(CvtColor, cv::gpu::DeviceInfo, cv::Size, perf::MatType, CvtColorInfo)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    CvtColorInfo info = GET_PARAM(3);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, CV_MAKETYPE(type, info.scn));
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::cvtColor(src, dst, info.code, info.dcn);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1),
-                        testing::Values(
-                            CvtColorInfo(4, 4, cv::COLOR_RGBA2BGRA), CvtColorInfo(4, 1, cv::COLOR_BGRA2GRAY), CvtColorInfo(1, 4, cv::COLOR_GRAY2BGRA),
-                            CvtColorInfo(4, 4, cv::COLOR_BGR2XYZ), CvtColorInfo(4, 4, cv::COLOR_BGR2YCrCb), CvtColorInfo(4, 4, cv::COLOR_YCrCb2BGR),
-                            CvtColorInfo(4, 4, cv::COLOR_BGR2HSV), CvtColorInfo(4, 4, cv::COLOR_HSV2BGR))));
-
-//////////////////////////////////////////////////////////////////////
-// SwapChannels
-
-GPU_PERF_TEST(SwapChannels, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, CV_8UC4);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-
-    const int dstOrder[] = {2, 1, 0, 3};
-
-    TEST_CYCLE()
-    {
-        cv::gpu::swapChannels(src, dstOrder);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, SwapChannels, testing::Combine(ALL_DEVICES, GPU_TYPICAL_MAT_SIZES));
-
-//////////////////////////////////////////////////////////////////////
-// Threshold
-
-GPU_PERF_TEST(Threshold, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst(size, type);
-
-    TEST_CYCLE()
-    {
-        cv::gpu::threshold(src, dst, 100.0, 255.0, cv::THRESH_BINARY);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, Threshold, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// Resize
-
-GPU_PERF_TEST(Resize, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation, double)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int interpolation = GET_PARAM(3);
-    double f = GET_PARAM(4);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-
-    declare.time(1.0);
-
-    TEST_CYCLE()
-    {
-        cv::gpu::resize(src, dst, cv::Size(), f, f, interpolation);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine(
-                        ALL_DEVICES,
-                        testing::Values(perf::szSXGA, perf::sz1080p),
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_32FC1),
-                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC),
-                        testing::Values(0.5, 2.0)));
-
-//////////////////////////////////////////////////////////////////////
-// WarpAffine
-
-GPU_PERF_TEST(WarpAffine, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int interpolation = GET_PARAM(3);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-
-    const double aplha = CV_PI / 4;
-    double mat[2][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
-                         {std::sin(aplha),  std::cos(aplha), 0}};
-    cv::Mat M(2, 3, CV_64F, (void*) mat);
-
-    TEST_CYCLE()
-    {
-        cv::gpu::warpAffine(src, dst, M, size, interpolation, cv::BORDER_CONSTANT, cv::Scalar());
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, WarpAffine, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC)));
-
-//////////////////////////////////////////////////////////////////////
-// WarpPerspective
-
-GPU_PERF_TEST(WarpPerspective, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int interpolation = GET_PARAM(3);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-
-    const double aplha = CV_PI / 4;
-    double mat[3][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
-                         {std::sin(aplha),  std::cos(aplha), 0},
-                         {0.0,              0.0,             1.0}};
-    cv::Mat M(3, 3, CV_64F, (void*) mat);
-
-    TEST_CYCLE()
-    {
-        cv::gpu::warpPerspective(src, dst, M, size, interpolation, cv::BORDER_CONSTANT, cv::Scalar());
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, WarpPerspective, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC)));
-
-//////////////////////////////////////////////////////////////////////
-// BuildWarpPlaneMaps
-
-GPU_PERF_TEST(BuildWarpPlaneMaps, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::gpu::GpuMat map_x;
-    cv::gpu::GpuMat map_y;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::buildWarpPlaneMaps(size, cv::Rect(0, 0, size.width, size.height), cv::Mat::eye(3, 3, CV_32FC1),
-                                    cv::Mat::ones(3, 3, CV_32FC1), cv::Mat::zeros(1, 3, CV_32F), 1.0, map_x, map_y);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, BuildWarpPlaneMaps, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
-
-//////////////////////////////////////////////////////////////////////
-// BuildWarpCylindricalMaps
-
-GPU_PERF_TEST(BuildWarpCylindricalMaps, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::gpu::GpuMat map_x;
-    cv::gpu::GpuMat map_y;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::buildWarpCylindricalMaps(size, cv::Rect(0, 0, size.width, size.height), cv::Mat::eye(3, 3, CV_32FC1),
-                                          cv::Mat::ones(3, 3, CV_32FC1), 1.0, map_x, map_y);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, BuildWarpCylindricalMaps, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
-
-//////////////////////////////////////////////////////////////////////
-// BuildWarpSphericalMaps
-
-GPU_PERF_TEST(BuildWarpSphericalMaps, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::gpu::GpuMat map_x;
-    cv::gpu::GpuMat map_y;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::buildWarpSphericalMaps(size, cv::Rect(0, 0, size.width, size.height), cv::Mat::eye(3, 3, CV_32FC1),
-                                        cv::Mat::ones(3, 3, CV_32FC1), 1.0, map_x, map_y);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, BuildWarpSphericalMaps, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
-
-//////////////////////////////////////////////////////////////////////
-// Rotate
-
-GPU_PERF_TEST(Rotate, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int interpolation = GET_PARAM(3);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::rotate(src, dst, size, 30.0, 0, 0, interpolation);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, Rotate, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC)));
-
-//////////////////////////////////////////////////////////////////////
-// CopyMakeBorder
-
-GPU_PERF_TEST(CopyMakeBorder, cv::gpu::DeviceInfo, cv::Size, perf::MatType, BorderMode)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int borderType = GET_PARAM(3);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::copyMakeBorder(src, dst, 5, 5, 5, 5, borderType);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, CopyMakeBorder, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC4, CV_32FC1),
-                        testing::Values((int) cv::BORDER_REPLICATE, (int) cv::BORDER_REFLECT, (int) cv::BORDER_WRAP, (int) cv::BORDER_CONSTANT)));
-
-//////////////////////////////////////////////////////////////////////
-// Integral
-
-GPU_PERF_TEST(Integral, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, CV_8UC1);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-    cv::gpu::GpuMat buf;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::integralBuffered(src, dst, buf);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, Integral, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
-
-//////////////////////////////////////////////////////////////////////
-// IntegralSqr
-
-GPU_PERF_TEST(IntegralSqr, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, CV_8UC1);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::sqrIntegral(src, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, IntegralSqr, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
-
-//////////////////////////////////////////////////////////////////////
-// ColumnSum
-
-GPU_PERF_TEST(ColumnSum, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, CV_32FC1);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::columnSum(src, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, ColumnSum, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
-
-//////////////////////////////////////////////////////////////////////
-// CornerHarris
-
-GPU_PERF_TEST(CornerHarris, cv::gpu::DeviceInfo, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    int type = GET_PARAM(1);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat img = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(img.empty());
-
-    img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
-
-    cv::gpu::GpuMat src(img);
-    cv::gpu::GpuMat dst;
-    cv::gpu::GpuMat Dx;
-    cv::gpu::GpuMat Dy;
-
-    int blockSize = 3;
-    int ksize = 7;
-    double k = 0.5;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::cornerHarris(src, dst, Dx, Dy, blockSize, ksize, k);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, CornerHarris, testing::Combine(
-                        ALL_DEVICES,
-                        testing::Values(CV_8UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// CornerMinEigenVal
-
-GPU_PERF_TEST(CornerMinEigenVal, cv::gpu::DeviceInfo, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    int type = GET_PARAM(1);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat img = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(img.empty());
-
-    img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
-
-    cv::gpu::GpuMat src(img);
-    cv::gpu::GpuMat dst;
-    cv::gpu::GpuMat Dx;
-    cv::gpu::GpuMat Dy;
-
-    int blockSize = 3;
-    int ksize = 7;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::cornerMinEigenVal(src, dst, Dx, Dy, blockSize, ksize);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, CornerMinEigenVal, testing::Combine(
-                        ALL_DEVICES,
-                        testing::Values(CV_8UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// MulSpectrums
-
-GPU_PERF_TEST(MulSpectrums, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat a_host(size, CV_32FC2);
-    cv::Mat b_host(size, CV_32FC2);
-
-    declare.in(a_host, b_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat a(a_host);
-    cv::gpu::GpuMat b(b_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::mulSpectrums(a, b, dst, 0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, MulSpectrums, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
-
-//////////////////////////////////////////////////////////////////////
-// Dft
-
-GPU_PERF_TEST(Dft, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, CV_32FC2);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-
-    declare.time(2.0);
-
-    TEST_CYCLE()
-    {
-        cv::gpu::dft(src, dst, size);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, Dft, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
+INSTANTIATE_TEST_CASE_P(ImgProc, BlendLinear, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4))));
 
 //////////////////////////////////////////////////////////////////////
 // Convolve
 
-GPU_PERF_TEST(Convolve, cv::gpu::DeviceInfo, cv::Size, int, bool)
+IMPLEMENT_PARAM_CLASS(KSize, int)
+IMPLEMENT_PARAM_CLASS(Ccorr, bool)
+
+GPU_PERF_TEST(Convolve, cv::gpu::DeviceInfo, cv::Size, KSize, Ccorr)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
     int templ_size = GET_PARAM(2);
     bool ccorr = GET_PARAM(3);
 
-    cv::gpu::setDevice(devInfo.deviceID());
-
     cv::gpu::GpuMat image = cv::gpu::createContinuous(size, CV_32FC1);
-    cv::gpu::GpuMat templ = cv::gpu::createContinuous(templ_size, templ_size, CV_32FC1);
-
     image.setTo(cv::Scalar(1.0));
+
+    cv::gpu::GpuMat templ = cv::gpu::createContinuous(templ_size, templ_size, CV_32FC1);
     templ.setTo(cv::Scalar(1.0));
 
     cv::gpu::GpuMat dst;
     cv::gpu::ConvolveBuf buf;
 
+    cv::gpu::convolve(image, templ, dst, ccorr, buf);
+
     declare.time(2.0);
 
     TEST_CYCLE()
@@ -752,29 +645,412 @@ GPU_PERF_TEST(Convolve, cv::gpu::DeviceInfo, cv::Size, int, bool)
 }
 
 INSTANTIATE_TEST_CASE_P(ImgProc, Convolve, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(3, 9, 27, 32, 64),
-                        testing::Bool()));
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(KSize(3), KSize(9), KSize(17), KSize(27), KSize(32), KSize(64)),
+    testing::Values(Ccorr(false), Ccorr(true))));
+
+////////////////////////////////////////////////////////////////////////////////
+// MatchTemplate_8U
+
+CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
+#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_CCOEFF_NORMED))
+
+IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size)
+
+GPU_PERF_TEST(MatchTemplate_8U, cv::gpu::DeviceInfo, cv::Size, TemplateSize, Channels, TemplateMethod)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    cv::Size templ_size = GET_PARAM(2);
+    int cn = GET_PARAM(3);
+    int method = GET_PARAM(4);
+
+    cv::Mat image_host(size, CV_MAKE_TYPE(CV_8U, cn));
+    fill(image_host, 0, 255);
+
+    cv::Mat templ_host(templ_size, CV_MAKE_TYPE(CV_8U, cn));
+    fill(templ_host, 0, 255);
+
+    cv::gpu::GpuMat image(image_host);
+    cv::gpu::GpuMat templ(templ_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::matchTemplate(image, templ, dst, method);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::matchTemplate(image, templ, dst, method);
+    }
+};
+
+INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_8U, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
+    testing::Values(Channels(1), Channels(3), Channels(4)),
+    ALL_TEMPLATE_METHODS));
+
+////////////////////////////////////////////////////////////////////////////////
+// MatchTemplate_32F
+
+GPU_PERF_TEST(MatchTemplate_32F, cv::gpu::DeviceInfo, cv::Size, TemplateSize, Channels, TemplateMethod)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    cv::Size templ_size = GET_PARAM(2);
+    int cn = GET_PARAM(3);
+    int method = GET_PARAM(4);
+
+    cv::Mat image_host(size, CV_MAKE_TYPE(CV_32F, cn));
+    fill(image_host, 0, 255);
+
+    cv::Mat templ_host(templ_size, CV_MAKE_TYPE(CV_32F, cn));
+    fill(templ_host, 0, 255);
+
+    cv::gpu::GpuMat image(image_host);
+    cv::gpu::GpuMat templ(templ_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::matchTemplate(image, templ, dst, method);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::matchTemplate(image, templ, dst, method);
+    }
+};
+
+INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_32F, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
+    testing::Values(Channels(1), Channels(3), Channels(4)),
+    testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
+
+//////////////////////////////////////////////////////////////////////
+// MulSpectrums
+
+CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
+
+GPU_PERF_TEST(MulSpectrums, cv::gpu::DeviceInfo, cv::Size, DftFlags)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int flag = GET_PARAM(2);
+
+    cv::Mat a_host(size, CV_32FC2);
+    fill(a_host, 0, 100);
+
+    cv::Mat b_host(size, CV_32FC2);
+    fill(b_host, 0, 100);
+
+    cv::gpu::GpuMat a(a_host);
+    cv::gpu::GpuMat b(b_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::mulSpectrums(a, b, dst, flag);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::mulSpectrums(a, b, dst, flag);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, MulSpectrums, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(DftFlags(0), DftFlags(cv::DFT_ROWS))));
+
+//////////////////////////////////////////////////////////////////////
+// MulAndScaleSpectrums
+
+GPU_PERF_TEST(MulAndScaleSpectrums, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+
+    float scale = 1.f / size.area();
+
+    cv::Mat src1_host(size, CV_32FC2);
+    fill(src1_host, 0, 100);
+
+    cv::Mat src2_host(size, CV_32FC2);
+    fill(src2_host, 0, 100);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::mulAndScaleSpectrums(src1, src2, dst, cv::DFT_ROWS, scale, false);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::mulAndScaleSpectrums(src1, src2, dst, cv::DFT_ROWS, scale, false);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, MulAndScaleSpectrums, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// Dft
+
+GPU_PERF_TEST(Dft, cv::gpu::DeviceInfo, cv::Size, DftFlags)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int flag = GET_PARAM(2);
+
+    cv::Mat src_host(size, CV_32FC2);
+    fill(src_host, 0, 100);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::dft(src, dst, size, flag);
+
+    declare.time(2.0);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::dft(src, dst, size, flag);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Dft, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(DftFlags(0), DftFlags(cv::DFT_ROWS), DftFlags(cv::DFT_INVERSE))));
+
+//////////////////////////////////////////////////////////////////////
+// CornerHarris
+
+IMPLEMENT_PARAM_CLASS(BlockSize, int)
+IMPLEMENT_PARAM_CLASS(ApertureSize, int)
+
+GPU_PERF_TEST(CornerHarris, cv::gpu::DeviceInfo, MatType, BorderMode, BlockSize, ApertureSize)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    int type = GET_PARAM(1);
+    int borderType = GET_PARAM(2);
+    int blockSize = GET_PARAM(3);
+    int apertureSize = GET_PARAM(4);
+
+    cv::Mat img = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img.empty());
+
+    img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
+
+    cv::gpu::GpuMat src(img);
+    cv::gpu::GpuMat dst;
+    cv::gpu::GpuMat Dx;
+    cv::gpu::GpuMat Dy;
+    cv::gpu::GpuMat buf;
+
+    double k = 0.5;
+
+    cv::gpu::cornerHarris(src, dst, Dx, Dy, buf, blockSize, apertureSize, k, borderType);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::cornerHarris(src, dst, Dx, Dy, buf, blockSize, apertureSize, k, borderType);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, CornerHarris, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)),
+    testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
+    testing::Values(BlockSize(3), BlockSize(5), BlockSize(7)),
+    testing::Values(ApertureSize(0), ApertureSize(3), ApertureSize(5), ApertureSize(7))));
+
+//////////////////////////////////////////////////////////////////////
+// CornerMinEigenVal
+
+GPU_PERF_TEST(CornerMinEigenVal, cv::gpu::DeviceInfo, MatType, BorderMode, BlockSize, ApertureSize)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    int type = GET_PARAM(1);
+    int borderType = GET_PARAM(2);
+    int blockSize = GET_PARAM(3);
+    int apertureSize = GET_PARAM(4);
+
+    cv::Mat img = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img.empty());
+
+    img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
+
+    cv::gpu::GpuMat src(img);
+    cv::gpu::GpuMat dst;
+    cv::gpu::GpuMat Dx;
+    cv::gpu::GpuMat Dy;
+    cv::gpu::GpuMat buf;
+
+    cv::gpu::cornerMinEigenVal(src, dst, Dx, Dy, buf, blockSize, apertureSize, borderType);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::cornerMinEigenVal(src, dst, Dx, Dy, buf, blockSize, apertureSize, borderType);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, CornerMinEigenVal, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)),
+    testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
+    testing::Values(BlockSize(3), BlockSize(5), BlockSize(7)),
+    testing::Values(ApertureSize(0), ApertureSize(3), ApertureSize(5), ApertureSize(7))));
+
+//////////////////////////////////////////////////////////////////////
+// BuildWarpPlaneMaps
+
+GPU_PERF_TEST(BuildWarpPlaneMaps, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+
+    cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
+    cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
+    cv::Mat T = cv::Mat::zeros(1, 3, CV_32F);
+    cv::gpu::GpuMat map_x;
+    cv::gpu::GpuMat map_y;
+
+    cv::gpu::buildWarpPlaneMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, T, 1.0, map_x, map_y);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::buildWarpPlaneMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, T, 1.0, map_x, map_y);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, BuildWarpPlaneMaps, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// BuildWarpCylindricalMaps
+
+GPU_PERF_TEST(BuildWarpCylindricalMaps, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+
+    cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
+    cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
+    cv::gpu::GpuMat map_x;
+    cv::gpu::GpuMat map_y;
+
+    cv::gpu::buildWarpCylindricalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::buildWarpCylindricalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, BuildWarpCylindricalMaps, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// BuildWarpSphericalMaps
+
+GPU_PERF_TEST(BuildWarpSphericalMaps, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+
+    cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
+    cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
+    cv::gpu::GpuMat map_x;
+    cv::gpu::GpuMat map_y;
+
+    cv::gpu::buildWarpSphericalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::buildWarpSphericalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, BuildWarpSphericalMaps, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// Rotate
+
+GPU_PERF_TEST(Rotate, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int interpolation = GET_PARAM(3);
+
+    cv::Mat src_host(size, type);
+    fill(src_host, 0, 255);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::rotate(src, dst, size, 30.0, 0, 0, interpolation);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::rotate(src, dst, size, 30.0, 0, 0, interpolation);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Rotate, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC))));
 
 //////////////////////////////////////////////////////////////////////
 // PyrDown
 
-GPU_PERF_TEST(PyrDown, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(PyrDown, cv::gpu::DeviceInfo, cv::Size, MatType)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
 
-    cv::gpu::setDevice(devInfo.deviceID());
-
     cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
+    fill(src_host, 0, 255);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
 
+    cv::gpu::pyrDown(src, dst);
+
     TEST_CYCLE()
     {
         cv::gpu::pyrDown(src, dst);
@@ -782,28 +1058,31 @@ GPU_PERF_TEST(PyrDown, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
 }
 
 INSTANTIATE_TEST_CASE_P(ImgProc, PyrDown, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16SC3, CV_32FC1)));
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4))));
 
 //////////////////////////////////////////////////////////////////////
 // PyrUp
 
-GPU_PERF_TEST(PyrUp, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(PyrUp, cv::gpu::DeviceInfo, cv::Size, MatType)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
 
-    cv::gpu::setDevice(devInfo.deviceID());
-
     cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
+    fill(src_host, 0, 255);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
 
+    cv::gpu::pyrUp(src, dst);
+
     TEST_CYCLE()
     {
         cv::gpu::pyrUp(src, dst);
@@ -811,64 +1090,109 @@ GPU_PERF_TEST(PyrUp, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
 }
 
 INSTANTIATE_TEST_CASE_P(ImgProc, PyrUp, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16SC3, CV_32FC1)));
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4))));
 
 //////////////////////////////////////////////////////////////////////
-// BlendLinear
+// CvtColor
 
-GPU_PERF_TEST(BlendLinear, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(CvtColor, cv::gpu::DeviceInfo, cv::Size, MatDepth, CvtColorInfo)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat img1_host(size, type);
-    cv::Mat img2_host(size, type);
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    CvtColorInfo info = GET_PARAM(3);
 
-    declare.in(img1_host, img2_host, WARMUP_RNG);
+    cv::Mat src_host(size, CV_MAKETYPE(depth, info.scn));
+    fill(src_host, 0, 255);
 
-    cv::gpu::GpuMat img1(img1_host);
-    cv::gpu::GpuMat img2(img2_host);
-    cv::gpu::GpuMat weights1(size, CV_32FC1, cv::Scalar::all(0.5));
-    cv::gpu::GpuMat weights2(size, CV_32FC1, cv::Scalar::all(0.5));
+    cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
 
+    cv::gpu::cvtColor(src, dst, info.code, info.dcn);
+
     TEST_CYCLE()
     {
-        cv::gpu::blendLinear(img1, img2, weights1, weights2, dst);
+        cv::gpu::cvtColor(src, dst, info.code, info.dcn);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, BlendLinear, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_32FC1)));
+INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)),
+    testing::Values(CvtColorInfo(4, 4, cv::COLOR_RGBA2BGRA),
+                    CvtColorInfo(4, 1, cv::COLOR_BGRA2GRAY),
+                    CvtColorInfo(1, 4, cv::COLOR_GRAY2BGRA),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2XYZ),
+                    CvtColorInfo(3, 3, cv::COLOR_XYZ2BGR),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2YCrCb),
+                    CvtColorInfo(3, 3, cv::COLOR_YCrCb2BGR),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2YUV),
+                    CvtColorInfo(3, 3, cv::COLOR_YUV2BGR),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2HSV),
+                    CvtColorInfo(3, 3, cv::COLOR_HSV2BGR),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2HLS),
+                    CvtColorInfo(3, 3, cv::COLOR_HLS2BGR))));
+
+//////////////////////////////////////////////////////////////////////
+// SwapChannels
+
+GPU_PERF_TEST(SwapChannels, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+
+    cv::Mat src_host(size, CV_8UC4);
+    fill(src_host, 0, 255);
+
+    cv::gpu::GpuMat src(src_host);
+
+    const int dstOrder[] = {2, 1, 0, 3};
+
+    cv::gpu::swapChannels(src, dstOrder);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::swapChannels(src, dstOrder);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, SwapChannels, testing::Combine(ALL_DEVICES, GPU_TYPICAL_MAT_SIZES));
 
 //////////////////////////////////////////////////////////////////////
 // AlphaComp
 
-GPU_PERF_TEST(AlphaComp, cv::gpu::DeviceInfo, cv::Size, perf::MatType, AlphaOp)
+CV_ENUM(AlphaOp, cv::gpu::ALPHA_OVER, cv::gpu::ALPHA_IN, cv::gpu::ALPHA_OUT, cv::gpu::ALPHA_ATOP, cv::gpu::ALPHA_XOR, cv::gpu::ALPHA_PLUS, cv::gpu::ALPHA_OVER_PREMUL, cv::gpu::ALPHA_IN_PREMUL, cv::gpu::ALPHA_OUT_PREMUL, cv::gpu::ALPHA_ATOP_PREMUL, cv::gpu::ALPHA_XOR_PREMUL, cv::gpu::ALPHA_PLUS_PREMUL, cv::gpu::ALPHA_PREMUL)
+
+GPU_PERF_TEST(AlphaComp, cv::gpu::DeviceInfo, cv::Size, MatType, AlphaOp)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
     int alpha_op = GET_PARAM(3);
 
-    cv::gpu::setDevice(devInfo.deviceID());
-
     cv::Mat img1_host(size, type);
-    cv::Mat img2_host(size, type);
+    fill(img1_host, 0, 255);
 
-    declare.in(img1_host, img2_host, WARMUP_RNG);
+    cv::Mat img2_host(size, type);
+    fill(img2_host, 0, 255);
 
     cv::gpu::GpuMat img1(img1_host);
     cv::gpu::GpuMat img2(img2_host);
     cv::gpu::GpuMat dst;
 
+    cv::gpu::alphaComp(img1, img2, dst, alpha_op);
+
     TEST_CYCLE()
     {
         cv::gpu::alphaComp(img1, img2, dst, alpha_op);
@@ -876,111 +1200,43 @@ GPU_PERF_TEST(AlphaComp, cv::gpu::DeviceInfo, cv::Size, perf::MatType, AlphaOp)
 }
 
 INSTANTIATE_TEST_CASE_P(ImgProc, AlphaComp, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC4, CV_16UC4, CV_32SC4, CV_32FC4),
-                        testing::Values((int)cv::gpu::ALPHA_OVER, (int)cv::gpu::ALPHA_IN, (int)cv::gpu::ALPHA_OUT, (int)cv::gpu::ALPHA_ATOP, (int)cv::gpu::ALPHA_XOR, (int)cv::gpu::ALPHA_PLUS, (int)cv::gpu::ALPHA_OVER_PREMUL, (int)cv::gpu::ALPHA_IN_PREMUL, (int)cv::gpu::ALPHA_OUT_PREMUL, (int)cv::gpu::ALPHA_ATOP_PREMUL, (int)cv::gpu::ALPHA_XOR_PREMUL, (int)cv::gpu::ALPHA_PLUS_PREMUL, (int)cv::gpu::ALPHA_PREMUL)));
-
-//////////////////////////////////////////////////////////////////////
-// Canny
-
-GPU_PERF_TEST_1(Canny, cv::gpu::DeviceInfo)
-{
-    cv::gpu::DeviceInfo devInfo = GetParam();
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat image_host = readImage("perf/1280x1024.jpg", cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(image_host.empty());
-
-    cv::gpu::GpuMat image(image_host);
-    cv::gpu::GpuMat dst;
-    cv::gpu::CannyBuf buf;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::Canny(image, buf, dst, 50.0, 100.0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, Canny, ALL_DEVICES);
-
-//////////////////////////////////////////////////////////////////////
-// CalcHist
-
-GPU_PERF_TEST(CalcHist, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, CV_8UC1);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat hist;
-    cv::gpu::GpuMat buf;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::calcHist(src, hist, buf);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, CalcHist, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
-
-//////////////////////////////////////////////////////////////////////
-// EqualizeHist
-
-GPU_PERF_TEST(EqualizeHist, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, CV_8UC1);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-    cv::gpu::GpuMat hist;
-    cv::gpu::GpuMat buf;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::equalizeHist(src, dst, hist, buf);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, EqualizeHist, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC4), MatType(CV_16UC4), MatType(CV_32SC4), MatType(CV_32FC4)),
+    testing::Values(AlphaOp(cv::gpu::ALPHA_OVER),
+                    AlphaOp(cv::gpu::ALPHA_IN),
+                    AlphaOp(cv::gpu::ALPHA_OUT),
+                    AlphaOp(cv::gpu::ALPHA_ATOP),
+                    AlphaOp(cv::gpu::ALPHA_XOR),
+                    AlphaOp(cv::gpu::ALPHA_PLUS),
+                    AlphaOp(cv::gpu::ALPHA_OVER_PREMUL),
+                    AlphaOp(cv::gpu::ALPHA_IN_PREMUL),
+                    AlphaOp(cv::gpu::ALPHA_OUT_PREMUL),
+                    AlphaOp(cv::gpu::ALPHA_ATOP_PREMUL),
+                    AlphaOp(cv::gpu::ALPHA_XOR_PREMUL),
+                    AlphaOp(cv::gpu::ALPHA_PLUS_PREMUL),
+                    AlphaOp(cv::gpu::ALPHA_PREMUL))));
 
 //////////////////////////////////////////////////////////////////////
 // ImagePyramid
 
-GPU_PERF_TEST(ImagePyramid_build, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(ImagePyramid_build, cv::gpu::DeviceInfo, cv::Size, MatType)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
 
-    cv::gpu::setDevice(devInfo.deviceID());
-
     cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
+    fill(src_host, 0, 255);
 
     cv::gpu::GpuMat src(src_host);
 
     cv::gpu::ImagePyramid pyr;
 
+    pyr.build(src, 5);
+
     TEST_CYCLE()
     {
         pyr.build(src, 5);
@@ -988,27 +1244,30 @@ GPU_PERF_TEST(ImagePyramid_build, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
 }
 
 INSTANTIATE_TEST_CASE_P(ImgProc, ImagePyramid_build, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4))));
 
-GPU_PERF_TEST(ImagePyramid_getLayer, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(ImagePyramid_getLayer, cv::gpu::DeviceInfo, cv::Size, MatType)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
 
-    cv::gpu::setDevice(devInfo.deviceID());
-
     cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
+    fill(src_host, 0, 255);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
 
     cv::gpu::ImagePyramid pyr(src, 3);
 
+    pyr.getLayer(dst, cv::Size(size.width / 2 + 10, size.height / 2 + 10));
+
     TEST_CYCLE()
     {
         pyr.getLayer(dst, cv::Size(size.width / 2 + 10, size.height / 2 + 10));
@@ -1016,8 +1275,10 @@ GPU_PERF_TEST(ImagePyramid_getLayer, cv::gpu::DeviceInfo, cv::Size, perf::MatTyp
 }
 
 INSTANTIATE_TEST_CASE_P(ImgProc, ImagePyramid_getLayer, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4))));
 
 #endif
diff --git a/modules/gpu/perf/perf_utility.hpp b/modules/gpu/perf/perf_utility.hpp
index f15610b98..41be78774 100644
--- a/modules/gpu/perf/perf_utility.hpp
+++ b/modules/gpu/perf/perf_utility.hpp
@@ -3,15 +3,16 @@
 
 void fill(cv::Mat& m, double a, double b);
 
+using perf::MatType;
+using perf::MatDepth;
+
 enum {HORIZONTAL_AXIS = 0, VERTICAL_AXIS = 1, BOTH_AXIS = -1};
 
 CV_ENUM(MorphOp, cv::MORPH_ERODE, cv::MORPH_DILATE)
 CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
 CV_ENUM(FlipCode, HORIZONTAL_AXIS, VERTICAL_AXIS, BOTH_AXIS)
-CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC)
-CV_ENUM(MatchMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
-CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2)
-CV_ENUM(AlphaOp, cv::gpu::ALPHA_OVER, cv::gpu::ALPHA_IN, cv::gpu::ALPHA_OUT, cv::gpu::ALPHA_ATOP, cv::gpu::ALPHA_XOR, cv::gpu::ALPHA_PLUS, cv::gpu::ALPHA_OVER_PREMUL, cv::gpu::ALPHA_IN_PREMUL, cv::gpu::ALPHA_OUT_PREMUL, cv::gpu::ALPHA_ATOP_PREMUL, cv::gpu::ALPHA_XOR_PREMUL, cv::gpu::ALPHA_PLUS_PREMUL, cv::gpu::ALPHA_PREMUL)
+CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
+CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING)
 
 struct CvtColorInfo
 {
@@ -24,6 +25,22 @@ struct CvtColorInfo
 
 void PrintTo(const CvtColorInfo& info, std::ostream* os);
 
+#define IMPLEMENT_PARAM_CLASS(name, type) \
+    class name \
+    { \
+    public: \
+        name ( type arg = type ()) : val_(arg) {} \
+        operator type () const {return val_;} \
+    private: \
+        type val_; \
+    }; \
+    inline void PrintTo( name param, std::ostream* os) \
+    { \
+        *os << #name <<  " = " << testing::PrintToString(static_cast< type >(param)); \
+    }
+
+IMPLEMENT_PARAM_CLASS(Channels, int)
+
 namespace cv { namespace gpu
 {
     void PrintTo(const cv::gpu::DeviceInfo& info, std::ostream* os);
@@ -55,8 +72,6 @@ namespace cv { namespace gpu
 
 cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
 
-bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
-
 const std::vector<cv::gpu::DeviceInfo>& devices();
 
 std::vector<cv::gpu::DeviceInfo> devices(cv::gpu::FeatureSet feature);
diff --git a/modules/gpu/src/cuda/resize.cu b/modules/gpu/src/cuda/resize.cu
index 732b0f775..a51c04306 100644
--- a/modules/gpu/src/cuda/resize.cu
+++ b/modules/gpu/src/cuda/resize.cu
@@ -46,6 +46,7 @@
 #include "opencv2/gpu/device/vec_math.hpp"
 #include "opencv2/gpu/device/saturate_cast.hpp"
 #include "opencv2/gpu/device/filters.hpp"
+# include <cfloat>
 
 namespace cv { namespace gpu { namespace device
 {
@@ -65,6 +66,17 @@ namespace cv { namespace gpu { namespace device
             }
         }
 
+        template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)
+        {
+            const int x = blockDim.x * blockIdx.x + threadIdx.x;
+            const int y = blockDim.y * blockIdx.y + threadIdx.y;
+
+            if (x < dst.cols && y < dst.rows)
+            {
+                dst(y, x) = saturate_cast<T>(src(y, x));
+            }
+        }
+
         template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
         {
             static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
@@ -74,13 +86,47 @@ namespace cv { namespace gpu { namespace device
 
                 BrdReplicate<T> brd(src.rows, src.cols);
                 BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
-                Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
+                Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy);
 
                 resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
                 cudaSafeCall( cudaGetLastError() );
             }
         };
 
+        template <typename T> struct ResizeDispatcherStream<AreaFilter, T>
+        {
+            static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
+            {
+                dim3 block(32, 8);
+                dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
+
+                BrdConstant<T> brd(src.rows, src.cols);
+                BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
+                AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
+                resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
+                cudaSafeCall( cudaGetLastError() );
+                if (stream == 0)
+                    cudaSafeCall( cudaDeviceSynchronize() );
+            }
+        };
+
+        template <typename T> struct ResizeDispatcherStream<IntegerAreaFilter, T>
+        {
+            static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
+            {
+                dim3 block(32, 8);
+                dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
+
+                BrdConstant<T> brd(src.rows, src.cols);
+                BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
+                IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
+                resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
+                cudaSafeCall( cudaGetLastError() );
+                if (stream == 0)
+                    cudaSafeCall( cudaDeviceSynchronize() );
+            }
+        };
+
         template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
         {
             static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst)
@@ -169,15 +215,35 @@ namespace cv { namespace gpu { namespace device
             }
         };
 
+        template <typename T> struct ResizeDispatcher<AreaFilter, T>
+        {
+            static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
+            {
+                int iscale_x = round(fx);
+                int iscale_y = round(fy);
+
+                if( std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)
+                    ResizeDispatcherStream<IntegerAreaFilter, T>::call(src, fx, fy, dst, stream);
+                else
+                    ResizeDispatcherStream<AreaFilter, T>::call(src, fx, fy, dst, stream);
+            }
+        };
+
         template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, 
             DevMem2Db dst, int interpolation, cudaStream_t stream)
         {
             typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream);
 
-            static const caller_t callers[3] = 
+            static const caller_t callers[4] =
             {
-                ResizeDispatcher<PointFilter, T>::call, ResizeDispatcher<LinearFilter, T>::call, ResizeDispatcher<CubicFilter, T>::call
+                ResizeDispatcher<PointFilter, T>::call,
+                ResizeDispatcher<LinearFilter, T>::call,
+                ResizeDispatcher<CubicFilter, T>::call,
+                ResizeDispatcher<AreaFilter, T>::call
             };
+            // chenge to linear if area interpolation upscaling
+            if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
+                interpolation = 1;
 
             callers[interpolation](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, fx, fy, 
                 static_cast< DevMem2D_<T> >(dst), stream);
diff --git a/modules/gpu/src/opencv2/gpu/device/filters.hpp b/modules/gpu/src/opencv2/gpu/device/filters.hpp
index 537d6aff2..9294b5556 100644
--- a/modules/gpu/src/opencv2/gpu/device/filters.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/filters.hpp
@@ -55,7 +55,7 @@ namespace cv { namespace gpu { namespace device
         typedef typename Ptr2D::elem_type elem_type;
         typedef float index_type;
 
-        explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_) : src(src_) {}
+        explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) : src(src_) {}
 
         __device__ __forceinline__ elem_type operator ()(float y, float x) const
         {
@@ -70,7 +70,7 @@ namespace cv { namespace gpu { namespace device
         typedef typename Ptr2D::elem_type elem_type;
         typedef float index_type;
 
-        explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_) : src(src_) {}
+        explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) : src(src_) {}
 
         __device__ __forceinline__ elem_type operator ()(float y, float x) const
         {
@@ -107,7 +107,7 @@ namespace cv { namespace gpu { namespace device
         typedef float index_type;
         typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
 
-        explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_) : src(src_) {}
+        explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) : src(src_) {}
 
         static __device__ __forceinline__ float bicubicCoeff(float x_)
         {
@@ -154,6 +154,111 @@ namespace cv { namespace gpu { namespace device
 
         const Ptr2D src;
     };
+    // for integer scaling
+    template <typename Ptr2D> struct IntegerAreaFilter
+    {
+        typedef typename Ptr2D::elem_type elem_type;
+        typedef float index_type;
+
+        explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
+            : src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {}
+
+        __device__ __forceinline__ elem_type operator ()(float y, float x) const
+        {
+            float fsx1 = x * scale_x;
+            float fsx2 = fsx1 + scale_x;
+
+            int sx1 = __float2int_ru(fsx1);
+            int sx2 = __float2int_rd(fsx2);
+
+            float fsy1 = y * scale_y;
+            float fsy2 = fsy1 + scale_y;
+
+            int sy1 = __float2int_ru(fsy1);
+            int sy2 = __float2int_rd(fsy2);
+
+            typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
+            work_type out = VecTraits<work_type>::all(0.f);
+
+            for(int dy = sy1; dy < sy2; ++dy)
+                for(int dx = sx1; dx < sx2; ++dx)
+                {
+                    out = out + src(dy, dx) * scale;
+                }
+
+            return saturate_cast<elem_type>(out);
+        }
+
+        const Ptr2D src;
+        float scale_x, scale_y ,scale;
+    };
+
+    template <typename Ptr2D> struct AreaFilter
+    {
+        typedef typename Ptr2D::elem_type elem_type;
+        typedef float index_type;
+
+        explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
+            : src(src_), scale_x(scale_x_), scale_y(scale_y_){}
+
+        __device__ __forceinline__ elem_type operator ()(float y, float x) const
+        {
+            float fsx1 = x * scale_x;
+            float fsx2 = fsx1 + scale_x;
+
+            int sx1 = __float2int_ru(fsx1);
+            int sx2 = __float2int_rd(fsx2);
+
+            float fsy1 = y * scale_y;
+            float fsy2 = fsy1 + scale_y;
+
+            int sy1 = __float2int_ru(fsy1);
+            int sy2 = __float2int_rd(fsy2);
+
+            float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1));
+
+            typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
+            work_type out = VecTraits<work_type>::all(0.f);
+
+            for (int dy = sy1; dy < sy2; ++dy)
+            {
+                for (int dx = sx1; dx < sx2; ++dx)
+                    out = out + src(dy, dx) * scale;
+
+                if (sx1 > fsx1)
+                    out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale);
+
+                if (sx2 < fsx2)
+                    out = out + src(dy, sx2) * ((fsx2 -sx2) * scale);
+            }
+
+            if (sy1 > fsy1)
+                for (int dx = sx1; dx < sx2; ++dx)
+                    out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
+
+            if (sy2 < fsy2)
+                for (int dx = sx1; dx < sx2; ++dx)
+                    out = out + src(sy2, dx) * ((fsy2 -sy2) * scale);
+
+            if ((sy1 > fsy1) &&  (sx1 > fsx1))
+                out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
+
+            if ((sy1 > fsy1) &&  (sx2 < fsx2))
+                out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
+
+            if ((sy2 < fsy2) &&  (sx2 < fsx2))
+                out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
+
+            if ((sy2 < fsy2) &&  (sx1 > fsx1))
+                out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
+
+            return saturate_cast<elem_type>(out);
+        }
+
+        const Ptr2D src;
+        float scale_x, scale_y;
+        int width, haight;
+    };
 }}} // namespace cv { namespace gpu { namespace device
 
 #endif // __OPENCV_GPU_FILTERS_HPP__
diff --git a/modules/gpu/src/opencv2/gpu/device/vec_traits.hpp b/modules/gpu/src/opencv2/gpu/device/vec_traits.hpp
index 7ead7cb50..955fe0d4b 100644
--- a/modules/gpu/src/opencv2/gpu/device/vec_traits.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/vec_traits.hpp
@@ -221,7 +221,7 @@ namespace cv { namespace gpu { namespace device
 
     template<> struct VecTraits<char> 
     { 
-        typedef char elem_type; 
+        typedef char elem_type;
         enum {cn=1}; 
         static __device__ __host__ __forceinline__ char all(char v) {return v;}
         static __device__ __host__ __forceinline__ char make(char x) {return x;}
@@ -229,7 +229,7 @@ namespace cv { namespace gpu { namespace device
     };
     template<> struct VecTraits<schar> 
     { 
-        typedef schar elem_type; 
+        typedef schar elem_type;
         enum {cn=1}; 
         static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
         static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
diff --git a/modules/gpu/src/resize.cpp b/modules/gpu/src/resize.cpp
index ef28892f4..672dbc288 100644
--- a/modules/gpu/src/resize.cpp
+++ b/modules/gpu/src/resize.cpp
@@ -61,7 +61,8 @@ namespace cv { namespace gpu { namespace device
 void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
 {
     CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
-    CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
+    CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR
+            || interpolation == INTER_CUBIC || interpolation == INTER_AREA);
     CV_Assert(!(dsize == Size()) || (fx > 0 && fy > 0));
 
     if (dsize == Size())
@@ -90,7 +91,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
     src.locateROI(wholeSize, ofs);
 
     bool useNpp = (src.type() == CV_8UC1 || src.type() == CV_8UC4);
-    useNpp = useNpp && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || src.type() == CV_8UC4);
+    useNpp = useNpp && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || (src.type() == CV_8UC4 && interpolation != INTER_AREA));
 
     if (useNpp)
     {
diff --git a/modules/gpu/test/test_resize.cpp b/modules/gpu/test/test_resize.cpp
index b4c9ddc46..22d7ba3fd 100644
--- a/modules/gpu/test/test_resize.cpp
+++ b/modules/gpu/test/test_resize.cpp
@@ -48,7 +48,8 @@
 
 namespace
 {
-    template <typename T, template <typename> class Interpolator> void resizeImpl(const cv::Mat& src, cv::Mat& dst, double fx, double fy)
+    template <typename T, template <typename> class Interpolator>
+    void resizeImpl(const cv::Mat& src, cv::Mat& dst, double fx, double fy)
     {
         const int cn = src.channels();
 
@@ -156,6 +157,51 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Resize, testing::Combine(
     testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
     WHOLE_SUBMAT));
 
+
+/////////////////
+PARAM_TEST_CASE(ResizeArea, cv::gpu::DeviceInfo, cv::Size, MatType, double, Interpolation, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    double coeff;
+    int interpolation;
+    int type;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        type = GET_PARAM(2);
+        coeff = GET_PARAM(3);
+        interpolation = GET_PARAM(4);
+        useRoi = GET_PARAM(5);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+TEST_P(ResizeArea, Accuracy)
+{
+    cv::Mat src = randomMat(size, type);
+
+    cv::gpu::GpuMat dst = createMat(cv::Size(cv::saturate_cast<int>(src.cols * coeff), cv::saturate_cast<int>(src.rows * coeff)), type, useRoi);
+    cv::gpu::resize(loadMat(src, useRoi), dst, cv::Size(), coeff, coeff, interpolation);
+
+    cv::Mat dst_cpu;
+    cv::resize(src, dst_cpu, cv::Size(), coeff, coeff, interpolation);
+
+    EXPECT_MAT_NEAR(dst_cpu, dst, src.depth() == CV_32F ? 1e-2 : 1.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeArea, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatType(CV_8UC3), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(0.3, 0.5),
+    testing::Values(Interpolation(cv::INTER_AREA)),
+    WHOLE_SUBMAT));
+
 ///////////////////////////////////////////////////////////////////
 // Test NPP
 
diff --git a/modules/gpu/test/utility.hpp b/modules/gpu/test/utility.hpp
index f1a83fb1b..3ad02decb 100644
--- a/modules/gpu/test/utility.hpp
+++ b/modules/gpu/test/utility.hpp
@@ -277,7 +277,7 @@ IMPLEMENT_PARAM_CLASS(Channels, int)
 
 CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX)
 
-CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC)
+CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
 
 CV_ENUM(BorderType, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
 #define ALL_BORDER_TYPES testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP))
diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp
index 88d1196a2..be21ae911 100644
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@@ -878,8 +878,8 @@ struct VResizeLinear
         VecOp vecOp;
 
         int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
-	    #if CV_ENABLE_UNROLLED
-		for( ; x <= width - 4; x += 4 )
+        #if CV_ENABLE_UNROLLED
+        for( ; x <= width - 4; x += 4 )
         {
             WT t0, t1;
             t0 = S0[x]*b0 + S1[x]*b1;
@@ -1035,7 +1035,7 @@ struct VResizeLanczos4
         CastOp castOp;
         VecOp vecOp;
         int k, x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
-		#if CV_ENABLE_UNROLLED
+        #if CV_ENABLE_UNROLLED
         for( ; x <= width - 4; x += 4 )
         {
             WT b = beta[0];
@@ -1130,7 +1130,7 @@ static void resizeGeneric_( const Mat& src, Mat& dst,
         if( k0 < ksize )
             hresize( srows + k0, rows + k0, ksize - k0, xofs, alpha,
                      ssize.width, dsize.width, cn, xmin, xmax );
-		vresize( (const WT**)rows, (T*)(dst.data + dst.step*dy), beta, dsize.width );
+        vresize( (const WT**)rows, (T*)(dst.data + dst.step*dy), beta, dsize.width );
     }
 }
 
@@ -1163,8 +1163,8 @@ static void resizeAreaFast_( const Mat& src, Mat& dst, const int* ofs, const int
         {
             const T* S = (const T*)(src.data + src.step*sy0) + xofs[dx];
             WT sum = 0;
-			k=0;
-			#if CV_ENABLE_UNROLLED
+            k=0;
+            #if CV_ENABLE_UNROLLED
             for( ; k <= area - 4; k += 4 )
                 sum += S[ofs[k]] + S[ofs[k+1]] + S[ofs[k+2]] + S[ofs[k+3]];
             #endif
@@ -1272,15 +1272,18 @@ static void resizeArea_( const Mat& src, Mat& dst, const DecimateAlpha* xofs, in
             WT beta1 = 1 - beta;
             T* D = (T*)(dst.data + dst.step*cur_dy);
             if( fabs(beta) < 1e-3 )
+            {
+                if(cur_dy >= dsize.height) return;
                 for( dx = 0; dx < dsize.width; dx++ )
                 {
-                    D[dx] = saturate_cast<T>(sum[dx] + buf[dx]);
+                    D[dx] = saturate_cast<T>((sum[dx] + buf[dx]) / min(scale_y, src.rows - cur_dy * scale_y));
                     sum[dx] = buf[dx] = 0;
                 }
+            }
             else
                 for( dx = 0; dx < dsize.width; dx++ )
                 {
-                    D[dx] = saturate_cast<T>(sum[dx] + buf[dx]*beta1);
+                    D[dx] = saturate_cast<T>((sum[dx] + buf[dx]* beta1)/ min(scale_y, src.rows - cur_dy*scale_y));
                     sum[dx] = buf[dx]*beta;
                     buf[dx] = 0;
                 }
@@ -1329,11 +1332,11 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
         resizeGeneric_<
             HResizeLinear<uchar, int, short,
                 INTER_RESIZE_COEF_SCALE,
-				HResizeLinearVec_8u32s>,
+                HResizeLinearVec_8u32s>,
             VResizeLinear<uchar, int, short,
                 FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>,
-				VResizeLinearVec_32s8u> >,
-		0,
+                VResizeLinearVec_32s8u> >,
+        0,
         resizeGeneric_<
             HResizeLinear<ushort, float, float, 1,
                 HResizeLinearVec_16u32f>,
@@ -1344,7 +1347,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
                 HResizeLinearVec_16s32f>,
             VResizeLinear<short, float, float, Cast<float, short>,
                 VResizeLinearVec_32f16s> >,
-		0,
+        0,
         resizeGeneric_<
             HResizeLinear<float, float, float, 1,
                 HResizeLinearVec_32f>,
@@ -1374,7 +1377,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
             HResizeCubic<short, float, float>,
             VResizeCubic<short, float, float, Cast<float, short>,
             VResizeCubicVec_32f16s> >,
-		0,
+        0,
         resizeGeneric_<
             HResizeCubic<float, float, float>,
             VResizeCubic<float, float, float, Cast<float, float>,
@@ -1396,10 +1399,10 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
         resizeGeneric_<HResizeLanczos4<ushort, float, float>,
             VResizeLanczos4<ushort, float, float, Cast<float, ushort>,
             VResizeNoVec> >,
-       	resizeGeneric_<HResizeLanczos4<short, float, float>,
+        resizeGeneric_<HResizeLanczos4<short, float, float>,
             VResizeLanczos4<short, float, float, Cast<float, short>,
             VResizeNoVec> >,
-		0,
+        0,
         resizeGeneric_<HResizeLanczos4<float, float, float>,
             VResizeLanczos4<float, float, float, Cast<float, float>,
             VResizeNoVec> >,
@@ -1412,8 +1415,8 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
     static ResizeAreaFastFunc areafast_tab[] =
     {
         resizeAreaFast_<uchar, int>, 0,
-		resizeAreaFast_<ushort, float>,
-		resizeAreaFast_<short, float>,
+        resizeAreaFast_<ushort, float>,
+        resizeAreaFast_<short, float>,
         0,
         resizeAreaFast_<float, float>,
         resizeAreaFast_<double, double>,
@@ -1498,7 +1501,6 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
 
         AutoBuffer<DecimateAlpha> _xofs(ssize.width*2);
         DecimateAlpha* xofs = _xofs;
-        double scale = 1.f/(scale_x*scale_y);
 
         for( dx = 0, k = 0; dx < dsize.width; dx++ )
         {
@@ -1512,7 +1514,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
                 assert( k < ssize.width*2 );
                 xofs[k].di = dx*cn;
                 xofs[k].si = (sx1-1)*cn;
-                xofs[k++].alpha = (float)((sx1 - fsx1)*scale);
+                xofs[k++].alpha = (float)((sx1 - fsx1) / min(scale_x, src.cols - fsx1));
             }
 
             for( sx = sx1; sx < sx2; sx++ )
@@ -1520,7 +1522,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
                 assert( k < ssize.width*2 );
                 xofs[k].di = dx*cn;
                 xofs[k].si = sx*cn;
-                xofs[k++].alpha = (float)scale;
+                xofs[k++].alpha = 1.f / min(scale_x, src.cols - fsx1);
             }
 
             if( fsx2 - sx2 > 1e-3 )
@@ -1528,10 +1530,9 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
                 assert( k < ssize.width*2 );
                 xofs[k].di = dx*cn;
                 xofs[k].si = sx2*cn;
-                xofs[k++].alpha = (float)((fsx2 - sx2)*scale);
+                xofs[k++].alpha = (float)(min(fsx2 - sx2, 1.) / min(scale_x, src.cols - fsx1));
             }
         }
-
         func( src, dst, xofs, k ,scale_y);
         return;
     }
@@ -3480,7 +3481,7 @@ void cvLinearPolar( const CvArr* srcarr, CvArr* dstarr,
     if( !CV_ARE_TYPES_EQ( src, dst ))
         CV_Error( CV_StsUnmatchedFormats, "" );
 
-	ssize.width = src->cols;
+    ssize.width = src->cols;
     ssize.height = src->rows;
     dsize.width = dst->cols;
     dsize.height = dst->rows;
diff --git a/modules/imgproc/test/test_imgwarp.cpp b/modules/imgproc/test/test_imgwarp.cpp
index 7eab00329..afc0bc5cc 100644
--- a/modules/imgproc/test/test_imgwarp.cpp
+++ b/modules/imgproc/test/test_imgwarp.cpp
@@ -1462,6 +1462,49 @@ TEST(Imgproc_fitLine_Mat_3dC1, regression)
     ASSERT_EQ(line2.size(), (size_t)6);
 }
 
+TEST(Imgproc_resize_area, regression)
+{
+    static ushort input_data[16 * 16] = {
+         90,  94,  80,   3, 231,   2, 186, 245, 188, 165,  10,  19, 201, 169,   8, 228,
+         86,   5, 203, 120, 136, 185,  24,  94,  81, 150, 163, 137,  88, 105, 132, 132,
+        236,  48, 250, 218,  19,  52,  54, 221, 159, 112,  45,  11, 152, 153, 112, 134,
+         78, 133, 136,  83,  65,  76,  82, 250,   9, 235, 148,  26, 236, 179, 200,  50,
+         99,  51, 103, 142, 201,  65, 176,  33,  49, 226, 177, 109,  46,  21,  67, 130,
+         54, 125, 107, 154, 145,  51, 199, 189, 161, 142, 231, 240, 139, 162, 240,  22,
+        231,  86,  79, 106,  92,  47, 146, 156,  36, 207,  71,  33,   2, 244, 221,  71,
+         44, 127,  71, 177,  75, 126,  68, 119, 200, 129, 191, 251,   6, 236, 247,  6,
+        133, 175,  56, 239, 147, 221, 243, 154, 242,  82, 106,  99,  77, 158,  60, 229,
+          2,  42,  24, 174,  27, 198,  14, 204, 246, 251, 141,  31, 114, 163,  29, 147,
+        121,  53,  74,  31, 147, 189,  42,  98, 202,  17, 228, 123, 209,  40,  77,  49,
+        112, 203,  30,  12, 205,  25,  19, 106, 145, 185, 163, 201, 237, 223, 247,  38,
+         33, 105, 243, 117,  92, 179, 204, 248, 160,  90,  73, 126,   2,  41, 213, 204,
+          6, 124, 195, 201, 230, 187, 210, 167,  48,  79, 123, 159, 145, 218, 105, 209,
+        240, 152, 136, 235, 235, 164, 157,  9,  152,  38,  27, 209, 120,  77, 238, 196,
+        240, 233,  10, 241,  90,  67,  12, 79,    0,  43,  58,  27,  83, 199, 190, 182};
+
+    static ushort expected_data[5 * 5] = {
+        120, 100, 151, 101, 130,
+        106, 115, 141, 130, 127,
+         91, 136, 170, 114, 140,
+        104, 122, 131, 147, 133,
+        161, 163,  70, 107, 182
+    };
+
+    cv::Mat src(16, 16, CV_16UC1, input_data);
+    cv::Mat actual;
+    cv::Mat expected(5,5,CV_16UC1, expected_data);
+
+    cv::resize(src, actual, cv::Size(), 0.3, 0.3, INTER_AREA);
+
+    ASSERT_EQ(actual.type(), expected.type());
+    ASSERT_EQ(actual.size(), expected.size());
+    Mat diff;
+    absdiff(actual, expected, diff);
+    Mat one_channel_diff = diff.reshape(1);
+    ASSERT_EQ(norm(one_channel_diff, cv::NORM_INF),0);
+}
+
+
 //////////////////////////////////////////////////////////////////////////
 
 TEST(Imgproc_Resize, accuracy) { CV_ResizeTest test; test.safe_run(); }