diff --git a/modules/photo/include/opencv2/photo.hpp b/modules/photo/include/opencv2/photo.hpp
index 85336c936..0a42424cb 100644
--- a/modules/photo/include/opencv2/photo.hpp
+++ b/modules/photo/include/opencv2/photo.hpp
@@ -153,6 +153,7 @@ denoising time. Recommended value 21 pixels
 parameter applied to all channels or one per channel in dst. Big h value
 perfectly removes noise but also removes image details, smaller h
 value preserves details but also preserves some noise
+@param normType Type of norm used for weight calcluation. Can be either NORM_L2 or NORM_L1
 
 This function expected to be applied to grayscale images. For colored images look at
 fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored
@@ -162,7 +163,8 @@ parameter.
  */
 CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst,
                                         const std::vector<float>& h,
-                                        int templateWindowSize = 7, int searchWindowSize = 21);
+                                        int templateWindowSize = 7, int searchWindowSize = 21,
+                                        int normType = NORM_L2);
 
 /** @brief Modification of fastNlMeansDenoising function for colored images
 
diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp
index cd8a751f6..4e7922e40 100644
--- a/modules/photo/src/denoising.cpp
+++ b/modules/photo/src/denoising.cpp
@@ -45,6 +45,54 @@
 #include "fast_nlmeans_multi_denoising_invoker.hpp"
 #include "fast_nlmeans_denoising_opencl.hpp"
 
+template<typename ST, typename IT, typename UIT, typename D>
+static void fastNlMeansDenoising_( const Mat& src, Mat& dst, const std::vector<float>& h,
+                                   int templateWindowSize, int searchWindowSize)
+{
+    int hn = (int)h.size();
+
+    switch (CV_MAT_CN(src.type())) {
+        case 1:
+            parallel_for_(cv::Range(0, src.rows),
+                          FastNlMeansDenoisingInvoker<ST, IT, UIT, D, int>(
+                              src, dst, templateWindowSize, searchWindowSize, &h[0]));
+            break;
+        case 2:
+            if (hn == 1)
+                parallel_for_(cv::Range(0, src.rows),
+                              FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
+                                  src, dst, templateWindowSize, searchWindowSize, &h[0]));
+            else
+                parallel_for_(cv::Range(0, src.rows),
+                              FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
+                                  src, dst, templateWindowSize, searchWindowSize, &h[0]));
+            break;
+        case 3:
+            if (hn == 1)
+                parallel_for_(cv::Range(0, src.rows),
+                              FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
+                                  src, dst, templateWindowSize, searchWindowSize, &h[0]));
+            else
+                parallel_for_(cv::Range(0, src.rows),
+                              FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
+                                  src, dst, templateWindowSize, searchWindowSize, &h[0]));
+            break;
+        case 4:
+            if (hn == 1)
+                parallel_for_(cv::Range(0, src.rows),
+                              FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
+                                  src, dst, templateWindowSize, searchWindowSize, &h[0]));
+            else
+                parallel_for_(cv::Range(0, src.rows),
+                              FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
+                                  src, dst, templateWindowSize, searchWindowSize, &h[0]));
+            break;
+        default:
+            CV_Error(Error::StsBadArg,
+                     "Unsupported number of channels! Only 1, 2, 3, and 4 are supported");
+    }
+}
+
 void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h,
                                int templateWindowSize, int searchWindowSize)
 {
@@ -53,66 +101,59 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h,
 }
 
 void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, const std::vector<float>& h,
-                               int templateWindowSize, int searchWindowSize)
+                               int templateWindowSize, int searchWindowSize, int normType)
 {
-    int hn = (int)h.size();
-    CV_Assert(hn == 1 || hn == CV_MAT_CN(_src.type()));
+    int hn = (int)h.size(), type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
+    CV_Assert(hn == 1 || hn == cn);
 
     Size src_size = _src.size();
     CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) &&
                src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes
                ocl_fastNlMeansDenoising(_src, _dst, &h[0], hn,
-                                        templateWindowSize, searchWindowSize, false))
+                                        templateWindowSize, searchWindowSize, normType))
 
     Mat src = _src.getMat();
     _dst.create(src_size, src.type());
     Mat dst = _dst.getMat();
 
+    switch (normType) {
+        case NORM_L2:
 #ifdef HAVE_TEGRA_OPTIMIZATION
-    if(hn == 1 && tegra::useTegra() &&
-       tegra::fastNlMeansDenoising(src, dst, h[0], templateWindowSize, searchWindowSize))
-        return;
+            if(hn == 1 && tegra::useTegra() &&
+               tegra::fastNlMeansDenoising(src, dst, h[0], templateWindowSize, searchWindowSize))
+                return;
 #endif
-
-    switch (src.type()) {
-        case CV_8U:
-            parallel_for_(cv::Range(0, src.rows),
-                          FastNlMeansDenoisingInvoker<uchar, int, unsigned, DistSquared, int>(
-                              src, dst, templateWindowSize, searchWindowSize, &h[0]));
+            switch (depth) {
+                case CV_8U:
+                    fastNlMeansDenoising_<uchar, int, unsigned, DistSquared>(src, dst, h,
+                                                                             templateWindowSize,
+                                                                             searchWindowSize);
+                    break;
+                default:
+                    CV_Error(Error::StsBadArg,
+                             "Unsupported depth! Only CV_8U is supported for NORM_L2");
+            }
             break;
-        case CV_8UC2:
-            if (hn == 1)
-                parallel_for_(cv::Range(0, src.rows),
-                              FastNlMeansDenoisingInvoker<Vec2b, int, unsigned, DistSquared, int>(
-                                  src, dst, templateWindowSize, searchWindowSize, &h[0]));
-            else
-                parallel_for_(cv::Range(0, src.rows),
-                              FastNlMeansDenoisingInvoker<Vec2b, int, unsigned, DistSquared, Vec2i>(
-                                  src, dst, templateWindowSize, searchWindowSize, &h[0]));
-            break;
-        case CV_8UC3:
-            if (hn == 1)
-                parallel_for_(cv::Range(0, src.rows),
-                              FastNlMeansDenoisingInvoker<Vec3b, int, unsigned, DistSquared, int>(
-                                  src, dst, templateWindowSize, searchWindowSize, &h[0]));
-            else
-                parallel_for_(cv::Range(0, src.rows),
-                              FastNlMeansDenoisingInvoker<Vec3b, int, unsigned, DistSquared, Vec3i>(
-                                  src, dst, templateWindowSize, searchWindowSize, &h[0]));
-            break;
-        case CV_8UC4:
-            if (hn == 1)
-                parallel_for_(cv::Range(0, src.rows),
-                              FastNlMeansDenoisingInvoker<Vec4b, int, unsigned, DistSquared, int>(
-                                  src, dst, templateWindowSize, searchWindowSize, &h[0]));
-            else
-                parallel_for_(cv::Range(0, src.rows),
-                              FastNlMeansDenoisingInvoker<Vec4b, int, unsigned, DistSquared, Vec4i>(
-                                  src, dst, templateWindowSize, searchWindowSize, &h[0]));
+        case NORM_L1:
+            switch (depth) {
+                case CV_8U:
+                    fastNlMeansDenoising_<uchar, int, unsigned, DistAbs>(src, dst, h,
+                                                                         templateWindowSize,
+                                                                         searchWindowSize);
+                    break;
+                case CV_16U:
+                    fastNlMeansDenoising_<ushort, int64, uint64, DistAbs>(src, dst, h,
+                                                                          templateWindowSize,
+                                                                          searchWindowSize);
+                    break;
+                default:
+                    CV_Error(Error::StsBadArg,
+                             "Unsupported depth! Only CV_8U and CV_16U are supported for NORM_L1");
+            }
             break;
         default:
             CV_Error(Error::StsBadArg,
-                "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3 and CV_8UC4 are supported");
+                     "Unsupported norm type! Only NORM_L2 and NORM_L1 are supported");
     }
 }
 
diff --git a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp
index b7fdc7cf9..1c511f37b 100644
--- a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp
+++ b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp
@@ -29,8 +29,10 @@ static int divUp(int a, int b)
 }
 
 template <typename FT, typename ST, typename WT>
-static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, const FT *h, int hn, int cn,
-                                      int & almostTemplateWindowSizeSqBinShift, bool abs)
+static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight,
+                                      int searchWindowSize, int templateWindowSize,
+                                      const FT *h, int hn, int cn, int normType,
+                                      int & almostTemplateWindowSizeSqBinShift)
 {
     const WT maxEstimateSumValue = searchWindowSize * searchWindowSize *
         std::numeric_limits<ST>::max();
@@ -50,7 +52,7 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow
     FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq;
 
     const FT WEIGHT_THRESHOLD = 1e-3f;
-    int maxDist = abs ? std::numeric_limits<ST>::max() * cn :
+    int maxDist = normType == NORM_L1 ? std::numeric_limits<ST>::max() * cn :
         std::numeric_limits<ST>::max() * std::numeric_limits<ST>::max() * cn;
     int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1);
     FT den[4];
@@ -66,7 +68,8 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow
                          " -D wlut_t=%s -D convert_wlut_t=%s%s%s",
                          ocl::typeToStr(depth), ocl::typeToStr(CV_MAKE_TYPE(depth, hn)),
                          ocl::typeToStr(CV_32SC(hn)), ocl::convertTypeStr(depth, CV_32S, hn, buf),
-                         doubleSupport ? " -D DOUBLE_SUPPORT" : "", abs ? " -D ABS" : ""));
+                         doubleSupport ? " -D DOUBLE_SUPPORT" : "",
+                         normType == NORM_L1 ? " -D ABS" : ""));
     if (k.empty())
         return false;
 
@@ -79,13 +82,14 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow
 }
 
 static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, const float *h, int hn,
-                                     int templateWindowSize, int searchWindowSize, bool abs)
+                                     int templateWindowSize, int searchWindowSize, int normType)
 {
     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
     int ctaSize = ocl::Device::getDefault().isIntel() ? CTA_SIZE_INTEL : CTA_SIZE_DEFAULT;
     Size size = _src.size();
 
-    if (cn != 1 && cn != 2 && cn != 3 && cn != 4 && depth != CV_8U && (!abs || depth != CV_16U))
+    if (cn < 1 || cn > 4 || ((normType != NORM_L2 || depth != CV_8U) &&
+                             (normType != NORM_L1 || (depth != CV_8U && depth != CV_16U))))
         return false;
 
     int templateWindowHalfWize = templateWindowSize / 2;
@@ -117,7 +121,8 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, const fl
                          ctaSize, templateWindowHalfWize, searchWindowHalfSize,
                          ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn,
                          (depth == CV_8U ? sizeof(uchar) : sizeof(ushort)) * (cn == 3 ? 4 : cn),
-                         ocl::convertTypeStr(CV_32S, depth, cn, buf[3]), abs ? " -D ABS" : "");
+                         ocl::convertTypeStr(CV_32S, depth, cn, buf[3]),
+                         normType == NORM_L1 ? " -D ABS" : "");
 
     ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts);
     if (k.empty())
@@ -127,15 +132,13 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, const fl
     if ((depth == CV_8U &&
          !ocl_calcAlmostDist2Weight<float, uchar, int>(almostDist2Weight,
                                                        searchWindowSize, templateWindowSize,
-                                                       h, hn, cn,
-                                                       almostTemplateWindowSizeSqBinShift,
-                                                       abs)) ||
+                                                       h, hn, cn, normType,
+                                                       almostTemplateWindowSizeSqBinShift)) ||
         (depth == CV_16U &&
          !ocl_calcAlmostDist2Weight<float, ushort, int64>(almostDist2Weight,
                                                           searchWindowSize, templateWindowSize,
-                                                          h, hn, cn,
-                                                          almostTemplateWindowSizeSqBinShift,
-                                                          abs)))
+                                                          h, hn, cn, normType,
+                                                          almostTemplateWindowSizeSqBinShift)))
         return false;
     CV_Assert(almostTemplateWindowSizeSqBinShift >= 0);