diff --git a/doc/tutorials/core/adding_images/adding_images.rst b/doc/tutorials/core/adding_images/adding_images.rst
index 40e2a0d44..e3135693d 100644
--- a/doc/tutorials/core/adding_images/adding_images.rst
+++ b/doc/tutorials/core/adding_images/adding_images.rst
@@ -53,8 +53,8 @@ As usual, after the not-so-lengthy explanation, let's go to the code:
     std::cout<<"* Enter alpha [0-1]: ";
     std::cin>>input;
 
-    /// We use the alpha provided by the user iff it is between 0 and 1
-    if( alpha >= 0 && alpha <= 1 )
+    /// We use the alpha provided by the user if it is between 0 and 1
+    if( input >= 0.0 && input <= 1.0 )
       { alpha = input; }
 
     /// Read image ( same size, same type )
diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp
index c4eb0041d..a7e4fd90b 100644
--- a/modules/ocl/src/arithm.cpp
+++ b/modules/ocl/src/arithm.cpp
@@ -57,180 +57,6 @@ using namespace cv;
 using namespace cv::ocl;
 using namespace std;
 
-#if !defined (HAVE_OPENCL)
-
-/* arithmetics */
-void cv::ocl::add(const oclMat &, const oclMat &, oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::add(const oclMat &, const oclMat &, oclMat &, const oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::add(const oclMat &, const Scalar &, oclMat &, const oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::subtract(const oclMat &, const oclMat &, oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::subtract(const oclMat &, const oclMat &, oclMat &, const oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::subtract(const oclMat &, const Scalar &, oclMat &, const oclMat & )
-{
-    throw_nogpu();
-}
-void cv::ocl::subtract(const Scalar &, const oclMat &,  oclMat &, const oclMat & )
-{
-    throw_nogpu();
-}
-void cv::ocl::multiply(const oclMat &, const oclMat &, oclMat &, double)
-{
-    throw_nogpu();
-}
-void cv::ocl::divide(const oclMat &, const oclMat &, oclMat &, double)
-{
-    throw_nogpu();
-}
-void cv::ocl::divide(double, const oclMat &, oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::absdiff(const oclMat &, const oclMat &, oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::absdiff(const oclMat &, const Scalar &, oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::compare(const oclMat &, const oclMat &, oclMat & , int)
-{
-    throw_nogpu();
-}
-void cv::ocl::meanStdDev(const oclMat &, Scalar &, Scalar &)
-{
-    throw_nogpu();
-}
-double cv::ocl::norm(const oclMat &, int)
-{
-    throw_nogpu();
-    return 0.0;
-}
-double cv::ocl::norm(const oclMat &, const oclMat &, int)
-{
-    throw_nogpu();
-    return 0.0;
-}
-void cv::ocl::flip(const oclMat &, oclMat &, int)
-{
-    throw_nogpu();
-}
-Scalar cv::ocl::sum(const oclMat &)
-{
-    throw_nogpu();
-    return Scalar();
-}
-void cv::ocl::minMax(const oclMat &, double *, double *, const oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::minMaxLoc(const oclMat &, double *, double *, Point *, Point *, const oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::LUT(const oclMat &, const Mat &, oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::exp(const oclMat &, oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::log(const oclMat &, oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::magnitude(const oclMat &, oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::phase(const oclMat &, const oclMat &, oclMat &, bool)
-{
-    throw_nogpu();
-}
-void cv::ocl::cartToPolar(const oclMat &, const oclMat &, oclMat &, oclMat &, bool)
-{
-    throw_nogpu();
-}
-void cv::ocl::polarToCart(const oclMat &, const oclMat &, oclMat &, oclMat &, bool)
-{
-    throw_nogpu();
-}
-void cv::ocl::transpose(const oclMat &, oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::pow(const oclMat &, double, oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, double beta, double gama, oclMat &dst)
-{
-    throw_nogpu();
-}
-void cv::ocl::magnitudeSqr(const oclMat &src1, const oclMat &src2, oclMat &dst)
-{
-    throw_nogpu();
-}
-
-/* bit wise operations */
-void cv::ocl::bitwise_not(const oclMat &, oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::bitwise_or(const oclMat &, const oclMat &, oclMat &, const oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::bitwise_and(const oclMat &, const oclMat &, oclMat &, const oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::bitwise_and(const oclMat &, const Scalar &, oclMat &, const oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::bitwise_xor(const oclMat &, const oclMat &, oclMat &, const oclMat &)
-{
-    throw_nogpu();
-}
-cv::ocl::oclMat cv::ocl::operator ~ (const oclMat &)
-{
-    throw_nogpu();
-    return oclMat();
-}
-cv::ocl::oclMat cv::ocl::operator | (const oclMat &, const oclMat &)
-{
-    throw_nogpu();
-    return oclMat();
-}
-cv::ocl::oclMat cv::ocl::operator & (const oclMat &, const oclMat &)
-{
-    throw_nogpu();
-    return oclMat();
-}
-cv::ocl::oclMat cv::ocl::operator ^ (const oclMat &, const oclMat &)
-{
-    throw_nogpu();
-    return oclMat();
-}
-
-#else /* !defined (HAVE_OPENCL) */
 namespace cv
 {
     namespace ocl
@@ -2477,5 +2303,3 @@ void cv::ocl::pow(const oclMat &x, double p, oclMat &y)
 
     arithmetic_pow_run(x, p, y, kernelName, &arithm_pow);
 }
-
-#endif /* !defined (HAVE_OPENCL) */
diff --git a/modules/ocl/src/binarycaching.hpp b/modules/ocl/src/binarycaching.hpp
index 479487843..0ec565f88 100644
--- a/modules/ocl/src/binarycaching.hpp
+++ b/modules/ocl/src/binarycaching.hpp
@@ -50,17 +50,6 @@ using namespace std;
 using std::cout;
 using std::endl;
 
-#if !defined (HAVE_OPENCL)
-namespace cv
-{
-    namespace ocl
-    {
-        //nothing
-    }//namespace ocl
-}//namespace cv
-
-#else /* !defined (HAVE_OPENCL) */
-
 namespace cv
 {
     namespace ocl
@@ -99,4 +88,3 @@ namespace cv
     }//namespace ocl
 
 }//namespace cv
-#endif
diff --git a/modules/ocl/src/blend.cpp b/modules/ocl/src/blend.cpp
index 75463b807..5eead4766 100644
--- a/modules/ocl/src/blend.cpp
+++ b/modules/ocl/src/blend.cpp
@@ -50,13 +50,6 @@ using namespace cv;
 using namespace cv::ocl;
 using namespace std;
 
-#if !defined (HAVE_OPENCL)
-void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2,
-                          oclMat &result)
-{
-    throw_nogpu();
-}
-#else
 namespace cv
 {
     namespace ocl
@@ -97,5 +90,4 @@ void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &
 
         openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth);
     }
-}
-#endif
\ No newline at end of file
+}
\ No newline at end of file
diff --git a/modules/ocl/src/brute_force_matcher.cpp b/modules/ocl/src/brute_force_matcher.cpp
index 9fe14d214..c81e342d3 100644
--- a/modules/ocl/src/brute_force_matcher.cpp
+++ b/modules/ocl/src/brute_force_matcher.cpp
@@ -51,136 +51,6 @@ using namespace cv;
 using namespace cv::ocl;
 using namespace std;
 
-#if !defined (HAVE_OPENCL)
-cv::ocl::BruteForceMatcher_OCL_base::BruteForceMatcher_OCL_base(DistType)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::add(const vector<oclMat> &)
-{
-    throw_nogpu();
-}
-const vector<oclMat> &cv::ocl::BruteForceMatcher_OCL_base::getTrainDescriptors() const
-{
-    throw_nogpu();
-    return trainDescCollection;
-}
-void cv::ocl::BruteForceMatcher_OCL_base::clear()
-{
-    throw_nogpu();
-}
-bool cv::ocl::BruteForceMatcher_OCL_base::empty() const
-{
-    throw_nogpu();
-    return true;
-}
-bool cv::ocl::BruteForceMatcher_OCL_base::isMaskSupported() const
-{
-    throw_nogpu();
-    return true;
-}
-void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &, const oclMat &, oclMat &, oclMat &, const oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &, const oclMat &, vector<DMatch> &)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &, const Mat &, vector<DMatch> &)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &, const oclMat &, vector<DMatch> &, const oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat &, oclMat &, const vector<oclMat> &)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, const oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &, const oclMat &, const oclMat &, vector<DMatch> &)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &, const Mat &, const Mat &, vector<DMatch> &)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &, vector<DMatch> &, const vector<oclMat> &)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, int, const oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatchDownload(const oclMat &, const oclMat &, vector< vector<DMatch> > &, bool)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatchConvert(const Mat &, const Mat &, vector< vector<DMatch> > &, bool)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &, const oclMat &, vector< vector<DMatch> > &, int, const oclMat &, bool)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, const oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Download(const oclMat &, const oclMat &, const oclMat &, vector< vector<DMatch> > &, bool)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Convert(const Mat &, const Mat &, const Mat &, vector< vector<DMatch> > &, bool)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &, vector< vector<DMatch> > &, int, const vector<oclMat> &, bool)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, float, const oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &, const oclMat &, const oclMat &, vector< vector<DMatch> > &, bool)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &, const Mat &, const Mat &, vector< vector<DMatch> > &, bool)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &, const oclMat &, vector< vector<DMatch> > &, float, const oclMat &, bool)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat &, oclMat &, oclMat &, oclMat &, oclMat &, float, const vector<oclMat> &)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &, const oclMat &, const oclMat &, const oclMat &, vector< vector<DMatch> > &, bool)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &, const Mat &, const Mat &, const Mat &, vector< vector<DMatch> > &, bool)
-{
-    throw_nogpu();
-}
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &, vector< vector<DMatch> > &, float, const vector<oclMat> &, bool)
-{
-    throw_nogpu();
-}
-#else /* !defined (HAVE_OPENCL) */
-
 using namespace std;
 namespace cv
 {
@@ -1826,8 +1696,4 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &query, vecto
     oclMat trainIdx, imgIdx, distance, nMatches;
     radiusMatchCollection(query, trainIdx, imgIdx, distance, nMatches, maxDistance, masks);
     radiusMatchDownload(trainIdx, imgIdx, distance, nMatches, matches, compactResult);
-}
-
-#endif
-
-
+}
\ No newline at end of file
diff --git a/modules/ocl/src/build_warps.cpp b/modules/ocl/src/build_warps.cpp
index b41077ecd..c4a092993 100644
--- a/modules/ocl/src/build_warps.cpp
+++ b/modules/ocl/src/build_warps.cpp
@@ -49,21 +49,6 @@ using namespace cv;
 using namespace cv::ocl;
 using namespace std;
 
-#if !defined (HAVE_OPENCL)
-void cv::ocl::buildWarpPlaneMaps(Size, Rect, const Mat &, const Mat &, const Mat &, float, oclMat &, oclMat &, Stream &)
-{
-    throw_nogpu();
-}
-void cv::ocl::buildWarpCylindricalMaps(Size, Rect, const Mat &, const Mat &, float, oclMat &, oclMat &, Stream &)
-{
-    throw_nogpu();
-}
-void cv::ocl::buildWarpSphericalMaps(Size, Rect, const Mat &, const Mat &, float, oclMat &, oclMat &, Stream &)
-{
-    throw_nogpu();
-}
-#else
-
 namespace cv
 {
     namespace ocl
@@ -275,6 +260,3 @@ void cv::ocl::buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, o
     size_t localThreads[3]  = {32, 8, 1};
     openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
 }
-
-
-#endif // HAVE_OPENCL
diff --git a/modules/ocl/src/canny.cpp b/modules/ocl/src/canny.cpp
index 250108908..4b872a1bc 100644
--- a/modules/ocl/src/canny.cpp
+++ b/modules/ocl/src/canny.cpp
@@ -51,25 +51,6 @@ using namespace cv;
 using namespace cv::ocl;
 using namespace std;
 
-#if !defined (HAVE_OPENCL)
-void cv::ocl::Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
-{
-    throw_nogpu();
-}
-void cv::ocl::Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
-{
-    throw_nogpu();
-}
-void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false)
-{
-    throw_nogpu();
-}
-void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false)
-{
-    throw_nogpu();
-}
-#else
-
 namespace cv
 {
     namespace ocl
@@ -426,5 +407,3 @@ void canny::getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols)
 
     openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
 }
-
-#endif // HAVE_OPENCL
diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp
index ab613e051..e14bd2756 100644
--- a/modules/ocl/src/color.cpp
+++ b/modules/ocl/src/color.cpp
@@ -49,18 +49,6 @@
 using namespace cv;
 using namespace cv::ocl;
 
-#if !defined (HAVE_OPENCL)
-
-void cv::ocl::cvtColor(const oclMat &, oclMat &, int, int)
-{
-    throw_nogpu();
-}
-void cv::ocl::cvtColor(const oclMat &, oclMat &, int, int, const Stream &)
-{
-    throw_nogpu();
-}
-
-#else /* !defined (HAVE_OPENCL) */
 #ifndef CV_DESCALE
 #define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
 #endif
@@ -289,5 +277,3 @@ void cv::ocl::cvtColor(const oclMat &src, oclMat &dst, int code, int dcn)
 {
     cvtColor_caller(src, dst, code, dcn);
 }
-
-#endif /* !defined (HAVE_OPENCL) */
diff --git a/modules/ocl/src/columnsum.cpp b/modules/ocl/src/columnsum.cpp
index 06c871d50..037ff234e 100644
--- a/modules/ocl/src/columnsum.cpp
+++ b/modules/ocl/src/columnsum.cpp
@@ -50,16 +50,6 @@ using namespace cv;
 using namespace cv::ocl;
 using namespace std;
 
-
-#if !defined(HAVE_OPENCL)
-
-void cv::ocl::columnSum(const oclMat &src, oclMat &dst)
-{
-    throw_nogpu();
-}
-
-#else /*!HAVE_OPENCL */
-
 namespace cv
 {
     namespace ocl
@@ -92,5 +82,4 @@ void cv::ocl::columnSum(const oclMat &src, oclMat &dst)
 
     openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
 
-}
-#endif
\ No newline at end of file
+}
\ No newline at end of file
diff --git a/modules/ocl/src/error.cpp b/modules/ocl/src/error.cpp
index 007c013cf..e854e70cd 100644
--- a/modules/ocl/src/error.cpp
+++ b/modules/ocl/src/error.cpp
@@ -46,12 +46,6 @@
 using namespace cv;
 using namespace cv::ocl;
 
-#if !defined (HAVE_OPENCL)
-
-// do nothing
-
-#else /* !defined (HAVE_OPENCL) */
-
 namespace cv
 {
     namespace ocl
@@ -194,5 +188,3 @@ namespace cv
         }
     }
 }
-
-#endif
diff --git a/modules/ocl/src/fft.cpp b/modules/ocl/src/fft.cpp
index cbecaec79..300ae60a7 100644
--- a/modules/ocl/src/fft.cpp
+++ b/modules/ocl/src/fft.cpp
@@ -49,12 +49,7 @@ using namespace cv;
 using namespace cv::ocl;
 using namespace std;
 
-#if !defined HAVE_OPENCL
-void cv::ocl::dft(const oclMat &, oclMat &, Size , int )
-{
-    throw_nogpu();
-}
-#elif !defined HAVE_CLAMDFFT
+#if !defined HAVE_CLAMDFFT
 void cv::ocl::dft(const oclMat&, oclMat&, Size, int)
 {
     CV_Error(CV_StsNotImplemented, "OpenCL DFT is not implemented");
diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp
index 5420021b1..5173dba37 100644
--- a/modules/ocl/src/filtering.cpp
+++ b/modules/ocl/src/filtering.cpp
@@ -54,121 +54,6 @@ using namespace std;
 using namespace cv;
 using namespace cv::ocl;
 
-#if !defined (HAVE_OPENCL)
-
-Ptr<BaseFilter_GPU> cv::ocl::getBoxFilter_GPU(int, int, const Size &, Point, int)
-{
-    throw_nogpu();
-    return Ptr<BaseFilter_GPU>(0);
-}
-
-Ptr<FilterEngine_GPU> cv::ocl::createBoxFilter_GPU(int, int, const Size &, const Point &, int)
-{
-    throw_nogpu();
-    return Ptr<FilterEngine_GPU>(0);
-}
-
-Ptr<FilterEngine_GPU> cv::ocl::createFilter2D_GPU(const Ptr<BaseFilter_GPU>)
-{
-    throw_nogpu();
-    return Ptr<FilterEngine_GPU>(0);
-}
-
-Ptr<FilterEngine_GPU> cv::ocl::createSeparableLinearFilter_GPU(int, int, const Mat &, const Mat &, const Point &)
-{
-    throw_nogpu();
-    return Ptr<FilterEngine_GPU>(0);
-}
-
-Ptr<BaseRowFilter_GPU> cv::ocl::getLinearRowFilter_GPU(int, int, const Mat &, int)
-{
-    throw_nogpu();
-    return Ptr<BaseRowFilter_GPU>(0);
-}
-
-Ptr<BaseColumnFilter_GPU> cv::ocl::getLinearColumnFilter_GPU(int, int, const Mat &, int)
-{
-    throw_nogpu();
-    return Ptr<BaseColumnFilter_GPU>(0);
-}
-
-Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int, Size, double, double)
-{
-    throw_nogpu();
-    return Ptr<FilterEngine_GPU>(0);
-}
-
-
-Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int, int, const Mat &, const Size &, Point)
-{
-    throw_nogpu();
-    return Ptr<BaseFilter_GPU>(0);
-}
-
-Ptr<FilterEngine_GPU> cv::ocl::createLinearFilter_GPU(int, int, const Mat &, const Point &)
-{
-    throw_nogpu();
-    return Ptr<FilterEngine_GPU>(0);
-}
-
-Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int borderType)
-{
-    throw_nogpu();
-    return Ptr<FilterEngine_GPU>(0);
-}
-
-void cv::ocl::boxFilter(const oclMat &, oclMat &, int, Size, Point, int)
-{
-    throw_nogpu();
-}
-
-void cv::ocl::sepFilter2D(const oclMat &, oclMat &, int, const Mat &, const Mat &, Point)
-{
-    throw_nogpu();
-}
-
-void cv::ocl::Sobel(const oclMat &, oclMat &, int, int, int, int, double)
-{
-    throw_nogpu();
-}
-
-void cv::ocl::Scharr(const oclMat &, oclMat &, int, int, int, double)
-{
-    throw_nogpu();
-}
-
-void cv::ocl::GaussianBlur(const oclMat &, oclMat &, Size, double, double)
-{
-    throw_nogpu();
-}
-
-void cv::ocl::filter2D(const oclMat &, oclMat &, int, const Mat &, Point)
-{
-    throw_nogpu();
-}
-
-void cv::ocl::Laplacian(const oclMat &, oclMat &, int, int, double)
-{
-    throw_nogpu();
-}
-
-void cv::ocl::erode(const oclMat &, oclMat &, const Mat &, Point, int)
-{
-    throw_nogpu();
-}
-
-void cv::ocl::dilate(const oclMat &, oclMat &, const Mat &, Point, int)
-{
-    throw_nogpu();
-}
-
-void cv::ocl::morphologyEx(const oclMat &, oclMat &, int, const Mat &, Point, int)
-{
-    throw_nogpu();
-}
-
-#else /* !defined (HAVE_OPENCL) */
-
 //helper routines
 namespace cv
 {
@@ -1705,5 +1590,3 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
     Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype);
     f->apply(src, dst);
 }
-
-#endif /* !defined (HAVE_OPENCL) */
diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp
index 2f11e2b36..be7e79cce 100644
--- a/modules/ocl/src/gemm.cpp
+++ b/modules/ocl/src/gemm.cpp
@@ -46,24 +46,14 @@
 #include <iomanip>
 #include "precomp.hpp"
 
-#ifdef HAVE_CLAMDBLAS
-
-#include "clAmdBlas.h"
-
-#if !defined HAVE_OPENCL
-void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
-                   const oclMat &src3, double beta, oclMat &dst, int flags)
-{
-    throw_nogpu();
-}
-#elif !defined HAVE_CLAMDBLAS
-void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
-                   const oclMat &src3, double beta, oclMat &dst, int flags)
+#if !defined HAVE_CLAMDBLAS
+void cv::ocl::gemm(const oclMat&, const oclMat&, double,
+                   const oclMat&, double, oclMat&, int)
 {
     CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented");
 }
 #else
-
+#include "clAmdBlas.h"
 using namespace cv;
 
 void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
@@ -168,4 +158,3 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
     clAmdBlasTeardown();
 }
 #endif
-#endif
diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp
index 5c9b75bf5..506dc6b0c 100644
--- a/modules/ocl/src/haar.cpp
+++ b/modules/ocl/src/haar.cpp
@@ -47,15 +47,10 @@
 //
 //M*/
 
-/* Haar features calculation */
-//#define EMU
-
 #include "precomp.hpp"
 #include <stdio.h>
 #include <string>
-#ifdef EMU
-#include "runCL.h"
-#endif
+
 using namespace cv;
 using namespace cv::ocl;
 using namespace std;
@@ -926,7 +921,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
     if( gimg.cols < minSize.width || gimg.rows < minSize.height )
         CV_Error(CV_StsError, "Image too small");
 
-    if( (flags & CV_HAAR_SCALE_IMAGE) && gimg.clCxt->impl->devName.find("Intel(R) HD Graphics") == string::npos )
+    if( (flags & CV_HAAR_SCALE_IMAGE) )
     {
         CvSize winSize0 = cascade->orig_window_size;
         //float scalefactor = 1.1f;
@@ -1114,30 +1109,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
         args.push_back ( make_pair(sizeof(cl_int4) , (void *)&p ));
         args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq ));
         args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
-        /*
-         openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer));
-         openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&scaleinfobuffer));
-         openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&nodebuffer));
-         openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsum.data));
-         openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsqsum.data));
-         openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&candidatebuffer));
-         openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&pixelstep));
-         openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&loopcount));
-         openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startstage));
-         openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitstage));
-         openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&endstage));
-         openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startnode));
-         openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitnode));
-         openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&p));
-         openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&pq));
-         openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_float), (void *)&correction));*/
-        //openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&n));
-        //openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&grpnumperline));
-        //openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&totalgrp));
 
-        //    openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL));
-
-        //    openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue));
         openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
         //t = (double)cvGetTickCount() - t;
         //printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
@@ -1258,13 +1230,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
             scaleinfo[i].factor = factor;
             int startnodenum = nodenum * i;
             float factor2 = (float)factor;
-            /*
-             openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&nodebuffer));
-             openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&newnodebuffer));
-             openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&factor2));
-             openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&correction[i]));
-             openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_int), (void *)&startnodenum));
-             */
 
             vector<pair<size_t, const void *> > args1;
             args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&nodebuffer ));
@@ -1298,22 +1263,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
         correctionbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_float) * loopcount);
         openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL));
         //int argcount = 0;
-        /*openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer));
-        openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&scaleinfobuffer));
-        openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&newnodebuffer));
-        openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsum.data));
-        openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsqsum.data));
-        openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&candidatebuffer));
-        openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&step));
-        openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&loopcount));
-        openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startstage));
-        openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitstage));
-        openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&endstage));
-        openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startnode));
-        openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitnode));
-        openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&pbuffer));
-        openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&correctionbuffer));
-        openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&nodenum));*/
 
         vector<pair<size_t, const void *> > args;
         args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
@@ -1335,8 +1284,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
 
 
         openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
-        //openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL));
-        //openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue));
 
         //openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->clCmdQueue,candidatebuffer,1,0,4*sizeof(int)*outputsz,candidate,0,NULL,NULL));
         candidate = (int *)clEnqueueMapBuffer(gsum.clCxt->impl->clCmdQueue, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int), 0, 0, 0, &status);
@@ -1407,204 +1354,10 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
 }
 
 
-// static CvHaarClassifierCascade * gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size )
-// {
-//     int i;
-//     CvHaarClassifierCascade *cascade = gpuCreateHaarClassifierCascade(n);
-//     cascade->orig_window_size = orig_window_size;
-
-//     for( i = 0; i < n; i++ )
-//     {
-//         int j, count, l;
-//         float threshold = 0;
-//         const char *stage = input_cascade[i];
-//         int dl = 0;
-
-//         /* tree links */
-//         int parent = -1;
-//         int next = -1;
-
-//         sscanf( stage, "%d%n", &count, &dl );
-//         stage += dl;
-
-//         assert( count > 0 );
-//         cascade->stage_classifier[i].count = count;
-//         cascade->stage_classifier[i].classifier =
-//             (CvHaarClassifier *)cvAlloc( count * sizeof(cascade->stage_classifier[i].classifier[0]));
-
-//         for( j = 0; j < count; j++ )
-//         {
-//             CvHaarClassifier *classifier = cascade->stage_classifier[i].classifier + j;
-//             int k, rects = 0;
-//             char str[100];
-
-//             sscanf( stage, "%d%n", &classifier->count, &dl );
-//             stage += dl;
-
-//             classifier->haar_feature = (CvHaarFeature *) cvAlloc(
-//                                            classifier->count * ( sizeof( *classifier->haar_feature ) +
-//                                                    sizeof( *classifier->threshold ) +
-//                                                    sizeof( *classifier->left ) +
-//                                                    sizeof( *classifier->right ) ) +
-//                                            (classifier->count + 1) * sizeof( *classifier->alpha ) );
-//             classifier->threshold = (float *) (classifier->haar_feature + classifier->count);
-//             classifier->left = (int *) (classifier->threshold + classifier->count);
-//             classifier->right = (int *) (classifier->left + classifier->count);
-//             classifier->alpha = (float *) (classifier->right + classifier->count);
-
-//             for( l = 0; l < classifier->count; l++ )
-//             {
-//                 sscanf( stage, "%d%n", &rects, &dl );
-//                 stage += dl;
-
-//                 assert( rects >= 2 && rects <= CV_HAAR_FEATURE_MAX );
-
-//                 for( k = 0; k < rects; k++ )
-//                 {
-//                     CvRect r;
-//                     int band = 0;
-//                     sscanf( stage, "%d%d%d%d%d%f%n",
-//                             &r.x, &r.y, &r.width, &r.height, &band,
-//                             &(classifier->haar_feature[l].rect[k].weight), &dl );
-//                     stage += dl;
-//                     classifier->haar_feature[l].rect[k].r = r;
-//                 }
-//                 sscanf( stage, "%s%n", str, &dl );
-//                 stage += dl;
-
-//                 classifier->haar_feature[l].tilted = strncmp( str, "tilted", 6 ) == 0;
-
-//                 for( k = rects; k < CV_HAAR_FEATURE_MAX; k++ )
-//                 {
-//                     memset( classifier->haar_feature[l].rect + k, 0,
-//                             sizeof(classifier->haar_feature[l].rect[k]) );
-//                 }
-
-//                 sscanf( stage, "%f%d%d%n", &(classifier->threshold[l]),
-//                         &(classifier->left[l]),
-//                         &(classifier->right[l]), &dl );
-//                 stage += dl;
-//             }
-//             for( l = 0; l <= classifier->count; l++ )
-//             {
-//                 sscanf( stage, "%f%n", &(classifier->alpha[l]), &dl );
-//                 stage += dl;
-//             }
-//         }
-
-//         sscanf( stage, "%f%n", &threshold, &dl );
-//         stage += dl;
-
-//         cascade->stage_classifier[i].threshold = threshold;
-
-//         /* load tree links */
-//         if( sscanf( stage, "%d%d%n", &parent, &next, &dl ) != 2 )
-//         {
-//             parent = i - 1;
-//             next = -1;
-//         }
-//         stage += dl;
-
-//         cascade->stage_classifier[i].parent = parent;
-//         cascade->stage_classifier[i].next = next;
-//         cascade->stage_classifier[i].child = -1;
-
-//         if( parent != -1 && cascade->stage_classifier[parent].child == -1 )
-//         {
-//             cascade->stage_classifier[parent].child = i;
-//         }
-//     }
-
-//     return cascade;
-// }
-
 #ifndef _MAX_PATH
 #define _MAX_PATH 1024
 #endif
 
-// static CvHaarClassifierCascade * gpuLoadHaarClassifierCascade( const char *directory, CvSize orig_window_size )
-// {
-//     const char **input_cascade = 0;
-//     CvHaarClassifierCascade *cascade = 0;
-
-//     int i, n;
-//     const char *slash;
-//     char name[_MAX_PATH];
-//     int size = 0;
-//     char *ptr = 0;
-
-//     if( !directory )
-//         CV_Error( CV_StsNullPtr, "Null path is passed" );
-
-//     n = (int)strlen(directory) - 1;
-//     slash = directory[n] == '\\' || directory[n] == '/' ? "" : "/";
-
-//     /* try to read the classifier from directory */
-//     for( n = 0; ; n++ )
-//     {
-//         sprintf( name, "%s%s%d/AdaBoostCARTHaarClassifier.txt", directory, slash, n );
-//         FILE *f = fopen( name, "rb" );
-//         if( !f )
-//             break;
-//         fseek( f, 0, SEEK_END );
-//         size += ftell( f ) + 1;
-//         fclose(f);
-//     }
-
-//     if( n == 0 && slash[0] )
-//         return (CvHaarClassifierCascade *)cvLoad( directory );
-
-//     if( n == 0 )
-//         CV_Error( CV_StsBadArg, "Invalid path" );
-
-//     size += (n + 1) * sizeof(char *);
-//     input_cascade = (const char **)cvAlloc( size );
-//     ptr = (char *)(input_cascade + n + 1);
-
-//     for( i = 0; i < n; i++ )
-//     {
-//         sprintf( name, "%s/%d/AdaBoostCARTHaarClassifier.txt", directory, i );
-//         FILE *f = fopen( name, "rb" );
-//         if( !f )
-//             CV_Error( CV_StsError, "" );
-//         fseek( f, 0, SEEK_END );
-//         size = ftell( f );
-//         fseek( f, 0, SEEK_SET );
-//         CV_Assert((size_t)size == fread( ptr, 1, size, f ));
-//         fclose(f);
-//         input_cascade[i] = ptr;
-//         ptr += size;
-//         *ptr++ = '\0';
-//     }
-
-//     input_cascade[n] = 0;
-//     cascade = gpuLoadCascadeCART( input_cascade, n, orig_window_size );
-
-//     if( input_cascade )
-//         cvFree( &input_cascade );
-
-//     return cascade;
-// }
-
-
-// static void gpuReleaseHaarClassifierCascade( CvHaarClassifierCascade **_cascade )
-// {
-//     if( _cascade && *_cascade )
-//     {
-//         int i, j;
-//         CvHaarClassifierCascade *cascade = *_cascade;
-
-//         for( i = 0; i < cascade->count; i++ )
-//         {
-//             for( j = 0; j < cascade->stage_classifier[i].count; j++ )
-//                 cvFree( &cascade->stage_classifier[i].classifier[j].haar_feature );
-//             cvFree( &cascade->stage_classifier[i].classifier );
-//         }
-//         gpuReleaseHidHaarClassifierCascade( (GpuHidHaarClassifierCascade **)&cascade->hid_cascade );
-//         cvFree( _cascade );
-//     }
-// }
-
 
 /****************************************************************************************\
 *                                  Persistence functions                                 *
@@ -1627,937 +1380,11 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
 #define ICV_HAAR_PARENT_NAME            "parent"
 #define ICV_HAAR_NEXT_NAME              "next"
 
-// static int gpuIsHaarClassifier( const void *struct_ptr )
-// {
-//     return CV_IS_HAAR_CLASSIFIER( struct_ptr );
-// }
-
-// static void * gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
-// {
-//     CvHaarClassifierCascade *cascade = NULL;
-
-//     char buf[256];
-//     CvFileNode *seq_fn = NULL; /* sequence */
-//     CvFileNode *fn = NULL;
-//     CvFileNode *stages_fn = NULL;
-//     CvSeqReader stages_reader;
-//     int n;
-//     int i, j, k, l;
-//     int parent, next;
-
-//     stages_fn = cvGetFileNodeByName( fs, node, ICV_HAAR_STAGES_NAME );
-//     if( !stages_fn || !CV_NODE_IS_SEQ( stages_fn->tag) )
-//         CV_Error( CV_StsError, "Invalid stages node" );
-
-//     n = stages_fn->data.seq->total;
-//     cascade = gpuCreateHaarClassifierCascade(n);
-
-//     /* read size */
-//     seq_fn = cvGetFileNodeByName( fs, node, ICV_HAAR_SIZE_NAME );
-//     if( !seq_fn || !CV_NODE_IS_SEQ( seq_fn->tag ) || seq_fn->data.seq->total != 2 )
-//         CV_Error( CV_StsError, "size node is not a valid sequence." );
-//     fn = (CvFileNode *) cvGetSeqElem( seq_fn->data.seq, 0 );
-//     if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 )
-//         CV_Error( CV_StsError, "Invalid size node: width must be positive integer" );
-//     cascade->orig_window_size.width = fn->data.i;
-//     fn = (CvFileNode *) cvGetSeqElem( seq_fn->data.seq, 1 );
-//     if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 )
-//         CV_Error( CV_StsError, "Invalid size node: height must be positive integer" );
-//     cascade->orig_window_size.height = fn->data.i;
-
-//     cvStartReadSeq( stages_fn->data.seq, &stages_reader );
-//     for( i = 0; i < n; ++i )
-//     {
-//         CvFileNode *stage_fn;
-//         CvFileNode *trees_fn;
-//         CvSeqReader trees_reader;
-
-//         stage_fn = (CvFileNode *) stages_reader.ptr;
-//         if( !CV_NODE_IS_MAP( stage_fn->tag ) )
-//         {
-//             sprintf( buf, "Invalid stage %d", i );
-//             CV_Error( CV_StsError, buf );
-//         }
-
-//         trees_fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_TREES_NAME );
-//         if( !trees_fn || !CV_NODE_IS_SEQ( trees_fn->tag )
-//                 || trees_fn->data.seq->total <= 0 )
-//         {
-//             sprintf( buf, "Trees node is not a valid sequence. (stage %d)", i );
-//             CV_Error( CV_StsError, buf );
-//         }
-
-//         cascade->stage_classifier[i].classifier =
-//             (CvHaarClassifier *) cvAlloc( trees_fn->data.seq->total
-//                                           * sizeof( cascade->stage_classifier[i].classifier[0] ) );
-//         for( j = 0; j < trees_fn->data.seq->total; ++j )
-//         {
-//             cascade->stage_classifier[i].classifier[j].haar_feature = NULL;
-//         }
-//         cascade->stage_classifier[i].count = trees_fn->data.seq->total;
-
-//         cvStartReadSeq( trees_fn->data.seq, &trees_reader );
-//         for( j = 0; j < trees_fn->data.seq->total; ++j )
-//         {
-//             CvFileNode *tree_fn;
-//             CvSeqReader tree_reader;
-//             CvHaarClassifier *classifier;
-//             int last_idx;
-
-//             classifier = &cascade->stage_classifier[i].classifier[j];
-//             tree_fn = (CvFileNode *) trees_reader.ptr;
-//             if( !CV_NODE_IS_SEQ( tree_fn->tag ) || tree_fn->data.seq->total <= 0 )
-//             {
-//                 sprintf( buf, "Tree node is not a valid sequence."
-//                          " (stage %d, tree %d)", i, j );
-//                 CV_Error( CV_StsError, buf );
-//             }
-
-//             classifier->count = tree_fn->data.seq->total;
-//             classifier->haar_feature = (CvHaarFeature *) cvAlloc(
-//                                            classifier->count * ( sizeof( *classifier->haar_feature ) +
-//                                                    sizeof( *classifier->threshold ) +
-//                                                    sizeof( *classifier->left ) +
-//                                                    sizeof( *classifier->right ) ) +
-//                                            (classifier->count + 1) * sizeof( *classifier->alpha ) );
-//             classifier->threshold = (float *) (classifier->haar_feature + classifier->count);
-//             classifier->left = (int *) (classifier->threshold + classifier->count);
-//             classifier->right = (int *) (classifier->left + classifier->count);
-//             classifier->alpha = (float *) (classifier->right + classifier->count);
-
-//             cvStartReadSeq( tree_fn->data.seq, &tree_reader );
-//             for( k = 0, last_idx = 0; k < tree_fn->data.seq->total; ++k )
-//             {
-//                 CvFileNode *node_fn;
-//                 CvFileNode *feature_fn;
-//                 CvFileNode *rects_fn;
-//                 CvSeqReader rects_reader;
-
-//                 node_fn = (CvFileNode *) tree_reader.ptr;
-//                 if( !CV_NODE_IS_MAP( node_fn->tag ) )
-//                 {
-//                     sprintf( buf, "Tree node %d is not a valid map. (stage %d, tree %d)",
-//                              k, i, j );
-//                     CV_Error( CV_StsError, buf );
-//                 }
-//                 feature_fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_FEATURE_NAME );
-//                 if( !feature_fn || !CV_NODE_IS_MAP( feature_fn->tag ) )
-//                 {
-//                     sprintf( buf, "Feature node is not a valid map. "
-//                              "(stage %d, tree %d, node %d)", i, j, k );
-//                     CV_Error( CV_StsError, buf );
-//                 }
-//                 rects_fn = cvGetFileNodeByName( fs, feature_fn, ICV_HAAR_RECTS_NAME );
-//                 if( !rects_fn || !CV_NODE_IS_SEQ( rects_fn->tag )
-//                         || rects_fn->data.seq->total < 1
-//                         || rects_fn->data.seq->total > CV_HAAR_FEATURE_MAX )
-//                 {
-//                     sprintf( buf, "Rects node is not a valid sequence. "
-//                              "(stage %d, tree %d, node %d)", i, j, k );
-//                     CV_Error( CV_StsError, buf );
-//                 }
-//                 cvStartReadSeq( rects_fn->data.seq, &rects_reader );
-//                 for( l = 0; l < rects_fn->data.seq->total; ++l )
-//                 {
-//                     CvFileNode *rect_fn;
-//                     CvRect r;
-
-//                     rect_fn = (CvFileNode *) rects_reader.ptr;
-//                     if( !CV_NODE_IS_SEQ( rect_fn->tag ) || rect_fn->data.seq->total != 5 )
-//                     {
-//                         sprintf( buf, "Rect %d is not a valid sequence. "
-//                                  "(stage %d, tree %d, node %d)", l, i, j, k );
-//                         CV_Error( CV_StsError, buf );
-//                     }
-
-//                     fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 0 );
-//                     if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 )
-//                     {
-//                         sprintf( buf, "x coordinate must be non-negative integer. "
-//                                  "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
-//                         CV_Error( CV_StsError, buf );
-//                     }
-//                     r.x = fn->data.i;
-//                     fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 1 );
-//                     if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 )
-//                     {
-//                         sprintf( buf, "y coordinate must be non-negative integer. "
-//                                  "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
-//                         CV_Error( CV_StsError, buf );
-//                     }
-//                     r.y = fn->data.i;
-//                     fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 2 );
-//                     if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0
-//                             || r.x + fn->data.i > cascade->orig_window_size.width )
-//                     {
-//                         sprintf( buf, "width must be positive integer and "
-//                                  "(x + width) must not exceed window width. "
-//                                  "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
-//                         CV_Error( CV_StsError, buf );
-//                     }
-//                     r.width = fn->data.i;
-//                     fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 3 );
-//                     if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0
-//                             || r.y + fn->data.i > cascade->orig_window_size.height )
-//                     {
-//                         sprintf( buf, "height must be positive integer and "
-//                                  "(y + height) must not exceed window height. "
-//                                  "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
-//                         CV_Error( CV_StsError, buf );
-//                     }
-//                     r.height = fn->data.i;
-//                     fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 4 );
-//                     if( !CV_NODE_IS_REAL( fn->tag ) )
-//                     {
-//                         sprintf( buf, "weight must be real number. "
-//                                  "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
-//                         CV_Error( CV_StsError, buf );
-//                     }
-
-//                     classifier->haar_feature[k].rect[l].weight = (float) fn->data.f;
-//                     classifier->haar_feature[k].rect[l].r = r;
-
-//                     CV_NEXT_SEQ_ELEM( sizeof( *rect_fn ), rects_reader );
-//                 } /* for each rect */
-//                 for( l = rects_fn->data.seq->total; l < CV_HAAR_FEATURE_MAX; ++l )
-//                 {
-//                     classifier->haar_feature[k].rect[l].weight = 0;
-//                     classifier->haar_feature[k].rect[l].r = cvRect( 0, 0, 0, 0 );
-//                 }
-
-//                 fn = cvGetFileNodeByName( fs, feature_fn, ICV_HAAR_TILTED_NAME);
-//                 if( !fn || !CV_NODE_IS_INT( fn->tag ) )
-//                 {
-//                     sprintf( buf, "tilted must be 0 or 1. "
-//                              "(stage %d, tree %d, node %d)", i, j, k );
-//                     CV_Error( CV_StsError, buf );
-//                 }
-//                 classifier->haar_feature[k].tilted = ( fn->data.i != 0 );
-//                 fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_THRESHOLD_NAME);
-//                 if( !fn || !CV_NODE_IS_REAL( fn->tag ) )
-//                 {
-//                     sprintf( buf, "threshold must be real number. "
-//                              "(stage %d, tree %d, node %d)", i, j, k );
-//                     CV_Error( CV_StsError, buf );
-//                 }
-//                 classifier->threshold[k] = (float) fn->data.f;
-//                 fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_LEFT_NODE_NAME);
-//                 if( fn )
-//                 {
-//                     if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k
-//                             || fn->data.i >= tree_fn->data.seq->total )
-//                     {
-//                         sprintf( buf, "left node must be valid node number. "
-//                                  "(stage %d, tree %d, node %d)", i, j, k );
-//                         CV_Error( CV_StsError, buf );
-//                     }
-//                     /* left node */
-//                     classifier->left[k] = fn->data.i;
-//                 }
-//                 else
-//                 {
-//                     fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_LEFT_VAL_NAME );
-//                     if( !fn )
-//                     {
-//                         sprintf( buf, "left node or left value must be specified. "
-//                                  "(stage %d, tree %d, node %d)", i, j, k );
-//                         CV_Error( CV_StsError, buf );
-//                     }
-//                     if( !CV_NODE_IS_REAL( fn->tag ) )
-//                     {
-//                         sprintf( buf, "left value must be real number. "
-//                                  "(stage %d, tree %d, node %d)", i, j, k );
-//                         CV_Error( CV_StsError, buf );
-//                     }
-//                     /* left value */
-//                     if( last_idx >= classifier->count + 1 )
-//                     {
-//                         sprintf( buf, "Tree structure is broken: too many values. "
-//                                  "(stage %d, tree %d, node %d)", i, j, k );
-//                         CV_Error( CV_StsError, buf );
-//                     }
-//                     classifier->left[k] = -last_idx;
-//                     classifier->alpha[last_idx++] = (float) fn->data.f;
-//                 }
-//                 fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_RIGHT_NODE_NAME);
-//                 if( fn )
-//                 {
-//                     if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k
-//                             || fn->data.i >= tree_fn->data.seq->total )
-//                     {
-//                         sprintf( buf, "right node must be valid node number. "
-//                                  "(stage %d, tree %d, node %d)", i, j, k );
-//                         CV_Error( CV_StsError, buf );
-//                     }
-//                     /* right node */
-//                     classifier->right[k] = fn->data.i;
-//                 }
-//                 else
-//                 {
-//                     fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_RIGHT_VAL_NAME );
-//                     if( !fn )
-//                     {
-//                         sprintf( buf, "right node or right value must be specified. "
-//                                  "(stage %d, tree %d, node %d)", i, j, k );
-//                         CV_Error( CV_StsError, buf );
-//                     }
-//                     if( !CV_NODE_IS_REAL( fn->tag ) )
-//                     {
-//                         sprintf( buf, "right value must be real number. "
-//                                  "(stage %d, tree %d, node %d)", i, j, k );
-//                         CV_Error( CV_StsError, buf );
-//                     }
-//                     /* right value */
-//                     if( last_idx >= classifier->count + 1 )
-//                     {
-//                         sprintf( buf, "Tree structure is broken: too many values. "
-//                                  "(stage %d, tree %d, node %d)", i, j, k );
-//                         CV_Error( CV_StsError, buf );
-//                     }
-//                     classifier->right[k] = -last_idx;
-//                     classifier->alpha[last_idx++] = (float) fn->data.f;
-//                 }
-
-//                 CV_NEXT_SEQ_ELEM( sizeof( *node_fn ), tree_reader );
-//             } /* for each node */
-//             if( last_idx != classifier->count + 1 )
-//             {
-//                 sprintf( buf, "Tree structure is broken: too few values. "
-//                          "(stage %d, tree %d)", i, j );
-//                 CV_Error( CV_StsError, buf );
-//             }
-
-//             CV_NEXT_SEQ_ELEM( sizeof( *tree_fn ), trees_reader );
-//         } /* for each tree */
-
-//         fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_STAGE_THRESHOLD_NAME);
-//         if( !fn || !CV_NODE_IS_REAL( fn->tag ) )
-//         {
-//             sprintf( buf, "stage threshold must be real number. (stage %d)", i );
-//             CV_Error( CV_StsError, buf );
-//         }
-//         cascade->stage_classifier[i].threshold = (float) fn->data.f;
-
-//         parent = i - 1;
-//         next = -1;
-
-//         fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_PARENT_NAME );
-//         if( !fn || !CV_NODE_IS_INT( fn->tag )
-//                 || fn->data.i < -1 || fn->data.i >= cascade->count )
-//         {
-//             sprintf( buf, "parent must be integer number. (stage %d)", i );
-//             CV_Error( CV_StsError, buf );
-//         }
-//         parent = fn->data.i;
-//         fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_NEXT_NAME );
-//         if( !fn || !CV_NODE_IS_INT( fn->tag )
-//                 || fn->data.i < -1 || fn->data.i >= cascade->count )
-//         {
-//             sprintf( buf, "next must be integer number. (stage %d)", i );
-//             CV_Error( CV_StsError, buf );
-//         }
-//         next = fn->data.i;
-
-//         cascade->stage_classifier[i].parent = parent;
-//         cascade->stage_classifier[i].next = next;
-//         cascade->stage_classifier[i].child = -1;
-
-//         if( parent != -1 && cascade->stage_classifier[parent].child == -1 )
-//         {
-//             cascade->stage_classifier[parent].child = i;
-//         }
-
-//         CV_NEXT_SEQ_ELEM( sizeof( *stage_fn ), stages_reader );
-//     } /* for each stage */
-
-//     return cascade;
-// }
-
-// static void gpuWriteHaarClassifier( CvFileStorage *fs, const char *name, const void *struct_ptr,
-//                         CvAttrList attributes )
-// {
-//     int i, j, k, l;
-//     char buf[256];
-//     const CvHaarClassifierCascade *cascade = (const CvHaarClassifierCascade *) struct_ptr;
-
-//     /* TODO: parameters check */
-
-//     cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_HAAR, attributes );
-
-//     cvStartWriteStruct( fs, ICV_HAAR_SIZE_NAME, CV_NODE_SEQ | CV_NODE_FLOW );
-//     cvWriteInt( fs, NULL, cascade->orig_window_size.width );
-//     cvWriteInt( fs, NULL, cascade->orig_window_size.height );
-//     cvEndWriteStruct( fs ); /* size */
-
-//     cvStartWriteStruct( fs, ICV_HAAR_STAGES_NAME, CV_NODE_SEQ );
-//     for( i = 0; i < cascade->count; ++i )
-//     {
-//         cvStartWriteStruct( fs, NULL, CV_NODE_MAP );
-//         sprintf( buf, "stage %d", i );
-//         cvWriteComment( fs, buf, 1 );
-
-//         cvStartWriteStruct( fs, ICV_HAAR_TREES_NAME, CV_NODE_SEQ );
-
-//         for( j = 0; j < cascade->stage_classifier[i].count; ++j )
-//         {
-//             CvHaarClassifier *tree = &cascade->stage_classifier[i].classifier[j];
-
-//             cvStartWriteStruct( fs, NULL, CV_NODE_SEQ );
-//             sprintf( buf, "tree %d", j );
-//             cvWriteComment( fs, buf, 1 );
-
-//             for( k = 0; k < tree->count; ++k )
-//             {
-//                 CvHaarFeature *feature = &tree->haar_feature[k];
-
-//                 cvStartWriteStruct( fs, NULL, CV_NODE_MAP );
-//                 if( k )
-//                 {
-//                     sprintf( buf, "node %d", k );
-//                 }
-//                 else
-//                 {
-//                     sprintf( buf, "root node" );
-//                 }
-//                 cvWriteComment( fs, buf, 1 );
-
-//                 cvStartWriteStruct( fs, ICV_HAAR_FEATURE_NAME, CV_NODE_MAP );
-
-//                 cvStartWriteStruct( fs, ICV_HAAR_RECTS_NAME, CV_NODE_SEQ );
-//                 for( l = 0; l < CV_HAAR_FEATURE_MAX && feature->rect[l].r.width != 0; ++l )
-//                 {
-//                     cvStartWriteStruct( fs, NULL, CV_NODE_SEQ | CV_NODE_FLOW );
-//                     cvWriteInt(  fs, NULL, feature->rect[l].r.x );
-//                     cvWriteInt(  fs, NULL, feature->rect[l].r.y );
-//                     cvWriteInt(  fs, NULL, feature->rect[l].r.width );
-//                     cvWriteInt(  fs, NULL, feature->rect[l].r.height );
-//                     cvWriteReal( fs, NULL, feature->rect[l].weight );
-//                     cvEndWriteStruct( fs ); /* rect */
-//                 }
-//                 cvEndWriteStruct( fs ); /* rects */
-//                 cvWriteInt( fs, ICV_HAAR_TILTED_NAME, feature->tilted );
-//                 cvEndWriteStruct( fs ); /* feature */
-
-//                 cvWriteReal( fs, ICV_HAAR_THRESHOLD_NAME, tree->threshold[k]);
-
-//                 if( tree->left[k] > 0 )
-//                 {
-//                     cvWriteInt( fs, ICV_HAAR_LEFT_NODE_NAME, tree->left[k] );
-//                 }
-//                 else
-//                 {
-//                     cvWriteReal( fs, ICV_HAAR_LEFT_VAL_NAME,
-//                                  tree->alpha[-tree->left[k]] );
-//                 }
-
-//                 if( tree->right[k] > 0 )
-//                 {
-//                     cvWriteInt( fs, ICV_HAAR_RIGHT_NODE_NAME, tree->right[k] );
-//                 }
-//                 else
-//                 {
-//                     cvWriteReal( fs, ICV_HAAR_RIGHT_VAL_NAME,
-//                                  tree->alpha[-tree->right[k]] );
-//                 }
-
-//                 cvEndWriteStruct( fs ); /* split */
-//             }
-
-//             cvEndWriteStruct( fs ); /* tree */
-//         }
-
-//         cvEndWriteStruct( fs ); /* trees */
-
-//         cvWriteReal( fs, ICV_HAAR_STAGE_THRESHOLD_NAME, cascade->stage_classifier[i].threshold);
-//         cvWriteInt( fs, ICV_HAAR_PARENT_NAME, cascade->stage_classifier[i].parent );
-//         cvWriteInt( fs, ICV_HAAR_NEXT_NAME, cascade->stage_classifier[i].next );
-
-//         cvEndWriteStruct( fs ); /* stage */
-//     } /* for each stage */
-
-//     cvEndWriteStruct( fs ); /* stages */
-//     cvEndWriteStruct( fs ); /* root */
-// }
-
-// static void * gpuCloneHaarClassifier( const void *struct_ptr )
-// {
-//     CvHaarClassifierCascade *cascade = NULL;
-
-//     int i, j, k, n;
-//     const CvHaarClassifierCascade *cascade_src =
-//         (const CvHaarClassifierCascade *) struct_ptr;
-
-//     n = cascade_src->count;
-//     cascade = gpuCreateHaarClassifierCascade(n);
-//     cascade->orig_window_size = cascade_src->orig_window_size;
-
-//     for( i = 0; i < n; ++i )
-//     {
-//         cascade->stage_classifier[i].parent = cascade_src->stage_classifier[i].parent;
-//         cascade->stage_classifier[i].next = cascade_src->stage_classifier[i].next;
-//         cascade->stage_classifier[i].child = cascade_src->stage_classifier[i].child;
-//         cascade->stage_classifier[i].threshold = cascade_src->stage_classifier[i].threshold;
-
-//         cascade->stage_classifier[i].count = 0;
-//         cascade->stage_classifier[i].classifier =
-//             (CvHaarClassifier *) cvAlloc( cascade_src->stage_classifier[i].count
-//                                           * sizeof( cascade->stage_classifier[i].classifier[0] ) );
-
-//         cascade->stage_classifier[i].count = cascade_src->stage_classifier[i].count;
-
-//         for( j = 0; j < cascade->stage_classifier[i].count; ++j )
-//             cascade->stage_classifier[i].classifier[j].haar_feature = NULL;
-
-//         for( j = 0; j < cascade->stage_classifier[i].count; ++j )
-//         {
-//             const CvHaarClassifier *classifier_src =
-//                 &cascade_src->stage_classifier[i].classifier[j];
-//             CvHaarClassifier *classifier =
-//                 &cascade->stage_classifier[i].classifier[j];
-
-//             classifier->count = classifier_src->count;
-//             classifier->haar_feature = (CvHaarFeature *) cvAlloc(
-//                                            classifier->count * ( sizeof( *classifier->haar_feature ) +
-//                                                    sizeof( *classifier->threshold ) +
-//                                                    sizeof( *classifier->left ) +
-//                                                    sizeof( *classifier->right ) ) +
-//                                            (classifier->count + 1) * sizeof( *classifier->alpha ) );
-//             classifier->threshold = (float *) (classifier->haar_feature + classifier->count);
-//             classifier->left = (int *) (classifier->threshold + classifier->count);
-//             classifier->right = (int *) (classifier->left + classifier->count);
-//             classifier->alpha = (float *) (classifier->right + classifier->count);
-//             for( k = 0; k < classifier->count; ++k )
-//             {
-//                 classifier->haar_feature[k] = classifier_src->haar_feature[k];
-//                 classifier->threshold[k] = classifier_src->threshold[k];
-//                 classifier->left[k] = classifier_src->left[k];
-//                 classifier->right[k] = classifier_src->right[k];
-//                 classifier->alpha[k] = classifier_src->alpha[k];
-//             }
-//             classifier->alpha[classifier->count] =
-//                 classifier_src->alpha[classifier->count];
-//         }
-//     }
-
-//     return cascade;
-// }
-
-#if 0
-CvType haar_type( CV_TYPE_NAME_HAAR, gpuIsHaarClassifier,
-                  (CvReleaseFunc)gpuReleaseHaarClassifierCascade,
-                  gpuReadHaarClassifier, gpuWriteHaarClassifier,
-                  gpuCloneHaarClassifier );
-
-
-namespace cv
+static int gpuRunHaarClassifierCascade( /*const CvHaarClassifierCascade *_cascade, CvPoint pt, int start_stage */)
 {
-
-HaarClassifierCascade::HaarClassifierCascade() {}
-HaarClassifierCascade::HaarClassifierCascade(const String &filename)
-{
-    load(filename);
-}
-
-bool HaarClassifierCascade::load(const String &filename)
-{
-    cascade = Ptr<CvHaarClassifierCascade>((CvHaarClassifierCascade *)cvLoad(filename.c_str(), 0, 0, 0));
-    return (CvHaarClassifierCascade *)cascade != 0;
-}
-
-void HaarClassifierCascade::detectMultiScale( const Mat &image,
-        Vector<Rect> &objects, double scaleFactor,
-        int minNeighbors, int flags,
-        Size minSize )
-{
-    MemStorage storage(cvCreateMemStorage(0));
-    CvMat _image = image;
-    CvSeq *_objects = gpuHaarDetectObjects( &_image, cascade, storage, scaleFactor,
-                                            minNeighbors, flags, minSize );
-    Seq<Rect>(_objects).copyTo(objects);
-}
-
-int HaarClassifierCascade::runAt(Point pt, int startStage, int) const
-{
-    return gpuRunHaarClassifierCascade(cascade, pt, startStage);
-}
-
-void HaarClassifierCascade::setImages( const Mat &sum, const Mat &sqsum,
-                                       const Mat &tilted, double scale )
-{
-    CvMat _sum = sum, _sqsum = sqsum, _tilted = tilted;
-    gpuSetImagesForHaarClassifierCascade( cascade, &_sum, &_sqsum, &_tilted, scale );
-}
-
-}
-#endif
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////reserved functios//////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-
-/*#if CV_SSE2
-#   if CV_SSE4 || defined __SSE4__
-#       include <smmintrin.h>
-#   else
-#       define _mm_blendv_pd(a, b, m) _mm_xor_pd(a, _mm_and_pd(_mm_xor_pd(b, a), m))
-#       define _mm_blendv_ps(a, b, m) _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(b, a), m))
-#   endif
-#if defined CV_ICC
-#   define CV_HAAR_USE_SSE 1
-#endif
-#endif*/
-
-
-/*
-CV_IMPL void
-gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* _cascade,
-const CvArr* _sum,
-const CvArr* _sqsum,
-const CvArr* _tilted_sum,
-double scale )
-{
-CvMat sum_stub, *sum = (CvMat*)_sum;
-CvMat sqsum_stub, *sqsum = (CvMat*)_sqsum;
-CvMat tilted_stub, *tilted = (CvMat*)_tilted_sum;
-GpuHidHaarClassifierCascade* cascade;
-int coi0 = 0, coi1 = 0;
-int i;
-int datasize;
-int totalclassifier;
-CvRect equRect;
-double weight_scale;
-int rows,cols;
-
-if( !CV_IS_HAAR_CLASSIFIER(_cascade) )
-CV_Error( !_cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier pointer" );
-
-if( scale <= 0 )
-CV_Error( CV_StsOutOfRange, "Scale must be positive" );
-
-sum = cvGetMat( sum, &sum_stub, &coi0 );
-sqsum = cvGetMat( sqsum, &sqsum_stub, &coi1 );
-
-if( coi0 || coi1 )
-CV_Error( CV_BadCOI, "COI is not supported" );
-
-if( !CV_ARE_SIZES_EQ( sum, sqsum ))
-CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
-
-if( CV_MAT_TYPE(sqsum->type) != CV_64FC1 ||
-CV_MAT_TYPE(sum->type) != CV_32SC1 )
-CV_Error( CV_StsUnsupportedFormat,
-"Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
-
-if( !_cascade->hid_cascade )
-gpuCreateHidHaarClassifierCascade(_cascade,&datasize,&totalclassifier);
-
-cascade =(GpuHidHaarClassifierCascade *)_cascade->hid_cascade;
-
-if( cascade->has_tilted_features )
-{
-tilted = cvGetMat( tilted, &tilted_stub, &coi1 );
-
-if( CV_MAT_TYPE(tilted->type) != CV_32SC1 )
-CV_Error( CV_StsUnsupportedFormat,
-"Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
-
-if( sum->step != tilted->step )
-CV_Error( CV_StsUnmatchedSizes,
-"Sum and tilted_sum must have the same stride (step, widthStep)" );
-
-if( !CV_ARE_SIZES_EQ( sum, tilted ))
-CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
-//cascade->tilted = *tilted;
-}
-
-_cascade->scale = scale;
-_cascade->real_window_size.width = cvRound( _cascade->orig_window_size.width * scale );
-_cascade->real_window_size.height = cvRound( _cascade->orig_window_size.height * scale );
-
-//cascade->sum = *sum;
-//cascade->sqsum = *sqsum;
-
-equRect.x = equRect.y = cvRound(scale);
-equRect.width = cvRound((_cascade->orig_window_size.width-2)*scale);
-equRect.height = cvRound((_cascade->orig_window_size.height-2)*scale);
-weight_scale = 1./(equRect.width*equRect.height);
-cascade->inv_window_area = weight_scale;
-
-cascade->p0 = sum_elem_ptr(*sum, equRect.y, equRect.x);
-cascade->p1 = sum_elem_ptr(*sum, equRect.y, equRect.x + equRect.width );
-cascade->p2 = sum_elem_ptr(*sum, equRect.y + equRect.height, equRect.x );
-cascade->p3 = sum_elem_ptr(*sum, equRect.y + equRect.height,
-equRect.x + equRect.width );
-*/
-/*    rows=sum->rows;
-cols=sum->cols;
-cascade->p0 = equRect.y*cols + equRect.x;
-cascade->p1 = equRect.y*cols + equRect.x + equRect.width;
-cascade->p2 = (equRect.y + equRect.height) * cols + equRect.x;
-cascade->p3 = (equRect.y + equRect.height) * cols + equRect.x + equRect.width ;
-*/
-/*
-cascade->pq0 = sqsum_elem_ptr(*sqsum, equRect.y, equRect.x);
-cascade->pq1 = sqsum_elem_ptr(*sqsum, equRect.y, equRect.x + equRect.width );
-cascade->pq2 = sqsum_elem_ptr(*sqsum, equRect.y + equRect.height, equRect.x );
-cascade->pq3 = sqsum_elem_ptr(*sqsum, equRect.y + equRect.height,
-equRect.x + equRect.width );
-*/
-/* init pointers in haar features according to real window size and
-given image pointers */
-/*    for( i = 0; i < _cascade->count; i++ )
-{
-int j, k, l;
-for( j = 0; j < cascade->stage_classifier[i].count; j++ )
-{
-for( l = 0; l < cascade->stage_classifier[i].classifier[j].count; l++ )
-{
-CvHaarFeature* feature =
-&_cascade->stage_classifier[i].classifier[j].haar_feature[l];
-*/                /* GpuHidHaarClassifier* classifier =
-cascade->stage_classifier[i].classifier + j; */
-//GpuHidHaarFeature* hidfeature =
-//  &cascade->stage_classifier[i].classifier[j].node[l].feature;
-/*                double sum0 = 0, area0 = 0;
-CvRect r[3];
-
-int base_w = -1, base_h = -1;
-int new_base_w = 0, new_base_h = 0;
-int kx, ky;
-int flagx = 0, flagy = 0;
-int x0 = 0, y0 = 0;
-int nr;
-*/
-/* align blocks */
-/*                for( k = 0; k < CV_HAAR_FEATURE_MAX; k++ )
-{
-//if( !hidfeature->rect[k].p0 )
-//    break;
-r[k] = feature->rect[k].r;
-base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].width-1) );
-base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].x - r[0].x-1) );
-base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].height-1) );
-base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].y - r[0].y-1) );
-}
-
-nr = k;
-
-base_w += 1;
-base_h += 1;
-kx = r[0].width / base_w;
-ky = r[0].height / base_h;
-
-if( kx <= 0 )
-{
-flagx = 1;
-new_base_w = cvRound( r[0].width * scale ) / kx;
-x0 = cvRound( r[0].x * scale );
-}
-
-if( ky <= 0 )
-{
-flagy = 1;
-new_base_h = cvRound( r[0].height * scale ) / ky;
-y0 = cvRound( r[0].y * scale );
-}
-
-for( k = 0; k < nr; k++ )
-{
-CvRect tr;
-double correction_ratio;
-
-if( flagx )
-{
-tr.x = (r[k].x - r[0].x) * new_base_w / base_w + x0;
-tr.width = r[k].width * new_base_w / base_w;
-}
-else
-{
-tr.x = cvRound( r[k].x * scale );
-tr.width = cvRound( r[k].width * scale );
-}
-
-if( flagy )
-{
-tr.y = (r[k].y - r[0].y) * new_base_h / base_h + y0;
-tr.height = r[k].height * new_base_h / base_h;
-}
-else
-{
-tr.y = cvRound( r[k].y * scale );
-tr.height = cvRound( r[k].height * scale );
-}
-
-#if CV_ADJUST_WEIGHTS
-{
-// RAINER START
-const float orig_feature_size =  (float)(feature->rect[k].r.width)*feature->rect[k].r.height;
-const float orig_norm_size = (float)(_cascade->orig_window_size.width)*(_cascade->orig_window_size.height);
-const float feature_size = float(tr.width*tr.height);
-//const float normSize    = float(equRect.width*equRect.height);
-float target_ratio = orig_feature_size / orig_norm_size;
-//float isRatio = featureSize / normSize;
-//correctionRatio = targetRatio / isRatio / normSize;
-correction_ratio = target_ratio / feature_size;
-// RAINER END
-}
-#else
-correction_ratio = weight_scale * (!feature->tilted ? 1 : 0.5);
-#endif
-
-if( !feature->tilted )
-{
-hidfeature->rect[k].p0 = tr.y * rows + tr.x;
-hidfeature->rect[k].p1 = tr.y * rows + tr.x + tr.width;
-hidfeature->rect[k].p2 = (tr.y + tr.height) * rows + tr.x;
-hidfeature->rect[k].p3 = (tr.y + tr.height) * rows + tr.x + tr.width;
-
-}
-else
-{
-hidfeature->rect[k].p2 = (tr.y + tr.width) * rows + tr.x + tr.width;
-hidfeature->rect[k].p3 = (tr.y + tr.width + tr.height) * rows + tr.x + tr.width - tr.height;
-hidfeature->rect[k].p0 = tr.y*rows + tr.x;
-hidfeature->rect[k].p1 = (tr.y + tr.height) * rows + tr.x - tr.height;
-
-}
-
-//hidfeature->rect[k].weight = (float)(feature->rect[k].weight * correction_ratio);
-
-if( k == 0 )
-area0 = tr.width * tr.height;
-else
-;//  sum0 += hidfeature->rect[k].weight * tr.width * tr.height;
-}
-
-//hidfeature->rect[0].weight = (float)(-sum0/area0);*/
-//            } /* l */
-//        } /* j */
-//    }
-//}
-/*
-CV_INLINE
-double gpuEvalHidHaarClassifier( GpuHidHaarClassifier *classifier,
-double variance_norm_factor,
-size_t p_offset )
-{
-
-    int idx = 0;
-    do
-    {
-    GpuHidHaarTreeNode* node = classifier->node + idx;
-    double t = node->threshold * variance_norm_factor;
-
-    double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
-    sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
-
-    if( node->feature.rect[2].p0 )
-    sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
-
-    idx = sum < t ? node->left : node->right;
-    }
-    while( idx > 0 );
-    return classifier->alpha[-idx];
-
-    return 0.;
-}
-
-
-*/
-static int gpuRunHaarClassifierCascade( /*const CvHaarClassifierCascade *_cascade,
-CvPoint pt, int start_stage */)
-{
-    /*
-    int result = -1;
-
-    int p_offset, pq_offset;
-    int i, j;
-    double mean, variance_norm_factor;
-    GpuHidHaarClassifierCascade* cascade;
-
-    if( !CV_IS_HAAR_CLASSIFIER(_cascade) )
-    CV_Error( !_cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid cascade pointer" );
-
-    cascade = (GpuHidHaarClassifierCascade*) _cascade->hid_cascade;
-    if( !cascade )
-    CV_Error( CV_StsNullPtr, "Hidden cascade has not been created.\n"
-    "Use gpuSetImagesForHaarClassifierCascade" );
-
-    if( pt.x < 0 || pt.y < 0 ||
-    pt.x + _cascade->real_window_size.width >= cascade->sum.width-2 ||
-    pt.y + _cascade->real_window_size.height >= cascade->sum.height-2 )
-    return -1;
-
-    p_offset = pt.y * (cascade->sum.step/sizeof(sumtype)) + pt.x;
-    pq_offset = pt.y * (cascade->sqsum.step/sizeof(sqsumtype)) + pt.x;
-    mean = calc_sum(*cascade,p_offset)*cascade->inv_window_area;
-    variance_norm_factor = cascade->pq0[pq_offset] - cascade->pq1[pq_offset] -
-    cascade->pq2[pq_offset] + cascade->pq3[pq_offset];
-    variance_norm_factor = variance_norm_factor*cascade->inv_window_area - mean*mean;
-    if( variance_norm_factor >= 0. )
-    variance_norm_factor = sqrt(variance_norm_factor);
-    else
-    variance_norm_factor = 1.;
-
-
-    if( cascade->is_stump_based )
-    {
-    for( i = start_stage; i < cascade->count; i++ )
-    {
-    double stage_sum = 0;
-
-    if( cascade->stage_classifier[i].two_rects )
-    {
-    for( j = 0; j < cascade->stage_classifier[i].count; j++ )
-    {
-    GpuHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
-    GpuHidHaarTreeNode* node = classifier->node;
-    double t = node->threshold*variance_norm_factor;
-    double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
-    sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
-    stage_sum += classifier->alpha[sum >= t];
-    }
-    }
-    else
-    {
-    for( j = 0; j < cascade->stage_classifier[i].count; j++ )
-    {
-    GpuHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
-    GpuHidHaarTreeNode* node = classifier->node;
-    double t = node->threshold*variance_norm_factor;
-    double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
-    sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
-    if( node->feature.rect[2].p0 )
-    sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
-
-    stage_sum += classifier->alpha[sum >= t];
-    }
-    }
-
-    if( stage_sum < cascade->stage_classifier[i].threshold )
-    return -i;
-    }
-    }
-    */
     return 1;
 }
 
-
 namespace cv
 {
 namespace ocl
@@ -2671,78 +1498,3 @@ struct gpuHaarDetectObjects_ScaleCascade_Invoker
 
 }
 }
-
-/*
-typedef struct _ALIGNED_ON(128) GpuHidHaarFeature
-{
-struct _ALIGNED_ON(32)
-{
-int    p0 _ALIGNED_ON(4);
-int    p1 _ALIGNED_ON(4);
-int    p2 _ALIGNED_ON(4);
-int    p3 _ALIGNED_ON(4);
-float weight  _ALIGNED_ON(4);
-}
-rect[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(32);
-}
-GpuHidHaarFeature;
-
-
-typedef struct _ALIGNED_ON(128) GpuHidHaarTreeNode
-{
-int left _ALIGNED_ON(4);
-int right _ALIGNED_ON(4);
-float threshold _ALIGNED_ON(4);
-int p0[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16);
-int p1[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16);
-int p2[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16);
-int p3[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16);
-float weight[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16);
-float alpha[2] _ALIGNED_ON(8);
-// GpuHidHaarFeature feature __attribute__((aligned (128)));
-}
-GpuHidHaarTreeNode;
-
-
-typedef struct _ALIGNED_ON(32) GpuHidHaarClassifier
-{
-int count _ALIGNED_ON(4);
-//CvHaarFeature* orig_feature;
-GpuHidHaarTreeNode* node _ALIGNED_ON(8);
-float* alpha _ALIGNED_ON(8);
-}
-GpuHidHaarClassifier;
-
-
-typedef struct _ALIGNED_ON(64) __attribute__((aligned (64))) GpuHidHaarStageClassifier
-{
-int  count _ALIGNED_ON(4);
-float threshold _ALIGNED_ON(4);
-int two_rects _ALIGNED_ON(4);
-GpuHidHaarClassifier* classifier _ALIGNED_ON(8);
-struct GpuHidHaarStageClassifier* next _ALIGNED_ON(8);
-struct GpuHidHaarStageClassifier* child _ALIGNED_ON(8);
-struct GpuHidHaarStageClassifier* parent _ALIGNED_ON(8);
-}
-GpuHidHaarStageClassifier;
-
-
-typedef struct _ALIGNED_ON(64) GpuHidHaarClassifierCascade
-{
-int  count _ALIGNED_ON(4);
-int  is_stump_based _ALIGNED_ON(4);
-int  has_tilted_features _ALIGNED_ON(4);
-int  is_tree _ALIGNED_ON(4);
-int pq0 _ALIGNED_ON(4);
-int pq1 _ALIGNED_ON(4);
-int pq2 _ALIGNED_ON(4);
-int pq3 _ALIGNED_ON(4);
-int p0 _ALIGNED_ON(4);
-int p1 _ALIGNED_ON(4);
-int p2 _ALIGNED_ON(4);
-int p3 _ALIGNED_ON(4);
-float inv_window_area _ALIGNED_ON(4);
-// GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8)));
-}GpuHidHaarClassifierCascade;
-*/
-/* End of file. */
diff --git a/modules/ocl/src/hog.cpp b/modules/ocl/src/hog.cpp
index 7eca4fe0f..59062ae49 100644
--- a/modules/ocl/src/hog.cpp
+++ b/modules/ocl/src/hog.cpp
@@ -49,69 +49,6 @@ using namespace cv;
 using namespace cv::ocl;
 using namespace std;
 
-#if !defined (HAVE_OPENCL)
-
-cv::ocl::HOGDescriptor::HOGDescriptor(Size, Size, Size, Size, int, double, double, bool, int)
-{
-    throw_nogpu();
-}
-size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
-{
-    throw_nogpu();
-    return 0;
-}
-size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const
-{
-    throw_nogpu();
-    return 0;
-}
-double cv::ocl::HOGDescriptor::getWinSigma() const
-{
-    throw_nogpu();
-    return 0;
-}
-bool cv::ocl::HOGDescriptor::checkDetectorSize() const
-{
-    throw_nogpu();
-    return false;
-}
-void cv::ocl::HOGDescriptor::setSVMDetector(const vector<float> &)
-{
-    throw_nogpu();
-}
-void cv::ocl::HOGDescriptor::detect(const oclMat &, vector<Point> &, double, Size, Size)
-{
-    throw_nogpu();
-}
-void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &, vector<Rect> &, double, Size, Size, double, int)
-{
-    throw_nogpu();
-}
-void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &, Size, oclMat &, int)
-{
-    throw_nogpu();
-}
-std::vector<float> cv::ocl::HOGDescriptor::getDefaultPeopleDetector()
-{
-    throw_nogpu();
-    return std::vector<float>();
-}
-std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector48x96()
-{
-    throw_nogpu();
-    return std::vector<float>();
-}
-std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector64x128()
-{
-    throw_nogpu();
-    return std::vector<float>();
-}
-
-#else
 
 #define CELL_WIDTH 8
 #define CELL_HEIGHT 8
@@ -1895,5 +1832,3 @@ void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz
 
     openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
 }
-
-#endif
diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp
index f7d0c4394..8fbada1d3 100644
--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@ -59,62 +59,6 @@ using namespace cv;
 using namespace cv::ocl;
 using namespace std;
 
-#if !defined (HAVE_OPENCL)
-
-
-void cv::ocl::meanShiftFiltering(const oclMat &, oclMat &, int, int, TermCriteria)
-{
-    throw_nogpu();
-}
-void cv::ocl::meanShiftProc(const oclMat &, oclMat &, oclMat &, int, int, TermCriteria)
-{
-    throw_nogpu();
-}
-double cv::ocl::threshold(const oclMat &, oclMat &, double, int)
-{
-    throw_nogpu();
-    return 0.0;
-}
-void cv::ocl::resize(const oclMat &, oclMat &, Size, double, double, int)
-{
-    throw_nogpu();
-}
-void cv::ocl::remap(const oclMat &, oclMat &, oclMat &, oclMat &, int, int , const Scalar &)
-{
-    throw_nogpu();
-}
-
-void cv::ocl::copyMakeBorder(const oclMat &, oclMat &, int, int, int, int, const Scalar &)
-{
-    throw_nogpu();
-}
-void cv::ocl::warpAffine(const oclMat &, oclMat &, const Mat &, Size, int)
-{
-    throw_nogpu();
-}
-void cv::ocl::warpPerspective(const oclMat &, oclMat &, const Mat &, Size, int)
-{
-    throw_nogpu();
-}
-void cv::ocl::integral(const oclMat &, oclMat &, oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::calcHist(const oclMat &, oclMat &hist)
-{
-    throw_nogpu();
-}
-void cv::ocl::bilateralFilter(const oclMat &, oclMat &, int, double, double, int)
-{
-    throw_nogpu();
-}
-void cv::ocl::convolve(const oclMat &, const oclMat &, oclMat &)
-{
-    throw_nogpu();
-}
-
-#else /* !defined (HAVE_OPENCL) */
-
 namespace cv
 {
     namespace ocl
@@ -1696,4 +1640,3 @@ void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y)
 
     convolve_run(x, t, y, kernelName, &imgproc_convolve);
 }
-#endif /* !defined (HAVE_OPENCL) */
diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp
index eba92a7de..5d4adfceb 100644
--- a/modules/ocl/src/initialization.cpp
+++ b/modules/ocl/src/initialization.cpp
@@ -61,82 +61,6 @@ using std::endl;
 
 //#define AMD_DOUBLE_DIFFER
 
-#if !defined (HAVE_OPENCL)
-
-namespace cv
-{
-    namespace ocl
-    {
-
-        cl_device_id getDevice()
-        {
-            throw_nogpu();
-            return 0;
-        }
-
-        void getComputeCapability(cl_device_id, int &major, int &minor)
-        {
-            throw_nogpu();
-        }
-
-        void openCLMallocPitch(Context * /*clCxt*/, void ** /*dev_ptr*/, size_t * /*pitch*/,
-                               size_t /*widthInBytes*/, size_t /*height*/)
-        {
-            throw_nogpu();
-        }
-
-        void openCLMemcpy2D(Context * /*clCxt*/, void * /*dst*/, size_t /*dpitch*/,
-                            const void * /*src*/, size_t /*spitch*/,
-                            size_t /*width*/, size_t /*height*/, enum openCLMemcpyKind /*kind*/)
-        {
-            throw_nogpu();
-        }
-
-        void openCLCopyBuffer2D(Context * /*clCxt*/, void * /*dst*/, size_t /*dpitch*/,
-                                const void * /*src*/, size_t /*spitch*/,
-                                size_t /*width*/, size_t /*height*/, enum openCLMemcpyKind /*kind*/)
-        {
-            throw_nogpu();
-        }
-
-        cl_mem openCLCreateBuffer(Context *, size_t, size_t)
-        {
-            throw_nogpu();
-        }
-
-        void openCLReadBuffer(Context *, cl_mem, void *, size_t)
-        {
-            throw_nogpu();
-        }
-
-        void openCLFree(void * /*devPtr*/)
-        {
-            throw_nogpu();
-        }
-
-        cl_kernel openCLGetKernelFromSource(const Context * /*clCxt*/,
-                                            const char ** /*fileName*/, string /*kernelName*/)
-        {
-            throw_nogpu();
-        }
-
-        void openCLVerifyKernel(const Context * /*clCxt*/, cl_kernel /*kernel*/, size_t * /*blockSize*/,
-                                size_t * /*globalThreads*/, size_t * /*localThreads*/)
-        {
-            throw_nogpu();
-        }
-
-        cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
-                             const size_t size)
-        {
-            throw_nogpu();
-        }
-
-    }//namespace ocl
-}//namespace cv
-
-#else /* !defined (HAVE_OPENCL) */
-
 namespace cv
 {
     namespace ocl
@@ -1013,4 +937,3 @@ namespace cv
     }//namespace ocl
 
 }//namespace cv
-#endif
diff --git a/modules/ocl/src/interpolate_frames.cpp b/modules/ocl/src/interpolate_frames.cpp
index d6b402093..6b9f53b85 100644
--- a/modules/ocl/src/interpolate_frames.cpp
+++ b/modules/ocl/src/interpolate_frames.cpp
@@ -50,17 +50,6 @@ using namespace std;
 using namespace cv;
 using namespace cv::ocl;
 
-
-#if !defined (HAVE_OPENCL)
-void cv::ocl::interpolateFrames(const oclMat &frame0, const oclMat &frame1,
-                                const oclMat &fu, const oclMat &fv,
-                                const oclMat &bu, const oclMat &bv,
-                                float pos, oclMat &newFrame, oclMat &buf)
-{
-    throw_nogpu();
-}
-#else
-
 namespace cv
 {
     namespace ocl
@@ -311,5 +300,4 @@ void interpolate::bindImgTex(const oclMat &img, cl_mem &texture)
     clEnqueueCopyBufferToImage(img.clCxt->impl->clCmdQueue, (cl_mem)img.data, texture, 0, origin, region, 0, NULL, 0);
     openCLSafeCall(err);
 }
-#endif//(HAVE_OPENCL)
 
diff --git a/modules/ocl/src/kernels/haarobjectdetect.cl b/modules/ocl/src/kernels/haarobjectdetect.cl
index 95cfa63c1..7835b4bcc 100644
--- a/modules/ocl/src/kernels/haarobjectdetect.cl
+++ b/modules/ocl/src/kernels/haarobjectdetect.cl
@@ -9,6 +9,7 @@
 //    Niko Li, newlife20080214@gmail.com
 //    Wang Weiyan, wangweiyanster@gmail.com
 //    Jia Haipeng, jiahaipeng95@gmail.com
+//    Nathan, liujun@multicorewareinc.com
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
@@ -47,14 +48,14 @@ typedef float sqsumtype;
 typedef struct  __attribute__((aligned (128)))  GpuHidHaarFeature
 {
     struct __attribute__((aligned (32)))
-    {
-        int p0 __attribute__((aligned (4)));
-        int p1 __attribute__((aligned (4)));
-        int p2 __attribute__((aligned (4)));
-        int p3 __attribute__((aligned (4)));
-        float weight __attribute__((aligned (4)));
-    }
-    rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned (32)));
+{
+    int p0 __attribute__((aligned (4)));
+    int p1 __attribute__((aligned (4)));
+    int p2 __attribute__((aligned (4)));
+    int p3 __attribute__((aligned (4)));
+    float weight __attribute__((aligned (4)));
+}
+rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned (32)));
 }
 GpuHidHaarFeature;
 
@@ -108,31 +109,31 @@ typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade
     int p2 __attribute__((aligned (4)));
     int p3 __attribute__((aligned (4)));
     float inv_window_area __attribute__((aligned (4)));
-}GpuHidHaarClassifierCascade;
+} GpuHidHaarClassifierCascade;
 
 
 __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCascade(//constant GpuHidHaarClassifierCascade * cascade,
-                                          global GpuHidHaarStageClassifier * stagecascadeptr,
-                                          global int4 * info,
-                                          global GpuHidHaarTreeNode * nodeptr,
-                                          global const int * restrict sum1,
-                                          global const float * restrict sqsum1,
-                                          global int4 * candidate,
-                                          const int pixelstep,
-                                          const int loopcount,
-                                          const int start_stage,
-                                          const int split_stage,
-                                          const int end_stage,
-                                          const int startnode,
-                                          const int splitnode,
-                                          const int4 p,
-                                          const int4 pq,
-                                          const float correction
-                                          //const int width,
-                                          //const int height,
-                                          //const int grpnumperline,
-                                          //const int totalgrp
-                                          )
+    global GpuHidHaarStageClassifier * stagecascadeptr,
+    global int4 * info,
+    global GpuHidHaarTreeNode * nodeptr,
+    global const int * restrict sum1,
+    global const float * restrict sqsum1,
+    global int4 * candidate,
+    const int pixelstep,
+    const int loopcount,
+    const int start_stage,
+    const int split_stage,
+    const int end_stage,
+    const int startnode,
+    const int splitnode,
+    const int4 p,
+    const int4 pq,
+    const float correction
+    //const int width,
+    //const int height,
+    //const int grpnumperline,
+    //const int totalgrp
+)
 {
     int grpszx = get_local_size(0);
     int grpszy = get_local_size(1);
@@ -184,7 +185,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
 
         __global const int * sum = sum1 + imgoff;
         __global const float * sqsum = sqsum1 + imgoff;
-        for(int grploop=grpidx;grploop<totalgrp;grploop+=grpnumx)
+        for(int grploop=grpidx; grploop<totalgrp; grploop+=grpnumx)
         {
             int grpidy = grploop / grpnumperline;
             int grpidx = grploop - mul24(grpidy, grpnumperline);
@@ -195,7 +196,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
             int grpoffx = x-lclidx;
             int grpoffy = y-lclidy;
 
-            for(int i=0;i<read_loop;i++)
+            for(int i=0; i<read_loop; i++)
             {
                 int pos_id = mad24(i,lcl_sz,lcl_id);
                 pos_id = pos_id < total_read ? pos_id : 0;
@@ -234,15 +235,15 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
                 cascadeinfo1.x +=lcl_off;
                 cascadeinfo1.z +=lcl_off;
                 mean = (lcldata[mad24(cascadeinfo1.y,readwidth,cascadeinfo1.x)] - lcldata[mad24(cascadeinfo1.y,readwidth,cascadeinfo1.z)] -
-                    lcldata[mad24(cascadeinfo1.w,readwidth,cascadeinfo1.x)] + lcldata[mad24(cascadeinfo1.w,readwidth,cascadeinfo1.z)])
-                    *correction;
+                        lcldata[mad24(cascadeinfo1.w,readwidth,cascadeinfo1.x)] + lcldata[mad24(cascadeinfo1.w,readwidth,cascadeinfo1.z)])
+                       *correction;
 
                 int p_offset = mad24(y, pixelstep, x);
 
                 cascadeinfo2.x +=p_offset;
                 cascadeinfo2.z +=p_offset;
                 variance_norm_factor =sqsum[mad24(cascadeinfo2.y, pixelstep, cascadeinfo2.x)] - sqsum[mad24(cascadeinfo2.y, pixelstep, cascadeinfo2.z)] -
-                    sqsum[mad24(cascadeinfo2.w, pixelstep, cascadeinfo2.x)] + sqsum[mad24(cascadeinfo2.w, pixelstep, cascadeinfo2.z)];
+                                      sqsum[mad24(cascadeinfo2.w, pixelstep, cascadeinfo2.x)] + sqsum[mad24(cascadeinfo2.w, pixelstep, cascadeinfo2.z)];
 
                 variance_norm_factor = variance_norm_factor * correction - mean * mean;
                 variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1.f;
@@ -270,19 +271,19 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
                         info2.z +=lcl_off;
 
                         float classsum = (lcldata[mad24(info1.y,readwidth,info1.x)] - lcldata[mad24(info1.y,readwidth,info1.z)] -
-                            lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x;
+                                          lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x;
 
 
                         classsum += (lcldata[mad24(info2.y,readwidth,info2.x)] - lcldata[mad24(info2.y,readwidth,info2.z)] -
-                            lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y;
+                                     lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y;
 
 
                         //if((info3.z - info3.x) && (!stageinfo.z))
                         //{
-                            info3.x +=lcl_off;
-                            info3.z +=lcl_off;
-                            classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
-                                lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
+                        info3.x +=lcl_off;
+                        info3.z +=lcl_off;
+                        classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
+                                     lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
                         //}
                         stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
                         nodecounter++;
@@ -299,12 +300,13 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
                 }
                 barrier(CLK_LOCAL_MEM_FENCE);
                 int queuecount  = lclcount[0];
+                barrier(CLK_LOCAL_MEM_FENCE);
                 nodecounter = splitnode;
-                for(int stageloop = split_stage; stageloop< end_stage && queuecount>0;stageloop++)
+                for(int stageloop = split_stage; stageloop< end_stage && queuecount>0; stageloop++)
                 {
-                  //barrier(CLK_LOCAL_MEM_FENCE);
+                    //barrier(CLK_LOCAL_MEM_FENCE);
                     //if(lcl_id == 0)
-            lclcount[0]=0;
+                    lclcount[0]=0;
                     barrier(CLK_LOCAL_MEM_FENCE);
 
                     int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop);
@@ -316,70 +318,73 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
                     int lcl_compute_win_id = (lcl_id >>(6-perfscale));
                     int lcl_loops = (stageinfo.x + lcl_compute_win -1) >> (6-perfscale);
                     int lcl_compute_id = lcl_id - (lcl_compute_win_id << (6-perfscale));
-                    for(int queueloop=0;queueloop<queuecount_loop/* && lcl_compute_win_id < queuecount*/;queueloop++)
+                    for(int queueloop=0; queueloop<queuecount_loop/* && lcl_compute_win_id < queuecount*/; queueloop++)
                     {
                         float stage_sum = 0.f;
                         int temp_coord = lcloutindex[lcl_compute_win_id<<1];
                         float variance_norm_factor = as_float(lcloutindex[(lcl_compute_win_id<<1)+1]);
                         int queue_pixel = mad24(((temp_coord  & (int)0xffff0000)>>16),readwidth,temp_coord & 0xffff);
 
-                      //barrier(CLK_LOCAL_MEM_FENCE);
-            if(lcl_compute_win_id < queuecount) {
-
-            int tempnodecounter = lcl_compute_id;
-                        float part_sum = 0.f;
-                        for(int lcl_loop=0;lcl_loop<lcl_loops && tempnodecounter<stageinfo.x;lcl_loop++)
+                        //barrier(CLK_LOCAL_MEM_FENCE);
+                        if(lcl_compute_win_id < queuecount)
                         {
-                            __global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter + tempnodecounter);
 
-                            int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0]));
-                            int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
-                            int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0]));
-                            float4 w = *(__global float4*)(&(currentnodeptr->weight[0]));
-                            float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0]));
-                            float nodethreshold  = w.w * variance_norm_factor;
+                            int tempnodecounter = lcl_compute_id;
+                            float part_sum = 0.f;
+                            for(int lcl_loop=0; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x; lcl_loop++)
+                            {
+                                __global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter + tempnodecounter);
 
-                            info1.x +=queue_pixel;
-                            info1.z +=queue_pixel;
-                            info2.x +=queue_pixel;
-                            info2.z +=queue_pixel;
+                                int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0]));
+                                int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
+                                int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0]));
+                                float4 w = *(__global float4*)(&(currentnodeptr->weight[0]));
+                                float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0]));
+                                float nodethreshold  = w.w * variance_norm_factor;
 
-                            float classsum = (lcldata[mad24(info1.y,readwidth,info1.x)] - lcldata[mad24(info1.y,readwidth,info1.z)] -
-                                lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x;
+                                info1.x +=queue_pixel;
+                                info1.z +=queue_pixel;
+                                info2.x +=queue_pixel;
+                                info2.z +=queue_pixel;
+
+                                float classsum = (lcldata[mad24(info1.y,readwidth,info1.x)] - lcldata[mad24(info1.y,readwidth,info1.z)] -
+                                                  lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x;
 
 
-                            classsum += (lcldata[mad24(info2.y,readwidth,info2.x)] - lcldata[mad24(info2.y,readwidth,info2.z)] -
-                                lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y;
-                        //if((info3.z - info3.x) && (!stageinfo.z))
-                        //{
+                                classsum += (lcldata[mad24(info2.y,readwidth,info2.x)] - lcldata[mad24(info2.y,readwidth,info2.z)] -
+                                             lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y;
+                                //if((info3.z - info3.x) && (!stageinfo.z))
+                                //{
                                 info3.x +=queue_pixel;
                                 info3.z +=queue_pixel;
                                 classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
-                                    lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
-                        //}
-                            part_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
-                            tempnodecounter +=lcl_compute_win;
-                        }//end for(int lcl_loop=0;lcl_loop<lcl_loops;lcl_loop++)
-                        partialsum[lcl_id]=part_sum;
-            }
+                                             lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
+                                //}
+                                part_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
+                                tempnodecounter +=lcl_compute_win;
+                            }//end for(int lcl_loop=0;lcl_loop<lcl_loops;lcl_loop++)
+                            partialsum[lcl_id]=part_sum;
+                        }
                         barrier(CLK_LOCAL_MEM_FENCE);
-            if(lcl_compute_win_id < queuecount) {
-                        for(int i=0;i<lcl_compute_win && (lcl_compute_id==0);i++)
+                        if(lcl_compute_win_id < queuecount)
                         {
-                            stage_sum += partialsum[lcl_id+i];
+                            for(int i=0; i<lcl_compute_win && (lcl_compute_id==0); i++)
+                            {
+                                stage_sum += partialsum[lcl_id+i];
+                            }
+                            if(stage_sum >= stagethreshold && (lcl_compute_id==0))
+                            {
+                                int queueindex = atomic_inc(lclcount);
+                                lcloutindex[queueindex<<1] = temp_coord;
+                                lcloutindex[(queueindex<<1)+1] = as_int(variance_norm_factor);
+                            }
+                            lcl_compute_win_id +=(1<<perfscale);
                         }
-                        if(stage_sum >= stagethreshold && (lcl_compute_id==0))
-                        {
-                            int queueindex = atomic_inc(lclcount);
-                            lcloutindex[queueindex<<1] = temp_coord;
-                            lcloutindex[(queueindex<<1)+1] = as_int(variance_norm_factor);
-                        }
-                        lcl_compute_win_id +=(1<<perfscale);
-            }
                         barrier(CLK_LOCAL_MEM_FENCE);
                     }//end for(int queueloop=0;queueloop<queuecount_loop;queueloop++)
-                  barrier(CLK_LOCAL_MEM_FENCE);
+                    //barrier(CLK_LOCAL_MEM_FENCE);
                     queuecount = lclcount[0];
+                    barrier(CLK_LOCAL_MEM_FENCE);
                     nodecounter += stageinfo.x;
                 }//end for(int stageloop = splitstage; stageloop< endstage && queuecount>0;stageloop++)
                 //barrier(CLK_LOCAL_MEM_FENCE);
@@ -420,139 +425,139 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
 
 
 
-                /*
-                if(stagecascade->two_rects)
-                {
-                    #pragma unroll
-                    for( n = 0; n < stagecascade->count; n++ )
-                    {
-                        t1 = *(node + counter);
-                        t = t1.threshold * variance_norm_factor;
-                        classsum = calc_sum1(t1,p_offset,0) * t1.weight[0];
-
-                        classsum  += calc_sum1(t1, p_offset,1) * t1.weight[1];
-                        stage_sum += classsum >= t ? t1.alpha[1]:t1.alpha[0];
-
-                        counter++;
-                    }
-                }
-                else
-                {
-                    #pragma unroll
-                    for( n = 0; n < stagecascade->count; n++ )
-                    {
-                        t = node[counter].threshold*variance_norm_factor;
-                        classsum = calc_sum1(node[counter],p_offset,0) * node[counter].weight[0];
-                        classsum += calc_sum1(node[counter],p_offset,1) * node[counter].weight[1];
-
-                        if( node[counter].p0[2] )
-                            classsum += calc_sum1(node[counter],p_offset,2) * node[counter].weight[2];
-
-                        stage_sum += classsum >= t ? node[counter].alpha[1]:node[counter].alpha[0];// modify
-
-                        counter++;
-                    }
-                }
-                */
-                /*
-__kernel void gpuRunHaarClassifierCascade_ScaleWindow(
-                                          constant GpuHidHaarClassifierCascade * _cascade,
-                                          global GpuHidHaarStageClassifier * stagecascadeptr,
-                                          //global GpuHidHaarClassifier * classifierptr,
-                                          global GpuHidHaarTreeNode * nodeptr,
-                                          global int * sum,
-                                          global float * sqsum,
-                                          global int * _candidate,
-                                          int pixel_step,
-                                          int cols,
-                                          int rows,
-                                          int start_stage,
-                                          int end_stage,
-                                          //int counts,
-                                          int nodenum,
-                                          int ystep,
-                                          int detect_width,
-                                          //int detect_height,
-                                          int loopcount,
-                                          int outputstep)
-                                          //float scalefactor)
+/*
+if(stagecascade->two_rects)
 {
-    unsigned int x1 = get_global_id(0);
-    unsigned int y1 = get_global_id(1);
-    int p_offset;
-    int m, n;
-    int result;
-    int counter;
-    float mean, variance_norm_factor;
-    for(int i=0;i<loopcount;i++)
+    #pragma unroll
+    for( n = 0; n < stagecascade->count; n++ )
     {
-        constant GpuHidHaarClassifierCascade * cascade = _cascade + i;
-        global int * candidate = _candidate + i*outputstep;
-        int window_width = cascade->p1 - cascade->p0;
-        int window_height = window_width;
-        result = 1;
-        counter = 0;
-            unsigned int x = mul24(x1,ystep);
-            unsigned int y = mul24(y1,ystep);
-        if((x < cols - window_width - 1) && (y < rows - window_height -1))
-        {
-            global GpuHidHaarStageClassifier *stagecascade = stagecascadeptr +cascade->count*i+ start_stage;
-            //global GpuHidHaarClassifier      *classifier   = classifierptr;
-            global GpuHidHaarTreeNode        *node         = nodeptr + nodenum*i;
+        t1 = *(node + counter);
+        t = t1.threshold * variance_norm_factor;
+        classsum = calc_sum1(t1,p_offset,0) * t1.weight[0];
 
-            p_offset = mad24(y, pixel_step, x);// modify
+        classsum  += calc_sum1(t1, p_offset,1) * t1.weight[1];
+        stage_sum += classsum >= t ? t1.alpha[1]:t1.alpha[0];
 
-            mean = (*(sum + p_offset + (int)cascade->p0) - *(sum + p_offset + (int)cascade->p1) -
-                    *(sum + p_offset + (int)cascade->p2) + *(sum + p_offset + (int)cascade->p3))
-                    *cascade->inv_window_area;
-
-            variance_norm_factor = *(sqsum + p_offset + cascade->p0) - *(sqsum + cascade->p1 + p_offset) -
-                                    *(sqsum + p_offset + cascade->p2) + *(sqsum + cascade->p3 + p_offset);
-            variance_norm_factor = variance_norm_factor * cascade->inv_window_area - mean * mean;
-            variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1;//modify
-
-            // if( cascade->is_stump_based )
-            //{
-            for( m = start_stage; m < end_stage; m++ )
-            {
-                float stage_sum = 0.f;
-                float t,  classsum;
-                GpuHidHaarTreeNode t1;
-
-                //#pragma unroll
-                for( n = 0; n < stagecascade->count; n++ )
-                {
-                     t1 = *(node + counter);
-                     t  = t1.threshold * variance_norm_factor;
-                     classsum = calc_sum1(t1, p_offset ,0) * t1.weight[0] + calc_sum1(t1, p_offset ,1) * t1.weight[1];
-
-                     if((t1.p0[2]) && (!stagecascade->two_rects))
-                         classsum += calc_sum1(t1, p_offset, 2) * t1.weight[2];
-
-                     stage_sum += classsum >= t ? t1.alpha[1] : t1.alpha[0];// modify
-                     counter++;
-                }
-
-                if (stage_sum < stagecascade->threshold)
-                {
-                    result = 0;
-                    break;
-                }
-
-                stagecascade++;
-
-            }
-                if(result)
-                {
-                    candidate[4 * (y1 * detect_width + x1)]     = x;
-                    candidate[4 * (y1 * detect_width + x1) + 1] = y;
-                    candidate[4 * (y1 * detect_width + x1)+2]     = window_width;
-                    candidate[4 * (y1 * detect_width + x1) + 3] = window_height;
-                }
-            //}
-        }
+        counter++;
     }
 }
+else
+{
+    #pragma unroll
+    for( n = 0; n < stagecascade->count; n++ )
+    {
+        t = node[counter].threshold*variance_norm_factor;
+        classsum = calc_sum1(node[counter],p_offset,0) * node[counter].weight[0];
+        classsum += calc_sum1(node[counter],p_offset,1) * node[counter].weight[1];
+
+        if( node[counter].p0[2] )
+            classsum += calc_sum1(node[counter],p_offset,2) * node[counter].weight[2];
+
+        stage_sum += classsum >= t ? node[counter].alpha[1]:node[counter].alpha[0];// modify
+
+        counter++;
+    }
+}
+*/
+/*
+__kernel void gpuRunHaarClassifierCascade_ScaleWindow(
+                          constant GpuHidHaarClassifierCascade * _cascade,
+                          global GpuHidHaarStageClassifier * stagecascadeptr,
+                          //global GpuHidHaarClassifier * classifierptr,
+                          global GpuHidHaarTreeNode * nodeptr,
+                          global int * sum,
+                          global float * sqsum,
+                          global int * _candidate,
+                          int pixel_step,
+                          int cols,
+                          int rows,
+                          int start_stage,
+                          int end_stage,
+                          //int counts,
+                          int nodenum,
+                          int ystep,
+                          int detect_width,
+                          //int detect_height,
+                          int loopcount,
+                          int outputstep)
+                          //float scalefactor)
+{
+unsigned int x1 = get_global_id(0);
+unsigned int y1 = get_global_id(1);
+int p_offset;
+int m, n;
+int result;
+int counter;
+float mean, variance_norm_factor;
+for(int i=0;i<loopcount;i++)
+{
+constant GpuHidHaarClassifierCascade * cascade = _cascade + i;
+global int * candidate = _candidate + i*outputstep;
+int window_width = cascade->p1 - cascade->p0;
+int window_height = window_width;
+result = 1;
+counter = 0;
+unsigned int x = mul24(x1,ystep);
+unsigned int y = mul24(y1,ystep);
+if((x < cols - window_width - 1) && (y < rows - window_height -1))
+{
+global GpuHidHaarStageClassifier *stagecascade = stagecascadeptr +cascade->count*i+ start_stage;
+//global GpuHidHaarClassifier      *classifier   = classifierptr;
+global GpuHidHaarTreeNode        *node         = nodeptr + nodenum*i;
+
+p_offset = mad24(y, pixel_step, x);// modify
+
+mean = (*(sum + p_offset + (int)cascade->p0) - *(sum + p_offset + (int)cascade->p1) -
+    *(sum + p_offset + (int)cascade->p2) + *(sum + p_offset + (int)cascade->p3))
+    *cascade->inv_window_area;
+
+variance_norm_factor = *(sqsum + p_offset + cascade->p0) - *(sqsum + cascade->p1 + p_offset) -
+                    *(sqsum + p_offset + cascade->p2) + *(sqsum + cascade->p3 + p_offset);
+variance_norm_factor = variance_norm_factor * cascade->inv_window_area - mean * mean;
+variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1;//modify
+
+// if( cascade->is_stump_based )
+//{
+for( m = start_stage; m < end_stage; m++ )
+{
+float stage_sum = 0.f;
+float t,  classsum;
+GpuHidHaarTreeNode t1;
+
+//#pragma unroll
+for( n = 0; n < stagecascade->count; n++ )
+{
+     t1 = *(node + counter);
+     t  = t1.threshold * variance_norm_factor;
+     classsum = calc_sum1(t1, p_offset ,0) * t1.weight[0] + calc_sum1(t1, p_offset ,1) * t1.weight[1];
+
+     if((t1.p0[2]) && (!stagecascade->two_rects))
+         classsum += calc_sum1(t1, p_offset, 2) * t1.weight[2];
+
+     stage_sum += classsum >= t ? t1.alpha[1] : t1.alpha[0];// modify
+     counter++;
+}
+
+if (stage_sum < stagecascade->threshold)
+{
+    result = 0;
+    break;
+}
+
+stagecascade++;
+
+}
+if(result)
+{
+    candidate[4 * (y1 * detect_width + x1)]     = x;
+    candidate[4 * (y1 * detect_width + x1) + 1] = y;
+    candidate[4 * (y1 * detect_width + x1)+2]     = window_width;
+    candidate[4 * (y1 * detect_width + x1) + 3] = window_height;
+}
+//}
+}
+}
+}
 */
 
 
diff --git a/modules/ocl/src/match_template.cpp b/modules/ocl/src/match_template.cpp
index 8a2705646..ab867d4d3 100644
--- a/modules/ocl/src/match_template.cpp
+++ b/modules/ocl/src/match_template.cpp
@@ -51,12 +51,6 @@ using namespace cv;
 using namespace cv::ocl;
 using namespace std;
 
-#if !defined (HAVE_OPENCL)
-void cv::ocl::matchTemplate(const oclMat &, const oclMat &, oclMat &)
-{
-    throw_nogpu();
-}
-#else
 //helper routines
 namespace cv
 {
@@ -498,4 +492,3 @@ void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &re
     CV_Assert(caller);
     caller(image, templ, result, buf);
 }
-#endif //
diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp
index 7b90218d9..f0e65f9cc 100644
--- a/modules/ocl/src/matrix_operations.cpp
+++ b/modules/ocl/src/matrix_operations.cpp
@@ -58,60 +58,6 @@ using namespace std;
 //////////////////////////////// oclMat ////////////////////////////////
 ////////////////////////////////////////////////////////////////////////
 
-#if !defined (HAVE_OPENCL)
-
-namespace cv
-{
-    namespace ocl
-    {
-        void oclMat::upload(const Mat & /*m*/)
-        {
-            throw_nogpu();
-        }
-        void oclMat::download(cv::Mat & /*m*/) const
-        {
-            throw_nogpu();
-        }
-        void oclMat::copyTo( oclMat & /*m*/ ) const
-        {
-            throw_nogpu();
-        }
-        void oclMat::copyTo( oclMat & /*m*/, const oclMat &/* mask */) const
-        {
-            throw_nogpu();
-        }
-        void oclMat::convertTo( oclMat & /*m*/, int /*rtype*/, double /*alpha*/, double /*beta*/ ) const
-        {
-            throw_nogpu();
-        }
-        oclMat &oclMat::operator = (const Scalar & /*s*/)
-        {
-            throw_nogpu();
-            return *this;
-        }
-        oclMat &oclMat::setTo(const Scalar & /*s*/, const oclMat & /*mask*/)
-        {
-            throw_nogpu();
-            return *this;
-        }
-        oclMat oclMat::reshape(int /*new_cn*/, int /*new_rows*/) const
-        {
-            throw_nogpu();
-            return oclMat();
-        }
-        void oclMat::create(int /*_rows*/, int /*_cols*/, int /*_type*/)
-        {
-            throw_nogpu();
-        }
-        void oclMat::release()
-        {
-            throw_nogpu();
-        }
-    }
-}
-
-#else /* !defined (HAVE_OPENCL) */
-
 //helper routines
 namespace cv
 {
@@ -1045,4 +991,3 @@ oclMat& cv::ocl::oclMat::operator/=( const oclMat& m )
     divide(*this, m, *this);
     return *this;
 }
-#endif /* !defined (HAVE_OPENCL) */
diff --git a/modules/ocl/src/mcwutil.hpp b/modules/ocl/src/mcwutil.hpp
index 8db61f163..d1986b93b 100644
--- a/modules/ocl/src/mcwutil.hpp
+++ b/modules/ocl/src/mcwutil.hpp
@@ -47,9 +47,6 @@
 #define _OPENCV_MCWUTIL_
 
 #include "precomp.hpp"
-
-#if defined (HAVE_OPENCL)
-
 using namespace std;
 
 namespace cv
@@ -76,5 +73,5 @@ namespace cv
     }//namespace ocl
 
 }//namespace cv
-#endif // HAVE_OPENCL
+
 #endif //_OPENCV_MCWUTIL_
diff --git a/modules/ocl/src/mssegmentation.cpp b/modules/ocl/src/mssegmentation.cpp
index 48bc96f41..300265bc2 100644
--- a/modules/ocl/src/mssegmentation.cpp
+++ b/modules/ocl/src/mssegmentation.cpp
@@ -44,23 +44,6 @@
 
 #include "precomp.hpp"
 
-#if !defined(HAVE_OPENCL)
-
-namespace cv
-{
-    namespace ocl
-    {
-
-        void meanShiftSegmentation(const oclMat &, Mat &, int, int, int, TermCriteria)
-        {
-            throw_nogpu();
-        }
-
-    }
-}
-
-#else
-
 using namespace std;
 
 // Auxiliray stuff
@@ -411,4 +394,3 @@ namespace cv
 
     }
 }
-#endif // #if !defined (HAVE_OPENCL)
diff --git a/modules/ocl/src/pyrlk.cpp b/modules/ocl/src/pyrlk.cpp
index 87c536ec9..d4dbfd506 100644
--- a/modules/ocl/src/pyrlk.cpp
+++ b/modules/ocl/src/pyrlk.cpp
@@ -52,13 +52,6 @@ using namespace std;
 using namespace cv;
 using namespace cv::ocl;
 
-#if !defined (HAVE_OPENCL)
-
-void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &, const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &) {  }
-void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat *) {  }
-
-#else /* !defined (HAVE_OPENCL) */
-
 namespace cv
 {
 namespace ocl
@@ -862,5 +855,3 @@ void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextI
 
     clFinish(prevImg.clCxt->impl->clCmdQueue);
 }
-
-#endif /* !defined (HAVE_CUDA) */
diff --git a/modules/ocl/src/pyrup.cpp b/modules/ocl/src/pyrup.cpp
index ebd3535b6..fa7f73d4c 100644
--- a/modules/ocl/src/pyrup.cpp
+++ b/modules/ocl/src/pyrup.cpp
@@ -54,13 +54,6 @@ using namespace cv;
 using namespace cv::ocl;
 using namespace std;
 
-#ifndef HAVE_OPENCL
-void cv::ocl::pyrUp(const oclMat &, GpuMat &, oclMat &)
-{
-    throw_nogpu();
-}
-#else
-
 namespace cv
 {
     namespace ocl
@@ -93,5 +86,4 @@ namespace cv
             openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
         }
     }
-};
-#endif // HAVE_OPENCL
\ No newline at end of file
+}
\ No newline at end of file
diff --git a/modules/ocl/src/split_merge.cpp b/modules/ocl/src/split_merge.cpp
index b071ff870..e7aad4382 100644
--- a/modules/ocl/src/split_merge.cpp
+++ b/modules/ocl/src/split_merge.cpp
@@ -58,34 +58,6 @@ using std::endl;
 ///////////////// oclMat merge and split ///////////////////////////////
 ////////////////////////////////////////////////////////////////////////
 
-#if !defined (HAVE_OPENCL)
-
-namespace cv
-{
-    namespace ocl
-    {
-        void cv::ocl::merge(const oclMat *src_mat, size_t count, oclMat &dst_mat)
-        {
-            throw_nogpu();
-        }
-        void cv::ocl::merge(const vector<oclMat> &src_mat, oclMat &dst_mat)
-        {
-            throw_nogpu();
-        }
-
-        void cv::ocl::split(const oclMat &src, oclMat *dst)
-        {
-            throw_nogpu();
-        }
-        void cv::ocl::split(const oclMat &src, vector<oclMat> &dst)
-        {
-            throw_nogpu();
-        }
-    }
-}
-
-#else /* !defined (HAVE_OPENCL) */
-
 namespace cv
 {
     namespace ocl
@@ -417,4 +389,3 @@ void cv::ocl::split(const oclMat &src, vector<oclMat> &dst)
     if(src.oclchannels() > 0)
         split_merge::split(src, &dst[0]);
 }
-#endif /* !defined (HAVE_OPENCL) */
diff --git a/modules/ocl/src/surf.cpp b/modules/ocl/src/surf.cpp
index 71a7aacd3..65dc86d16 100644
--- a/modules/ocl/src/surf.cpp
+++ b/modules/ocl/src/surf.cpp
@@ -50,59 +50,6 @@ using namespace cv;
 using namespace cv::ocl;
 using namespace std;
 
-#if !defined (HAVE_OPENCL)
-
-cv::ocl::SURF_OCL::SURF_OCL()
-{
-    throw_nogpu();
-}
-cv::ocl::SURF_OCL::SURF_OCL(double, int, int, bool, float, bool)
-{
-    throw_nogpu();
-}
-int cv::ocl::SURF_OCL::descriptorSize() const
-{
-    throw_nogpu();
-    return 0;
-}
-void cv::ocl::SURF_OCL::uploadKeypoints(const vector<KeyPoint> &, oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat &, vector<KeyPoint> &)
-{
-    throw_nogpu();
-}
-void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat &, vector<float> &)
-{
-    throw_nogpu();
-}
-void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, oclMat &)
-{
-    throw_nogpu();
-}
-void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, oclMat &, oclMat &, bool)
-{
-    throw_nogpu();
-}
-void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, vector<KeyPoint> &)
-{
-    throw_nogpu();
-}
-void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, vector<KeyPoint> &, oclMat &, bool)
-{
-    throw_nogpu();
-}
-void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, vector<KeyPoint> &, vector<float> &, bool)
-{
-    throw_nogpu();
-}
-void cv::ocl::SURF_OCL::releaseMemory()
-{
-    throw_nogpu();
-}
-
-#else /* !defined (HAVE_OPENCL) */
 namespace cv
 {
     namespace ocl
@@ -755,5 +702,3 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
     }
 }
 
-#endif // /* !defined (HAVE_OPENCL) */
-
diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp
index 5bf08c80e..9cc6c1c89 100644
--- a/modules/ocl/test/test_imgproc.cpp
+++ b/modules/ocl/test/test_imgproc.cpp
@@ -791,8 +791,8 @@ TEST_P(WarpAffine, Mat)
 {
     static const double coeffs[2][3] =
     {
-        {cos(3.14 / 6), -sin(3.14 / 6), 100.0},
-        {sin(3.14 / 6), cos(3.14 / 6), -100.0}
+        {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
+        {sin(CV_PI / 6), cos(CV_PI / 6), -100.0}
     };
     Mat M(2, 3, CV_64F, (void *)coeffs);
 
diff --git a/modules/ocl/test/test_match_template.cpp b/modules/ocl/test/test_match_template.cpp
index 3892513b4..c948e1d53 100644
--- a/modules/ocl/test/test_match_template.cpp
+++ b/modules/ocl/test/test_match_template.cpp
@@ -44,7 +44,7 @@
 
 
 #include "precomp.hpp"
-//#define PERF_TEST 0
+
 #ifdef HAVE_OPENCL
 ////////////////////////////////////////////////////////////////////////////////
 // MatchTemplate
@@ -97,15 +97,7 @@ TEST_P(MatchTemplate8U, Accuracy)
     cv::Mat mat_dst;
     dst.download(mat_dst);
 
-
     EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss);
-
-#ifdef PERF_TEST
-    {
-        P_TEST_FULL( {}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {});
-        P_TEST_FULL( {}, {cv::matchTemplate(image, templ, dst_gold, method);}, {});
-    }
-#endif // PERF_TEST
 }
 
 PARAM_TEST_CASE(MatchTemplate32F, cv::Size, TemplateSize, Channels, TemplateMethod)
@@ -144,17 +136,6 @@ TEST_P(MatchTemplate32F, Accuracy)
     dst.download(mat_dst);
 
     EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss);
-
-#ifdef PERF_TEST
-    {
-        std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
-        std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl;
-        std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl;
-        std::cout << "Channels: " << cn << std::endl;
-        P_TEST_FULL( {}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {});
-        P_TEST_FULL( {}, {cv::matchTemplate(image, templ, dst_gold, method);}, {});
-    }
-#endif // PERF_TEST
 }
 
 INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,