From f1c549fabf2d916df306a889137de49f3ef338d5 Mon Sep 17 00:00:00 2001
From: yao <bitwangyaoyao@gmail.com>
Date: Wed, 19 Jun 2013 16:36:23 +0800
Subject: [PATCH] revise ocl samples, add tvl1 sample

---
 samples/ocl/facedetect.cpp         | 159 ++++++++------
 samples/ocl/hog.cpp                | 335 +++++++++++------------------
 samples/ocl/pyrlk_optical_flow.cpp |  59 +++--
 samples/ocl/squares.cpp            | 240 +++++++++++++++++----
 samples/ocl/stereo_match.cpp       | 306 ++++++++++++--------------
 samples/ocl/surf_matcher.cpp       | 205 +++++++-----------
 samples/ocl/tvl1_optical_flow.cpp  | 265 +++++++++++++++++++++++
 7 files changed, 924 insertions(+), 645 deletions(-)
 create mode 100644 samples/ocl/tvl1_optical_flow.cpp

diff --git a/samples/ocl/facedetect.cpp b/samples/ocl/facedetect.cpp
index 684c2d923..a49610aeb 100644
--- a/samples/ocl/facedetect.cpp
+++ b/samples/ocl/facedetect.cpp
@@ -7,55 +7,67 @@
 
 using namespace std;
 using namespace cv;
-#define LOOP_NUM 10 
+#define LOOP_NUM 10
 
 const static Scalar colors[] =  { CV_RGB(0,0,255),
-        CV_RGB(0,128,255),
-        CV_RGB(0,255,255),
-        CV_RGB(0,255,0),
-        CV_RGB(255,128,0),
-        CV_RGB(255,255,0),
-        CV_RGB(255,0,0),
-        CV_RGB(255,0,255)} ;
+                                  CV_RGB(0,128,255),
+                                  CV_RGB(0,255,255),
+                                  CV_RGB(0,255,0),
+                                  CV_RGB(255,128,0),
+                                  CV_RGB(255,255,0),
+                                  CV_RGB(255,0,0),
+                                  CV_RGB(255,0,255)
+                                } ;
+
 
 int64 work_begin = 0;
 int64 work_end = 0;
+string outputName;
 
-static void workBegin() 
-{ 
+static void workBegin()
+{
     work_begin = getTickCount();
 }
 static void workEnd()
 {
     work_end += (getTickCount() - work_begin);
 }
-static double getTime(){
+static double getTime()
+{
     return work_end /((double)cvGetTickFrequency() * 1000.);
 }
 
-void detect( Mat& img, vector<Rect>& faces, 
-    cv::ocl::OclCascadeClassifierBuf& cascade, 
-    double scale, bool calTime);
 
-void detectCPU( Mat& img, vector<Rect>& faces, 
-    CascadeClassifier& cascade, 
-    double scale, bool calTime);
+void detect( Mat& img, vector<Rect>& faces,
+             ocl::OclCascadeClassifierBuf& cascade,
+             double scale, bool calTime);
+
+
+void detectCPU( Mat& img, vector<Rect>& faces,
+                CascadeClassifier& cascade,
+                double scale, bool calTime);
+
 
 void Draw(Mat& img, vector<Rect>& faces, double scale);
 
+
 // This function test if gpu_rst matches cpu_rst.
 // If the two vectors are not equal, it will return the difference in vector size
 // Else if will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
-double checkRectSimilarity(Size sz, std::vector<Rect>& cpu_rst, std::vector<Rect>& gpu_rst);
+double checkRectSimilarity(Size sz, vector<Rect>& cpu_rst, vector<Rect>& gpu_rst);
+
 
 int main( int argc, const char** argv )
 {
     const char* keys =
         "{ h | help       | false       | print help message }"
         "{ i | input      |             | specify input image }"
-        "{ t | template   | ../../../data/haarcascades/haarcascade_frontalface_alt.xml  | specify template file }"
+        "{ t | template   | haarcascade_frontalface_alt.xml |"
+        " specify template file path }"
         "{ c | scale      |   1.0       | scale image }"
-        "{ s | use_cpu    | false       | use cpu or gpu to process the image }";
+        "{ s | use_cpu    | false       | use cpu or gpu to process the image }"
+        "{ o | output     | facedetect_output.jpg  |"
+        " specify output image save path(only works when input is images) }";
 
     CommandLineParser cmd(argc, argv, keys);
     if (cmd.get<bool>("help"))
@@ -69,9 +81,10 @@ int main( int argc, const char** argv )
 
     bool useCPU = cmd.get<bool>("s");
     string inputName = cmd.get<string>("i");
+    outputName = cmd.get<string>("o");
     string cascadeName = cmd.get<string>("t");
     double scale = cmd.get<double>("c");
-    cv::ocl::OclCascadeClassifierBuf cascade;
+    ocl::OclCascadeClassifierBuf cascade;
     CascadeClassifier  cpu_cascade;
 
     if( !cascade.load( cascadeName ) || !cpu_cascade.load(cascadeName) )
@@ -83,7 +96,7 @@ int main( int argc, const char** argv )
     if( inputName.empty() )
     {
         capture = cvCaptureFromCAM(0);
-        if(!capture) 
+        if(!capture)
             cout << "Capture from CAM 0 didn't work" << endl;
     }
     else if( inputName.size() )
@@ -92,7 +105,7 @@ int main( int argc, const char** argv )
         if( image.empty() )
         {
             capture = cvCaptureFromAVI( inputName.c_str() );
-            if(!capture) 
+            if(!capture)
                 cout << "Capture from AVI didn't work" << endl;
             return -1;
         }
@@ -100,14 +113,15 @@ int main( int argc, const char** argv )
     else
     {
         image = imread( "lena.jpg", 1 );
-        if(image.empty()) 
+        if(image.empty())
             cout << "Couldn't read lena.jpg" << endl;
         return -1;
     }
 
+
     cvNamedWindow( "result", 1 );
-    std::vector<cv::ocl::Info> oclinfo;
-    int devnums = cv::ocl::getDevice(oclinfo);
+    vector<ocl::Info> oclinfo;
+    int devnums = ocl::getDevice(oclinfo);
     if( devnums < 1 )
     {
         std::cout << "no device found\n";
@@ -130,19 +144,23 @@ int main( int argc, const char** argv )
                 frame.copyTo( frameCopy );
             else
                 flip( frame, frameCopy, 0 );
-            if(useCPU){
+            if(useCPU)
+            {
                 detectCPU(frameCopy, faces, cpu_cascade, scale, false);
             }
-            else{
-                detect(frameCopy, faces, cascade, scale, false);     
+            else
+            {
+                detect(frameCopy, faces, cascade, scale, false);
             }
             Draw(frameCopy, faces, scale);
             if( waitKey( 10 ) >= 0 )
                 goto _cleanup_;
         }
 
+
         waitKey(0);
 
+
 _cleanup_:
         cvReleaseCapture( &capture );
     }
@@ -152,18 +170,21 @@ _cleanup_:
         vector<Rect> faces;
         vector<Rect> ref_rst;
         double accuracy = 0.;
-        for(int i = 0; i <= LOOP_NUM;i ++) 
+        for(int i = 0; i <= LOOP_NUM; i ++)
         {
             cout << "loop" << i << endl;
-            if(useCPU){
-                detectCPU(image, faces, cpu_cascade, scale, i==0?false:true);  
+            if(useCPU)
+            {
+                detectCPU(image, faces, cpu_cascade, scale, i==0?false:true);
             }
-            else{
+            else
+            {
                 detect(image, faces, cascade, scale, i==0?false:true);
-                if(i == 0){
+                if(i == 0)
+                {
                     detectCPU(image, ref_rst, cpu_cascade, scale, false);
                     accuracy = checkRectSimilarity(image.size(), ref_rst, faces);
-                }                    
+                }
             }
             if (i == LOOP_NUM)
             {
@@ -180,31 +201,31 @@ _cleanup_:
     }
 
     cvDestroyWindow("result");
-
     return 0;
 }
 
-void detect( Mat& img, vector<Rect>& faces, 
-    cv::ocl::OclCascadeClassifierBuf& cascade, 
-    double scale, bool calTime)
+void detect( Mat& img, vector<Rect>& faces,
+             ocl::OclCascadeClassifierBuf& cascade,
+             double scale, bool calTime)
 {
-    cv::ocl::oclMat image(img);
-    cv::ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
+    ocl::oclMat image(img);
+    ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
     if(calTime) workBegin();
-    cv::ocl::cvtColor( image, gray, CV_BGR2GRAY );
-    cv::ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
-    cv::ocl::equalizeHist( smallImg, smallImg );
+    ocl::cvtColor( image, gray, CV_BGR2GRAY );
+    ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
+    ocl::equalizeHist( smallImg, smallImg );
 
     cascade.detectMultiScale( smallImg, faces, 1.1,
-        3, 0
-        |CV_HAAR_SCALE_IMAGE
-        , Size(30,30), Size(0, 0) );
+                              3, 0
+                              |CV_HAAR_SCALE_IMAGE
+                              , Size(30,30), Size(0, 0) );
     if(calTime) workEnd();
 }
 
-void detectCPU( Mat& img, vector<Rect>& faces, 
-    CascadeClassifier& cascade, 
-    double scale, bool calTime)
+
+void detectCPU( Mat& img, vector<Rect>& faces,
+                CascadeClassifier& cascade,
+                double scale, bool calTime)
 {
     if(calTime) workBegin();
     Mat cpu_gray, cpu_smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
@@ -212,11 +233,12 @@ void detectCPU( Mat& img, vector<Rect>& faces,
     resize(cpu_gray, cpu_smallImg, cpu_smallImg.size(), 0, 0, INTER_LINEAR);
     equalizeHist(cpu_smallImg, cpu_smallImg);
     cascade.detectMultiScale(cpu_smallImg, faces, 1.1,
-        3, 0 | CV_HAAR_SCALE_IMAGE,
-        Size(30, 30), Size(0, 0));
-    if(calTime) workEnd(); 
+                             3, 0 | CV_HAAR_SCALE_IMAGE,
+                             Size(30, 30), Size(0, 0));
+    if(calTime) workEnd();
 }
 
+
 void Draw(Mat& img, vector<Rect>& faces, double scale)
 {
     int i = 0;
@@ -230,31 +252,38 @@ void Draw(Mat& img, vector<Rect>& faces, double scale)
         radius = cvRound((r->width + r->height)*0.25*scale);
         circle( img, center, radius, color, 3, 8, 0 );
     }
-    cv::imshow( "result", img );
+    imshow( "result", img );
+    imwrite( outputName, img );
 }
 
-double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& ob2)
+
+double checkRectSimilarity(Size sz, vector<Rect>& ob1, vector<Rect>& ob2)
 {
     double final_test_result = 0.0;
     size_t sz1 = ob1.size();
     size_t sz2 = ob2.size();
 
     if(sz1 != sz2)
+    {
         return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
+    }
     else
     {
-        cv::Mat cpu_result(sz, CV_8UC1);
+        if(sz1==0 && sz2==0)
+            return 0;
+        Mat cpu_result(sz, CV_8UC1);
         cpu_result.setTo(0);
 
         for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
-        {      
-            cv::Mat cpu_result_roi(cpu_result, *r);
+        {
+            Mat cpu_result_roi(cpu_result, *r);
             cpu_result_roi.setTo(1);
             cpu_result.copyTo(cpu_result);
         }
-        int cpu_area = cv::countNonZero(cpu_result > 0);
+        int cpu_area = countNonZero(cpu_result > 0);
 
-        cv::Mat gpu_result(sz, CV_8UC1);
+
+        Mat gpu_result(sz, CV_8UC1);
         gpu_result.setTo(0);
         for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
         {
@@ -263,11 +292,13 @@ double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& o
             gpu_result.copyTo(gpu_result);
         }
 
-        cv::Mat result_;
+        Mat result_;
         multiply(cpu_result, gpu_result, result_);
-        int result = cv::countNonZero(result_ > 0);
-
-        final_test_result = 1.0 - (double)result/(double)cpu_area;
+        int result = countNonZero(result_ > 0);
+        if(cpu_area!=0 && result!=0)
+            final_test_result = 1.0 - (double)result/(double)cpu_area;
+        else if(cpu_area==0 && result!=0)
+            final_test_result = -1;
     }
     return final_test_result;
 }
diff --git a/samples/ocl/hog.cpp b/samples/ocl/hog.cpp
index 28be6fa9a..ff53e010c 100644
--- a/samples/ocl/hog.cpp
+++ b/samples/ocl/hog.cpp
@@ -10,75 +10,39 @@
 using namespace std;
 using namespace cv;
 
-bool help_showed = false;
-
-class Args
-{
-public:
-    Args();
-    static Args read(int argc, char** argv);
-
-    string src;
-    bool src_is_video;
-    bool src_is_camera;
-    int camera_id;
-
-    bool write_video;
-    string dst_video;
-    double dst_video_fps;
-
-    bool make_gray;
-
-    bool resize_src;
-    int width, height;
-
-    double scale;
-    int nlevels;
-    int gr_threshold;
-
-    double hit_threshold;
-    bool hit_threshold_auto;
-
-    int win_width;
-    int win_stride_width, win_stride_height;
-
-    bool gamma_corr;
-};
-
 class App
 {
 public:
-    App(const Args& s);
+    App(CommandLineParser& cmd);
     void run();
-
     void handleKey(char key);
-
     void hogWorkBegin();
     void hogWorkEnd();
     string hogWorkFps() const;
-
     void workBegin();
     void workEnd();
     string workFps() const;
-
     string message() const;
 
+
 // This function test if gpu_rst matches cpu_rst.
 // If the two vectors are not equal, it will return the difference in vector size
-// Else if will return 
+// Else if will return
 // (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
-    double checkRectSimilarity(Size sz, 
-                               std::vector<Rect>& cpu_rst, 
+    double checkRectSimilarity(Size sz,
+                               std::vector<Rect>& cpu_rst,
                                std::vector<Rect>& gpu_rst);
 private:
     App operator=(App&);
 
-    Args args;
+    //Args args;
     bool running;
-
     bool use_gpu;
     bool make_gray;
     double scale;
+    double resize_scale;
+    int win_width;
+    int win_stride_width, win_stride_height;
     int gr_threshold;
     int nlevels;
     double hit_threshold;
@@ -86,119 +50,49 @@ private:
 
     int64 hog_work_begin;
     double hog_work_fps;
-
     int64 work_begin;
     double work_fps;
-};
 
-static void printHelp()
-{
-    cout << "Histogram of Oriented Gradients descriptor and detector sample.\n"
-         << "\nUsage: hog_gpu\n"
-         << "  (<image>|--video <vide>|--camera <camera_id>) # frames source\n"
-         << "  [--make_gray <true/false>] # convert image to gray one or not\n"
-         << "  [--resize_src <true/false>] # do resize of the source image or not\n"
-         << "  [--width <int>] # resized image width\n"
-         << "  [--height <int>] # resized image height\n"
-         << "  [--hit_threshold <double>] # classifying plane distance threshold (0.0 usually)\n"
-         << "  [--scale <double>] # HOG window scale factor\n"
-         << "  [--nlevels <int>] # max number of HOG window scales\n"
-         << "  [--win_width <int>] # width of the window (48 or 64)\n"
-         << "  [--win_stride_width <int>] # distance by OX axis between neighbour wins\n"
-         << "  [--win_stride_height <int>] # distance by OY axis between neighbour wins\n"
-         << "  [--gr_threshold <int>] # merging similar rects constant\n"
-         << "  [--gamma_correct <int>] # do gamma correction or not\n"
-         << "  [--write_video <bool>] # write video or not\n"
-         << "  [--dst_video <path>] # output video path\n"
-         << "  [--dst_video_fps <double>] # output video fps\n";
-    help_showed = true;
-}
+    string img_source;
+    string vdo_source;
+    string output;
+    int camera_id;
+};
 
 int main(int argc, char** argv)
 {
+    const char* keys =
+        "{ h |  help    | false          | print help message }"
+        "{ i |  input   |                | specify input image}"
+        "{ c | camera   | -1             | enable camera capturing }"
+        "{ v | video    |                | use video as input }"
+        "{ g |  gray    | false          | convert image to gray one or not}"
+        "{ s |  scale   | 1.0            | resize the image before detect}"
+        "{ l |larger_win| false          | use 64x128 window}"
+        "{ o |  output  |                | specify output path when input is images}";
+    CommandLineParser cmd(argc, argv, keys);
+    App app(cmd);
     try
     {
-        if (argc < 2)
-            printHelp();
-        Args args = Args::read(argc, argv);
-        if (help_showed)
-            return -1;
-        App app(args);
         app.run();
     }
-    catch (const Exception& e) { return cout << "error: "  << e.what() << endl, 1; }
-    catch (const exception& e) { return cout << "error: "  << e.what() << endl, 1; }
-    catch(...) { return cout << "unknown exception" << endl, 1; }
+    catch (const Exception& e)
+    {
+        return cout << "error: "  << e.what() << endl, 1;
+    }
+    catch (const exception& e)
+    {
+        return cout << "error: "  << e.what() << endl, 1;
+    }
+    catch(...)
+    {
+        return cout << "unknown exception" << endl, 1;
+    }
     return 0;
 }
 
-
-Args::Args()
+App::App(CommandLineParser& cmd)
 {
-    src_is_video = false;
-    src_is_camera = false;
-    camera_id = 0;
-
-    write_video = false;
-    dst_video_fps = 24.;
-
-    make_gray = false;
-
-    resize_src = false;
-    width = 640;
-    height = 480;
-
-    scale = 1.05;
-    nlevels = 13;
-    gr_threshold = 8;
-    hit_threshold = 1.4;
-    hit_threshold_auto = true;
-
-    win_width = 48;
-    win_stride_width = 8;
-    win_stride_height = 8;
-
-    gamma_corr = true;
-}
-
-
-Args Args::read(int argc, char** argv)
-{
-    Args args;
-    for (int i = 1; i < argc; i++)
-    {
-        if (string(argv[i]) == "--make_gray") args.make_gray = (string(argv[++i]) == "true");
-        else if (string(argv[i]) == "--resize_src") args.resize_src = (string(argv[++i]) == "true");
-        else if (string(argv[i]) == "--width") args.width = atoi(argv[++i]);
-        else if (string(argv[i]) == "--height") args.height = atoi(argv[++i]);
-        else if (string(argv[i]) == "--hit_threshold")
-        {
-            args.hit_threshold = atof(argv[++i]);
-            args.hit_threshold_auto = false;
-        }
-        else if (string(argv[i]) == "--scale") args.scale = atof(argv[++i]);
-        else if (string(argv[i]) == "--nlevels") args.nlevels = atoi(argv[++i]);
-        else if (string(argv[i]) == "--win_width") args.win_width = atoi(argv[++i]);
-        else if (string(argv[i]) == "--win_stride_width") args.win_stride_width = atoi(argv[++i]);
-        else if (string(argv[i]) == "--win_stride_height") args.win_stride_height = atoi(argv[++i]);
-        else if (string(argv[i]) == "--gr_threshold") args.gr_threshold = atoi(argv[++i]);
-        else if (string(argv[i]) == "--gamma_correct") args.gamma_corr = (string(argv[++i]) == "true");
-        else if (string(argv[i]) == "--write_video") args.write_video = (string(argv[++i]) == "true");
-        else if (string(argv[i]) == "--dst_video") args.dst_video = argv[++i];
-        else if (string(argv[i]) == "--dst_video_fps") args.dst_video_fps = atof(argv[++i]);
-        else if (string(argv[i]) == "--help") printHelp();
-        else if (string(argv[i]) == "--video") { args.src = argv[++i]; args.src_is_video = true; }
-        else if (string(argv[i]) == "--camera") { args.camera_id = atoi(argv[++i]); args.src_is_camera = true; }
-        else if (args.src.empty()) args.src = argv[i];
-        else throw runtime_error((string("unknown key: ") + argv[i]));
-    }
-    return args;
-}
-
-
-App::App(const Args& s)
-{
-    args = s;
     cout << "\nControls:\n"
          << "\tESC - exit\n"
          << "\tm - change mode GPU <-> CPU\n"
@@ -209,56 +103,56 @@ App::App(const Args& s)
          << "\t4/r - increase/decrease hit threshold\n"
          << endl;
 
+
     use_gpu = true;
-    make_gray = args.make_gray;
-    scale = args.scale;
-    gr_threshold = args.gr_threshold;
-    nlevels = args.nlevels;
+    make_gray = cmd.get<bool>("g");
+    resize_scale = cmd.get<double>("s");
+    win_width = cmd.get<bool>("l") == true ? 64 : 48;
+    vdo_source = cmd.get<string>("v");
+    img_source = cmd.get<string>("i");
+    output = cmd.get<string>("o");
+    camera_id = cmd.get<int>("c");
 
-    if (args.hit_threshold_auto)
-        args.hit_threshold = args.win_width == 48 ? 1.4 : 0.;
-    hit_threshold = args.hit_threshold;
+    win_stride_width = 8;
+    win_stride_height = 8;
+    gr_threshold = 8;
+    nlevels = 13;
+    hit_threshold = win_width == 48 ? 1.4 : 0.;
+    scale = 1.05;
+    gamma_corr = true;
 
-    gamma_corr = args.gamma_corr;
-
-    if (args.win_width != 64 && args.win_width != 48)
-        args.win_width = 64;
-
-    cout << "Scale: " << scale << endl;
-    if (args.resize_src)
-        cout << "Resized source: (" << args.width << ", " << args.height << ")\n";
     cout << "Group threshold: " << gr_threshold << endl;
     cout << "Levels number: " << nlevels << endl;
-    cout << "Win width: " << args.win_width << endl;
-    cout << "Win stride: (" << args.win_stride_width << ", " << args.win_stride_height << ")\n";
+    cout << "Win width: " << win_width << endl;
+    cout << "Win stride: (" << win_stride_width << ", " << win_stride_height << ")\n";
     cout << "Hit threshold: " << hit_threshold << endl;
     cout << "Gamma correction: " << gamma_corr << endl;
     cout << endl;
 }
 
-
 void App::run()
 {
-    std::vector<ocl::Info> oclinfo;
+    vector<ocl::Info> oclinfo;
     ocl::getDevice(oclinfo);
     running = true;
-    cv::VideoWriter video_writer;
+    VideoWriter video_writer;
 
-    Size win_size(args.win_width, args.win_width * 2); //(64, 128) or (48, 96)
-    Size win_stride(args.win_stride_width, args.win_stride_height);
+    Size win_size(win_width, win_width * 2);
+    Size win_stride(win_stride_width, win_stride_height);
 
     // Create HOG descriptors and detectors here
     vector<float> detector;
     if (win_size == Size(64, 128))
-        detector = cv::ocl::HOGDescriptor::getPeopleDetector64x128();
+        detector = ocl::HOGDescriptor::getPeopleDetector64x128();
     else
-        detector = cv::ocl::HOGDescriptor::getPeopleDetector48x96();
+        detector = ocl::HOGDescriptor::getPeopleDetector48x96();
 
-    cv::ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
-                                   cv::ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
-                                   cv::ocl::HOGDescriptor::DEFAULT_NLEVELS);
-    cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
-                              HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
+
+    ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
+                               ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
+                               ocl::HOGDescriptor::DEFAULT_NLEVELS);
+    HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
+                          HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
     gpu_hog.setSVMDetector(detector);
     cpu_hog.setSVMDetector(detector);
 
@@ -267,29 +161,29 @@ void App::run()
         VideoCapture vc;
         Mat frame;
 
-        if (args.src_is_video)
+        if (vdo_source!="")
         {
-            vc.open(args.src.c_str());
+            vc.open(vdo_source.c_str());
             if (!vc.isOpened())
-                throw runtime_error(string("can't open video file: " + args.src));
+                throw runtime_error(string("can't open video file: " + vdo_source));
             vc >> frame;
         }
-        else if (args.src_is_camera)
+        else if (camera_id != -1)
         {
-            vc.open(args.camera_id);
+            vc.open(camera_id);
             if (!vc.isOpened())
             {
                 stringstream msg;
-                msg << "can't open camera: " << args.camera_id;
+                msg << "can't open camera: " << camera_id;
                 throw runtime_error(msg.str());
             }
             vc >> frame;
         }
         else
         {
-            frame = imread(args.src);
+            frame = imread(img_source);
             if (frame.empty())
-                throw runtime_error(string("can't open image file: " + args.src));
+                throw runtime_error(string("can't open image file: " + img_source));
         }
 
         Mat img_aux, img, img_to_show;
@@ -307,13 +201,15 @@ void App::run()
             else frame.copyTo(img_aux);
 
             // Resize image
-            if (args.resize_src) resize(img_aux, img, Size(args.width, args.height));
+            if (abs(scale-1.0)>0.001)
+            {
+                Size sz((int)((double)img_aux.cols/resize_scale), (int)((double)img_aux.rows/resize_scale));
+                resize(img_aux, img, sz);
+            }
             else img = img_aux;
             img_to_show = img;
-
             gpu_hog.nlevels = nlevels;
             cpu_hog.nlevels = nlevels;
-
             vector<Rect> found;
 
             // Perform HOG classification
@@ -330,15 +226,16 @@ void App::run()
                     vector<Rect> ref_rst;
                     cvtColor(img, img, CV_BGRA2BGR);
                     cpu_hog.detectMultiScale(img, ref_rst, hit_threshold, win_stride,
-                                              Size(0, 0), scale, gr_threshold-2);
+                                             Size(0, 0), scale, gr_threshold-2);
                     double accuracy = checkRectSimilarity(img.size(), ref_rst, found);
-                    cout << "\naccuracy value: " << accuracy << endl;           
-                } 
-           }
+                    cout << "\naccuracy value: " << accuracy << endl;
+                }
+            }
             else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
-                                          Size(0, 0), scale, gr_threshold);
+                                              Size(0, 0), scale, gr_threshold);
             hogWorkEnd();
 
+
             // Draw positive classified windows
             for (size_t i = 0; i < found.size(); i++)
             {
@@ -353,25 +250,31 @@ void App::run()
             putText(img_to_show, "FPS (HOG only): " + hogWorkFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
             putText(img_to_show, "FPS (total): " + workFps(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
             imshow("opencv_gpu_hog", img_to_show);
-
-            if (args.src_is_video || args.src_is_camera) vc >> frame;
+            if (vdo_source!="" || camera_id!=-1) vc >> frame;
 
             workEnd();
 
-            if (args.write_video)
+            if (output!="")
             {
-                if (!video_writer.isOpened())
+                if (img_source!="")     // wirte image
                 {
-                    video_writer.open(args.dst_video, CV_FOURCC('x','v','i','d'), args.dst_video_fps,
-                                      img_to_show.size(), true);
-                    if (!video_writer.isOpened())
-                        throw std::runtime_error("can't create video writer");
+                    imwrite(output, img_to_show);
                 }
+                else                    //write video
+                {
+                    if (!video_writer.isOpened())
+                    {
+                        video_writer.open(output, CV_FOURCC('x','v','i','d'), 24,
+                                          img_to_show.size(), true);
+                        if (!video_writer.isOpened())
+                            throw std::runtime_error("can't create video writer");
+                    }
 
-                if (make_gray) cvtColor(img_to_show, img, CV_GRAY2BGR);
-                else cvtColor(img_to_show, img, CV_BGRA2BGR);
+                    if (make_gray) cvtColor(img_to_show, img, CV_GRAY2BGR);
+                    else cvtColor(img_to_show, img, CV_BGRA2BGR);
 
-                video_writer << img;
+                    video_writer << img;
+                }
             }
 
             handleKey((char)waitKey(3));
@@ -379,7 +282,6 @@ void App::run()
     }
 }
 
-
 void App::handleKey(char key)
 {
     switch (key)
@@ -442,7 +344,10 @@ void App::handleKey(char key)
 }
 
 
-inline void App::hogWorkBegin() { hog_work_begin = getTickCount(); }
+inline void App::hogWorkBegin()
+{
+    hog_work_begin = getTickCount();
+}
 
 inline void App::hogWorkEnd()
 {
@@ -458,8 +363,10 @@ inline string App::hogWorkFps() const
     return ss.str();
 }
 
-
-inline void App::workBegin() { work_begin = getTickCount(); }
+inline void App::workBegin()
+{
+    work_begin = getTickCount();
+}
 
 inline void App::workEnd()
 {
@@ -475,8 +382,9 @@ inline string App::workFps() const
     return ss.str();
 }
 
-double App::checkRectSimilarity(Size sz, 
-                                std::vector<Rect>& ob1, 
+
+double App::checkRectSimilarity(Size sz,
+                                std::vector<Rect>& ob1,
                                 std::vector<Rect>& ob2)
 {
     double final_test_result = 0.0;
@@ -484,20 +392,26 @@ double App::checkRectSimilarity(Size sz,
     size_t sz2 = ob2.size();
 
     if(sz1 != sz2)
+    {
         return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
+    }
     else
     {
+        if(sz1==0 && sz2==0)
+            return 0;
         cv::Mat cpu_result(sz, CV_8UC1);
         cpu_result.setTo(0);
 
+
         for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
-        {      
+        {
             cv::Mat cpu_result_roi(cpu_result, *r);
             cpu_result_roi.setTo(1);
             cpu_result.copyTo(cpu_result);
         }
         int cpu_area = cv::countNonZero(cpu_result > 0);
 
+
         cv::Mat gpu_result(sz, CV_8UC1);
         gpu_result.setTo(0);
         for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
@@ -510,10 +424,11 @@ double App::checkRectSimilarity(Size sz,
         cv::Mat result_;
         multiply(cpu_result, gpu_result, result_);
         int result = cv::countNonZero(result_ > 0);
-
-        final_test_result = 1.0 - (double)result/(double)cpu_area;
+        if(cpu_area!=0 && result!=0)
+            final_test_result = 1.0 - (double)result/(double)cpu_area;
+        else if(cpu_area==0 && result!=0)
+            final_test_result = -1;
     }
     return final_test_result;
-
 }
 
diff --git a/samples/ocl/pyrlk_optical_flow.cpp b/samples/ocl/pyrlk_optical_flow.cpp
index cc8d886f7..cefa92867 100644
--- a/samples/ocl/pyrlk_optical_flow.cpp
+++ b/samples/ocl/pyrlk_optical_flow.cpp
@@ -11,19 +11,20 @@ using namespace cv;
 using namespace cv::ocl;
 
 typedef unsigned char uchar;
-#define LOOP_NUM 10 
+#define LOOP_NUM 10
 int64 work_begin = 0;
 int64 work_end = 0;
 
-static void workBegin() 
-{ 
+static void workBegin()
+{
     work_begin = getTickCount();
 }
 static void workEnd()
 {
     work_end += (getTickCount() - work_begin);
 }
-static double getTime(){
+static double getTime()
+{
     return work_end * 1000. / getTickFrequency();
 }
 
@@ -93,14 +94,15 @@ int main(int argc, const char* argv[])
     //set this to save kernel compile time from second time you run
     ocl::setBinpath("./");
     const char* keys =
-        "{ h            | help           | false | print help message }"
-        "{ l            | left           |       | specify left image }"
-        "{ r            | right          |       | specify right image }"
-        "{ c            | camera         | 0     | enable camera capturing }"
-        "{ s            | use_cpu        | false | use cpu or gpu to process the image }"
-        "{ v            | video          |       | use video as input }"
-        "{ points       | points         | 1000  | specify points count [GoodFeatureToTrack] }"
-        "{ min_dist     | min_dist       | 0     | specify minimal distance between points [GoodFeatureToTrack] }";
+        "{ h   | help     | false           | print help message }"
+        "{ l   | left     |                 | specify left image }"
+        "{ r   | right    |                 | specify right image }"
+        "{ c   | camera   | 0               | specify camera id }"
+        "{ s   | use_cpu  | false           | use cpu or gpu to process the image }"
+        "{ v   | video    |                 | use video as input }"
+        "{ o   | output   | pyrlk_output.jpg| specify output save path when input is images }"
+        "{ p   | points   | 1000            | specify points count [GoodFeatureToTrack] }"
+        "{ m   | min_dist | 0               | specify minimal distance between points [GoodFeatureToTrack] }";
 
     CommandLineParser cmd(argc, argv, keys);
 
@@ -113,13 +115,13 @@ int main(int argc, const char* argv[])
     }
 
     bool defaultPicturesFail = false;
-    string fname0 = cmd.get<string>("left");
-    string fname1 = cmd.get<string>("right");
-    string vdofile = cmd.get<string>("video");
-    int points = cmd.get<int>("points");
-    double minDist = cmd.get<double>("min_dist");
+    string fname0 = cmd.get<string>("l");
+    string fname1 = cmd.get<string>("r");
+    string vdofile = cmd.get<string>("v");
+    string outfile = cmd.get<string>("o");
+    int points = cmd.get<int>("p");
+    double minDist = cmd.get<double>("m");
     bool useCPU = cmd.get<bool>("s");
-    bool useCamera = cmd.get<bool>("c");
     int inputName = cmd.get<int>("c");
 
     oclMat d_nextPts, d_status;
@@ -132,22 +134,9 @@ int main(int argc, const char* argv[])
     vector<unsigned char> status(points);
     vector<float> err;
 
-    if (frame0.empty() || frame1.empty())
-    {
-        useCamera = true;
-        defaultPicturesFail = true;
-        CvCapture* capture = 0;
-        capture = cvCaptureFromCAM( inputName );
-        if (!capture)
-        {
-            cout << "Can't load input images" << endl;
-            return -1;
-        }
-    }
-
     cout << "Points count : " << points << endl << endl;
 
-    if (useCamera)
+    if (frame0.empty() || frame1.empty())
     {
         CvCapture* capture = 0;
         Mat frame, frameCopy;
@@ -241,10 +230,10 @@ _cleanup_:
     else
     {
 nocamera:
-        for(int i = 0; i <= LOOP_NUM;i ++) 
+        for(int i = 0; i <= LOOP_NUM; i ++)
         {
             cout << "loop" << i << endl;
-            if (i > 0) workBegin();     
+            if (i > 0) workBegin();
 
             if (useCPU)
             {
@@ -274,8 +263,8 @@ nocamera:
                 cout << getTime() / LOOP_NUM << " ms" << endl;
 
                 drawArrows(frame0, pts, nextPts, status, Scalar(255, 0, 0));
-
                 imshow("PyrLK [Sparse]", frame0);
+                imwrite(outfile, frame0);
             }
         }
     }
diff --git a/samples/ocl/squares.cpp b/samples/ocl/squares.cpp
index 6b184161f..48964ffb2 100644
--- a/samples/ocl/squares.cpp
+++ b/samples/ocl/squares.cpp
@@ -6,7 +6,6 @@
 #include "opencv2/imgproc/imgproc.hpp"
 #include "opencv2/highgui/highgui.hpp"
 #include "opencv2/ocl/ocl.hpp"
-
 #include <iostream>
 #include <math.h>
 #include <string.h>
@@ -14,23 +13,50 @@
 using namespace cv;
 using namespace std;
 
-static void help()
-{
-    cout <<
-        "\nA program using OCL module pyramid scaling, Canny, dilate functions, threshold, split; cpu contours, contour simpification and\n"
-        "memory storage (it's got it all folks) to find\n"
-        "squares in a list of images pic1-6.png\n"
-        "Returns sequence of squares detected on the image.\n"
-        "the sequence is stored in the specified memory storage\n"
-        "Call:\n"
-        "./squares\n"
-        "Using OpenCV version %s\n" << CV_VERSION << "\n" << endl;
-}
+#define ACCURACY_CHECK 1
 
+#if ACCURACY_CHECK
+// check if two vectors of vector of points are near or not
+// prior assumption is that they are in correct order
+static bool checkPoints(
+    vector< vector<Point> > set1,
+    vector< vector<Point> > set2,
+    int maxDiff = 5)
+{
+    if(set1.size() != set2.size())
+    {
+        return false;
+    }
+
+    for(vector< vector<Point> >::iterator it1 = set1.begin(), it2 = set2.begin();
+            it1 < set1.end() && it2 < set2.end(); it1 ++, it2 ++)
+    {
+        vector<Point> pts1 = *it1;
+        vector<Point> pts2 = *it2;
+
+
+        if(pts1.size() != pts2.size())
+        {
+            return false;
+        }
+        for(size_t i = 0; i < pts1.size(); i ++)
+        {
+            Point pt1 = pts1[i], pt2 = pts2[i];
+            if(std::abs(pt1.x - pt2.x) > maxDiff ||
+                    std::abs(pt1.y - pt2.y) > maxDiff)
+            {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+#endif
 
 int thresh = 50, N = 11;
 const char* wndname = "OpenCL Square Detection Demo";
 
+
 // helper function:
 // finds a cosine of angle between vectors
 // from pt0->pt1 and from pt0->pt2
@@ -43,9 +69,92 @@ static double angle( Point pt1, Point pt2, Point pt0 )
     return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10);
 }
 
+
 // returns sequence of squares detected on the image.
 // the sequence is stored in the specified memory storage
 static void findSquares( const Mat& image, vector<vector<Point> >& squares )
+{
+    squares.clear();
+    Mat pyr, timg, gray0(image.size(), CV_8U), gray;
+
+    // down-scale and upscale the image to filter out the noise
+    pyrDown(image, pyr, Size(image.cols/2, image.rows/2));
+    pyrUp(pyr, timg, image.size());
+    vector<vector<Point> > contours;
+
+    // find squares in every color plane of the image
+    for( int c = 0; c < 3; c++ )
+    {
+        int ch[] = {c, 0};
+        mixChannels(&timg, 1, &gray0, 1, ch, 1);
+
+        // try several threshold levels
+        for( int l = 0; l < N; l++ )
+        {
+            // hack: use Canny instead of zero threshold level.
+            // Canny helps to catch squares with gradient shading
+            if( l == 0 )
+            {
+                // apply Canny. Take the upper threshold from slider
+                // and set the lower to 0 (which forces edges merging)
+                Canny(gray0, gray, 0, thresh, 5);
+                // dilate canny output to remove potential
+                // holes between edge segments
+                dilate(gray, gray, Mat(), Point(-1,-1));
+            }
+            else
+            {
+                // apply threshold if l!=0:
+                //     tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0
+                cv::threshold(gray0, gray, (l+1)*255/N, 255, THRESH_BINARY);
+            }
+
+            // find contours and store them all as a list
+            findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE);
+
+            vector<Point> approx;
+
+            // test each contour
+            for( size_t i = 0; i < contours.size(); i++ )
+            {
+                // approximate contour with accuracy proportional
+                // to the contour perimeter
+                approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);
+
+                // square contours should have 4 vertices after approximation
+                // relatively large area (to filter out noisy contours)
+                // and be convex.
+                // Note: absolute value of an area is used because
+                // area may be positive or negative - in accordance with the
+                // contour orientation
+                if( approx.size() == 4 &&
+                        fabs(contourArea(Mat(approx))) > 1000 &&
+                        isContourConvex(Mat(approx)) )
+                {
+                    double maxCosine = 0;
+
+                    for( int j = 2; j < 5; j++ )
+                    {
+                        // find the maximum cosine of the angle between joint edges
+                        double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
+                        maxCosine = MAX(maxCosine, cosine);
+                    }
+
+                    // if cosines of all angles are small
+                    // (all angles are ~90 degree) then write quandrange
+                    // vertices to resultant sequence
+                    if( maxCosine < 0.3 )
+                        squares.push_back(approx);
+                }
+            }
+        }
+    }
+}
+
+
+// returns sequence of squares detected on the image.
+// the sequence is stored in the specified memory storage
+static void findSquares_ocl( const Mat& image, vector<vector<Point> >& squares )
 {
     squares.clear();
 
@@ -91,7 +200,6 @@ static void findSquares( const Mat& image, vector<vector<Point> >& squares )
             findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE);
 
             vector<Point> approx;
-
             // test each contour
             for( size_t i = 0; i < contours.size(); i++ )
             {
@@ -106,11 +214,10 @@ static void findSquares( const Mat& image, vector<vector<Point> >& squares )
                 // area may be positive or negative - in accordance with the
                 // contour orientation
                 if( approx.size() == 4 &&
-                    fabs(contourArea(Mat(approx))) > 1000 &&
-                    isContourConvex(Mat(approx)) )
+                        fabs(contourArea(Mat(approx))) > 1000 &&
+                        isContourConvex(Mat(approx)) )
                 {
                     double maxCosine = 0;
-
                     for( int j = 2; j < 5; j++ )
                     {
                         // find the maximum cosine of the angle between joint edges
@@ -139,40 +246,93 @@ static void drawSquares( Mat& image, const vector<vector<Point> >& squares )
         int n = (int)squares[i].size();
         polylines(image, &p, &n, 1, true, Scalar(0,255,0), 3, CV_AA);
     }
-
-    imshow(wndname, image);
 }
 
 
-int main(int /*argc*/, char** /*argv*/)
+// draw both pure-C++ and ocl square results onto a single image
+static Mat drawSquaresBoth( const Mat& image,
+                            const vector<vector<Point> >& sqsCPP,
+                            const vector<vector<Point> >& sqsOCL
+)
 {
+    Mat imgToShow(Size(image.cols * 2, image.rows), image.type());
+    Mat lImg = imgToShow(Rect(Point(0, 0), image.size()));
+    Mat rImg = imgToShow(Rect(Point(image.cols, 0), image.size()));
+    image.copyTo(lImg);
+    image.copyTo(rImg);
+    drawSquares(lImg, sqsCPP);
+    drawSquares(rImg, sqsOCL);
+    float fontScale = 0.8f;
+    Scalar white = Scalar::all(255), black = Scalar::all(0);
+
+    putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2);
+    putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2);
+    putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1);
+    putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1);
+
+    return imgToShow;
+}
+
+
+int main(int argc, char** argv)
+{
+    const char* keys =
+        "{ i | input   |                    | specify input image }"
+        "{ o | output  | squares_output.jpg | specify output save path}";
+    CommandLineParser cmd(argc, argv, keys);
+    string inputName = cmd.get<string>("i");
+    string outfile = cmd.get<string>("o");
+    if(inputName.empty())
+    {
+        cout << "Avaible options:" << endl;
+        cmd.printParams();
+        return 0;
+    }
 
-    //ocl::setBinpath("F:/kernel_bin");
     vector<ocl::Info> info;
     CV_Assert(ocl::getDevice(info));
-
-    static const char* names[] = { "pic1.png", "pic2.png", "pic3.png",
-        "pic4.png", "pic5.png", "pic6.png", 0 };
-    help();
+    int iterations = 10;
     namedWindow( wndname, 1 );
-    vector<vector<Point> > squares;
+    vector<vector<Point> > squares_cpu, squares_ocl;
 
-    for( int i = 0; names[i] != 0; i++ )
+    Mat image = imread(inputName, 1);
+    if( image.empty() )
     {
-        Mat image = imread(names[i], 1);
-        if( image.empty() )
-        {
-            cout << "Couldn't load " << names[i] << endl;
-            continue;
-        }
-
-        findSquares(image, squares);
-        drawSquares(image, squares);
-
-        int c = waitKey();
-        if( (char)c == 27 )
-            break;
+        cout << "Couldn't load " << inputName << endl;
+        return -1;
     }
+    int j = iterations;
+    int64 t_ocl = 0, t_cpp = 0;
+    //warm-ups
+    cout << "warming up ..." << endl;
+    findSquares(image, squares_cpu);
+    findSquares_ocl(image, squares_ocl);
+
+
+#if ACCURACY_CHECK
+    cout << "Checking ocl accuracy ... " << endl;
+    cout << (checkPoints(squares_cpu, squares_ocl) ? "Pass" : "Failed") << endl;
+#endif
+    do
+    {
+        int64 t_start = cv::getTickCount();
+        findSquares(image, squares_cpu);
+        t_cpp += cv::getTickCount() - t_start;
+
+
+        t_start  = cv::getTickCount();
+        findSquares_ocl(image, squares_ocl);
+        t_ocl += cv::getTickCount() - t_start;
+        cout << "run loop: " << j << endl;
+    }
+    while(--j);
+    cout << "cpp average time: " << 1000.0f * (double)t_cpp / getTickFrequency() / iterations << "ms" << endl;
+    cout << "ocl average time: " << 1000.0f * (double)t_ocl / getTickFrequency() / iterations << "ms" << endl;
+
+    Mat result = drawSquaresBoth(image, squares_cpu, squares_ocl);
+    imshow(wndname, result);
+    imwrite(outfile, result);
+    cvWaitKey(0);
 
     return 0;
 }
diff --git a/samples/ocl/stereo_match.cpp b/samples/ocl/stereo_match.cpp
index 7ac2c9a6f..565744baa 100644
--- a/samples/ocl/stereo_match.cpp
+++ b/samples/ocl/stereo_match.cpp
@@ -10,56 +10,45 @@ using namespace cv;
 using namespace std;
 using namespace ocl;
 
-bool help_showed = false;
-
-struct Params
-{
-    Params();
-    static Params read(int argc, char** argv);
-
-    string left;
-    string right;
-
-    string method_str() const
-    {
-        switch (method)
-        {
-        case BM: return "BM";
-        case BP: return "BP";
-        case CSBP: return "CSBP";
-        }
-        return "";
-    }
-    enum {BM, BP, CSBP} method;
-    int ndisp; // Max disparity + 1
-    enum {GPU, CPU} type;
-};
-
 
 struct App
 {
-    App(const Params& p);
+    App(CommandLineParser& cmd);
     void run();
     void handleKey(char key);
     void printParams() const;
 
-    void workBegin() { work_begin = getTickCount(); }
+    void workBegin()
+    {
+        work_begin = getTickCount();
+    }
     void workEnd()
     {
         int64 d = getTickCount() - work_begin;
         double f = getTickFrequency();
         work_fps = f / d;
     }
-
+    string method_str() const
+    {
+        switch (method)
+        {
+        case BM:
+            return "BM";
+        case BP:
+            return "BP";
+        case CSBP:
+            return "CSBP";
+        }
+        return "";
+    }
     string text() const
     {
         stringstream ss;
-        ss << "(" << p.method_str() << ") FPS: " << setiosflags(ios::left)
-            << setprecision(4) << work_fps;
+        ss << "(" << method_str() << ") FPS: " << setiosflags(ios::left)
+           << setprecision(4) << work_fps;
         return ss.str();
     }
 private:
-    Params p;
     bool running;
 
     Mat left_src, right_src;
@@ -72,42 +61,45 @@ private:
 
     int64 work_begin;
     double work_fps;
-};
 
-static void printHelp()
-{
-    cout << "Usage: stereo_match_gpu\n"
-        << "\t--left <left_view> --right <right_view> # must be rectified\n"
-        << "\t--method <stereo_match_method> # BM | BP | CSBP\n"
-        << "\t--ndisp <number> # number of disparity levels\n"
-        << "\t--type <device_type> # cpu | CPU | gpu | GPU\n";
-    help_showed = true;
-}
+    string l_img, r_img;
+    string out_img;
+    enum {BM, BP, CSBP} method;
+    int ndisp; // Max disparity + 1
+    enum {GPU, CPU} type;
+};
 
 int main(int argc, char** argv)
 {
+    const char* keys =
+        "{ h | help     | false                     | print help message }"
+        "{ l | left     |                           | specify left image }"
+        "{ r | right    |                           | specify right image }"
+        "{ m | method   | BM                        | specify match method(BM/BP/CSBP) }"
+        "{ n | ndisp    | 64                        |  specify number of disparity levels }"
+        "{ s | cpu_ocl  | false                     | use cpu or gpu as ocl device to process the image }"
+        "{ o | output   | stereo_match_output.jpg   | specify output path when input is images}";
+    CommandLineParser cmd(argc, argv, keys);
+    if (cmd.get<bool>("help"))
+    {
+        cout << "Avaible options:" << endl;
+        cmd.printParams();
+        return 0;
+    }
     try
     {
-        if (argc < 2)
-        {
-            printHelp();
-            return 1;
-        }
+        App app(cmd);
+        int flag = CVCL_DEVICE_TYPE_GPU;
+        if(cmd.get<bool>("s") == true)
+            flag = CVCL_DEVICE_TYPE_CPU;
 
-        Params args = Params::read(argc, argv);
-        if (help_showed)
-            return -1;
-
-        int flags[2] = { CVCL_DEVICE_TYPE_GPU, CVCL_DEVICE_TYPE_CPU };
         vector<Info> info;
-
-        if(getDevice(info, flags[args.type]) == 0)
+        if(getDevice(info, flag) == 0)
         {
             throw runtime_error("Error: Did not find a valid OpenCL device!");
         }
         cout << "Device name:" << info[0].DeviceName[0] << endl;
 
-        App app(args);
         app.run();
     }
     catch (const exception& e)
@@ -117,77 +109,39 @@ int main(int argc, char** argv)
     return 0;
 }
 
-
-Params::Params()
-{
-    method = BM;
-    ndisp = 64;
-    type = GPU;
-}
-
-
-Params Params::read(int argc, char** argv)
-{
-    Params p;
-
-    for (int i = 1; i < argc; i++)
-    {
-        if (string(argv[i]) == "--left") p.left = argv[++i];
-        else if (string(argv[i]) == "--right") p.right = argv[++i];
-        else if (string(argv[i]) == "--method")
-        {
-            if (string(argv[i + 1]) == "BM") p.method = BM;
-            else if (string(argv[i + 1]) == "BP") p.method = BP;
-            else if (string(argv[i + 1]) == "CSBP") p.method = CSBP;
-            else throw runtime_error("unknown stereo match method: " + string(argv[i + 1]));
-            i++;
-        }
-        else if (string(argv[i]) == "--ndisp") p.ndisp = atoi(argv[++i]);
-        else if (string(argv[i]) == "--type")
-        {
-            string t(argv[++i]);
-            if (t == "cpu" || t == "CPU")
-            {
-                p.type = CPU;
-            } 
-            else if (t == "gpu" || t == "GPU")
-            {
-                p.type = GPU;
-            }
-            else throw runtime_error("unknown device type: " + t);
-        }
-        else if (string(argv[i]) == "--help") printHelp();
-        else throw runtime_error("unknown key: " + string(argv[i]));
-    }
-
-    return p;
-}
-
-
-App::App(const Params& params)
-    : p(params), running(false)
+App::App(CommandLineParser& cmd)
+    : running(false),method(BM)
 {
     cout << "stereo_match_ocl sample\n";
     cout << "\nControls:\n"
-        << "\tesc - exit\n"
-        << "\tp - print current parameters\n"
-        << "\tg - convert source images into gray\n"
-        << "\tm - change stereo match method\n"
-        << "\ts - change Sobel prefiltering flag (for BM only)\n"
-        << "\t1/q - increase/decrease maximum disparity\n"
-        << "\t2/w - increase/decrease window size (for BM only)\n"
-        << "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n"
-        << "\t4/r - increase/decrease level count (for BP and CSBP only)\n";
+         << "\tesc - exit\n"
+         << "\tp - print current parameters\n"
+         << "\tg - convert source images into gray\n"
+         << "\tm - change stereo match method\n"
+         << "\ts - change Sobel prefiltering flag (for BM only)\n"
+         << "\t1/q - increase/decrease maximum disparity\n"
+         << "\t2/w - increase/decrease window size (for BM only)\n"
+         << "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n"
+         << "\t4/r - increase/decrease level count (for BP and CSBP only)\n";
+    l_img = cmd.get<string>("l");
+    r_img = cmd.get<string>("r");
+    string mstr = cmd.get<string>("m");
+    if(mstr == "BM") method = BM;
+    else if(mstr == "BP") method = BP;
+    else if(mstr == "CSBP") method = CSBP;
+    else cout << "unknown method!\n";
+    ndisp = cmd.get<int>("n");
+    out_img = cmd.get<string>("o");
 }
 
 
 void App::run()
 {
     // Load images
-    left_src = imread(p.left);
-    right_src = imread(p.right);
-    if (left_src.empty()) throw runtime_error("can't open file \"" + p.left + "\"");
-    if (right_src.empty()) throw runtime_error("can't open file \"" + p.right + "\"");
+    left_src = imread(l_img);
+    right_src = imread(r_img);
+    if (left_src.empty()) throw runtime_error("can't open file \"" + l_img + "\"");
+    if (right_src.empty()) throw runtime_error("can't open file \"" + r_img + "\"");
 
     cvtColor(left_src, left, CV_BGR2GRAY);
     cvtColor(right_src, right, CV_BGR2GRAY);
@@ -199,14 +153,15 @@ void App::run()
     imshow("right", right);
 
     // Set common parameters
-    bm.ndisp = p.ndisp;
-    bp.ndisp = p.ndisp;
-    csbp.ndisp = p.ndisp;
+    bm.ndisp = ndisp;
+    bp.ndisp = ndisp;
+    csbp.ndisp = ndisp;
 
     cout << endl;
     printParams();
 
     running = true;
+    bool written = false;
     while (running)
     {
 
@@ -214,9 +169,9 @@ void App::run()
         Mat disp;
         oclMat d_disp;
         workBegin();
-        switch (p.method)
+        switch (method)
         {
-        case Params::BM:
+        case BM:
             if (d_left.channels() > 1 || d_right.channels() > 1)
             {
                 cout << "BM doesn't support color images\n";
@@ -230,25 +185,28 @@ void App::run()
             }
             bm(d_left, d_right, d_disp);
             break;
-        case Params::BP:
+        case BP:
             bp(d_left, d_right, d_disp);
             break;
-        case Params::CSBP:
+        case CSBP:
             csbp(d_left, d_right, d_disp);
             break;
         }
-        ocl::finish();
         workEnd();
 
         // Show results
         d_disp.download(disp);
-        if (p.method != Params::BM)
+        if (method != BM)
         {
             disp.convertTo(disp, 0);
         }
         putText(disp, text(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar::all(255));
         imshow("disparity", disp);
-
+        if(!written)
+        {
+            imwrite(out_img, disp);
+            written = true;
+        }
         handleKey((char)waitKey(3));
     }
 }
@@ -259,19 +217,19 @@ void App::printParams() const
     cout << "--- Parameters ---\n";
     cout << "image_size: (" << left.cols << ", " << left.rows << ")\n";
     cout << "image_channels: " << left.channels() << endl;
-    cout << "method: " << p.method_str() << endl
-        << "ndisp: " << p.ndisp << endl;
-    switch (p.method)
+    cout << "method: " << method_str() << endl
+         << "ndisp: " << ndisp << endl;
+    switch (method)
     {
-    case Params::BM:
+    case BM:
         cout << "win_size: " << bm.winSize << endl;
         cout << "prefilter_sobel: " << bm.preset << endl;
         break;
-    case Params::BP:
+    case BP:
         cout << "iter_count: " << bp.iters << endl;
         cout << "level_count: " << bp.levels << endl;
         break;
-    case Params::CSBP:
+    case CSBP:
         cout << "iter_count: " << csbp.iters << endl;
         cout << "level_count: " << csbp.levels << endl;
         break;
@@ -287,11 +245,13 @@ void App::handleKey(char key)
     case 27:
         running = false;
         break;
-    case 'p': case 'P':
+    case 'p':
+    case 'P':
         printParams();
         break;
-    case 'g': case 'G':
-        if (left.channels() == 1 && p.method != Params::BM)
+    case 'g':
+    case 'G':
+        if (left.channels() == 1 && method != BM)
         {
             left = left_src;
             right = right_src;
@@ -307,23 +267,25 @@ void App::handleKey(char key)
         imshow("left", left);
         imshow("right", right);
         break;
-    case 'm': case 'M':
-        switch (p.method)
+    case 'm':
+    case 'M':
+        switch (method)
         {
-        case Params::BM:
-            p.method = Params::BP;
+        case BM:
+            method = BP;
             break;
-        case Params::BP:
-            p.method = Params::CSBP;
+        case BP:
+            method = CSBP;
             break;
-        case Params::CSBP:
-            p.method = Params::BM;
+        case CSBP:
+            method = BM;
             break;
         }
-        cout << "method: " << p.method_str() << endl;
+        cout << "method: " << method_str() << endl;
         break;
-    case 's': case 'S':
-        if (p.method == Params::BM)
+    case 's':
+    case 'S':
+        if (method == BM)
         {
             switch (bm.preset)
             {
@@ -338,76 +300,80 @@ void App::handleKey(char key)
         }
         break;
     case '1':
-        p.ndisp = p.ndisp == 1 ? 8 : p.ndisp + 8;
-        cout << "ndisp: " << p.ndisp << endl;
-        bm.ndisp = p.ndisp;
-        bp.ndisp = p.ndisp;
-        csbp.ndisp = p.ndisp;
+        ndisp == 1 ? ndisp = 8 : ndisp += 8;
+        cout << "ndisp: " << ndisp << endl;
+        bm.ndisp = ndisp;
+        bp.ndisp = ndisp;
+        csbp.ndisp = ndisp;
         break;
-    case 'q': case 'Q':
-        p.ndisp = max(p.ndisp - 8, 1);
-        cout << "ndisp: " << p.ndisp << endl;
-        bm.ndisp = p.ndisp;
-        bp.ndisp = p.ndisp;
-        csbp.ndisp = p.ndisp;
+    case 'q':
+    case 'Q':
+        ndisp = max(ndisp - 8, 1);
+        cout << "ndisp: " << ndisp << endl;
+        bm.ndisp = ndisp;
+        bp.ndisp = ndisp;
+        csbp.ndisp = ndisp;
         break;
     case '2':
-        if (p.method == Params::BM)
+        if (method == BM)
         {
             bm.winSize = min(bm.winSize + 1, 51);
             cout << "win_size: " << bm.winSize << endl;
         }
         break;
-    case 'w': case 'W':
-        if (p.method == Params::BM)
+    case 'w':
+    case 'W':
+        if (method == BM)
         {
             bm.winSize = max(bm.winSize - 1, 2);
             cout << "win_size: " << bm.winSize << endl;
         }
         break;
     case '3':
-        if (p.method == Params::BP)
+        if (method == BP)
         {
             bp.iters += 1;
             cout << "iter_count: " << bp.iters << endl;
         }
-        else if (p.method == Params::CSBP)
+        else if (method == CSBP)
         {
             csbp.iters += 1;
             cout << "iter_count: " << csbp.iters << endl;
         }
         break;
-    case 'e': case 'E':
-        if (p.method == Params::BP)
+    case 'e':
+    case 'E':
+        if (method == BP)
         {
             bp.iters = max(bp.iters - 1, 1);
             cout << "iter_count: " << bp.iters << endl;
         }
-        else if (p.method == Params::CSBP)
+        else if (method == CSBP)
         {
             csbp.iters = max(csbp.iters - 1, 1);
             cout << "iter_count: " << csbp.iters << endl;
         }
         break;
     case '4':
-        if (p.method == Params::BP)
+        if (method == BP)
         {
             bp.levels += 1;
             cout << "level_count: " << bp.levels << endl;
         }
-        else if (p.method == Params::CSBP)
+        else if (method == CSBP)
         {
             csbp.levels += 1;
             cout << "level_count: " << csbp.levels << endl;
         }
         break;
-    case 'r': case 'R':
-        if (p.method == Params::BP)
+    case 'r':
+    case 'R':
+        if (method == BP)
         {
             bp.levels = max(bp.levels - 1, 1);
             cout << "level_count: " << bp.levels << endl;
         }
-        else if (p.method == Params::CSBP)
+        else if (method == CSBP)
         {
             csbp.levels = max(csbp.levels - 1, 1);
             cout << "level_count: " << csbp.levels << endl;
diff --git a/samples/ocl/surf_matcher.cpp b/samples/ocl/surf_matcher.cpp
index 038a8dc5c..bee517fbc 100644
--- a/samples/ocl/surf_matcher.cpp
+++ b/samples/ocl/surf_matcher.cpp
@@ -1,48 +1,3 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other oclMaterials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
 #include <iostream>
 #include <stdio.h>
 #include "opencv2/core/core.hpp"
@@ -61,27 +16,20 @@ const float GOOD_PORTION = 0.15f;
 
 namespace
 {
-void help();
-
-void help()
-{
-    std::cout << "\nThis program demonstrates using SURF_OCL features detector and descriptor extractor" << std::endl;
-    std::cout << "\nUsage:\n\tsurf_matcher --left <image1> --right <image2> [-c]" << std::endl;
-    std::cout << "\nExample:\n\tsurf_matcher --left box.png --right box_in_scene.png" << std::endl;
-}
 
 int64 work_begin = 0;
 int64 work_end = 0;
 
-void workBegin() 
-{ 
+void workBegin()
+{
     work_begin = getTickCount();
 }
 void workEnd()
 {
     work_end = getTickCount() - work_begin;
 }
-double getTime(){
+double getTime()
+{
     return work_end /((double)cvGetTickFrequency() * 1000.);
 }
 
@@ -114,17 +62,17 @@ struct SURFMatcher
 Mat drawGoodMatches(
     const Mat& cpu_img1,
     const Mat& cpu_img2,
-    const vector<KeyPoint>& keypoints1, 
-    const vector<KeyPoint>& keypoints2, 
+    const vector<KeyPoint>& keypoints1,
+    const vector<KeyPoint>& keypoints2,
     vector<DMatch>& matches,
     vector<Point2f>& scene_corners_
-    )
+)
 {
-    //-- Sort matches and preserve top 10% matches 
+    //-- Sort matches and preserve top 10% matches
     std::sort(matches.begin(), matches.end());
     std::vector< DMatch > good_matches;
     double minDist = matches.front().distance,
-        maxDist = matches.back().distance;
+           maxDist = matches.back().distance;
 
     const int ptsPairs = std::min(GOOD_PTS_MAX, (int)(matches.size() * GOOD_PORTION));
     for( int i = 0; i < ptsPairs; i++ )
@@ -139,8 +87,8 @@ Mat drawGoodMatches(
     // drawing the results
     Mat img_matches;
     drawMatches( cpu_img1, keypoints1, cpu_img2, keypoints2,
-        good_matches, img_matches, Scalar::all(-1), Scalar::all(-1),
-        vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS  );
+                 good_matches, img_matches, Scalar::all(-1), Scalar::all(-1),
+                 vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS  );
 
     //-- Localize the object
     std::vector<Point2f> obj;
@@ -154,28 +102,30 @@ Mat drawGoodMatches(
     }
     //-- Get the corners from the image_1 ( the object to be "detected" )
     std::vector<Point2f> obj_corners(4);
-    obj_corners[0] = cvPoint(0,0); obj_corners[1] = cvPoint( cpu_img1.cols, 0 );
-    obj_corners[2] = cvPoint( cpu_img1.cols, cpu_img1.rows ); obj_corners[3] = cvPoint( 0, cpu_img1.rows );
+    obj_corners[0] = cvPoint(0,0);
+    obj_corners[1] = cvPoint( cpu_img1.cols, 0 );
+    obj_corners[2] = cvPoint( cpu_img1.cols, cpu_img1.rows );
+    obj_corners[3] = cvPoint( 0, cpu_img1.rows );
     std::vector<Point2f> scene_corners(4);
-    
+
     Mat H = findHomography( obj, scene, CV_RANSAC );
     perspectiveTransform( obj_corners, scene_corners, H);
 
     scene_corners_ = scene_corners;
-    
+
     //-- Draw lines between the corners (the mapped object in the scene - image_2 )
-    line( img_matches, 
-        scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), 
-        Scalar( 0, 255, 0), 2, CV_AA );
-    line( img_matches, 
-        scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), 
-        Scalar( 0, 255, 0), 2, CV_AA );
-    line( img_matches, 
-        scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), 
-        Scalar( 0, 255, 0), 2, CV_AA );
-    line( img_matches, 
-        scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), 
-        Scalar( 0, 255, 0), 2, CV_AA );
+    line( img_matches,
+          scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0),
+          Scalar( 0, 255, 0), 2, CV_AA );
+    line( img_matches,
+          scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0),
+          Scalar( 0, 255, 0), 2, CV_AA );
+    line( img_matches,
+          scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0),
+          Scalar( 0, 255, 0), 2, CV_AA );
+    line( img_matches,
+          scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0),
+          Scalar( 0, 255, 0), 2, CV_AA );
     return img_matches;
 }
 
@@ -185,6 +135,21 @@ Mat drawGoodMatches(
 // use cpu findHomography interface to calculate the transformation matrix
 int main(int argc, char* argv[])
 {
+    const char* keys =
+        "{ h | help     | false           | print help message  }"
+        "{ l | left     |                 | specify left image  }"
+        "{ r | right    |                 | specify right image }"
+        "{ o | output   | SURF_output.jpg | specify output save path (only works in CPU or GPU only mode) }"
+        "{ c | use_cpu  | false           | use CPU algorithms  }"
+        "{ a | use_all  | false           | use both CPU and GPU algorithms}";
+    CommandLineParser cmd(argc, argv, keys);
+    if (cmd.get<bool>("help"))
+    {
+        std::cout << "Avaible options:" << std::endl;
+        cmd.printParams();
+        return 0;
+    }
+
     vector<cv::ocl::Info> info;
     if(cv::ocl::getDevice(info) == 0)
     {
@@ -195,54 +160,38 @@ int main(int argc, char* argv[])
 
     Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey;
     oclMat img1, img2;
-    bool useCPU = false;
+    bool useCPU = cmd.get<bool>("c");
     bool useGPU = false;
-    bool useALL = false;
+    bool useALL = cmd.get<bool>("a");
 
-    for (int i = 1; i < argc; ++i)
+    string outpath = cmd.get<std::string>("o");
+
+    cpu_img1 = imread(cmd.get<std::string>("l"));
+    CV_Assert(!cpu_img1.empty());
+    cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY);
+    img1 = cpu_img1_grey;
+
+    cpu_img2 = imread(cmd.get<std::string>("r"));
+    CV_Assert(!cpu_img2.empty());
+    cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY);
+    img2 = cpu_img2_grey;
+
+    if(useALL)
     {
-        if (string(argv[i]) == "--left")
-        {
-            cpu_img1 = imread(argv[++i]);
-            CV_Assert(!cpu_img1.empty());
-            cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY);
-            img1 = cpu_img1_grey;
-        }
-        else if (string(argv[i]) == "--right")
-        {
-            cpu_img2 = imread(argv[++i]);
-            CV_Assert(!cpu_img2.empty());
-            cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY);
-            img2 = cpu_img2_grey;
-        }
-        else if (string(argv[i]) == "-c")
-        {
-            useCPU = true;
-            useGPU = false;
-            useALL = false;
-        }else if(string(argv[i]) == "-g")
-        {
-            useGPU = true;
-            useCPU = false;
-            useALL = false;
-        }else if(string(argv[i]) == "-a")
-        {
-            useALL = true;
-            useCPU = false;
-            useGPU = false;
-        }
-        else if (string(argv[i]) == "--help")
-        {
-            help();
-            return -1;
-        }
+        useCPU = false;
+        useGPU = false;
     }
+    else if(useCPU==false && useALL==false)
+    {
+        useGPU = true;
+    }
+
     if(!useCPU)
     {
         std::cout
-            << "Device name:"
-            << info[0].DeviceName[0]
-        << std::endl;
+                << "Device name:"
+                << info[0].DeviceName[0]
+                << std::endl;
     }
     double surf_time = 0.;
 
@@ -262,12 +211,12 @@ int main(int argc, char* argv[])
     //instantiate detectors/matchers
     SURFDetector<SURF>     cpp_surf;
     SURFDetector<SURF_OCL> ocl_surf;
-    
+
     SURFMatcher<BFMatcher>      cpp_matcher;
     SURFMatcher<BFMatcher_OCL>  ocl_matcher;
 
     //-- start of timing section
-    if (useCPU) 
+    if (useCPU)
     {
         for (int i = 0; i <= LOOP_NUM; i++)
         {
@@ -298,7 +247,8 @@ int main(int argc, char* argv[])
 
         surf_time = getTime();
         std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n";
-    }else
+    }
+    else
     {
         //cpu runs
         for (int i = 0; i <= LOOP_NUM; i++)
@@ -353,14 +303,14 @@ int main(int argc, char* argv[])
             for(size_t i = 0; i < cpu_corner.size(); i++)
             {
                 if((std::abs(cpu_corner[i].x - gpu_corner[i].x) > 10)
-                    ||(std::abs(cpu_corner[i].y - gpu_corner[i].y) > 10))
+                        ||(std::abs(cpu_corner[i].y - gpu_corner[i].y) > 10))
                 {
                     std::cout<<"Failed\n";
                     result = false;
                     break;
                 }
                 result = true;
-            } 
+            }
             if(result)
                 std::cout<<"Passed\n";
         }
@@ -371,12 +321,15 @@ int main(int argc, char* argv[])
     {
         namedWindow("cpu surf matches", 0);
         imshow("cpu surf matches", img_matches);
+        imwrite(outpath, img_matches);
     }
     else if(useGPU)
     {
         namedWindow("ocl surf matches", 0);
         imshow("ocl surf matches", img_matches);
-    }else
+        imwrite(outpath, img_matches);
+    }
+    else
     {
         namedWindow("cpu surf matches", 0);
         imshow("cpu surf matches", img_matches);
diff --git a/samples/ocl/tvl1_optical_flow.cpp b/samples/ocl/tvl1_optical_flow.cpp
new file mode 100644
index 000000000..cff9692ed
--- /dev/null
+++ b/samples/ocl/tvl1_optical_flow.cpp
@@ -0,0 +1,265 @@
+#include <iostream>
+#include <vector>
+#include <iomanip>
+
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/ocl/ocl.hpp"
+#include "opencv2/video/video.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::ocl;
+
+typedef unsigned char uchar;
+#define LOOP_NUM 10
+int64 work_begin = 0;
+int64 work_end = 0;
+
+static void workBegin()
+{
+    work_begin = getTickCount();
+}
+static void workEnd()
+{
+    work_end += (getTickCount() - work_begin);
+}
+static double getTime()
+{
+    return work_end * 1000. / getTickFrequency();
+}
+
+template <typename T> inline T clamp (T x, T a, T b)
+{
+    return ((x) > (a) ? ((x) < (b) ? (x) : (b)) : (a));
+}
+
+template <typename T> inline T mapValue(T x, T a, T b, T c, T d)
+{
+    x = clamp(x, a, b);
+    return c + (d - c) * (x - a) / (b - a);
+}
+
+static void getFlowField(const Mat& u, const Mat& v, Mat& flowField)
+{
+    float maxDisplacement = 1.0f;
+
+    for (int i = 0; i < u.rows; ++i)
+    {
+        const float* ptr_u = u.ptr<float>(i);
+        const float* ptr_v = v.ptr<float>(i);
+
+        for (int j = 0; j < u.cols; ++j)
+        {
+            float d = max(fabsf(ptr_u[j]), fabsf(ptr_v[j]));
+
+            if (d > maxDisplacement)
+                maxDisplacement = d;
+        }
+    }
+
+    flowField.create(u.size(), CV_8UC4);
+
+    for (int i = 0; i < flowField.rows; ++i)
+    {
+        const float* ptr_u = u.ptr<float>(i);
+        const float* ptr_v = v.ptr<float>(i);
+
+
+        Vec4b* row = flowField.ptr<Vec4b>(i);
+
+        for (int j = 0; j < flowField.cols; ++j)
+        {
+            row[j][0] = 0;
+            row[j][1] = static_cast<unsigned char> (mapValue (-ptr_v[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
+            row[j][2] = static_cast<unsigned char> (mapValue ( ptr_u[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
+            row[j][3] = 255;
+        }
+    }
+}
+
+
+int main(int argc, const char* argv[])
+{
+    static std::vector<Info> ocl_info;
+    ocl::getDevice(ocl_info);
+    //if you want to use undefault device, set it here
+    setDevice(ocl_info[0]);
+
+    //set this to save kernel compile time from second time you run
+    ocl::setBinpath("./");
+    const char* keys =
+        "{ h   | help       | false           | print help message }"
+        "{ l   | left       |                 | specify left image }"
+        "{ r   | right      |                 | specify right image }"
+        "{ o   | output     | tvl1_output.jpg | specify output save path }"
+        "{ c   | camera     | 0               | enable camera capturing }"
+        "{ s   | use_cpu    | false           | use cpu or gpu to process the image }"
+        "{ v   | video      |                 | use video as input }";
+
+    CommandLineParser cmd(argc, argv, keys);
+
+    if (cmd.get<bool>("help"))
+    {
+        cout << "Usage: pyrlk_optical_flow [options]" << endl;
+        cout << "Avaible options:" << endl;
+        cmd.printParams();
+        return 0;
+    }
+
+    bool defaultPicturesFail = false;
+    string fname0 = cmd.get<string>("l");
+    string fname1 = cmd.get<string>("r");
+    string vdofile = cmd.get<string>("v");
+    string outpath = cmd.get<string>("o");
+    bool useCPU = cmd.get<bool>("s");
+    bool useCamera = cmd.get<bool>("c");
+    int inputName = cmd.get<int>("c");
+
+    Mat frame0 = imread(fname0, cv::IMREAD_GRAYSCALE);
+    Mat frame1 = imread(fname1, cv::IMREAD_GRAYSCALE);
+    cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
+    cv::ocl::OpticalFlowDual_TVL1_OCL d_alg;
+
+
+    Mat flow, show_flow;
+    Mat flow_vec[2];
+    if (frame0.empty() || frame1.empty())
+    {
+        useCamera = true;
+        defaultPicturesFail = true;
+        CvCapture* capture = 0;
+        capture = cvCaptureFromCAM( inputName );
+        if (!capture)
+        {
+            cout << "Can't load input images" << endl;
+            return -1;
+        }
+    }
+
+
+    if (useCamera)
+    {
+        CvCapture* capture = 0;
+        Mat frame, frameCopy;
+        Mat frame0Gray, frame1Gray;
+        Mat ptr0, ptr1;
+
+        if(vdofile == "")
+            capture = cvCaptureFromCAM( inputName );
+        else
+            capture = cvCreateFileCapture(vdofile.c_str());
+
+        int c = inputName ;
+        if(!capture)
+        {
+            if(vdofile == "")
+                cout << "Capture from CAM " << c << " didn't work" << endl;
+            else
+                cout << "Capture from file " << vdofile << " failed" <<endl;
+            if (defaultPicturesFail)
+            {
+                return -1;
+            }
+            goto nocamera;
+        }
+
+        cout << "In capture ..." << endl;
+        for(int i = 0;; i++)
+        {
+            frame = cvQueryFrame( capture );
+            if( frame.empty() )
+                break;
+
+            if (i == 0)
+            {
+                frame.copyTo( frame0 );
+                cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
+            }
+            else
+            {
+                if (i%2 == 1)
+                {
+                    frame.copyTo(frame1);
+                    cvtColor(frame1, frame1Gray, COLOR_BGR2GRAY);
+                    ptr0 = frame0Gray;
+                    ptr1 = frame1Gray;
+                }
+                else
+                {
+                    frame.copyTo(frame0);
+                    cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
+                    ptr0 = frame1Gray;
+                    ptr1 = frame0Gray;
+                }
+
+                if (useCPU)
+                {
+                    alg->calc(ptr0, ptr1, flow);
+                    split(flow, flow_vec);
+                }
+                else
+                {
+                    oclMat d_flowx, d_flowy;
+                    d_alg(oclMat(ptr0), oclMat(ptr1), d_flowx, d_flowy);
+                    d_flowx.download(flow_vec[0]);
+                    d_flowy.download(flow_vec[1]);
+                }
+                if (i%2 == 1)
+                    frame1.copyTo(frameCopy);
+                else
+                    frame0.copyTo(frameCopy);
+                getFlowField(flow_vec[0], flow_vec[1], show_flow);
+                imshow("PyrLK [Sparse]", show_flow);
+            }
+
+            if( waitKey( 10 ) >= 0 )
+                goto _cleanup_;
+        }
+
+        waitKey(0);
+
+_cleanup_:
+        cvReleaseCapture( &capture );
+    }
+    else
+    {
+nocamera:
+        oclMat d_flowx, d_flowy;
+        for(int i = 0; i <= LOOP_NUM; i ++)
+        {
+            cout << "loop" << i << endl;
+
+            if (i > 0) workBegin();
+            if (useCPU)
+            {
+                alg->calc(frame0, frame1, flow);
+                split(flow, flow_vec);
+            }
+            else
+            {
+                d_alg(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy);
+                d_flowx.download(flow_vec[0]);
+                d_flowy.download(flow_vec[1]);
+            }
+            if (i > 0 && i <= LOOP_NUM)
+                workEnd();
+
+            if (i == LOOP_NUM)
+            {
+                if (useCPU)
+                    cout << "average CPU time (noCamera) : ";
+                else
+                    cout << "average GPU time (noCamera) : ";
+                cout << getTime() / LOOP_NUM << " ms" << endl;
+
+                getFlowField(flow_vec[0], flow_vec[1], show_flow);
+                imshow("PyrLK [Sparse]", show_flow);
+                imwrite(outpath, show_flow);
+            }
+        }
+    }
+
+    waitKey();
+
+    return 0;
+}
\ No newline at end of file