From 587fb4940dbf9ae1c2e6e8aaac06f9d26b5bcdbd Mon Sep 17 00:00:00 2001 From: yao Date: Fri, 28 Jun 2013 11:43:43 +0800 Subject: [PATCH 1/6] some accuracy fix of HOG --- modules/ocl/src/hog.cpp | 16 ++++++++++++++-- modules/ocl/src/opencl/objdetect_hog.cl | 15 +++++++++++++++ modules/ocl/test/test_objdetect.cpp | 6 +++--- 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/modules/ocl/src/hog.cpp b/modules/ocl/src/hog.cpp index 3533cce69..412afee8b 100644 --- a/modules/ocl/src/hog.cpp +++ b/modules/ocl/src/hog.cpp @@ -1816,8 +1816,14 @@ void cv::ocl::device::hog::normalize_hists(int nbins, openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1, "-D CPU"); else + { + cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName); + int wave_size = queryDeviceInfo(kernel); + char opt[32] = {0}; + sprintf(opt, "-D WAVE_SIZE=%d", wave_size); openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, - localThreads, args, -1, -1); + localThreads, args, -1, -1, opt); + } } void cv::ocl::device::hog::classify_hists(int win_height, int win_width, @@ -1879,8 +1885,14 @@ void cv::ocl::device::hog::classify_hists(int win_height, int win_width, openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1, "-D CPU"); else + { + cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName); + int wave_size = queryDeviceInfo(kernel); + char opt[32] = {0}; + sprintf(opt, "-D WAVE_SIZE=%d", wave_size); openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, - localThreads, args, -1, -1); + localThreads, args, -1, -1, opt); + } } void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, diff --git a/modules/ocl/src/opencl/objdetect_hog.cl b/modules/ocl/src/opencl/objdetect_hog.cl index 05d538330..b9103380d 100644 --- a/modules/ocl/src/opencl/objdetect_hog.cl +++ b/modules/ocl/src/opencl/objdetect_hog.cl @@ -318,6 +318,10 @@ float reduce_smem(volatile __local float* smem, int size) if (tid < 32) { if (size >= 64) smem[tid] = sum = sum + smem[tid + 32]; +#if WAVE_SIZE < 32 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 16) { +#endif if (size >= 32) smem[tid] = sum = sum + smem[tid + 16]; if (size >= 16) smem[tid] = sum = sum + smem[tid + 8]; if (size >= 8) smem[tid] = sum = sum + smem[tid + 4]; @@ -418,6 +422,9 @@ __kernel void classify_hists_180_kernel( { smem[tid] = product = product + smem[tid + 32]; } +#if WAVE_SIZE < 32 + barrier(CLK_LOCAL_MEM_FENCE); +#endif if (tid < 16) { smem[tid] = product = product + smem[tid + 16]; @@ -487,6 +494,10 @@ __kernel void classify_hists_252_kernel( if (tid < 32) { smem[tid] = product = product + smem[tid + 32]; +#if WAVE_SIZE < 32 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 16) { +#endif smem[tid] = product = product + smem[tid + 16]; smem[tid] = product = product + smem[tid + 8]; smem[tid] = product = product + smem[tid + 4]; @@ -553,6 +564,10 @@ __kernel void classify_hists_kernel( if (tid < 32) { smem[tid] = product = product + smem[tid + 32]; +#if WAVE_SIZE < 32 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 16) { +#endif smem[tid] = product = product + smem[tid + 16]; smem[tid] = product = product + smem[tid + 8]; smem[tid] = product = product + smem[tid + 4]; diff --git a/modules/ocl/test/test_objdetect.cpp b/modules/ocl/test/test_objdetect.cpp index 86590f798..bc719b097 100644 --- a/modules/ocl/test/test_objdetect.cpp +++ b/modules/ocl/test/test_objdetect.cpp @@ -146,17 +146,17 @@ TEST_P(HOG, Detect) if (winSize.width == 48 && winSize.height == 96) { // daimler's base - ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector48x96()); + ocl_hog.setSVMDetector(hog.getDaimlerPeopleDetector()); hog.setSVMDetector(hog.getDaimlerPeopleDetector()); } else if (winSize.width == 64 && winSize.height == 128) { - ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector64x128()); + ocl_hog.setSVMDetector(hog.getDefaultPeopleDetector()); hog.setSVMDetector(hog.getDefaultPeopleDetector()); } else { - ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector()); + ocl_hog.setSVMDetector(hog.getDefaultPeopleDetector()); hog.setSVMDetector(hog.getDefaultPeopleDetector()); } From f1d9680ba8fbd16d2b7c2ee21bf826bad73ac22b Mon Sep 17 00:00:00 2001 From: yao Date: Fri, 28 Jun 2013 11:44:43 +0800 Subject: [PATCH 2/6] format the ocl's samples name --- modules/ocl/test/main.cpp | 2 ++ samples/ocl/CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/ocl/test/main.cpp b/modules/ocl/test/main.cpp index dd46ff6e0..4ba02cf9b 100644 --- a/modules/ocl/test/main.cpp +++ b/modules/ocl/test/main.cpp @@ -118,6 +118,8 @@ int main(int argc, char **argv) setDevice(oclinfo[pid], device); + setBinaryDiskCache(CACHE_UPDATE); + cout << "Device type:" << type << endl << "Device name:" << oclinfo[pid].DeviceName[device] << endl; return RUN_ALL_TESTS(); } diff --git a/samples/ocl/CMakeLists.txt b/samples/ocl/CMakeLists.txt index cdcf2f3e5..a201d8338 100644 --- a/samples/ocl/CMakeLists.txt +++ b/samples/ocl/CMakeLists.txt @@ -27,7 +27,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND) target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${OPENCV_OCL_SAMPLES_REQUIRED_DEPS}) set_target_properties(${the_target} PROPERTIES - OUTPUT_NAME "${name}_${project}" + OUTPUT_NAME "${project}-example-${name}" PROJECT_LABEL "(EXAMPLE_${project_upper}) ${name}") if(ENABLE_SOLUTION_FOLDERS) From c1a59b8d8066e475e2e9f9da03f29e5e45d9096c Mon Sep 17 00:00:00 2001 From: yao Date: Fri, 28 Jun 2013 13:38:58 +0800 Subject: [PATCH 3/6] more fix to HOG --- modules/ocl/src/opencl/objdetect_hog.cl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/ocl/src/opencl/objdetect_hog.cl b/modules/ocl/src/opencl/objdetect_hog.cl index b9103380d..8ca12704e 100644 --- a/modules/ocl/src/opencl/objdetect_hog.cl +++ b/modules/ocl/src/opencl/objdetect_hog.cl @@ -133,7 +133,9 @@ __kernel void compute_hists_lut_kernel( final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] = hist_[0] + hist_[1] + hist_[2]; } +#ifdef CPU barrier(CLK_LOCAL_MEM_FENCE); +#endif int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x; if ((tid < cblock_hist_size) && (gid < blocks_total)) @@ -225,8 +227,9 @@ __kernel void compute_hists_kernel( final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] = hist_[0] + hist_[1] + hist_[2]; } +#ifdef CPU barrier(CLK_LOCAL_MEM_FENCE); - +#endif int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x; if ((tid < cblock_hist_size) && (gid < blocks_total)) { From 6982ea5a669de8e412ce407adceca5efb16c5906 Mon Sep 17 00:00:00 2001 From: yao Date: Fri, 28 Jun 2013 15:08:39 +0800 Subject: [PATCH 4/6] some tweaks to samples --- samples/ocl/clahe.cpp | 108 +++++++++++++++++++++++++++++++++++ samples/ocl/facedetect.cpp | 7 ++- samples/ocl/hog.cpp | 10 +++- samples/ocl/stereo_match.cpp | 16 ++++-- 4 files changed, 134 insertions(+), 7 deletions(-) create mode 100644 samples/ocl/clahe.cpp diff --git a/samples/ocl/clahe.cpp b/samples/ocl/clahe.cpp new file mode 100644 index 000000000..72fc2fb61 --- /dev/null +++ b/samples/ocl/clahe.cpp @@ -0,0 +1,108 @@ +#include +#include "opencv2/core/core.hpp" +#include "opencv2/imgproc/imgproc.hpp" +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/ocl/ocl.hpp" +using namespace cv; +using namespace std; + +Ptr pFilter; +int tilesize; +int cliplimit; +string outfile; + +static void TSize_Callback(int pos) +{ + if(pos==0) + { + pFilter->setTilesGridSize(Size(1,1)); + } + pFilter->setTilesGridSize(Size(tilesize,tilesize)); +} + +static void Clip_Callback(int) +{ + pFilter->setClipLimit(cliplimit); +} + +int main(int argc, char** argv) +{ + const char* keys = + "{ i | input | | specify input image }" + "{ c | camera | 0 | specify camera id }" + "{ s | use_cpu | false | use cpu algorithm }" + "{ o | output | clahe_output.jpg | specify output save path}"; + + CommandLineParser cmd(argc, argv, keys); + string infile = cmd.get("i"); + outfile = cmd.get("o"); + int camid = cmd.get("c"); + bool use_cpu = cmd.get("s"); + CvCapture* capture = 0; + bool running = true; + + namedWindow("CLAHE"); + createTrackbar("Tile Size", "CLAHE", &tilesize, 32, (TrackbarCallback)TSize_Callback); + createTrackbar("Clip Limit", "CLAHE", &cliplimit, 20, (TrackbarCallback)Clip_Callback); + Mat frame, outframe; + ocl::oclMat d_outframe; + + int cur_clip; + Size cur_tilesize; + if(use_cpu) + { + pFilter = createCLAHE(); + } + else + { + pFilter = ocl::createCLAHE(); + } + cur_clip = (int)pFilter->getClipLimit(); + cur_tilesize = pFilter->getTilesGridSize(); + setTrackbarPos("Tile Size", "CLAHE", cur_tilesize.width); + setTrackbarPos("Clip Limit", "CLAHE", cur_clip); + if(infile != "") + { + frame = imread(infile); + if(frame.empty()) + { + cout << "error read image: " << infile << endl; + return -1; + } + } + else + { + capture = cvCaptureFromCAM(camid); + } + cout << "\nControls:\n" + << "\to - save output image\n" + << "\tESC - exit\n"; + while(running) + { + if(capture) + frame = cvQueryFrame(capture); + else + frame = imread(infile); + if(frame.empty()) + { + continue; + } + if(use_cpu) + { + cvtColor(frame, frame, COLOR_BGR2GRAY); + pFilter->apply(frame, outframe); + } + else + { + ocl::oclMat d_frame(frame); + ocl::cvtColor(d_frame, d_outframe, COLOR_BGR2GRAY); + pFilter->apply(d_outframe, d_outframe); + d_outframe.download(outframe); + } + imshow("CLAHE", outframe); + char key = (char)cvWaitKey(3); + if(key == 'o') imwrite(outfile, outframe); + else if(key == 27) running = false; + } + return 0; +} diff --git a/samples/ocl/facedetect.cpp b/samples/ocl/facedetect.cpp index a49610aeb..ead99b07e 100644 --- a/samples/ocl/facedetect.cpp +++ b/samples/ocl/facedetect.cpp @@ -252,8 +252,13 @@ void Draw(Mat& img, vector& faces, double scale) radius = cvRound((r->width + r->height)*0.25*scale); circle( img, center, radius, color, 3, 8, 0 ); } - imshow( "result", img ); imwrite( outputName, img ); + if(abs(scale-1.0)>.001) + { + resize(img, img, Size(img.cols/scale, img.rows/scale)); + } + imshow( "result", img ); + } diff --git a/samples/ocl/hog.cpp b/samples/ocl/hog.cpp index ff53e010c..a8f6b06ea 100644 --- a/samples/ocl/hog.cpp +++ b/samples/ocl/hog.cpp @@ -57,6 +57,7 @@ private: string vdo_source; string output; int camera_id; + bool write_once; }; int main(int argc, char** argv) @@ -97,6 +98,7 @@ App::App(CommandLineParser& cmd) << "\tESC - exit\n" << "\tm - change mode GPU <-> CPU\n" << "\tg - convert image to gray or not\n" + << "\to - save output image once, or switch on/off video save\n" << "\t1/q - increase/decrease HOG scale\n" << "\t2/w - increase/decrease levels count\n" << "\t3/e - increase/decrease HOG group threshold\n" @@ -120,6 +122,7 @@ App::App(CommandLineParser& cmd) hit_threshold = win_width == 48 ? 1.4 : 0.; scale = 1.05; gamma_corr = true; + write_once = false; cout << "Group threshold: " << gr_threshold << endl; cout << "Levels number: " << nlevels << endl; @@ -254,10 +257,11 @@ void App::run() workEnd(); - if (output!="") + if (output!="" && write_once) { if (img_source!="") // wirte image { + write_once = false; imwrite(output, img_to_show); } else //write video @@ -340,6 +344,10 @@ void App::handleKey(char key) gamma_corr = !gamma_corr; cout << "Gamma correction: " << gamma_corr << endl; break; + case 'o': + case 'O': + write_once = !write_once; + break; } } diff --git a/samples/ocl/stereo_match.cpp b/samples/ocl/stereo_match.cpp index abe75c70e..8a5031ed7 100644 --- a/samples/ocl/stereo_match.cpp +++ b/samples/ocl/stereo_match.cpp @@ -49,7 +49,7 @@ struct App return ss.str(); } private: - bool running; + bool running, write_once; Mat left_src, right_src; Mat left, right; @@ -115,6 +115,7 @@ App::App(CommandLineParser& cmd) cout << "stereo_match_ocl sample\n"; cout << "\nControls:\n" << "\tesc - exit\n" + << "\to - save output image once\n" << "\tp - print current parameters\n" << "\tg - convert source images into gray\n" << "\tm - change stereo match method\n" @@ -132,6 +133,7 @@ App::App(CommandLineParser& cmd) else cout << "unknown method!\n"; ndisp = cmd.get("n"); out_img = cmd.get("o"); + write_once = false; } @@ -161,10 +163,8 @@ void App::run() printParams(); running = true; - bool written = false; while (running) { - // Prepare disparity map of specified type Mat disp; oclMat d_disp; @@ -192,19 +192,21 @@ void App::run() csbp(d_left, d_right, d_disp); break; } + // Show results d_disp.download(disp); workEnd(); + if (method != BM) { disp.convertTo(disp, 0); } putText(disp, text(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar::all(255)); imshow("disparity", disp); - if(!written) + if(write_once) { imwrite(out_img, disp); - written = true; + write_once = false; } handleKey((char)waitKey(3)); } @@ -378,6 +380,10 @@ void App::handleKey(char key) cout << "level_count: " << csbp.levels << endl; } break; + case 'o': + case 'O': + write_once = true; + break; } } From c73a10cb4d34dced5f7d54e0a0e5cb78ee24aa96 Mon Sep 17 00:00:00 2001 From: yao Date: Fri, 28 Jun 2013 16:23:01 +0800 Subject: [PATCH 5/6] warnings fix --- samples/ocl/facedetect.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/ocl/facedetect.cpp b/samples/ocl/facedetect.cpp index ead99b07e..49148bdec 100644 --- a/samples/ocl/facedetect.cpp +++ b/samples/ocl/facedetect.cpp @@ -255,7 +255,7 @@ void Draw(Mat& img, vector& faces, double scale) imwrite( outputName, img ); if(abs(scale-1.0)>.001) { - resize(img, img, Size(img.cols/scale, img.rows/scale)); + resize(img, img, Size((int)(img.cols/scale), (int)(img.rows/scale))); } imshow( "result", img ); From c66e27d49e8c40a98149e6c3a53075a18dfa6878 Mon Sep 17 00:00:00 2001 From: yao Date: Fri, 28 Jun 2013 17:45:39 +0800 Subject: [PATCH 6/6] stereoBM fix an error on Linux when running full performance test --- modules/ocl/src/opencl/stereobm.cl | 37 +++++++++++------------------- 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/modules/ocl/src/opencl/stereobm.cl b/modules/ocl/src/opencl/stereobm.cl index 552874d42..f1b958812 100644 --- a/modules/ocl/src/opencl/stereobm.cl +++ b/modules/ocl/src/opencl/stereobm.cl @@ -258,27 +258,13 @@ float sobel(__global unsigned char *input, int x, int y, int rows, int cols) float CalcSums(__local float *cols, __local float *cols_cache, int winsz) { - float cache = 0; - float cache2 = 0; - int winsz2 = winsz/2; + unsigned int cache = cols[0]; - int x = get_local_id(0); - int group_size_x = get_local_size(0); - - for(int i = 1; i <= winsz2; i++) +#pragma unroll + for(int i = 1; i <= winsz; i++) cache += cols[i]; - cols_cache[0] = cache; - - barrier(CLK_LOCAL_MEM_FENCE); - - if (x < group_size_x - winsz2) - cache2 = cols_cache[winsz2]; - else - for(int i = winsz2 + 1; i < winsz; i++) - cache2 += cols[i]; - - return cols[0] + cache + cache2; + return cache; } #define RpT (2 * ROWSperTHREAD) // got experimentally @@ -301,8 +287,7 @@ __kernel void textureness_kernel(__global unsigned char *disp, int disp_rows, in int beg_row = group_id_y * RpT; int end_row = min(beg_row + RpT, disp_rows); -// if (x < disp_cols) -// { + int y = beg_row; float sum = 0; @@ -340,11 +325,15 @@ __kernel void textureness_kernel(__global unsigned char *disp, int disp_rows, in } barrier(CLK_LOCAL_MEM_FENCE); - float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255; - if (sum_win < threshold) - disp[y * disp_step + x] = 0; + + if (x < disp_cols) + { + float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255; + if (sum_win < threshold) + disp[y * disp_step + x] = 0; + } barrier(CLK_LOCAL_MEM_FENCE); } - // } + }