fixed ocl::phase
This commit is contained in:
		| @@ -461,8 +461,8 @@ void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev) | |||||||
|         m2(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0)); |         m2(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0)); | ||||||
|     oclMat dst1(m1), dst2(m2); |     oclMat dst1(m1), dst2(m2); | ||||||
|  |  | ||||||
|     //arithmetic_sum_run(src, dst1,"arithm_op_sum"); | //    arithmetic_sum_run(src, dst1, "arithm_op_sum"); | ||||||
|     //arithmetic_sum_run(src, dst2,"arithm_op_squares_sum"); | //    arithmetic_sum_run(src, dst2, "arithm_op_squares_sum"); | ||||||
|  |  | ||||||
|     m1 = (Mat)dst1; |     m1 = (Mat)dst1; | ||||||
|     m2 = (Mat)dst2; |     m2 = (Mat)dst2; | ||||||
| @@ -558,7 +558,6 @@ void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) | void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) | ||||||
| { | { | ||||||
|     oclMat buf; |     oclMat buf; | ||||||
| @@ -928,47 +927,38 @@ static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat | |||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols && src1.rows == src2.rows && src2.rows == dst.rows); |  | ||||||
|     CV_Assert(src1.type() == src2.type() && src1.type() == dst.type()); |  | ||||||
|  |  | ||||||
|     Context  *clCxt = src1.clCxt; |     Context  *clCxt = src1.clCxt; | ||||||
|     int channels = dst.oclchannels(); |     int depth = dst.depth(), cols1 = src1.cols * src1.oclchannels(); | ||||||
|     int depth = dst.depth(); |     int src1step1 = src1.step / src1.elemSize1(), src1offset1 = src1.offset / src1.elemSize1(); | ||||||
|  |     int src2step1 = src2.step / src2.elemSize1(), src2offset1 = src2.offset / src2.elemSize1(); | ||||||
|     size_t vector_length = 1; |     int dststep1 = dst.step / dst.elemSize1(), dstoffset1 = dst.offset / dst.elemSize1(); | ||||||
|     int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1); |  | ||||||
|     int cols = divUp(dst.cols * channels + offset_cols, vector_length); |  | ||||||
|  |  | ||||||
|     size_t localThreads[3]  = { 64, 4, 1 }; |     size_t localThreads[3]  = { 64, 4, 1 }; | ||||||
|     size_t globalThreads[3] = { cols, dst.rows, 1 }; |     size_t globalThreads[3] = { cols1, dst.rows, 1 }; | ||||||
|  |  | ||||||
|     int dst_step1 = dst.cols * dst.elemSize(); |  | ||||||
|     vector<pair<size_t , const void *> > args; |     vector<pair<size_t , const void *> > args; | ||||||
|     args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); |     args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); | ||||||
|     args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step )); |     args.push_back( make_pair( sizeof(cl_int), (void *)&src1step1 )); | ||||||
|     args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset )); |     args.push_back( make_pair( sizeof(cl_int), (void *)&src1offset1 )); | ||||||
|     args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); |     args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); | ||||||
|     args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step )); |     args.push_back( make_pair( sizeof(cl_int), (void *)&src2step1 )); | ||||||
|     args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset )); |     args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 )); | ||||||
|     args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); |     args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); | ||||||
|     args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); |     args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 )); | ||||||
|     args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset )); |     args.push_back( make_pair( sizeof(cl_int), (void *)&dstoffset1 )); | ||||||
|  |     args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 )); | ||||||
|     args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); |     args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); | ||||||
|     args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); |  | ||||||
|     args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); |  | ||||||
|  |  | ||||||
|     openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); |     openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); | ||||||
| } | } | ||||||
|  |  | ||||||
| void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle , bool angleInDegrees) | void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleInDegrees) | ||||||
| { | { | ||||||
|     CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F)); |     CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F)); | ||||||
|  |     CV_Assert(x.step % x.elemSize() == 0 && y.step % y.elemSize() == 0); | ||||||
|  |  | ||||||
|     Angle.create(x.size(), x.type()); |     Angle.create(x.size(), x.type()); | ||||||
|     string kernelName = angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians"; |     arithmetic_phase_run(x, y, Angle, angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians", &arithm_phase); | ||||||
|     if (angleInDegrees) |  | ||||||
|         arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase); |  | ||||||
|     else |  | ||||||
|         arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////////////// | ||||||
| @@ -1539,8 +1529,8 @@ oclMatExpr::operator oclMat() const | |||||||
| /////////////////////////////// transpose //////////////////////////////////// | /////////////////////////////// transpose //////////////////////////////////// | ||||||
| ////////////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
| #define TILE_DIM      (32) | #define TILE_DIM   (32) | ||||||
| #define BLOCK_ROWS    (256/TILE_DIM) | #define BLOCK_ROWS (256 / TILE_DIM) | ||||||
|  |  | ||||||
| static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, bool inplace = false) | static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, bool inplace = false) | ||||||
| { | { | ||||||
|   | |||||||
| @@ -45,110 +45,125 @@ | |||||||
| // | // | ||||||
|  |  | ||||||
| #if defined (DOUBLE_SUPPORT) | #if defined (DOUBLE_SUPPORT) | ||||||
|  | #ifdef cl_khr_fp64 | ||||||
| #pragma OPENCL EXTENSION cl_khr_fp64:enable | #pragma OPENCL EXTENSION cl_khr_fp64:enable | ||||||
|  | #elif defined (cl_amd_fp64) | ||||||
|  | #pragma OPENCL EXTENSION cl_amd_fp64:enable | ||||||
|  | #endif | ||||||
| #endif | #endif | ||||||
| #define CV_PI 3.1415926535898 |  | ||||||
| /**************************************phase inradians**************************************/ |  | ||||||
| __kernel void arithm_phase_inradians_D5 (__global float *src1, int src1_step, int src1_offset, |  | ||||||
|                                          __global float *src2, int src2_step, int src2_offset, |  | ||||||
|                                          __global float *dst,  int dst_step,  int dst_offset, |  | ||||||
|                                          int rows, int cols, int dst_step1) |  | ||||||
| { |  | ||||||
|  |  | ||||||
|  | #define CV_PI 3.1415926535898 | ||||||
|  | #define CV_2PI 2*3.1415926535898 | ||||||
|  |  | ||||||
|  | /**************************************phase inradians**************************************/ | ||||||
|  |  | ||||||
|  | __kernel void arithm_phase_inradians_D5(__global float *src1, int src1_step1, int src1_offset1, | ||||||
|  |                                          __global float *src2, int src2_step1, int src2_offset1, | ||||||
|  |                                          __global float *dst,  int dst_step1,  int dst_offset1, | ||||||
|  |                                          int cols, int rows) | ||||||
|  | { | ||||||
|     int x = get_global_id(0); |     int x = get_global_id(0); | ||||||
|     int y = get_global_id(1); |     int y = get_global_id(1); | ||||||
|  |  | ||||||
|     if(x < cols && y < rows) |     if (x < cols && y < rows) | ||||||
|     { |     { | ||||||
|         int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); |         int src1_index = mad24(y, src1_step1, x + src1_offset1); | ||||||
|         int src2_index = mad24(y, src2_step, (x << 2) + src2_offset); |         int src2_index = mad24(y, src2_step1, x + src2_offset1); | ||||||
|         int dst_index  = mad24(y, dst_step,  (x << 2) + dst_offset); |         int dst_index  = mad24(y, dst_step1, x + dst_offset1); | ||||||
|  |  | ||||||
|         float data1 = *((__global float *)((__global char *)src1 + src1_index)); |         float data1 = src1[src1_index]; | ||||||
|         float data2 = *((__global float *)((__global char *)src2 + src2_index)); |         float data2 = src2[src2_index]; | ||||||
|         float tmp = atan2(data2,data1); |         float tmp = atan2(data2, data1); | ||||||
|  |  | ||||||
|         *((__global float *)((__global char *)dst + dst_index)) = tmp; |         if (tmp < 0) | ||||||
|  |             tmp += CV_2PI; | ||||||
|  |  | ||||||
|  |         dst[dst_index] = tmp; | ||||||
|     } |     } | ||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| #if defined (DOUBLE_SUPPORT) | #if defined (DOUBLE_SUPPORT) | ||||||
| __kernel void arithm_phase_inradians_D6 (__global double *src1, int src1_step, int src1_offset, | __kernel void arithm_phase_inradians_D6(__global double *src1, int src1_step1, int src1_offset1, | ||||||
|                                          __global double *src2, int src2_step, int src2_offset, |                                          __global double *src2, int src2_step1, int src2_offset1, | ||||||
|                                          __global double *dst,  int dst_step,  int dst_offset, |                                          __global double *dst,  int dst_step1,  int dst_offset1, | ||||||
|                                          int rows, int cols, int dst_step1) |                                          int cols, int rows) | ||||||
| { | { | ||||||
|  |  | ||||||
|     int x = get_global_id(0); |     int x = get_global_id(0); | ||||||
|     int y = get_global_id(1); |     int y = get_global_id(1); | ||||||
|  |  | ||||||
|     if(x < cols && y < rows) |     if (x < cols && y < rows) | ||||||
|     { |     { | ||||||
|         int src1_index = mad24(y, src1_step, (x << 3) + src1_offset); |         int src1_index = mad24(y, src1_step1, x + src1_offset1); | ||||||
|         int src2_index = mad24(y, src2_step, (x << 3) + src2_offset); |         int src2_index = mad24(y, src2_step1, x + src2_offset1); | ||||||
|         int dst_index  = mad24(y, dst_step,  (x << 3) + dst_offset); |         int dst_index  = mad24(y, dst_step1, x + dst_offset1); | ||||||
|  |  | ||||||
|         double data1 = *((__global double *)((__global char *)src1 + src1_index)); |         double data1 = src1[src1_index]; | ||||||
|         double data2 = *((__global double *)((__global char *)src2 + src2_index)); |         double data2 = src2[src2_index]; | ||||||
|  |         double tmp = atan2(data2, data1); | ||||||
|  |  | ||||||
|         *((__global double *)((__global char *)dst + dst_index)) = atan2(data2,data1); |         if (tmp < 0) | ||||||
|  |             tmp += CV_2PI; | ||||||
|  |  | ||||||
|  |         dst[dst_index] = tmp; | ||||||
|     } |     } | ||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| /**************************************phase indegrees**************************************/ | /**************************************phase indegrees**************************************/ | ||||||
| __kernel void arithm_phase_indegrees_D5 (__global float *src1, int src1_step, int src1_offset, |  | ||||||
|                                          __global float *src2, int src2_step, int src2_offset, |  | ||||||
|                                          __global float *dst,  int dst_step,  int dst_offset, |  | ||||||
|                                          int rows, int cols, int dst_step1) |  | ||||||
| { |  | ||||||
|  |  | ||||||
|  | __kernel void arithm_phase_indegrees_D5(__global float *src1, int src1_step1, int src1_offset1, | ||||||
|  |                                          __global float *src2, int src2_step1, int src2_offset1, | ||||||
|  |                                          __global float *dst,  int dst_step1,  int dst_offset1, | ||||||
|  |                                          int cols, int rows) | ||||||
|  | { | ||||||
|     int x = get_global_id(0); |     int x = get_global_id(0); | ||||||
|     int y = get_global_id(1); |     int y = get_global_id(1); | ||||||
|  |  | ||||||
|     if(x < cols && y < rows) |     if (x < cols && y < rows) | ||||||
|     { |     { | ||||||
|         int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); |         int src1_index = mad24(y, src1_step1, x + src1_offset1); | ||||||
|         int src2_index = mad24(y, src2_step, (x << 2) + src2_offset); |         int src2_index = mad24(y, src2_step1, x + src2_offset1); | ||||||
|         int dst_index  = mad24(y, dst_step,  (x << 2) + dst_offset); |         int dst_index  = mad24(y, dst_step1, x + dst_offset1); | ||||||
|  |  | ||||||
|         float data1 = *((__global float *)((__global char *)src1 + src1_index)); |         float data1 = src1[src1_index]; | ||||||
|         float data2 = *((__global float *)((__global char *)src2 + src2_index)); |         float data2 = src2[src2_index]; | ||||||
|         float tmp = atan2(data2,data1); |         float tmp = atan2(data2, data1); | ||||||
|         float tmp_data = 180*tmp/CV_PI; |         tmp = 180 * tmp / CV_PI; | ||||||
|  |  | ||||||
|         *((__global float *)((__global char *)dst + dst_index)) = tmp_data; |         if (tmp < 0) | ||||||
|  |             tmp += 360; | ||||||
|  |  | ||||||
|  |         dst[dst_index] = tmp; | ||||||
|     } |     } | ||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| #if defined (DOUBLE_SUPPORT) | #if defined (DOUBLE_SUPPORT) | ||||||
| __kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step, int src1_offset, | __kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1, int src1_offset1, | ||||||
|                                          __global double *src2, int src2_step, int src2_offset, |                                          __global double *src2, int src2_step1, int src2_offset1, | ||||||
|                                          __global double *dst,  int dst_step,  int dst_offset, |                                          __global double *dst,  int dst_step1,  int dst_offset1, | ||||||
|                                          int rows, int cols, int dst_step1) |                                          int cols, int rows) | ||||||
| { | { | ||||||
|  |  | ||||||
|     int x = get_global_id(0); |     int x = get_global_id(0); | ||||||
|     int y = get_global_id(1); |     int y = get_global_id(1); | ||||||
|  |  | ||||||
|     if(x < cols && y < rows) |     if (x < cols && y < rows) | ||||||
|     { |     { | ||||||
|         int src1_index = mad24(y, src1_step, (x << 3) + src1_offset); |         int src1_index = mad24(y, src1_step1, x + src1_offset1); | ||||||
|         int src2_index = mad24(y, src2_step, (x << 3) + src2_offset); |         int src2_index = mad24(y, src2_step1, x + src2_offset1); | ||||||
|         int dst_index  = mad24(y, dst_step,  (x << 3) + dst_offset); |         int dst_index  = mad24(y, dst_step1, x + dst_offset1); | ||||||
|  |  | ||||||
|         double data1 = *((__global double *)((__global char *)src1 + src1_index)); |         double data1 = src1[src1_index]; | ||||||
|         double data2 = *((__global double *)((__global char *)src2 + src2_index)); |         double data2 = src2[src2_index]; | ||||||
|         double tmp = atan2(data2,data1); |         double tmp = atan2(src2[src2_index], src1[src1_index]); | ||||||
|         double tmp_data = 180*tmp/CV_PI; |  | ||||||
|  |  | ||||||
|         *((__global double *)((__global char *)dst + dst_index)) = tmp_data; |         tmp = 180 * tmp / CV_PI; | ||||||
|  |         if (tmp < 0) | ||||||
|  |             tmp += 360; | ||||||
|  |  | ||||||
|  |         dst[dst_index] = tmp; | ||||||
|     } |     } | ||||||
|  |  | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -464,7 +464,6 @@ TEST_P(Mul, Scalar) | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| TEST_P(Mul, Mat_Scalar) | TEST_P(Mul, Mat_Scalar) | ||||||
| { | { | ||||||
|     for (int j = 0; j < LOOP_TIMES; j++) |     for (int j = 0; j < LOOP_TIMES; j++) | ||||||
| @@ -507,7 +506,6 @@ TEST_P(Div, Scalar) | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| TEST_P(Div, Mat_Scalar) | TEST_P(Div, Mat_Scalar) | ||||||
| { | { | ||||||
|     for (int j = 0; j < LOOP_TIMES; j++) |     for (int j = 0; j < LOOP_TIMES; j++) | ||||||
| @@ -1173,17 +1171,27 @@ TEST_P(CountNonZero, MAT) | |||||||
|  |  | ||||||
| typedef ArithmTestBase Phase; | typedef ArithmTestBase Phase; | ||||||
|  |  | ||||||
| TEST_P(Phase, DISABLED_Mat) | TEST_P(Phase, angleInDegrees) | ||||||
| { | { | ||||||
|     for (int angelInDegrees = 0; angelInDegrees < 2; angelInDegrees++) |     for (int j = 0; j < LOOP_TIMES; j++) | ||||||
|     { |     { | ||||||
|         for (int j = 0; j < LOOP_TIMES; j++) |         random_roi(); | ||||||
|         { |         cv::phase(src1_roi, src2_roi, dst1_roi, true); | ||||||
|             random_roi(); |         cv::ocl::phase(gsrc1, gsrc2, gdst1, true); | ||||||
|             cv::phase(src1_roi, src2_roi, dst1_roi, angelInDegrees ? true : false); |  | ||||||
|             cv::ocl::phase(gsrc1, gsrc2, gdst1, angelInDegrees ? true : false); |         Near(1e-2); | ||||||
|             Near(1e-2); |     } | ||||||
|         } | } | ||||||
|  |  | ||||||
|  | TEST_P(Phase, angleInRadians) | ||||||
|  | { | ||||||
|  |     for (int j = 0; j < LOOP_TIMES; j++) | ||||||
|  |     { | ||||||
|  |         random_roi(); | ||||||
|  |         cv::phase(src1_roi, src2_roi, dst1_roi); | ||||||
|  |         cv::ocl::phase(gsrc1, gsrc2, gdst1); | ||||||
|  |  | ||||||
|  |         Near(1e-2); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Ilya Lavrenov
					Ilya Lavrenov