diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp index 5bc19e11c..ab613e051 100644 --- a/modules/ocl/src/color.cpp +++ b/modules/ocl/src/color.cpp @@ -16,6 +16,7 @@ // // @Authors // Wang Weiyan, wangweiyanster@gmail.com +// Peng Xiao, pengxiao@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -70,79 +71,218 @@ void cv::ocl::cvtColor(const oclMat &, oclMat &, int, int, const Stream &) namespace cv { - namespace ocl - { - extern const char *cvt_color; - } +namespace ocl +{ +extern const char *cvt_color; +} } namespace { - void RGB2Gray_caller(const oclMat &src, oclMat &dst, int bidx) - { - vector<pair<size_t , const void *> > args; - int channels = src.oclchannels(); - char build_options[50]; - //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); - sprintf(build_options, "-D DEPTH_%d", src.depth()); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&channels)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx)); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); - size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; - openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2Gray", gt, lt, args, -1, -1, build_options); - } - void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int /*dcn*/) - { - Size sz = src.size(); - int scn = src.oclchannels(), depth = src.depth(), bidx; +void RGB2Gray_caller(const oclMat &src, oclMat &dst, int bidx) +{ + vector<pair<size_t , const void *> > args; + int channels = src.oclchannels(); + char build_options[50]; + sprintf(build_options, "-D DEPTH_%d", src.depth()); + //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&channels)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); + size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; + openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2Gray", gt, lt, args, -1, -1, build_options); +} +void Gray2RGB_caller(const oclMat &src, oclMat &dst) +{ + vector<pair<size_t , const void *> > args; + char build_options[50]; + sprintf(build_options, "-D DEPTH_%d", src.depth()); + //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); + size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; + openCLExecuteKernel(src.clCxt, &cvt_color, "Gray2RGB", gt, lt, args, -1, -1, build_options); +} +void RGB2YUV_caller(const oclMat &src, oclMat &dst, int bidx) +{ + vector<pair<size_t , const void *> > args; + int channels = src.oclchannels(); + char build_options[50]; + sprintf(build_options, "-D DEPTH_%d", src.depth()); + //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&channels)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); + size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; + openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2YUV", gt, lt, args, -1, -1, build_options); +} +void YUV2RGB_caller(const oclMat &src, oclMat &dst, int bidx) +{ + vector<pair<size_t , const void *> > args; + int channels = src.oclchannels(); + char build_options[50]; + sprintf(build_options, "-D DEPTH_%d", src.depth()); + //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&channels)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); + size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; + openCLExecuteKernel(src.clCxt, &cvt_color, "YUV2RGB", gt, lt, args, -1, -1, build_options); +} +void YUV2RGB_NV12_caller(const oclMat &src, oclMat &dst, int bidx) +{ + vector<pair<size_t , const void *> > args; + char build_options[50]; + sprintf(build_options, "-D DEPTH_%d", src.depth()); + //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); + size_t gt[3] = {dst.cols / 2, dst.rows / 2, 1}, lt[3] = {16, 16, 1}; + openCLExecuteKernel(src.clCxt, &cvt_color, "YUV2RGBA_NV12", gt, lt, args, -1, -1, build_options); +} +void RGB2YCrCb_caller(const oclMat &src, oclMat &dst, int bidx) +{ + vector<pair<size_t , const void *> > args; + int channels = src.oclchannels(); + char build_options[50]; + sprintf(build_options, "-D DEPTH_%d", src.depth()); + //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&channels)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); + size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; + openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2YCrCb", gt, lt, args, -1, -1, build_options); +} +void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn) +{ + Size sz = src.size(); + int scn = src.oclchannels(), depth = src.depth(), bidx; - CV_Assert(depth == CV_8U || depth == CV_16U); + CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32F); - switch (code) - { - /* - case CV_BGR2BGRA: case CV_RGB2BGRA: case CV_BGRA2BGR: - case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA: - case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555: - case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555: - case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB: - case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA: - */ - case CV_BGR2GRAY: - case CV_BGRA2GRAY: - case CV_RGB2GRAY: - case CV_RGBA2GRAY: - { - CV_Assert(scn == 3 || scn == 4); - bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2; - dst.create(sz, CV_MAKETYPE(depth, 1)); - RGB2Gray_caller(src, dst, bidx); - break; - } + switch (code) + { /* - case CV_BGR5652GRAY: case CV_BGR5552GRAY: - case CV_GRAY2BGR: case CV_GRAY2BGRA: - case CV_GRAY2BGR565: case CV_GRAY2BGR555: - case CV_BGR2YCrCb: case CV_RGB2YCrCb: - case CV_BGR2YUV: case CV_RGB2YUV: - case CV_YCrCb2BGR: case CV_YCrCb2RGB: - case CV_YUV2BGR: case CV_YUV2RGB: - case CV_BGR2XYZ: case CV_RGB2XYZ: - case CV_XYZ2BGR: case CV_XYZ2RGB: - case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL: - case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL: - case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL: - case CV_HLS2BGR: case CV_HLS2RGB: case CV_HLS2BGR_FULL: case CV_HLS2RGB_FULL: + case CV_BGR2BGRA: case CV_RGB2BGRA: case CV_BGRA2BGR: + case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA: + case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555: + case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555: + case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB: + case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA: */ - default: - CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); - } + case CV_BGR2GRAY: + case CV_BGRA2GRAY: + case CV_RGB2GRAY: + case CV_RGBA2GRAY: + { + CV_Assert(scn == 3 || scn == 4); + bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2; + dst.create(sz, CV_MAKETYPE(depth, 1)); + RGB2Gray_caller(src, dst, bidx); + break; } + case CV_GRAY2BGR: + case CV_GRAY2BGRA: + { + CV_Assert(scn == 1); + dcn = code == CV_GRAY2BGRA ? 4 : 3; + dst.create(sz, CV_MAKETYPE(depth, dcn)); + Gray2RGB_caller(src, dst); + break; + } + case CV_BGR2YUV: + case CV_RGB2YUV: + { + CV_Assert(scn == 3 || scn == 4); + bidx = code == CV_BGR2YUV ? 0 : 2; + dst.create(sz, CV_MAKETYPE(depth, 3)); + RGB2YUV_caller(src, dst, bidx); + break; + } + case CV_YUV2BGR: + case CV_YUV2RGB: + { + CV_Assert(scn == 3 || scn == 4); + bidx = code == CV_YUV2BGR ? 0 : 2; + dst.create(sz, CV_MAKETYPE(depth, 3)); + YUV2RGB_caller(src, dst, bidx); + break; + } + case CV_YUV2RGB_NV12: + case CV_YUV2BGR_NV12: + case CV_YUV2RGBA_NV12: + case CV_YUV2BGRA_NV12: + { + CV_Assert(scn == 1); + CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); + dcn = code == CV_YUV2BGRA_NV12 || code == CV_YUV2RGBA_NV12 ? 4 : 3; + bidx = code == CV_YUV2BGRA_NV12 || code == CV_YUV2BGR_NV12 ? 0 : 2; + + Size dstSz(sz.width, sz.height * 2 / 3); + dst.create(dstSz, CV_MAKETYPE(depth, dcn)); + YUV2RGB_NV12_caller(src, dst, bidx); + break; + } + case CV_BGR2YCrCb: + case CV_RGB2YCrCb: + { + CV_Assert(scn == 3 || scn == 4); + bidx = code == CV_BGR2YCrCb ? 0 : 2; + dst.create(sz, CV_MAKETYPE(depth, 3)); + RGB2YCrCb_caller(src, dst, bidx); + break; + } + case CV_YCrCb2BGR: + case CV_YCrCb2RGB: + { + break; + } + /* + case CV_BGR5652GRAY: case CV_BGR5552GRAY: + case CV_GRAY2BGR565: case CV_GRAY2BGR555: + case CV_BGR2YCrCb: case CV_RGB2YCrCb: + case CV_BGR2XYZ: case CV_RGB2XYZ: + case CV_XYZ2BGR: case CV_XYZ2RGB: + case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL: + case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL: + case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL: + case CV_HLS2BGR: case CV_HLS2RGB: case CV_HLS2BGR_FULL: case CV_HLS2RGB_FULL: + */ + default: + CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); + } +} } void cv::ocl::cvtColor(const oclMat &src, oclMat &dst, int code, int dcn) diff --git a/modules/ocl/src/kernels/cvt_color.cl b/modules/ocl/src/kernels/cvt_color.cl index 6c3868056..952193931 100644 --- a/modules/ocl/src/kernels/cvt_color.cl +++ b/modules/ocl/src/kernels/cvt_color.cl @@ -16,6 +16,7 @@ // // @Authors // Jia Haipeng, jiahaipeng95@gmail.com +// Peng Xiao, pengxiao@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -48,13 +49,33 @@ #pragma OPENCL EXTENSION cl_khr_fp64:enable #endif +#define DATA_TYPE UNDEFINED + #if defined (DEPTH_0) +#undef DATA_TYPE #define DATA_TYPE uchar +#define MAX_NUM 255 +#define HALF_MAX 128 +#define SAT_CAST(num) convert_uchar_sat(num) #endif + #if defined (DEPTH_2) +#undef DATA_TYPE #define DATA_TYPE ushort +#define MAX_NUM 65535 +#define HALF_MAX 32768 +#define SAT_CAST(num) convert_ushort_sat(num) #endif +#if defined (DEPTH_5) +#undef DATA_TYPE +#define DATA_TYPE float +#define MAX_NUM 1.0f +#define HALF_MAX 0.5f +#define SAT_CAST(num) (num) +#endif + + #define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n)) enum { @@ -65,6 +86,7 @@ enum B2Y = 1868, BLOCK_SIZE = 256 }; +///////////////////////////////////// RGB <-> GRAY ////////////////////////////////////// __kernel void RGB2Gray(int cols,int rows,int src_step,int dst_step,int channels, int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst) @@ -72,10 +94,203 @@ __kernel void RGB2Gray(int cols,int rows,int src_step,int dst_step,int channels, const int x = get_global_id(0); const int y = get_global_id(1); + src_step /= sizeof(DATA_TYPE); + dst_step /= sizeof(DATA_TYPE); if (y < rows && x < cols) { - int src_idx = y * src_step + x * channels * sizeof(DATA_TYPE); - int dst_idx = y * dst_step + x * sizeof(DATA_TYPE); + int src_idx = y * src_step + x * channels; + int dst_idx = y * dst_step + x; +#if defined (DEPTH_5) + dst[dst_idx] = src[src_idx + bidx] * 0.114f + src[src_idx + 1] * 0.587f + src[src_idx + (bidx^2)] * 0.299f; +#else dst[dst_idx] = (DATA_TYPE)CV_DESCALE((src[src_idx + bidx] * B2Y + src[src_idx + 1] * G2Y + src[src_idx + (bidx^2)] * R2Y), yuv_shift); +#endif + } +} + +__kernel void Gray2RGB(int cols,int rows,int src_step,int dst_step, + __global const DATA_TYPE* src, __global DATA_TYPE* dst) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + src_step /= sizeof(DATA_TYPE); + dst_step /= sizeof(DATA_TYPE); + if (y < rows && x < cols) + { + int src_idx = y * src_step + x; + int dst_idx = y * dst_step + x * 4; + DATA_TYPE val = src[src_idx]; + dst[dst_idx++] = val; + dst[dst_idx++] = val; + dst[dst_idx++] = val; + dst[dst_idx] = MAX_NUM; + } +} + +///////////////////////////////////// RGB <-> YUV ////////////////////////////////////// +__constant float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f }; +__constant int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 }; + +__kernel void RGB2YUV(int cols,int rows,int src_step,int dst_step,int channels, + int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + src_step /= sizeof(DATA_TYPE); + dst_step /= sizeof(DATA_TYPE); + + if (y < rows && x < cols) + { + int src_idx = y * src_step + x * channels; + int dst_idx = y * dst_step + x * channels; + dst += dst_idx; + const DATA_TYPE rgb[] = {src[src_idx], src[src_idx + 1], src[src_idx + 2]}; +#if defined (DEPTH_5) + __constant float * coeffs = c_RGB2YUVCoeffs_f; + const DATA_TYPE Y = rgb[0] * coeffs[bidx] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx^2]; + const DATA_TYPE Cr = (rgb[bidx] - Y) * coeffs[3] + HALF_MAX; + const DATA_TYPE Cb = (rgb[bidx^2] - Y) * coeffs[4] + HALF_MAX; +#else + __constant int * coeffs = c_RGB2YUVCoeffs_i; + const int delta = HALF_MAX * (1 << yuv_shift); + const int Y = CV_DESCALE(rgb[0] * coeffs[bidx] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx^2], yuv_shift); + const int Cr = CV_DESCALE((rgb[bidx] - Y) * coeffs[3] + delta, yuv_shift); + const int Cb = CV_DESCALE((rgb[bidx^2] - Y) * coeffs[4] + delta, yuv_shift); +#endif + dst[0] = SAT_CAST( Y ); + dst[1] = SAT_CAST( Cr ); + dst[2] = SAT_CAST( Cb ); + } +} + +__constant float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f }; +__constant int c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 }; + +__kernel void YUV2RGB(int cols,int rows,int src_step,int dst_step,int channels, + int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + src_step /= sizeof(DATA_TYPE); + dst_step /= sizeof(DATA_TYPE); + + if (y < rows && x < cols) + { + int src_idx = y * src_step + x * channels; + int dst_idx = y * dst_step + x * channels; + dst += dst_idx; + const DATA_TYPE yuv[] = {src[src_idx], src[src_idx + 1], src[src_idx + 2]}; + +#if defined (DEPTH_5) + __constant float * coeffs = c_YUV2RGBCoeffs_f; + const float b = yuv[0] + (yuv[2] - HALF_MAX) * coeffs[3]; + const float g = yuv[0] + (yuv[2] - HALF_MAX) * coeffs[2] + (yuv[1] - HALF_MAX) * coeffs[1]; + const float r = yuv[0] + (yuv[1] - HALF_MAX) * coeffs[0]; +#else + __constant int * coeffs = c_YUV2RGBCoeffs_i; + const int b = yuv[0] + CV_DESCALE((yuv[2] - HALF_MAX) * coeffs[3], yuv_shift); + const int g = yuv[0] + CV_DESCALE((yuv[2] - HALF_MAX) * coeffs[2] + (yuv[1] - HALF_MAX) * coeffs[1], yuv_shift); + const int r = yuv[0] + CV_DESCALE((yuv[1] - HALF_MAX) * coeffs[0], yuv_shift); +#endif + dst[bidx^2] = SAT_CAST( b ); + dst[1] = SAT_CAST( g ); + dst[bidx] = SAT_CAST( r ); + } +} + +__constant int ITUR_BT_601_CY = 1220542; +__constant int ITUR_BT_601_CUB = 2116026; +__constant int ITUR_BT_601_CUG = -409993; +__constant int ITUR_BT_601_CVG = -852492; +__constant int ITUR_BT_601_CVR = 1673527; +__constant int ITUR_BT_601_SHIFT = 20; + +__kernel void YUV2RGBA_NV12(int cols,int rows,int src_step,int dst_step, + int bidx, int width, int height, __global const uchar* src, __global uchar* dst) +{ + const int x = get_global_id(0); // max_x = width / 2 + const int y = get_global_id(1); // max_y = height/ 2 + + if (y < height / 2 && x < width / 2 ) + { + __global const uchar* ysrc = src + (y << 1) * src_step + (x << 1); + __global const uchar* usrc = src + (height + y) * src_step + (x << 1); + __global uchar* dst1 = dst + (y << 1) * dst_step + (x << 3); + __global uchar* dst2 = dst + ((y << 1) + 1) * dst_step + (x << 3); + int Y1 = ysrc[0]; + int Y2 = ysrc[1]; + int Y3 = ysrc[src_step]; + int Y4 = ysrc[src_step + 1]; + + int U = usrc[0] - 128; + int V = usrc[1] - 128; + + int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * V; + int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * V + ITUR_BT_601_CUG * U; + int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * U; + + Y1 = max(0, Y1 - 16) * ITUR_BT_601_CY; + dst1[2 - bidx] = convert_uchar_sat((Y1 + ruv) >> ITUR_BT_601_SHIFT); + dst1[1] = convert_uchar_sat((Y1 + guv) >> ITUR_BT_601_SHIFT); + dst1[bidx] = convert_uchar_sat((Y1 + buv) >> ITUR_BT_601_SHIFT); + dst1[3] = 255; + + Y2 = max(0, Y2 - 16) * ITUR_BT_601_CY; + dst1[6 - bidx] = convert_uchar_sat((Y2 + ruv) >> ITUR_BT_601_SHIFT); + dst1[5] = convert_uchar_sat((Y2 + guv) >> ITUR_BT_601_SHIFT); + dst1[4 + bidx] = convert_uchar_sat((Y2 + buv) >> ITUR_BT_601_SHIFT); + dst1[7] = 255; + + Y3 = max(0, Y3 - 16) * ITUR_BT_601_CY; + dst2[2 - bidx] = convert_uchar_sat((Y3 + ruv) >> ITUR_BT_601_SHIFT); + dst2[1] = convert_uchar_sat((Y3 + guv) >> ITUR_BT_601_SHIFT); + dst2[bidx] = convert_uchar_sat((Y3 + buv) >> ITUR_BT_601_SHIFT); + dst2[3] = 255; + + Y4 = max(0, Y4 - 16) * ITUR_BT_601_CY; + dst2[6 - bidx] = convert_uchar_sat((Y4 + ruv) >> ITUR_BT_601_SHIFT); + dst2[5] = convert_uchar_sat((Y4 + guv) >> ITUR_BT_601_SHIFT); + dst2[4 + bidx] = convert_uchar_sat((Y4 + buv) >> ITUR_BT_601_SHIFT); + dst2[7] = 255; + } +} + +///////////////////////////////////// RGB <-> YUV ////////////////////////////////////// +__constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; +__constant int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241}; + +__kernel void RGB2YCrCb(int cols,int rows,int src_step,int dst_step,int channels, + int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + src_step /= sizeof(DATA_TYPE); + dst_step /= sizeof(DATA_TYPE); + + if (y < rows && x < cols) + { + int src_idx = y * src_step + x * channels; + int dst_idx = y * dst_step + x * channels; + dst += dst_idx; + const DATA_TYPE rgb[] = {src[src_idx], src[src_idx + 1], src[src_idx + 2]}; +#if defined (DEPTH_5) + __constant float * coeffs = c_RGB2YCrCbCoeffs_f; + const DATA_TYPE Y = rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx]; + const DATA_TYPE Cr = (rgb[bidx^2] - Y) * coeffs[3] + HALF_MAX; + const DATA_TYPE Cb = (rgb[bidx] - Y) * coeffs[4] + HALF_MAX; +#else + __constant int * coeffs = c_RGB2YCrCbCoeffs_i; + const int delta = HALF_MAX * (1 << yuv_shift); + const int Y = CV_DESCALE(rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx], yuv_shift); + const int Cr = CV_DESCALE((rgb[bidx^2] - Y) * coeffs[3] + delta, yuv_shift); + const int Cb = CV_DESCALE((rgb[bidx] - Y) * coeffs[4] + delta, yuv_shift); +#endif + dst[0] = SAT_CAST( Y ); + dst[1] = SAT_CAST( Cr ); + dst[2] = SAT_CAST( Cb ); } } diff --git a/modules/ocl/test/test_color.cpp b/modules/ocl/test/test_color.cpp new file mode 100644 index 000000000..3935559fd --- /dev/null +++ b/modules/ocl/test/test_color.cpp @@ -0,0 +1,193 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#ifdef HAVE_OPENCL + +//#define MAT_DEBUG +#ifdef MAT_DEBUG +#define MAT_DIFF(mat, mat2)\ +{\ + for(int i = 0; i < mat.rows; i ++)\ + {\ + for(int j = 0; j < mat.cols; j ++)\ + {\ + cv::Vec4b s = mat.at<cv::Vec4b>(i, j);\ + cv::Vec4b s2 = mat2.at<cv::Vec4b>(i, j);\ + if(s != s2) printf("*");\ + else printf(".");\ + }\ + puts("\n");\ + }\ +} +#else +#define MAT_DIFF(mat, mat2) +#endif + + +namespace +{ + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// cvtColor +PARAM_TEST_CASE(CvtColor, cv::Size, MatDepth) +{ + cv::Size size; + int depth; + bool useRoi; + + cv::Mat img; + + virtual void SetUp() + { + size = GET_PARAM(0); + depth = GET_PARAM(1); + + img = randomMat(size, CV_MAKE_TYPE(depth, 3), 0.0, depth == CV_32F ? 1.0 : 255.0); + } +}; + +#define CVTCODE(name) cv::COLOR_ ## name +#define TEST_P_CVTCOLOR(name) TEST_P(CvtColor, name)\ +{\ + cv::Mat src = img;\ + cv::ocl::oclMat ocl_img, dst;\ + ocl_img.upload(img);\ + cv::ocl::cvtColor(ocl_img, dst, CVTCODE(name));\ + cv::Mat dst_gold;\ + cv::cvtColor(src, dst_gold, CVTCODE(name));\ + cv::Mat dst_mat;\ + dst.download(dst_mat);\ + EXPECT_MAT_NEAR(dst_gold, dst_mat, 1e-5, "");\ +} + +//add new ones here using macro +TEST_P_CVTCOLOR(RGB2GRAY) +TEST_P_CVTCOLOR(BGR2GRAY) +TEST_P_CVTCOLOR(RGBA2GRAY) +TEST_P_CVTCOLOR(BGRA2GRAY) + +TEST_P_CVTCOLOR(RGB2YUV) +TEST_P_CVTCOLOR(BGR2YUV) +TEST_P_CVTCOLOR(YUV2RGB) +TEST_P_CVTCOLOR(YUV2BGR) +TEST_P_CVTCOLOR(RGB2YCrCb) +TEST_P_CVTCOLOR(BGR2YCrCb) + +PARAM_TEST_CASE(CvtColor_Gray2RGB, cv::Size, MatDepth, int) +{ + cv::Size size; + int code; + int depth; + cv::Mat img; + + virtual void SetUp() + { + size = GET_PARAM(0); + depth = GET_PARAM(1); + code = GET_PARAM(2); + img = randomMat(size, CV_MAKETYPE(depth, 1), 0.0, depth == CV_32F ? 1.0 : 255.0); + } +}; +TEST_P(CvtColor_Gray2RGB, Accuracy) +{ + cv::Mat src = img; + cv::ocl::oclMat ocl_img, dst; + ocl_img.upload(src); + cv::ocl::cvtColor(ocl_img, dst, code); + cv::Mat dst_gold; + cv::cvtColor(src, dst_gold, code); + cv::Mat dst_mat; + dst.download(dst_mat); + EXPECT_MAT_NEAR(dst_gold, dst_mat, 1e-5, ""); +} + + +PARAM_TEST_CASE(CvtColor_YUV420, cv::Size, int) +{ + cv::Size size; + int code; + + cv::Mat img; + + virtual void SetUp() + { + size = GET_PARAM(0); + code = GET_PARAM(1); + img = randomMat(size, CV_8UC1, 0.0, 255.0); + } +}; + +TEST_P(CvtColor_YUV420, Accuracy) +{ + cv::Mat src = img; + cv::ocl::oclMat ocl_img, dst; + ocl_img.upload(src); + cv::ocl::cvtColor(ocl_img, dst, code); + cv::Mat dst_gold; + cv::cvtColor(src, dst_gold, code); + cv::Mat dst_mat; + dst.download(dst_mat); + MAT_DIFF(dst_mat, dst_gold); + EXPECT_MAT_NEAR(dst_gold, dst_mat, 1e-5, ""); +} + +INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor, testing::Combine( + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)) + )); + +INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor_YUV420, testing::Combine( + testing::Values(cv::Size(128, 45), cv::Size(46, 132), cv::Size(1024, 1023)), + testing::Values(CV_YUV2RGBA_NV12, CV_YUV2BGRA_NV12, CV_YUV2RGB_NV12, CV_YUV2BGR_NV12) + )); + +INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor_Gray2RGB, testing::Combine( + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)), + testing::Values(CV_GRAY2BGR, CV_GRAY2BGRA, CV_GRAY2RGB, CV_GRAY2RGBA) + )); +} +#endif