Merge pull request #2065 from ilya-lavrenov:tapi_calcBackProject

This commit is contained in:
Andrey Pavlenko
2013-12-26 20:37:33 +04:00
committed by OpenCV Buildbot
4 changed files with 475 additions and 0 deletions

View File

@@ -1930,13 +1930,153 @@ void cv::calcBackProject( const Mat* images, int nimages, const int* channels,
}
namespace cv {
static void getUMatIndex(const std::vector<UMat> & um, int cn, int & idx, int & cnidx)
{
int totalChannels = 0;
for (size_t i = 0, size = um.size(); i < size; ++i)
{
int ccn = um[i].channels();
totalChannels += ccn;
if (totalChannels >= cn)
{
idx = (int)i;
cnidx = i == 0 ? cn : cn % (totalChannels - ccn);
return;
}
}
idx = cnidx = -1;
}
static bool ocl_calcBackProject( InputArrayOfArrays _images, std::vector<int> channels,
InputArray _hist, OutputArray _dst,
const std::vector<float>& ranges,
float scale, size_t histdims )
{
const std::vector<UMat> & images = *(const std::vector<UMat> *)_images.getObj();
size_t nimages = images.size(), totalcn = images[0].channels();
CV_Assert(nimages > 0);
Size size = images[0].size();
int depth = images[0].depth();
for (size_t i = 1; i < nimages; ++i)
{
const UMat & m = images[i];
totalcn += m.channels();
CV_Assert(size == m.size() && depth == m.depth());
}
std::sort(channels.begin(), channels.end());
for (size_t i = 0; i < histdims; ++i)
CV_Assert(channels[i] < (int)totalcn);
if (histdims == 1)
{
int idx, cnidx;
getUMatIndex(images, channels[0], idx, cnidx);
CV_Assert(idx >= 0);
UMat im = images[idx];
String opts = format("-D histdims=1 -D scn=%d", im.channels());
ocl::Kernel lutk("calcLUT", ocl::imgproc::calc_back_project_oclsrc, opts);
if (lutk.empty())
return false;
size_t lsize = 256;
UMat lut(1, (int)lsize, CV_32SC1), hist = _hist.getUMat(), uranges(ranges, true);
lutk.args(ocl::KernelArg::ReadOnlyNoSize(hist), hist.rows,
ocl::KernelArg::PtrWriteOnly(lut), scale, ocl::KernelArg::PtrReadOnly(uranges));
if (!lutk.run(1, &lsize, NULL, false))
return false;
ocl::Kernel mapk("LUT", ocl::imgproc::calc_back_project_oclsrc, opts);
if (mapk.empty())
return false;
_dst.create(size, depth);
UMat dst = _dst.getUMat();
im.offset += cnidx;
mapk.args(ocl::KernelArg::ReadOnlyNoSize(im), ocl::KernelArg::PtrReadOnly(lut),
ocl::KernelArg::WriteOnly(dst));
size_t globalsize[2] = { size.width, size.height };
return mapk.run(2, globalsize, NULL, false);
}
else if (histdims == 2)
{
int idx0, idx1, cnidx0, cnidx1;
getUMatIndex(images, channels[0], idx0, cnidx0);
getUMatIndex(images, channels[1], idx1, cnidx1);
CV_Assert(idx0 >= 0 && idx1 >= 0);
UMat im0 = images[idx0], im1 = images[idx1];
// Lut for the first dimension
String opts = format("-D histdims=2 -D scn1=%d -D scn2=%d", im0.channels(), im1.channels());
ocl::Kernel lutk1("calcLUT", ocl::imgproc::calc_back_project_oclsrc, opts);
if (lutk1.empty())
return false;
size_t lsize = 256;
UMat lut(1, (int)lsize<<1, CV_32SC1), uranges(ranges, true), hist = _hist.getUMat();
lutk1.args(hist.rows, ocl::KernelArg::PtrWriteOnly(lut), (int)0, ocl::KernelArg::PtrReadOnly(uranges), (int)0);
if (!lutk1.run(1, &lsize, NULL, false))
return false;
// lut for the second dimension
ocl::Kernel lutk2("calcLUT", ocl::imgproc::calc_back_project_oclsrc, opts);
if (lutk2.empty())
return false;
lut.offset += lsize * sizeof(int);
lutk2.args(hist.cols, ocl::KernelArg::PtrWriteOnly(lut), (int)256, ocl::KernelArg::PtrReadOnly(uranges), (int)2);
if (!lutk2.run(1, &lsize, NULL, false))
return false;
// perform lut
ocl::Kernel mapk("LUT", ocl::imgproc::calc_back_project_oclsrc, opts);
if (mapk.empty())
return false;
_dst.create(size, depth);
UMat dst = _dst.getUMat();
im0.offset += cnidx0;
im1.offset += cnidx1;
mapk.args(ocl::KernelArg::ReadOnlyNoSize(im0), ocl::KernelArg::ReadOnlyNoSize(im1),
ocl::KernelArg::ReadOnlyNoSize(hist), ocl::KernelArg::PtrReadOnly(lut), scale, ocl::KernelArg::WriteOnly(dst));
size_t globalsize[2] = { size.width, size.height };
return mapk.run(2, globalsize, NULL, false);
}
return false;
}
}
void cv::calcBackProject( InputArrayOfArrays images, const std::vector<int>& channels,
InputArray hist, OutputArray dst,
const std::vector<float>& ranges,
double scale )
{
Size histSize = hist.size();
bool _1D = histSize.height == 1 || histSize.width == 1;
size_t histdims = _1D ? 1 : hist.dims();
if (ocl::useOpenCL() && images.isUMatVector() && dst.isUMat() && hist.type() == CV_32FC1 &&
histdims <= 2 && ranges.size() == histdims * 2 && histdims == channels.size() &&
ocl_calcBackProject(images, channels, hist, dst, ranges, (float)scale, histdims))
return;
Mat H0 = hist.getMat(), H;
int hcn = H0.channels();
if( hcn > 1 )
{
CV_Assert( H0.isContinuous() );
@@ -1947,12 +2087,15 @@ void cv::calcBackProject( InputArrayOfArrays images, const std::vector<int>& cha
}
else
H = H0;
bool _1d = H.rows == 1 || H.cols == 1;
int i, dims = H.dims, rsz = (int)ranges.size(), csz = (int)channels.size();
int nimages = (int)images.total();
CV_Assert(nimages > 0);
CV_Assert(rsz == dims*2 || (rsz == 2 && _1d) || (rsz == 0 && images.depth(0) == CV_8U));
CV_Assert(csz == 0 || csz == dims || (csz == 1 && _1d));
float* _ranges[CV_MAX_DIM];
if( rsz > 0 )
{

View File

@@ -0,0 +1,135 @@
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Niko Li, newlife20080214@gmail.com
// Jia Haipeng, jiahaipeng95@gmail.com
// Xu Pang, pangxu010@163.com
// Wenju He, wenju@multicorewareinc.com
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//
#define OUT_OF_RANGE -1
#if histdims == 1
__kernel void calcLUT(__global const uchar * histptr, int hist_step, int hist_offset, int hist_bins,
__global int * lut, float scale, __constant float * ranges)
{
int x = get_global_id(0);
float value = convert_float(x);
if (value > ranges[1] || value < ranges[0])
lut[x] = OUT_OF_RANGE;
else
{
float lb = ranges[0], ub = ranges[1], gap = (ub - lb) / hist_bins;
value -= lb;
int bin = convert_int_sat_rtn(value / gap);
if (bin >= hist_bins)
lut[x] = OUT_OF_RANGE;
else
{
int hist_index = mad24(hist_step, bin, hist_offset);
__global const float * hist = (__global const float *)(histptr + hist_index);
lut[x] = (int)convert_uchar_sat_rte(hist[0] * scale);
}
}
}
__kernel void LUT(__global const uchar * src, int src_step, int src_offset,
__constant int * lut,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < dst_cols && y < dst_rows)
{
int src_index = mad24(y, src_step, src_offset + x * scn);
int dst_index = mad24(y, dst_step, dst_offset + x);
int value = lut[src[src_index]];
dst[dst_index] = value == OUT_OF_RANGE ? 0 : convert_uchar(value);
}
}
#elif histdims == 2
__kernel void calcLUT(int hist_bins, __global int * lut, int lut_offset,
__constant float * ranges, int roffset)
{
int x = get_global_id(0);
float value = convert_float(x);
ranges += roffset;
lut += lut_offset;
if (value > ranges[1] || value < ranges[0])
lut[x] = OUT_OF_RANGE;
else
{
float lb = ranges[0], ub = ranges[1], gap = (ub - lb) / hist_bins;
value -= lb;
int bin = convert_int_sat_rtn(value / gap);
lut[x] = bin >= hist_bins ? OUT_OF_RANGE : bin;
}
}
__kernel void LUT(__global const uchar * src1, int src1_step, int src1_offset,
__global const uchar * src2, int src2_step, int src2_offset,
__global const uchar * histptr, int hist_step, int hist_offset,
__constant int * lut, float scale,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < dst_cols && y < dst_rows)
{
int src1_index = mad24(y, src1_step, src1_offset + x * scn1);
int src2_index = mad24(y, src2_step, src2_offset + x * scn2);
int dst_index = mad24(y, dst_step, dst_offset + x);
int bin1 = lut[src1[src1_index]];
int bin2 = lut[src2[src2_index] + 256];
dst[dst_index] = bin1 == OUT_OF_RANGE || bin2 == OUT_OF_RANGE ? 0 :
convert_uchar_sat_rte(*(__global const float *)(histptr +
mad24(hist_step, bin1, hist_offset + bin2 * (int)sizeof(float))) * scale);
}
}
#else
#error "(nimages <= 2) should be true"
#endif