Merge pull request #1212 from jet47:gpu-calc-hist
This commit is contained in:
commit
add2ea75c3
@ -562,7 +562,17 @@ PERF_TEST_P(Sz, ImgProc_CalcHist,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
FAIL_NO_CPU();
|
cv::Mat dst;
|
||||||
|
|
||||||
|
const int hbins = 256;
|
||||||
|
const float hranges[] = {0.0f, 256.0f};
|
||||||
|
const int histSize[] = {hbins};
|
||||||
|
const float* ranges[] = {hranges};
|
||||||
|
const int channels[] = {0};
|
||||||
|
|
||||||
|
TEST_CYCLE() cv::calcHist(&src, 1, channels, cv::Mat(), dst, 1, histSize, ranges);
|
||||||
|
|
||||||
|
CPU_SANITY_CHECK(dst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,6 +109,86 @@ namespace hist
|
|||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
namespace hist
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ void histEvenInc(int* shist, uint data, int binSize, int lowerLevel, int upperLevel)
|
||||||
|
{
|
||||||
|
if (data >= lowerLevel && data <= upperLevel)
|
||||||
|
{
|
||||||
|
const uint ind = (data - lowerLevel) / binSize;
|
||||||
|
Emulation::smem::atomicAdd(shist + ind, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void histEven8u(const uchar* src, const size_t step, const int rows, const int cols,
|
||||||
|
int* hist, const int binCount, const int binSize, const int lowerLevel, const int upperLevel)
|
||||||
|
{
|
||||||
|
extern __shared__ int shist[];
|
||||||
|
|
||||||
|
const int y = blockIdx.x * blockDim.y + threadIdx.y;
|
||||||
|
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||||
|
|
||||||
|
if (tid < binCount)
|
||||||
|
shist[tid] = 0;
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
if (y < rows)
|
||||||
|
{
|
||||||
|
const uchar* rowPtr = src + y * step;
|
||||||
|
const uint* rowPtr4 = (uint*) rowPtr;
|
||||||
|
|
||||||
|
const int cols_4 = cols / 4;
|
||||||
|
for (int x = threadIdx.x; x < cols_4; x += blockDim.x)
|
||||||
|
{
|
||||||
|
const uint data = rowPtr4[x];
|
||||||
|
|
||||||
|
histEvenInc(shist, (data >> 0) & 0xFFU, binSize, lowerLevel, upperLevel);
|
||||||
|
histEvenInc(shist, (data >> 8) & 0xFFU, binSize, lowerLevel, upperLevel);
|
||||||
|
histEvenInc(shist, (data >> 16) & 0xFFU, binSize, lowerLevel, upperLevel);
|
||||||
|
histEvenInc(shist, (data >> 24) & 0xFFU, binSize, lowerLevel, upperLevel);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cols % 4 != 0 && threadIdx.x == 0)
|
||||||
|
{
|
||||||
|
for (int x = cols_4 * 4; x < cols; ++x)
|
||||||
|
{
|
||||||
|
const uchar data = rowPtr[x];
|
||||||
|
histEvenInc(shist, data, binSize, lowerLevel, upperLevel);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
if (tid < binCount)
|
||||||
|
{
|
||||||
|
const int histVal = shist[tid];
|
||||||
|
|
||||||
|
if (histVal > 0)
|
||||||
|
::atomicAdd(hist + tid, histVal);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void histEven8u(PtrStepSzb src, int* hist, int binCount, int lowerLevel, int upperLevel, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
const dim3 block(32, 8);
|
||||||
|
const dim3 grid(divUp(src.rows, block.y));
|
||||||
|
|
||||||
|
const int binSize = divUp(upperLevel - lowerLevel, binCount);
|
||||||
|
|
||||||
|
const size_t smem_size = binCount * sizeof(int);
|
||||||
|
|
||||||
|
histEven8u<<<grid, block, smem_size, stream>>>(src.data, src.step, src.rows, src.cols, hist, binCount, binSize, lowerLevel, upperLevel);
|
||||||
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
namespace hist
|
namespace hist
|
||||||
{
|
{
|
||||||
__constant__ int c_lut[256];
|
__constant__ int c_lut[256];
|
||||||
|
@ -889,6 +889,21 @@ void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerL
|
|||||||
histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);
|
histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace hist
|
||||||
|
{
|
||||||
|
void histEven8u(PtrStepSzb src, int* hist, int binCount, int lowerLevel, int upperLevel, cudaStream_t stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
void histEven8u(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
hist.create(1, histSize, CV_32S);
|
||||||
|
cudaSafeCall( cudaMemsetAsync(hist.data, 0, histSize * sizeof(int), stream) );
|
||||||
|
hist::histEven8u(src, hist.ptr<int>(), histSize, lowerLevel, upperLevel, stream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream)
|
void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream)
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 );
|
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 );
|
||||||
@ -902,6 +917,12 @@ void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSiz
|
|||||||
NppHistogramEvenC1<CV_16S, nppiHistogramEven_16s_C1R, nppiHistogramEvenGetBufferSize_16s_C1R>::hist
|
NppHistogramEvenC1<CV_16S, nppiHistogramEven_16s_C1R, nppiHistogramEvenGetBufferSize_16s_C1R>::hist
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (src.depth() == CV_8U && deviceSupports(FEATURE_SET_COMPUTE_30))
|
||||||
|
{
|
||||||
|
histEven8u(src, hist, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
|
hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -86,13 +86,16 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Integral, testing::Combine(
|
|||||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// HistEven
|
// HistEven
|
||||||
|
|
||||||
struct HistEven : testing::TestWithParam<cv::gpu::DeviceInfo>
|
PARAM_TEST_CASE(HistEven, cv::gpu::DeviceInfo, cv::Size)
|
||||||
{
|
{
|
||||||
cv::gpu::DeviceInfo devInfo;
|
cv::gpu::DeviceInfo devInfo;
|
||||||
|
|
||||||
|
cv::Size size;
|
||||||
|
|
||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
devInfo = GetParam();
|
devInfo = GET_PARAM(0);
|
||||||
|
size = GET_PARAM(1);
|
||||||
|
|
||||||
cv::gpu::setDevice(devInfo.deviceID());
|
cv::gpu::setDevice(devInfo.deviceID());
|
||||||
}
|
}
|
||||||
@ -100,57 +103,34 @@ struct HistEven : testing::TestWithParam<cv::gpu::DeviceInfo>
|
|||||||
|
|
||||||
GPU_TEST_P(HistEven, Accuracy)
|
GPU_TEST_P(HistEven, Accuracy)
|
||||||
{
|
{
|
||||||
cv::Mat img = readImage("stereobm/aloe-L.png");
|
cv::Mat src = randomMat(size, CV_8UC1);
|
||||||
ASSERT_FALSE(img.empty());
|
|
||||||
|
|
||||||
cv::Mat hsv;
|
|
||||||
cv::cvtColor(img, hsv, CV_BGR2HSV);
|
|
||||||
|
|
||||||
int hbins = 30;
|
int hbins = 30;
|
||||||
float hranges[] = {0.0f, 180.0f};
|
float hranges[] = {50.0f, 200.0f};
|
||||||
|
|
||||||
std::vector<cv::gpu::GpuMat> srcs;
|
|
||||||
cv::gpu::split(loadMat(hsv), srcs);
|
|
||||||
|
|
||||||
cv::gpu::GpuMat hist;
|
cv::gpu::GpuMat hist;
|
||||||
cv::gpu::histEven(srcs[0], hist, hbins, (int)hranges[0], (int)hranges[1]);
|
cv::gpu::histEven(loadMat(src), hist, hbins, (int) hranges[0], (int) hranges[1]);
|
||||||
|
|
||||||
|
cv::Mat hist_gold;
|
||||||
|
|
||||||
cv::MatND histnd;
|
|
||||||
int histSize[] = {hbins};
|
int histSize[] = {hbins};
|
||||||
const float* ranges[] = {hranges};
|
const float* ranges[] = {hranges};
|
||||||
int channels[] = {0};
|
int channels[] = {0};
|
||||||
cv::calcHist(&hsv, 1, channels, cv::Mat(), histnd, 1, histSize, ranges);
|
cv::calcHist(&src, 1, channels, cv::Mat(), hist_gold, 1, histSize, ranges);
|
||||||
|
|
||||||
cv::Mat hist_gold = histnd;
|
|
||||||
hist_gold = hist_gold.t();
|
hist_gold = hist_gold.t();
|
||||||
hist_gold.convertTo(hist_gold, CV_32S);
|
hist_gold.convertTo(hist_gold, CV_32S);
|
||||||
|
|
||||||
EXPECT_MAT_NEAR(hist_gold, hist, 0.0);
|
EXPECT_MAT_NEAR(hist_gold, hist, 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HistEven, ALL_DEVICES);
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HistEven, testing::Combine(
|
||||||
|
ALL_DEVICES,
|
||||||
|
DIFFERENT_SIZES));
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// CalcHist
|
// CalcHist
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
|
||||||
void calcHistGold(const cv::Mat& src, cv::Mat& hist)
|
|
||||||
{
|
|
||||||
hist.create(1, 256, CV_32SC1);
|
|
||||||
hist.setTo(cv::Scalar::all(0));
|
|
||||||
|
|
||||||
int* hist_row = hist.ptr<int>();
|
|
||||||
for (int y = 0; y < src.rows; ++y)
|
|
||||||
{
|
|
||||||
const uchar* src_row = src.ptr(y);
|
|
||||||
|
|
||||||
for (int x = 0; x < src.cols; ++x)
|
|
||||||
++hist_row[src_row[x]];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
PARAM_TEST_CASE(CalcHist, cv::gpu::DeviceInfo, cv::Size)
|
PARAM_TEST_CASE(CalcHist, cv::gpu::DeviceInfo, cv::Size)
|
||||||
{
|
{
|
||||||
cv::gpu::DeviceInfo devInfo;
|
cv::gpu::DeviceInfo devInfo;
|
||||||
@ -174,7 +154,16 @@ GPU_TEST_P(CalcHist, Accuracy)
|
|||||||
cv::gpu::calcHist(loadMat(src), hist);
|
cv::gpu::calcHist(loadMat(src), hist);
|
||||||
|
|
||||||
cv::Mat hist_gold;
|
cv::Mat hist_gold;
|
||||||
calcHistGold(src, hist_gold);
|
|
||||||
|
const int hbins = 256;
|
||||||
|
const float hranges[] = {0.0f, 256.0f};
|
||||||
|
const int histSize[] = {hbins};
|
||||||
|
const float* ranges[] = {hranges};
|
||||||
|
const int channels[] = {0};
|
||||||
|
|
||||||
|
cv::calcHist(&src, 1, channels, cv::Mat(), hist_gold, 1, histSize, ranges);
|
||||||
|
hist_gold = hist_gold.reshape(1, 1);
|
||||||
|
hist_gold.convertTo(hist_gold, CV_32S);
|
||||||
|
|
||||||
EXPECT_MAT_NEAR(hist_gold, hist, 0.0);
|
EXPECT_MAT_NEAR(hist_gold, hist, 0.0);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user