GPU implementation of CLAHE

This commit is contained in:
Vladislav Vinogradov
2013-03-25 15:39:07 +04:00
parent 5810a73d30
commit 4d23e2c8c9
6 changed files with 484 additions and 1 deletions

View File

@@ -43,7 +43,10 @@
#ifndef __OPENCV_GPU_SCAN_HPP__
#define __OPENCV_GPU_SCAN_HPP__
#include "common.hpp"
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/utility.hpp"
#include "opencv2/gpu/device/warp.hpp"
#include "opencv2/gpu/device/warp_shuffle.hpp"
namespace cv { namespace gpu { namespace device
{
@@ -166,6 +169,82 @@ namespace cv { namespace gpu { namespace device
static const int warp_log = 5;
static const int warp_mask = 31;
};
template <typename T>
__device__ T warpScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
{
#if __CUDA_ARCH__ >= 300
const unsigned int laneId = cv::gpu::device::Warp::laneId();
// scan on shuffl functions
#pragma unroll
for (int i = 1; i <= (OPENCV_GPU_WARP_SIZE / 2); i *= 2)
{
const T n = cv::gpu::device::shfl_up(idata, i);
if (laneId >= i)
idata += n;
}
return idata;
#else
unsigned int pos = 2 * tid - (tid & (OPENCV_GPU_WARP_SIZE - 1));
s_Data[pos] = 0;
pos += OPENCV_GPU_WARP_SIZE;
s_Data[pos] = idata;
s_Data[pos] += s_Data[pos - 1];
s_Data[pos] += s_Data[pos - 2];
s_Data[pos] += s_Data[pos - 4];
s_Data[pos] += s_Data[pos - 8];
s_Data[pos] += s_Data[pos - 16];
return s_Data[pos];
#endif
}
template <typename T>
__device__ __forceinline__ T warpScanExclusive(T idata, volatile T* s_Data, unsigned int tid)
{
return warpScanInclusive(idata, s_Data, tid) - idata;
}
template <int tiNumScanThreads, typename T>
__device__ T blockScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
{
if (tiNumScanThreads > OPENCV_GPU_WARP_SIZE)
{
//Bottom-level inclusive warp scan
T warpResult = warpScanInclusive(idata, s_Data, tid);
//Save top elements of each warp for exclusive warp scan
//sync to wait for warp scans to complete (because s_Data is being overwritten)
__syncthreads();
if ((tid & (OPENCV_GPU_WARP_SIZE - 1)) == (OPENCV_GPU_WARP_SIZE - 1))
{
s_Data[tid >> OPENCV_GPU_LOG_WARP_SIZE] = warpResult;
}
//wait for warp scans to complete
__syncthreads();
if (tid < (tiNumScanThreads / OPENCV_GPU_WARP_SIZE) )
{
//grab top warp elements
T val = s_Data[tid];
//calculate exclusive scan and write back to shared memory
s_Data[tid] = warpScanExclusive(val, s_Data, tid);
}
//return updated warp scans with exclusive scan results
__syncthreads();
return warpResult + s_Data[tid >> OPENCV_GPU_LOG_WARP_SIZE];
}
else
{
return warpScanInclusive(idata, s_Data, tid);
}
}
}}}
#endif // __OPENCV_GPU_SCAN_HPP__

View File

@@ -1062,6 +1062,14 @@ CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream = St
CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, Stream& stream = Stream::Null());
CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null());
class CV_EXPORTS CLAHE : public cv::CLAHE
{
public:
using cv::CLAHE::apply;
virtual void apply(InputArray src, OutputArray dst, Stream& stream) = 0;
};
CV_EXPORTS Ptr<cv::gpu::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
//////////////////////////////// StereoBM_GPU ////////////////////////////////
class CV_EXPORTS StereoBM_GPU