add preprocessing only function
This commit is contained in:
parent
87e0eee92b
commit
ee291a15da
@ -1576,6 +1576,9 @@ public:
|
||||
// Param stream is stream is a high-level CUDA stream abstraction used for asynchronous execution
|
||||
virtual void detect(InputArray image, InputArray rois, OutputArray objects, Stream& stream = Stream::Null()) const;
|
||||
|
||||
// Preprocesing only
|
||||
virtual void preprocess(InputArray image, OutputArray channels, Stream& stream = Stream::Null()) const;
|
||||
|
||||
// Convert ROI matrix into the suitable for detect method.
|
||||
// Param roi is an input matrix of the same size as the image.
|
||||
// There non zero value mean that detector should be executed in this point.
|
||||
|
@ -41,6 +41,7 @@
|
||||
//M*/
|
||||
|
||||
#include <opencv2/gpu/device/common.hpp>
|
||||
#include <opencv2/gpu/device/saturate_cast.hpp>
|
||||
|
||||
#include <icf.hpp>
|
||||
#include <float.h>
|
||||
@ -49,6 +50,108 @@
|
||||
namespace cv { namespace gpu { namespace device {
|
||||
namespace icf {
|
||||
|
||||
__device__ __forceinline__ void luv(const float& b, const float& g, const float& r, uchar& __l, uchar& __u, uchar& __v)
|
||||
{
|
||||
// rgb -> XYZ
|
||||
float x = 0.412453f * r + 0.357580f * g + 0.180423f * b;
|
||||
float y = 0.212671f * r + 0.715160f * g + 0.072169f * b;
|
||||
float z = 0.019334f * r + 0.119193f * g + 0.950227f * b;
|
||||
|
||||
// computed for D65
|
||||
const float _ur = 0.19783303699678276f;
|
||||
const float _vr = 0.46833047435252234f;
|
||||
|
||||
const float divisor = fmax((x + 15.f * y + 3.f * z), FLT_EPSILON);
|
||||
const float _u = __fdividef(4.f * x, divisor);
|
||||
const float _v = __fdividef(9.f * y, divisor);
|
||||
|
||||
const float L = fmax(0.f, ((116.f * cbrtf(y)) - 16.f));
|
||||
const float U = 13.f * L * (_u - _ur);
|
||||
const float V = 13.f * L * (_v - _vr);
|
||||
|
||||
// L in [0, 100], u in [-134, 220], v in [-140, 122]
|
||||
__l = static_cast<uchar>( L * (255.f / 100.f));
|
||||
__u = static_cast<uchar>((U + 134.f) * (255.f / (220.f + 134.f )));
|
||||
__v = static_cast<uchar>((V + 140.f) * (255.f / (122.f + 140.f )));
|
||||
}
|
||||
|
||||
__global__ void bgr2Luv_d(const uchar* rgb, const int rgbPitch, uchar* luvg, const int luvgPitch)
|
||||
{
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
uchar3 color = ((uchar3*)(rgb + rgbPitch * y))[x];
|
||||
uchar l, u, v;
|
||||
luv(color.x / 255.f, color.y / 255.f, color.z / 255.f, l, u, v);
|
||||
|
||||
luvg[luvgPitch * y + x] = l;
|
||||
luvg[luvgPitch * (y + 480) + x] = u;
|
||||
luvg[luvgPitch * (y + 2 * 480) + x] = v;
|
||||
}
|
||||
|
||||
void bgr2Luv(const PtrStepSzb& bgr, PtrStepSzb luv)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(bgr.cols / 32, bgr.rows / 8);
|
||||
|
||||
bgr2Luv_d<<<grid, block>>>((const uchar*)bgr.ptr(0), bgr.step, (uchar*)luv.ptr(0), luv.step);
|
||||
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int fast_angle_bin(const float& dx, const float& dy)
|
||||
{
|
||||
const float angle_quantum = M_PI / 6.f;
|
||||
float angle = atan2(dx, dy) + (angle_quantum / 2.f);
|
||||
|
||||
if (angle < 0) angle += M_PI;
|
||||
|
||||
const float angle_scaling = 1.f / angle_quantum;
|
||||
return static_cast<int>(angle * angle_scaling) % 6;
|
||||
}
|
||||
|
||||
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tgray;
|
||||
|
||||
__global__ void magnitude_d(PtrStepSzb mag)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
const float dx_a = tex2D(tgray, x + 1, y),
|
||||
dx_b = tex2D(tgray, x - 1, y),
|
||||
dx = dx_a - dx_b,
|
||||
|
||||
dy_a = tex2D(tgray, x, y + 1),
|
||||
dy_b = tex2D(tgray, x, y - 1),
|
||||
dy = dy_a - dy_b;
|
||||
|
||||
|
||||
const float magnitude_scaling = 1.0f/ sqrtf(2);
|
||||
|
||||
const float magnitude = sqrtf((dx * dx) + (dy * dy)) * magnitude_scaling;
|
||||
const uchar magnitude_u8 = static_cast<uchar>(magnitude);
|
||||
|
||||
mag( 480 * 6 + y, x) = magnitude_u8;
|
||||
|
||||
int angle_channel_index;
|
||||
|
||||
angle_channel_index = fast_angle_bin(dy, dx);
|
||||
mag( 480 * angle_channel_index + y, x) = magnitude_u8;
|
||||
}
|
||||
|
||||
void magnitude(const PtrStepSzb& gray, PtrStepSzb mag)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(gray.cols / 32, gray.rows / 8);
|
||||
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, tgray, gray.data, desc, gray.cols, gray.rows, gray.step) );
|
||||
|
||||
magnitude_d<<<grid, block>>>(mag);
|
||||
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
// ToDo: use textures or uncached load instruction.
|
||||
__global__ void magToHist(const uchar* __restrict__ mag,
|
||||
const float* __restrict__ angle, const int angPitch,
|
||||
|
@ -88,6 +88,9 @@ namespace icf {
|
||||
|
||||
void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections,
|
||||
PtrStepSzb suppressed, cudaStream_t stream);
|
||||
|
||||
void bgr2Luv(const PtrStepSzb& bgr, PtrStepSzb luv);
|
||||
void magnitude(const PtrStepSzb& gray, PtrStepSzb mag);
|
||||
}
|
||||
|
||||
namespace imgproc {
|
||||
@ -606,4 +609,34 @@ void cv::gpu::SCascade::read(const FileNode& fn)
|
||||
Algorithm::read(fn);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
void bgr2Luv(const cv::gpu::GpuMat& input, cv::gpu::GpuMat& integral)
|
||||
{
|
||||
cv::gpu::GpuMat bgr;
|
||||
cv::gpu::GaussianBlur(input, bgr, cv::Size(3, 3), -1);
|
||||
|
||||
cv::gpu::GpuMat gray, luv, shrunk, buffer;
|
||||
luv.create(bgr.rows * 10, bgr.cols, CV_8UC1);
|
||||
luv.setTo(0);
|
||||
|
||||
cv::gpu::cvtColor(bgr, gray, CV_BGR2GRAY);
|
||||
cv::gpu::device::icf::magnitude(gray, luv(cv::Rect(0, 0, bgr.cols, bgr.rows * 7)));
|
||||
|
||||
cv::gpu::GpuMat __luv(luv, cv::Rect(0, bgr.rows * 7, bgr.cols, bgr.rows * 3));
|
||||
cv::gpu::device::icf::bgr2Luv(bgr, __luv);
|
||||
|
||||
cv::gpu::resize(luv, shrunk, cv::Size(), 0.25f, 0.25f, CV_INTER_AREA);
|
||||
cv::gpu::integralBuffered(shrunk, integral, buffer);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::SCascade::preprocess(InputArray _bgr, OutputArray _channels, Stream& stream) const
|
||||
{
|
||||
CV_Assert(fields);
|
||||
(void)stream;
|
||||
const GpuMat bgr = _bgr.getGpuMat(), channels = _channels.getGpuMat();
|
||||
}
|
||||
|
||||
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user