preprocessing ~1.981 ms
This commit is contained in:
parent
1917366528
commit
4d9c7c1012
@ -40,6 +40,7 @@
|
|||||||
//
|
//
|
||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
|
#include <opencv2/gpu/device/common.hpp>
|
||||||
// #include <icf.hpp>
|
// #include <icf.hpp>
|
||||||
// #include <opencv2/gpu/device/saturate_cast.hpp>
|
// #include <opencv2/gpu/device/saturate_cast.hpp>
|
||||||
// #include <stdio.h>
|
// #include <stdio.h>
|
||||||
@ -54,9 +55,8 @@
|
|||||||
// # define dprintf(format, ...)
|
// # define dprintf(format, ...)
|
||||||
// #endif
|
// #endif
|
||||||
|
|
||||||
// namespace cv { namespace gpu { namespace device {
|
namespace cv { namespace gpu { namespace device {
|
||||||
|
namespace icf {
|
||||||
// namespace icf {
|
|
||||||
|
|
||||||
// enum {
|
// enum {
|
||||||
// HOG_BINS = 6,
|
// HOG_BINS = 6,
|
||||||
@ -66,33 +66,35 @@
|
|||||||
// GREY_OFFSET = HEIGHT * HOG_LUV_BINS
|
// GREY_OFFSET = HEIGHT * HOG_LUV_BINS
|
||||||
// };
|
// };
|
||||||
|
|
||||||
// __global__ void magToHist(const uchar* __restrict__ mag,
|
// ToDo: use textures or ancached load instruction.
|
||||||
// const float* __restrict__ angle, const int angPitch,
|
__global__ void magToHist(const uchar* __restrict__ mag,
|
||||||
// uchar* __restrict__ hog, const int hogPitch)
|
const float* __restrict__ angle, const int angPitch,
|
||||||
// {
|
uchar* __restrict__ hog, const int hogPitch, const int fh)
|
||||||
// const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
{
|
||||||
// const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
|
||||||
// const int bin = (int)(angle[y * angPitch + x]);
|
const int bin = (int)(angle[y * angPitch + x]);
|
||||||
// const uchar val = mag[y * angPitch + x];
|
const uchar val = mag[y * hogPitch + x];
|
||||||
|
hog[((fh * bin) + y) * hogPitch + x] = val;
|
||||||
|
}
|
||||||
|
|
||||||
// hog[((HEIGHT * bin) + y) * hogPitch + x] = val;
|
void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
|
||||||
// }
|
const int fw, const int fh, const int bins)
|
||||||
|
{
|
||||||
|
const uchar* mag = (const uchar*)hogluv.ptr(fh * bins);
|
||||||
|
uchar* hog = (uchar*)hogluv.ptr();
|
||||||
|
const float* angle = (const float*)nangle.ptr();
|
||||||
|
|
||||||
// void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle)
|
dim3 block(32, 8);
|
||||||
// {
|
dim3 grid(fw / 32, fh / 8);
|
||||||
// const uchar* mag = (const uchar*)hogluv.ptr(HEIGHT * HOG_BINS);
|
|
||||||
// uchar* hog = (uchar*)hogluv.ptr();
|
|
||||||
// const float* angle = (const float*)nangle.ptr();
|
|
||||||
|
|
||||||
// dim3 block(32, 8);
|
magToHist<<<grid, block>>>(mag, angle, nangle.step / sizeof(float), hog, hogluv.step, fh);
|
||||||
// dim3 grid(WIDTH / 32, HEIGHT / 8);
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
// magToHist<<<grid, block>>>(mag, angle, nangle.step / sizeof(float), hog, hogluv.step);
|
}
|
||||||
// cudaSafeCall( cudaGetLastError() );
|
}
|
||||||
// cudaSafeCall( cudaDeviceSynchronize() );
|
}}}
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// __global__ void detect(const cv::gpu::icf::Cascade cascade, const int* __restrict__ hogluv, const int pitch,
|
// __global__ void detect(const cv::gpu::icf::Cascade cascade, const int* __restrict__ hogluv, const int pitch,
|
||||||
// PtrStepSz<uchar4> objects)
|
// PtrStepSz<uchar4> objects)
|
||||||
|
@ -49,17 +49,18 @@ cv::gpu::SoftCascade::SoftCascade() : filds(0) { throw_nogpu(); }
|
|||||||
cv::gpu::SoftCascade::SoftCascade( const string&, const float, const float) : filds(0) { throw_nogpu(); }
|
cv::gpu::SoftCascade::SoftCascade( const string&, const float, const float) : filds(0) { throw_nogpu(); }
|
||||||
cv::gpu::SoftCascade::~SoftCascade() { throw_nogpu(); }
|
cv::gpu::SoftCascade::~SoftCascade() { throw_nogpu(); }
|
||||||
bool cv::gpu::SoftCascade::load( const string&, const float, const float) { throw_nogpu(); return false; }
|
bool cv::gpu::SoftCascade::load( const string&, const float, const float) { throw_nogpu(); return false; }
|
||||||
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, const int, Stream) { throw_nogpu(); }
|
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, const int, Stream) { throw_nogpu();}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
// #include <icf.hpp>
|
// #include <icf.hpp>
|
||||||
|
|
||||||
// namespace cv { namespace gpu { namespace device {
|
namespace cv { namespace gpu { namespace device {
|
||||||
// namespace icf {
|
namespace icf {
|
||||||
// void fillBins(cv::gpu::PtrStepSzb hogluv,const cv::gpu::PtrStepSzf& nangle);
|
void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
|
||||||
// }
|
const int fw, const int fh, const int bins);
|
||||||
// }}}
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
// namespace {
|
// namespace {
|
||||||
// char *itoa(long i, char* s, int /*dummy_radix*/)
|
// char *itoa(long i, char* s, int /*dummy_radix*/)
|
||||||
@ -71,6 +72,16 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat
|
|||||||
|
|
||||||
struct cv::gpu::SoftCascade::Filds
|
struct cv::gpu::SoftCascade::Filds
|
||||||
{
|
{
|
||||||
|
|
||||||
|
Filds()
|
||||||
|
{
|
||||||
|
plane.create(FRAME_HEIGHT * (HOG_LUV_BINS + 1), FRAME_WIDTH, CV_8UC1);
|
||||||
|
fplane.create(FRAME_HEIGHT * 6, FRAME_WIDTH, CV_32FC1);
|
||||||
|
luv.create(FRAME_HEIGHT, FRAME_WIDTH, CV_8UC3);
|
||||||
|
shrunk.create(FRAME_HEIGHT / 4 * HOG_LUV_BINS, FRAME_WIDTH / 4, CV_8UC1);
|
||||||
|
integralBuffer.create(shrunk.rows + 1 * HOG_LUV_BINS, shrunk.cols + 1, CV_32SC1);
|
||||||
|
hogluv.create((FRAME_HEIGHT / 4 + 1) * HOG_LUV_BINS, FRAME_WIDTH / 4 + 1, CV_32SC1);
|
||||||
|
}
|
||||||
// // scales range
|
// // scales range
|
||||||
// float minScale;
|
// float minScale;
|
||||||
// float maxScale;
|
// float maxScale;
|
||||||
@ -85,19 +96,26 @@ struct cv::gpu::SoftCascade::Filds
|
|||||||
// GpuMat features;
|
// GpuMat features;
|
||||||
// GpuMat levels;
|
// GpuMat levels;
|
||||||
|
|
||||||
// // preallocated buffer 640x480x10 + 640x480
|
// preallocated buffer 640x480x10 for hogluv + 640x480 got gray
|
||||||
// GpuMat dmem;
|
GpuMat plane;
|
||||||
// // 160x120x10
|
|
||||||
// GpuMat shrunk;
|
// preallocated buffer for floating point operations
|
||||||
// // 161x121x10
|
GpuMat fplane;
|
||||||
// GpuMat hogluv;
|
|
||||||
|
// temporial mat for cvtColor
|
||||||
|
GpuMat luv;
|
||||||
|
|
||||||
|
// 160x120x10
|
||||||
|
GpuMat shrunk;
|
||||||
|
|
||||||
|
// temporial mat for integrall
|
||||||
|
GpuMat integralBuffer;
|
||||||
|
|
||||||
|
// 161x121x10
|
||||||
|
GpuMat hogluv;
|
||||||
|
|
||||||
// // will be removed in final version
|
// // will be removed in final version
|
||||||
// // temporial mat for cvtColor
|
|
||||||
// GpuMat luv;
|
|
||||||
|
|
||||||
// // temporial mat for integrall
|
|
||||||
// GpuMat integralBuffer;
|
|
||||||
|
|
||||||
// // temp matrix for sobel and cartToPolar
|
// // temp matrix for sobel and cartToPolar
|
||||||
// GpuMat dfdx, dfdy, angle, mag, nmag, nangle;
|
// GpuMat dfdx, dfdy, angle, mag, nmag, nangle;
|
||||||
@ -108,17 +126,18 @@ struct cv::gpu::SoftCascade::Filds
|
|||||||
// icf::ChannelStorage storage;
|
// icf::ChannelStorage storage;
|
||||||
|
|
||||||
// enum { BOOST = 0 };
|
// enum { BOOST = 0 };
|
||||||
// enum
|
enum
|
||||||
// {
|
{
|
||||||
// FRAME_WIDTH = 640,
|
FRAME_WIDTH = 640,
|
||||||
// FRAME_HEIGHT = 480,
|
FRAME_HEIGHT = 480,
|
||||||
// TOTAL_SCALES = 55,
|
// TOTAL_SCALES = 55,
|
||||||
// CLASSIFIERS = 5,
|
// CLASSIFIERS = 5,
|
||||||
// ORIG_OBJECT_WIDTH = 64,
|
// ORIG_OBJECT_WIDTH = 64,
|
||||||
// ORIG_OBJECT_HEIGHT = 128,
|
// ORIG_OBJECT_HEIGHT = 128,
|
||||||
// HOG_BINS = 6,
|
HOG_BINS = 6,
|
||||||
// HOG_LUV_BINS = 10
|
LUV_BINS = 3,
|
||||||
// };
|
HOG_LUV_BINS = 10
|
||||||
|
};
|
||||||
|
|
||||||
// bool fill(const FileNode &root, const float mins, const float maxs);
|
// bool fill(const FileNode &root, const float mins, const float maxs);
|
||||||
// void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const
|
// void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const
|
||||||
@ -386,7 +405,8 @@ struct cv::gpu::SoftCascade::Filds
|
|||||||
// scale = ::std::min(maxScale, ::expf(::log(scale) + logFactor));
|
// scale = ::std::min(maxScale, ::expf(::log(scale) + logFactor));
|
||||||
|
|
||||||
// // printf("level: %d (%f %f) [%f %f] (%d %d) (%d %d)\n", level.octave, level.relScale, level.shrScale,
|
// // printf("level: %d (%f %f) [%f %f] (%d %d) (%d %d)\n", level.octave, level.relScale, level.shrScale,
|
||||||
// // level.scaling[0], level.scaling[1], level.workRect.x, level.workRect.y, level.objSize.x, level.objSize.y);
|
// // level.scaling[0], level.scaling[1], level.workRect.x, level.workRect.y, level.objSize.x,
|
||||||
|
//level.objSize.y);
|
||||||
|
|
||||||
// // std::cout << "level " << sc
|
// // std::cout << "level " << sc
|
||||||
// // << " octeve "
|
// // << " octeve "
|
||||||
@ -423,8 +443,8 @@ bool cv::gpu::SoftCascade::load( const string& filename, const float minScale, c
|
|||||||
if (!fs.isOpened()) return false;
|
if (!fs.isOpened()) return false;
|
||||||
|
|
||||||
filds = new Filds;
|
filds = new Filds;
|
||||||
// Filds& flds = *filds;
|
Filds& flds = *filds;
|
||||||
// if (!flds.fill(fs.getFirstTopLevelNode(), minScale, maxScale)) return false;
|
// if (!flds.fill(fs.getFirstTopLevelNode(), minScale, maxScale)) return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -432,15 +452,15 @@ bool cv::gpu::SoftCascade::load( const string& filename, const float minScale, c
|
|||||||
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat& /*rois*/,
|
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat& /*rois*/,
|
||||||
GpuMat& objects, const int /*rejectfactor*/, Stream s)
|
GpuMat& objects, const int /*rejectfactor*/, Stream s)
|
||||||
{
|
{
|
||||||
// // only color images are supperted
|
// only color images are supperted
|
||||||
// CV_Assert(colored.type() == CV_8UC3);
|
CV_Assert(colored.type() == CV_8UC3);
|
||||||
|
|
||||||
// // // only this window size allowed
|
// only this window size allowed
|
||||||
// CV_Assert(colored.cols == 640 && colored.rows == 480);
|
CV_Assert(colored.cols == Filds::FRAME_WIDTH && colored.rows == Filds::FRAME_HEIGHT);
|
||||||
|
|
||||||
// Filds& flds = *filds;
|
Filds& flds = *filds;
|
||||||
|
|
||||||
// #if defined USE_REFERENCE_VALUES
|
#if defined USE_REFERENCE_VALUES
|
||||||
// cudaMemset(flds.hogluv.data, 0, flds.hogluv.step * flds.hogluv.rows);
|
// cudaMemset(flds.hogluv.data, 0, flds.hogluv.step * flds.hogluv.rows);
|
||||||
// cv::FileStorage imgs("/home/kellan/testInts.xml", cv::FileStorage::READ);
|
// cv::FileStorage imgs("/home/kellan/testInts.xml", cv::FileStorage::READ);
|
||||||
// char buff[33];
|
// char buff[33];
|
||||||
@ -452,57 +472,72 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat&
|
|||||||
// GpuMat gchannel(flds.hogluv, cv::Rect(0, 121 * i, 161, 121));
|
// GpuMat gchannel(flds.hogluv, cv::Rect(0, 121 * i, 161, 121));
|
||||||
// gchannel.upload(channel);
|
// gchannel.upload(channel);
|
||||||
// }
|
// }
|
||||||
// #else
|
#else
|
||||||
// GpuMat& dmem = flds.dmem;
|
GpuMat& plane = flds.plane;
|
||||||
// cudaMemset(dmem.data, 0, dmem.step * dmem.rows);
|
GpuMat& shrunk = flds.shrunk;
|
||||||
// GpuMat& shrunk = flds.shrunk;
|
cudaMemset(plane.data, 0, plane.step * plane.rows);
|
||||||
// int w = shrunk.cols;
|
|
||||||
// int h = colored.rows / flds.storage.shrinkage;
|
|
||||||
|
|
||||||
// std::vector<GpuMat> splited;
|
int fw = Filds::FRAME_WIDTH;
|
||||||
// for(int i = 0; i < 3; ++i)
|
int fh = Filds::FRAME_HEIGHT;
|
||||||
// {
|
|
||||||
// splited.push_back(GpuMat(dmem, cv::Rect(0, colored.rows * (7 + i), colored.cols, colored.rows)));
|
|
||||||
// }
|
|
||||||
|
|
||||||
// GpuMat gray(dmem, cv::Rect(0, colored.rows * 10, colored.cols, colored.rows) );
|
GpuMat gray(plane, cv::Rect(0, fh * Filds::HOG_LUV_BINS, fw, fh));
|
||||||
|
|
||||||
// cv::gpu::cvtColor(colored, gray, CV_RGB2GRAY);
|
//cv::gpu::cvtColor(colored, gray, CV_RGB2GRAY);
|
||||||
|
cv::gpu::cvtColor(colored, gray, CV_BGR2GRAY);
|
||||||
|
|
||||||
// //create hog
|
//create hog
|
||||||
// cv::gpu::Sobel(gray, flds.dfdx, CV_32F, 1, 0, 3, 0.25);
|
GpuMat dfdx(flds.fplane, cv::Rect(0, 0, fw, fh));
|
||||||
// cv::gpu::Sobel(gray, flds.dfdy, CV_32F, 0, 1, 3, 0.25);
|
GpuMat dfdy(flds.fplane, cv::Rect(0, fh, fw, fh));
|
||||||
|
|
||||||
// cv::gpu::cartToPolar(flds.dfdx, flds.dfdy, flds.mag, flds.angle, true);
|
cv::gpu::Sobel(gray, dfdx, CV_32F, 1, 0, 3, 0.125f);
|
||||||
|
cv::gpu::Sobel(gray, dfdy, CV_32F, 0, 1, 3, 0.125f);
|
||||||
|
|
||||||
// cv::gpu::multiply(flds.mag, cv::Scalar::all(1.0 / ::log(2)), flds.nmag);
|
GpuMat mag(flds.fplane, cv::Rect(0, 2 * fh, fw, fh));
|
||||||
// cv::gpu::multiply(flds.angle, cv::Scalar::all(1.0 / 60.0), flds.nangle);
|
GpuMat ang(flds.fplane, cv::Rect(0, 3 * fh, fw, fh));
|
||||||
|
|
||||||
// GpuMat magCannel(dmem, cv::Rect(0, colored.rows * 6, colored.cols, colored.rows));
|
cv::gpu::cartToPolar(dfdx, dfdy, mag, ang, true);
|
||||||
// flds.nmag.convertTo(magCannel, CV_8UC1);
|
|
||||||
// device::icf::fillBins(dmem, flds.nangle);
|
|
||||||
|
|
||||||
// // create luv
|
// normolize magnitude to uchar interval and angles to 6 bins
|
||||||
// cv::gpu::cvtColor(colored, flds.luv, CV_BGR2Luv);
|
|
||||||
// cv::gpu::split(flds.luv, splited);
|
|
||||||
|
|
||||||
// GpuMat plane(dmem, cv::Rect(0, 0, colored.cols, colored.rows * Filds::HOG_LUV_BINS));
|
GpuMat nmag(flds.fplane, cv::Rect(0, 4 * fh, fw, fh));
|
||||||
// cv::gpu::resize(plane, flds.shrunk, cv::Size(), 0.25, 0.25, CV_INTER_AREA);
|
GpuMat nang(flds.fplane, cv::Rect(0, 5 * fh, fw, fh));
|
||||||
|
|
||||||
// // fer debug purpose
|
cv::gpu::multiply(mag, cv::Scalar::all(1.f / ::log(2)), nmag);
|
||||||
// // cudaMemset(flds.hogluv.data, 0, flds.hogluv.step * flds.hogluv.rows);
|
cv::gpu::multiply(ang, cv::Scalar::all(1.f / 60.f), nang);
|
||||||
|
|
||||||
// for(int i = 0; i < Filds::HOG_LUV_BINS; ++i)
|
//create uchar magnitude
|
||||||
// {
|
GpuMat cmag(plane, cv::Rect(0, fh * Filds::HOG_BINS, fw, fh));
|
||||||
// GpuMat channel(shrunk, cv::Rect(0, h * i, w, h ));
|
nmag.convertTo(cmag, CV_8UC1);
|
||||||
// GpuMat sum(flds.hogluv, cv::Rect(0, (h + 1) * i, w + 1, h + 1));
|
|
||||||
// cv::gpu::integralBuffered(channel, sum, flds.integralBuffer);
|
|
||||||
// }
|
|
||||||
|
|
||||||
// #endif
|
// create luv
|
||||||
|
cv::gpu::cvtColor(colored, flds.luv, CV_BGR2Luv);
|
||||||
|
|
||||||
// cudaStream_t stream = StreamAccessor::getStream(s);
|
std::vector<GpuMat> splited;
|
||||||
// // detection
|
for(int i = 0; i < Filds::LUV_BINS; ++i)
|
||||||
|
{
|
||||||
|
splited.push_back(GpuMat(plane, cv::Rect(0, fh * (7 + i), fw, fh)));
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::gpu::split(flds.luv, splited);
|
||||||
|
|
||||||
|
device::icf::fillBins(plane, nang, fw, fh, Filds::HOG_BINS);
|
||||||
|
|
||||||
|
GpuMat hogluv(plane, cv::Rect(0, 0, fw, fh * Filds::HOG_LUV_BINS));
|
||||||
|
cv::gpu::resize(hogluv, flds.shrunk, cv::Size(), 0.25, 0.25, CV_INTER_AREA);
|
||||||
|
|
||||||
|
fw /= 4;
|
||||||
|
fh /= 4;
|
||||||
|
for(int i = 0; i < Filds::HOG_LUV_BINS; ++i)
|
||||||
|
{
|
||||||
|
GpuMat channel(shrunk, cv::Rect(0, fh * i, fw, fh ));
|
||||||
|
GpuMat sum(flds.hogluv, cv::Rect(0, (fh + 1) * i, fw + 1, fh + 1));
|
||||||
|
cv::gpu::integralBuffered(channel, sum, flds.integralBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
// detection
|
||||||
// flds.detect(objects, stream);
|
// flds.detect(objects, stream);
|
||||||
|
|
||||||
// // flds.storage.frame(colored, stream);
|
// // flds.storage.frame(colored, stream);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user