optimizations:

- new reduce implementation (with kepler optimizations)
- saturate_cast via asm command
- video SIMD instructions in element operations
- float arithmetics instead of double
- new deviceSupports function
This commit is contained in:
Vladislav Vinogradov
2013-01-23 14:43:36 +04:00
parent ae6266e101
commit 281d036fcf
45 changed files with 9379 additions and 8846 deletions

View File

@@ -336,7 +336,7 @@ namespace
{
void calcDiffHistogram(const cv::gpu::GpuMat& prevFrame, const cv::gpu::GpuMat& curFrame, cv::gpu::GpuMat& hist, cv::gpu::GpuMat& histBuf)
{
typedef void (*func_t)(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, int cc, cudaStream_t stream);
typedef void (*func_t)(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, bool cc20, cudaStream_t stream);
static const func_t funcs[4][4] =
{
{0,0,0,0},
@@ -348,14 +348,11 @@ namespace
hist.create(3, 256, CV_32SC1);
histBuf.create(3, bgfg::PARTIAL_HISTOGRAM_COUNT * bgfg::HISTOGRAM_BIN_COUNT, CV_32SC1);
cv::gpu::DeviceInfo devInfo;
int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
funcs[prevFrame.channels() - 1][curFrame.channels() - 1](
prevFrame, curFrame,
hist.ptr<unsigned int>(0), hist.ptr<unsigned int>(1), hist.ptr<unsigned int>(2),
histBuf.ptr<unsigned int>(0), histBuf.ptr<unsigned int>(1), histBuf.ptr<unsigned int>(2),
cc, 0);
cv::gpu::deviceSupports(cv::gpu::FEATURE_SET_COMPUTE_20), 0);
}
void calcRelativeVariance(unsigned int hist[3 * 256], double relativeVariance[3][bgfg::HISTOGRAM_BIN_COUNT])
@@ -526,15 +523,15 @@ namespace
size_t total = all_contours.size();
_contours.create(total, 1, 0, -1, true);
_contours.create((int) total, 1, 0, -1, true);
cv::SeqIterator<CvSeq*> it = all_contours.begin();
for (size_t i = 0; i < total; ++i, ++it)
{
CvSeq* c = *it;
((CvContour*)c)->color = (int)i;
_contours.create((int)c->total, 1, CV_32SC2, i, true);
cv::Mat ci = _contours.getMat(i);
_contours.create((int)c->total, 1, CV_32SC2, (int)i, true);
cv::Mat ci = _contours.getMat((int)i);
CV_Assert( ci.isContinuous() );
cvCvtSeqToArray(c, ci.data);
}