minor formating fixes
This commit is contained in:
parent
0942244d6f
commit
5b0d7a65e6
@ -1,43 +1,43 @@
|
|||||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
//
|
//
|
||||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
//
|
//
|
||||||
// By downloading, copying, installing or using the software you agree to this license.
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
// If you do not agree to this license, do not download, install,
|
// If you do not agree to this license, do not download, install,
|
||||||
// copy or use the software.
|
// copy or use the software.
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
// License Agreement
|
// License Agreement
|
||||||
// For Open Source Computer Vision Library
|
// For Open Source Computer Vision Library
|
||||||
//
|
//
|
||||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
// Third party copyrights are property of their respective owners.
|
// Third party copyrights are property of their respective owners.
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
// are permitted provided that the following conditions are met:
|
// are permitted provided that the following conditions are met:
|
||||||
//
|
//
|
||||||
// * Redistribution's of source code must retain the above copyright notice,
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
// this list of conditions and the following disclaimer.
|
// this list of conditions and the following disclaimer.
|
||||||
//
|
//
|
||||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
// this list of conditions and the following disclaimer in the documentation
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
// and/or other materials provided with the distribution.
|
// and/or other materials provided with the distribution.
|
||||||
//
|
//
|
||||||
// * The name of the copyright holders may not be used to endorse or promote products
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
// derived from this software without specific prior written permission.
|
// derived from this software without specific prior written permission.
|
||||||
//
|
//
|
||||||
// This software is provided by the copyright holders and contributors "as is" and
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
// any express or implied warranties, including, but not limited to, the implied
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
// indirect, incidental, special, exemplary, or consequential damages
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
// (including, but not limited to, procurement of substitute goods or services;
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
// loss of use, data, or profits; or business interruption) however caused
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
// and on any theory of liability, whether in contract, strict liability,
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
// or tort (including negligence or otherwise) arising in any way out of
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
// the use of this software, even if advised of the possibility of such damage.
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
//
|
//
|
||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -50,11 +50,11 @@
|
|||||||
* is strictly prohibited.
|
* is strictly prohibited.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
NV12ToARGB color space conversion CUDA kernel
|
NV12ToARGB color space conversion CUDA kernel
|
||||||
|
|
||||||
This sample uses CUDA to perform a simple NV12 (YUV 4:2:0 planar)
|
This sample uses CUDA to perform a simple NV12 (YUV 4:2:0 planar)
|
||||||
source and converts to output in ARGB format
|
source and converts to output in ARGB format
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -82,16 +82,16 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
chromaCr = (float)((int)yuvi[2] - 512.0f);
|
chromaCr = (float)((int)yuvi[2] - 512.0f);
|
||||||
|
|
||||||
// Convert YUV To RGB with hue adjustment
|
// Convert YUV To RGB with hue adjustment
|
||||||
*red = (luma * constHueColorSpaceMat[0]) +
|
*red = (luma * constHueColorSpaceMat[0]) +
|
||||||
(chromaCb * constHueColorSpaceMat[1]) +
|
(chromaCb * constHueColorSpaceMat[1]) +
|
||||||
(chromaCr * constHueColorSpaceMat[2]);
|
(chromaCr * constHueColorSpaceMat[2]);
|
||||||
|
|
||||||
*green = (luma * constHueColorSpaceMat[3]) +
|
*green = (luma * constHueColorSpaceMat[3]) +
|
||||||
(chromaCb * constHueColorSpaceMat[4]) +
|
(chromaCb * constHueColorSpaceMat[4]) +
|
||||||
(chromaCr * constHueColorSpaceMat[5]);
|
(chromaCr * constHueColorSpaceMat[5]);
|
||||||
|
|
||||||
*blue = (luma * constHueColorSpaceMat[6]) +
|
*blue = (luma * constHueColorSpaceMat[6]) +
|
||||||
(chromaCb * constHueColorSpaceMat[7]) +
|
(chromaCb * constHueColorSpaceMat[7]) +
|
||||||
(chromaCr * constHueColorSpaceMat[8]);
|
(chromaCr * constHueColorSpaceMat[8]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -105,9 +105,9 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
blue = ::fmin(::fmax(blue, 0.0f), 1023.f);
|
blue = ::fmin(::fmax(blue, 0.0f), 1023.f);
|
||||||
|
|
||||||
// Convert to 8 bit unsigned integers per color component
|
// Convert to 8 bit unsigned integers per color component
|
||||||
ARGBpixel = (((uint)blue >> 2) |
|
ARGBpixel = (((uint)blue >> 2) |
|
||||||
(((uint)green >> 2) << 8) |
|
(((uint)green >> 2) << 8) |
|
||||||
(((uint)red >> 2) << 16) |
|
(((uint)red >> 2) << 16) |
|
||||||
(uint)alpha);
|
(uint)alpha);
|
||||||
|
|
||||||
return ARGBpixel;
|
return ARGBpixel;
|
||||||
@ -118,8 +118,8 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
#define COLOR_COMPONENT_BIT_SIZE 10
|
#define COLOR_COMPONENT_BIT_SIZE 10
|
||||||
#define COLOR_COMPONENT_MASK 0x3FF
|
#define COLOR_COMPONENT_MASK 0x3FF
|
||||||
|
|
||||||
__global__ void NV12ToARGB(uchar* srcImage, size_t nSourcePitch,
|
__global__ void NV12ToARGB(uchar* srcImage, size_t nSourcePitch,
|
||||||
uint* dstImage, size_t nDestPitch,
|
uint* dstImage, size_t nDestPitch,
|
||||||
uint width, uint height)
|
uint width, uint height)
|
||||||
{
|
{
|
||||||
// Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
|
// Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
|
||||||
@ -127,7 +127,7 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
if (x >= width || y >= height)
|
if (x >= width || y >= height)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
|
// Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
|
||||||
// if we move to texture we could read 4 luminance values
|
// if we move to texture we could read 4 luminance values
|
||||||
@ -142,7 +142,7 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
const int y_chroma = y >> 1;
|
const int y_chroma = y >> 1;
|
||||||
|
|
||||||
if (y & 1) // odd scanline ?
|
if (y & 1) // odd scanline ?
|
||||||
{
|
{
|
||||||
uint chromaCb = srcImage[chromaOffset + y_chroma * nSourcePitch + x ];
|
uint chromaCb = srcImage[chromaOffset + y_chroma * nSourcePitch + x ];
|
||||||
uint chromaCr = srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1];
|
uint chromaCr = srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1];
|
||||||
|
|
||||||
@ -151,7 +151,7 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
chromaCb = (chromaCb + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x ] + 1) >> 1;
|
chromaCb = (chromaCb + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x ] + 1) >> 1;
|
||||||
chromaCr = (chromaCr + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x + 1] + 1) >> 1;
|
chromaCr = (chromaCr + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x + 1] + 1) >> 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
yuv101010Pel[0] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2));
|
yuv101010Pel[0] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2));
|
||||||
yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
|
yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
|
||||||
|
|
||||||
@ -166,17 +166,17 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2));
|
yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2));
|
||||||
yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
|
yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
// this steps performs the color conversion
|
// this steps performs the color conversion
|
||||||
uint yuvi[6];
|
uint yuvi[6];
|
||||||
float red[2], green[2], blue[2];
|
float red[2], green[2], blue[2];
|
||||||
|
|
||||||
yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK );
|
yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK );
|
||||||
yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK);
|
yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK);
|
||||||
yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
|
yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
|
||||||
|
|
||||||
yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK );
|
yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK );
|
||||||
yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK);
|
yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK);
|
||||||
yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
|
yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
|
||||||
|
|
||||||
// YUV to RGB Transformation conversion
|
// YUV to RGB Transformation conversion
|
||||||
@ -184,7 +184,7 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]);
|
YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]);
|
||||||
|
|
||||||
// Clamp the results to RGBA
|
// Clamp the results to RGBA
|
||||||
|
|
||||||
const size_t dstImagePitch = nDestPitch >> 2;
|
const size_t dstImagePitch = nDestPitch >> 2;
|
||||||
|
|
||||||
dstImage[y * dstImagePitch + x ] = RGBAPACK_10bit(red[0], green[0], blue[0], constAlpha);
|
dstImage[y * dstImagePitch + x ] = RGBAPACK_10bit(red[0], green[0], blue[0], constAlpha);
|
||||||
@ -194,9 +194,9 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
void NV12ToARGB_gpu(const PtrStepb decodedFrame, DevMem2D_<uint> interopFrame, cudaStream_t stream)
|
void NV12ToARGB_gpu(const PtrStepb decodedFrame, DevMem2D_<uint> interopFrame, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(interopFrame.cols, 2 * block.x), divUp(interopFrame.rows, block.y));
|
dim3 grid(divUp(interopFrame.cols, 2 * block.x), divUp(interopFrame.rows, block.y));
|
||||||
|
|
||||||
NV12ToARGB<<<grid, block, 0, stream>>>(decodedFrame.data, decodedFrame.step, interopFrame.data, interopFrame.step,
|
NV12ToARGB<<<grid, block, 0, stream>>>(decodedFrame.data, decodedFrame.step, interopFrame.data, interopFrame.step,
|
||||||
interopFrame.cols, interopFrame.rows);
|
interopFrame.cols, interopFrame.rows);
|
||||||
|
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
@ -45,19 +45,19 @@
|
|||||||
#include "opencv2/gpu/device/vec_distance.hpp"
|
#include "opencv2/gpu/device/vec_distance.hpp"
|
||||||
#include "opencv2/gpu/device/datamov_utils.hpp"
|
#include "opencv2/gpu/device/datamov_utils.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
namespace bf_knnmatch
|
namespace bf_knnmatch
|
||||||
{
|
{
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Reduction
|
// Reduction
|
||||||
|
|
||||||
template <int BLOCK_SIZE>
|
template <int BLOCK_SIZE>
|
||||||
__device__ void findBestMatch(float& bestDistance1, float& bestDistance2,
|
__device__ void findBestMatch(float& bestDistance1, float& bestDistance2,
|
||||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||||
float* s_distance, int* s_trainIdx)
|
float* s_distance, int* s_trainIdx)
|
||||||
{
|
{
|
||||||
float myBestDistance1 = numeric_limits<float>::max();
|
float myBestDistance1 = numeric_limits<float>::max();
|
||||||
float myBestDistance2 = numeric_limits<float>::max();
|
float myBestDistance2 = numeric_limits<float>::max();
|
||||||
int myBestTrainIdx1 = -1;
|
int myBestTrainIdx1 = -1;
|
||||||
int myBestTrainIdx2 = -1;
|
int myBestTrainIdx2 = -1;
|
||||||
@ -122,13 +122,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
bestTrainIdx2 = myBestTrainIdx2;
|
bestTrainIdx2 = myBestTrainIdx2;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE>
|
template <int BLOCK_SIZE>
|
||||||
__device__ void findBestMatch(float& bestDistance1, float& bestDistance2,
|
__device__ void findBestMatch(float& bestDistance1, float& bestDistance2,
|
||||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||||
int& bestImgIdx1, int& bestImgIdx2,
|
int& bestImgIdx1, int& bestImgIdx2,
|
||||||
float* s_distance, int* s_trainIdx, int* s_imgIdx)
|
float* s_distance, int* s_trainIdx, int* s_imgIdx)
|
||||||
{
|
{
|
||||||
float myBestDistance1 = numeric_limits<float>::max();
|
float myBestDistance1 = numeric_limits<float>::max();
|
||||||
float myBestDistance2 = numeric_limits<float>::max();
|
float myBestDistance2 = numeric_limits<float>::max();
|
||||||
int myBestTrainIdx1 = -1;
|
int myBestTrainIdx1 = -1;
|
||||||
int myBestTrainIdx2 = -1;
|
int myBestTrainIdx2 = -1;
|
||||||
@ -208,7 +208,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match Unrolled Cached
|
// Match Unrolled Cached
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T, typename U>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T, typename U>
|
||||||
__device__ void loadQueryToSmem(int queryIdx, const DevMem2D_<T>& query, U* s_query)
|
__device__ void loadQueryToSmem(int queryIdx, const DevMem2D_<T>& query, U* s_query)
|
||||||
{
|
{
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
@ -219,11 +219,11 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__device__ void loopUnrolledCached(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
__device__ void loopUnrolledCached(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||||
float& bestDistance1, float& bestDistance2,
|
float& bestDistance1, float& bestDistance2,
|
||||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||||
int& bestImgIdx1, int& bestImgIdx2)
|
int& bestImgIdx1, int& bestImgIdx2)
|
||||||
{
|
{
|
||||||
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
||||||
@ -280,7 +280,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
@ -313,9 +313,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -330,7 +330,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
@ -374,9 +374,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -394,11 +394,11 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match Unrolled
|
// Match Unrolled
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__device__ void loopUnrolled(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
__device__ void loopUnrolled(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||||
float& bestDistance1, float& bestDistance2,
|
float& bestDistance1, float& bestDistance2,
|
||||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||||
int& bestImgIdx1, int& bestImgIdx2)
|
int& bestImgIdx1, int& bestImgIdx2)
|
||||||
{
|
{
|
||||||
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
||||||
@ -459,7 +459,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
@ -490,9 +490,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -507,7 +507,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
@ -549,9 +549,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -569,11 +569,11 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match
|
// Match
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__device__ void loop(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
__device__ void loop(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||||
float& bestDistance1, float& bestDistance2,
|
float& bestDistance1, float& bestDistance2,
|
||||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||||
int& bestImgIdx1, int& bestImgIdx2)
|
int& bestImgIdx1, int& bestImgIdx2)
|
||||||
{
|
{
|
||||||
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
||||||
@ -633,7 +633,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
@ -664,9 +664,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -681,7 +681,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
@ -723,9 +723,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -743,9 +743,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// knnMatch 2 dispatcher
|
// knnMatch 2 dispatcher
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void match2Dispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void match2Dispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance,
|
const DevMem2Db& trainIdx, const DevMem2Db& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
@ -761,11 +761,11 @@ namespace cv { namespace gpu { namespace device
|
|||||||
matchUnrolled<16, 256, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
matchUnrolled<16, 256, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 512)
|
else if (query.cols <= 512)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 512, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
matchUnrolled<16, 512, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 1024)
|
else if (query.cols <= 1024)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 1024, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
matchUnrolled<16, 1024, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
||||||
}*/
|
}*/
|
||||||
else
|
else
|
||||||
@ -774,9 +774,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void match2Dispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void match2Dispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
@ -792,11 +792,11 @@ namespace cv { namespace gpu { namespace device
|
|||||||
matchUnrolled<16, 256, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
matchUnrolled<16, 256, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 512)
|
else if (query.cols <= 512)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 512, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
matchUnrolled<16, 512, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 1024)
|
else if (query.cols <= 1024)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 1024, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
matchUnrolled<16, 1024, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
||||||
}*/
|
}*/
|
||||||
else
|
else
|
||||||
@ -832,7 +832,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX];
|
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0;
|
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0;
|
||||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0;
|
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0;
|
||||||
}
|
}
|
||||||
@ -857,7 +857,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void calcDistanceUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask, const DevMem2Df& allDist, cudaStream_t stream)
|
void calcDistanceUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask, const DevMem2Df& allDist, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -895,7 +895,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX];
|
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0;
|
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0;
|
||||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0;
|
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0;
|
||||||
}
|
}
|
||||||
@ -920,7 +920,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
void calcDistance(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask, const DevMem2Df& allDist, cudaStream_t stream)
|
void calcDistance(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask, const DevMem2Df& allDist, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -938,9 +938,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Calc Distance dispatcher
|
// Calc Distance dispatcher
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void calcDistanceDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void calcDistanceDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
const DevMem2Df& allDist,
|
const DevMem2Df& allDist,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
@ -956,11 +956,11 @@ namespace cv { namespace gpu { namespace device
|
|||||||
calcDistanceUnrolled<16, 256, Dist>(query, train, mask, allDist, stream);
|
calcDistanceUnrolled<16, 256, Dist>(query, train, mask, allDist, stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 512)
|
else if (query.cols <= 512)
|
||||||
{
|
{
|
||||||
calcDistanceUnrolled<16, 512, Dist>(query, train, mask, allDist, stream);
|
calcDistanceUnrolled<16, 512, Dist>(query, train, mask, allDist, stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 1024)
|
else if (query.cols <= 1024)
|
||||||
{
|
{
|
||||||
calcDistanceUnrolled<16, 1024, Dist>(query, train, mask, allDist, stream);
|
calcDistanceUnrolled<16, 1024, Dist>(query, train, mask, allDist, stream);
|
||||||
}*/
|
}*/
|
||||||
else
|
else
|
||||||
@ -972,7 +972,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// find knn match kernel
|
// find knn match kernel
|
||||||
|
|
||||||
template <int BLOCK_SIZE>
|
template <int BLOCK_SIZE>
|
||||||
__global__ void findBestMatch(DevMem2Df allDist, int i, PtrStepi trainIdx, PtrStepf distance)
|
__global__ void findBestMatch(DevMem2Df allDist, int i, PtrStepi trainIdx, PtrStepf distance)
|
||||||
{
|
{
|
||||||
const int SMEM_SIZE = BLOCK_SIZE > 64 ? BLOCK_SIZE : 64;
|
const int SMEM_SIZE = BLOCK_SIZE > 64 ? BLOCK_SIZE : 64;
|
||||||
@ -985,7 +985,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
float dist = numeric_limits<float>::max();
|
float dist = numeric_limits<float>::max();
|
||||||
int bestIdx = -1;
|
int bestIdx = -1;
|
||||||
|
|
||||||
for (int i = threadIdx.x; i < allDist.cols; i += BLOCK_SIZE)
|
for (int i = threadIdx.x; i < allDist.cols; i += BLOCK_SIZE)
|
||||||
{
|
{
|
||||||
float reg = allDistRow[i];
|
float reg = allDistRow[i];
|
||||||
@ -1013,7 +1013,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE>
|
template <int BLOCK_SIZE>
|
||||||
void findKnnMatch(int k, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream)
|
void findKnnMatch(int k, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, 1, 1);
|
const dim3 block(BLOCK_SIZE, 1, 1);
|
||||||
@ -1038,8 +1038,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// knn match Dispatcher
|
// knn match Dispatcher
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, int k, const Mask& mask,
|
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, int k, const Mask& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (k == 2)
|
if (k == 2)
|
||||||
@ -1051,13 +1051,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
calcDistanceDispatcher<Dist>(query, train, mask, allDist, cc, stream);
|
calcDistanceDispatcher<Dist>(query, train, mask, allDist, cc, stream);
|
||||||
findKnnMatchDispatcher(k, trainIdx, distance, allDist, cc, stream);
|
findKnnMatchDispatcher(k, trainIdx, distance, allDist, cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// knn match caller
|
// knn match caller
|
||||||
|
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
@ -1073,7 +1073,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template void matchL1_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchL1_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchL1_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@ -1091,7 +1091,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template void matchL2_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchL2_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
@ -1106,8 +1106,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
//template void matchHamming_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (masks.data)
|
if (masks.data)
|
||||||
@ -1123,8 +1123,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template void match2L1_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2L1_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
template void match2L1_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2L1_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (masks.data)
|
if (masks.data)
|
||||||
@ -1140,8 +1140,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
//template void match2L2_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Di& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
//template void match2L2_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Di& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
template void match2L2_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2L2_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (masks.data)
|
if (masks.data)
|
||||||
|
@ -45,14 +45,14 @@
|
|||||||
#include "opencv2/gpu/device/vec_distance.hpp"
|
#include "opencv2/gpu/device/vec_distance.hpp"
|
||||||
#include "opencv2/gpu/device/datamov_utils.hpp"
|
#include "opencv2/gpu/device/datamov_utils.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
namespace bf_match
|
namespace bf_match
|
||||||
{
|
{
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Reduction
|
// Reduction
|
||||||
|
|
||||||
template <int BLOCK_SIZE>
|
template <int BLOCK_SIZE>
|
||||||
__device__ void findBestMatch(float& bestDistance, int& bestTrainIdx, float* s_distance, int* s_trainIdx)
|
__device__ void findBestMatch(float& bestDistance, int& bestTrainIdx, float* s_distance, int* s_trainIdx)
|
||||||
{
|
{
|
||||||
s_distance += threadIdx.y * BLOCK_SIZE;
|
s_distance += threadIdx.y * BLOCK_SIZE;
|
||||||
@ -66,7 +66,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
reducePredVal<BLOCK_SIZE>(s_distance, bestDistance, s_trainIdx, bestTrainIdx, threadIdx.x, less<volatile float>());
|
reducePredVal<BLOCK_SIZE>(s_distance, bestDistance, s_trainIdx, bestTrainIdx, threadIdx.x, less<volatile float>());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE>
|
template <int BLOCK_SIZE>
|
||||||
__device__ void findBestMatch(float& bestDistance, int& bestTrainIdx, int& bestImgIdx, float* s_distance, int* s_trainIdx, int* s_imgIdx)
|
__device__ void findBestMatch(float& bestDistance, int& bestTrainIdx, int& bestImgIdx, float* s_distance, int* s_trainIdx, int* s_imgIdx)
|
||||||
{
|
{
|
||||||
s_distance += threadIdx.y * BLOCK_SIZE;
|
s_distance += threadIdx.y * BLOCK_SIZE;
|
||||||
@ -85,7 +85,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match Unrolled Cached
|
// Match Unrolled Cached
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T, typename U>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T, typename U>
|
||||||
__device__ void loadQueryToSmem(int queryIdx, const DevMem2D_<T>& query, U* s_query)
|
__device__ void loadQueryToSmem(int queryIdx, const DevMem2D_<T>& query, U* s_query)
|
||||||
{
|
{
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
@ -96,9 +96,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__device__ void loopUnrolledCached(int queryIdx, const DevMem2D_<T>& query,volatile int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
__device__ void loopUnrolledCached(int queryIdx, const DevMem2D_<T>& query,volatile int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||||
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
||||||
{
|
{
|
||||||
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
||||||
@ -142,7 +142,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int* bestTrainIdx, float* bestDistance)
|
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int* bestTrainIdx, float* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
@ -173,9 +173,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -190,8 +190,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
||||||
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
@ -232,9 +232,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -252,9 +252,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match Unrolled
|
// Match Unrolled
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__device__ void loopUnrolled(int queryIdx, const DevMem2D_<T>& query,volatile int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
__device__ void loopUnrolled(int queryIdx, const DevMem2D_<T>& query,volatile int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||||
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
||||||
{
|
{
|
||||||
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
||||||
@ -314,7 +314,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
|
typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
|
||||||
typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
|
typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
|
||||||
|
|
||||||
loopUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist>(queryIdx, query, 0, train, mask, s_query, s_train, myBestDistance, myBestTrainIdx, myBestTrainIdx);
|
loopUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist>(queryIdx, query, 0, train, mask, s_query, s_train, myBestDistance, myBestTrainIdx, myBestTrainIdx);
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
@ -331,9 +331,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -349,7 +349,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
||||||
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
@ -364,7 +364,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
|
typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
|
||||||
|
|
||||||
Mask m = mask;
|
Mask m = mask;
|
||||||
|
|
||||||
for (int imgIdx = 0; imgIdx < n; ++imgIdx)
|
for (int imgIdx = 0; imgIdx < n; ++imgIdx)
|
||||||
{
|
{
|
||||||
const DevMem2D_<T> train = trains[imgIdx];
|
const DevMem2D_<T> train = trains[imgIdx];
|
||||||
@ -388,9 +388,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -408,9 +408,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match
|
// Match
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__device__ void loop(int queryIdx, const DevMem2D_<T>& query, volatile int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
__device__ void loop(int queryIdx, const DevMem2D_<T>& query, volatile int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||||
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
||||||
{
|
{
|
||||||
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t)
|
||||||
@ -469,7 +469,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
|
typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
|
||||||
typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
|
typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
|
||||||
|
|
||||||
loop<BLOCK_SIZE, Dist>(queryIdx, query, 0, train, mask, s_query, s_train, myBestDistance, myBestTrainIdx, myBestTrainIdx);
|
loop<BLOCK_SIZE, Dist>(queryIdx, query, 0, train, mask, s_query, s_train, myBestDistance, myBestTrainIdx, myBestTrainIdx);
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
@ -486,9 +486,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -504,7 +504,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
||||||
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
@ -542,9 +542,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -562,9 +562,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match dispatcher
|
// Match dispatcher
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
@ -580,11 +580,11 @@ namespace cv { namespace gpu { namespace device
|
|||||||
matchUnrolled<16, 256, Dist>(query, train, mask, trainIdx, distance, stream);
|
matchUnrolled<16, 256, Dist>(query, train, mask, trainIdx, distance, stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 512)
|
else if (query.cols <= 512)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 512, Dist>(query, train, mask, trainIdx, distance, stream);
|
matchUnrolled<16, 512, Dist>(query, train, mask, trainIdx, distance, stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 1024)
|
else if (query.cols <= 1024)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 1024, Dist>(query, train, mask, trainIdx, distance, stream);
|
matchUnrolled<16, 1024, Dist>(query, train, mask, trainIdx, distance, stream);
|
||||||
}*/
|
}*/
|
||||||
else
|
else
|
||||||
@ -593,9 +593,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
@ -611,11 +611,11 @@ namespace cv { namespace gpu { namespace device
|
|||||||
matchUnrolled<16, 256, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
|
matchUnrolled<16, 256, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 512)
|
else if (query.cols <= 512)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 512, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
|
matchUnrolled<16, 512, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 1024)
|
else if (query.cols <= 1024)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 1024, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
|
matchUnrolled<16, 1024, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
|
||||||
}*/
|
}*/
|
||||||
else
|
else
|
||||||
@ -627,20 +627,20 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match caller
|
// Match caller
|
||||||
|
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
||||||
trainIdx, distance,
|
trainIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), WithOutMask(),
|
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), WithOutMask(),
|
||||||
trainIdx, distance,
|
trainIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -652,20 +652,20 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template void matchL1_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
||||||
trainIdx, distance,
|
trainIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), WithOutMask(),
|
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), WithOutMask(),
|
||||||
trainIdx, distance,
|
trainIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -677,20 +677,20 @@ namespace cv { namespace gpu { namespace device
|
|||||||
//template void matchL2_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL2_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL2_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
||||||
trainIdx, distance,
|
trainIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), WithOutMask(),
|
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), WithOutMask(),
|
||||||
trainIdx, distance,
|
trainIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -701,20 +701,20 @@ namespace cv { namespace gpu { namespace device
|
|||||||
//template void matchHamming_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (masks.data)
|
if (masks.data)
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
||||||
trainIdx, imgIdx, distance,
|
trainIdx, imgIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(),
|
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(),
|
||||||
trainIdx, imgIdx, distance,
|
trainIdx, imgIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -726,20 +726,20 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template void matchL1_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (masks.data)
|
if (masks.data)
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
||||||
trainIdx, imgIdx, distance,
|
trainIdx, imgIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(),
|
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(),
|
||||||
trainIdx, imgIdx, distance,
|
trainIdx, imgIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -751,20 +751,20 @@ namespace cv { namespace gpu { namespace device
|
|||||||
//template void matchL2_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL2_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL2_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (masks.data)
|
if (masks.data)
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
||||||
trainIdx, imgIdx, distance,
|
trainIdx, imgIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(),
|
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(),
|
||||||
trainIdx, imgIdx, distance,
|
trainIdx, imgIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -45,9 +45,9 @@
|
|||||||
#include "opencv2/gpu/device/vec_distance.hpp"
|
#include "opencv2/gpu/device/vec_distance.hpp"
|
||||||
#include "opencv2/gpu/device/datamov_utils.hpp"
|
#include "opencv2/gpu/device/datamov_utils.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
namespace bf_radius_match
|
namespace bf_radius_match
|
||||||
{
|
{
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match Unrolled
|
// Match Unrolled
|
||||||
@ -112,8 +112,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, cudaStream_t stream)
|
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -121,17 +121,17 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||||
|
|
||||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
|
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
|
||||||
trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols);
|
trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T>
|
||||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -146,12 +146,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
if (masks != 0 && masks[i].data)
|
if (masks != 0 && masks[i].data)
|
||||||
{
|
{
|
||||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]),
|
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]),
|
||||||
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
|
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
|
||||||
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
||||||
}
|
}
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
@ -223,9 +223,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -233,7 +233,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||||
|
|
||||||
match<BLOCK_SIZE, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
|
match<BLOCK_SIZE, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
|
||||||
trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols);
|
trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
@ -241,9 +241,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T>
|
template <int BLOCK_SIZE, typename Dist, typename T>
|
||||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -258,12 +258,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
if (masks != 0 && masks[i].data)
|
if (masks != 0 && masks[i].data)
|
||||||
{
|
{
|
||||||
match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]),
|
match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]),
|
||||||
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
|
match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
|
||||||
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
||||||
}
|
}
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
@ -276,9 +276,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match dispatcher
|
// Match dispatcher
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
@ -294,11 +294,11 @@ namespace cv { namespace gpu { namespace device
|
|||||||
matchUnrolled<16, 256, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
matchUnrolled<16, 256, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 512)
|
else if (query.cols <= 512)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 512, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
matchUnrolled<16, 512, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 1024)
|
else if (query.cols <= 1024)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 1024, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
matchUnrolled<16, 1024, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||||
}*/
|
}*/
|
||||||
else
|
else
|
||||||
@ -307,9 +307,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Dist, typename T>
|
template <typename Dist, typename T>
|
||||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
@ -325,36 +325,36 @@ namespace cv { namespace gpu { namespace device
|
|||||||
matchUnrolled<16, 256, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
matchUnrolled<16, 256, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 512)
|
else if (query.cols <= 512)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 512, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
matchUnrolled<16, 512, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 1024)
|
else if (query.cols <= 1024)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 1024, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
matchUnrolled<16, 1024, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||||
}*/
|
}*/
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
match<16, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
match<16, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Radius Match caller
|
// Radius Match caller
|
||||||
|
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
||||||
trainIdx, distance, nMatches,
|
trainIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, WithOutMask(),
|
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, WithOutMask(),
|
||||||
trainIdx, distance, nMatches,
|
trainIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -366,20 +366,20 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template void matchL1_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
||||||
trainIdx, distance, nMatches,
|
trainIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, WithOutMask(),
|
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, WithOutMask(),
|
||||||
trainIdx, distance, nMatches,
|
trainIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -391,20 +391,20 @@ namespace cv { namespace gpu { namespace device
|
|||||||
//template void matchL2_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL2_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL2_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
||||||
trainIdx, distance, nMatches,
|
trainIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, WithOutMask(),
|
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, WithOutMask(),
|
||||||
trainIdx, distance, nMatches,
|
trainIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -415,12 +415,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
//template void matchHamming_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
||||||
trainIdx, imgIdx, distance, nMatches,
|
trainIdx, imgIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -431,12 +431,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template void matchL1_gpu<int >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<int >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<float >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<float >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
||||||
trainIdx, imgIdx, distance, nMatches,
|
trainIdx, imgIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -447,12 +447,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
//template void matchL2_gpu<int >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<int >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL2_gpu<float >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL2_gpu<float >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
||||||
trainIdx, imgIdx, distance, nMatches,
|
trainIdx, imgIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -43,9 +43,9 @@
|
|||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
#include "opencv2/gpu/device/limits.hpp"
|
#include "opencv2/gpu/device/limits.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
namespace bilateral_filter
|
namespace bilateral_filter
|
||||||
{
|
{
|
||||||
__constant__ float* ctable_color;
|
__constant__ float* ctable_color;
|
||||||
__constant__ float* ctable_space;
|
__constant__ float* ctable_space;
|
||||||
@ -108,7 +108,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
dp[3] = *(disp + (y+1) * disp_step + x + 0);
|
dp[3] = *(disp + (y+1) * disp_step + x + 0);
|
||||||
dp[4] = *(disp + (y ) * disp_step + x + 1);
|
dp[4] = *(disp + (y ) * disp_step + x + 1);
|
||||||
|
|
||||||
if(::abs(dp[1] - dp[0]) >= cedge_disc || ::abs(dp[2] - dp[0]) >= cedge_disc || ::abs(dp[3] - dp[0]) >= cedge_disc || ::abs(dp[4] - dp[0]) >= cedge_disc)
|
if(::abs(dp[1] - dp[0]) >= cedge_disc || ::abs(dp[2] - dp[0]) >= cedge_disc || ::abs(dp[3] - dp[0]) >= cedge_disc || ::abs(dp[4] - dp[0]) >= cedge_disc)
|
||||||
{
|
{
|
||||||
const int ymin = ::max(0, y - cradius);
|
const int ymin = ::max(0, y - cradius);
|
||||||
const int xmin = ::max(0, x - cradius);
|
const int xmin = ::max(0, x - cradius);
|
||||||
@ -175,7 +175,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void bilateral_filter_caller(DevMem2D_<T> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
|
void bilateral_filter_caller(DevMem2D_<T> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
|
@ -42,9 +42,9 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
namespace blend
|
namespace blend
|
||||||
{
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> img1, const PtrStep<T> img2,
|
__global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> img1, const PtrStep<T> img2,
|
||||||
@ -62,14 +62,14 @@ namespace cv { namespace gpu { namespace device
|
|||||||
T p2 = img2.ptr(y)[x];
|
T p2 = img2.ptr(y)[x];
|
||||||
result.ptr(y)[x] = (p1 * w1 + p2 * w2) / (w1 + w2 + 1e-5f);
|
result.ptr(y)[x] = (p1 * w1 + p2 * w2) / (w1 + w2 + 1e-5f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream)
|
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(16, 16);
|
dim3 threads(16, 16);
|
||||||
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
|
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
|
||||||
|
|
||||||
blendLinearKernel<<<grid, threads, 0, stream>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
|
blendLinearKernel<<<grid, threads, 0, stream>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
@ -105,12 +105,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
dim3 threads(16, 16);
|
dim3 threads(16, 16);
|
||||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||||
|
|
||||||
blendLinearKernel8UC4<<<grid, threads, 0, stream>>>(rows, cols, img1, img2, weights1, weights2, result);
|
blendLinearKernel8UC4<<<grid, threads, 0, stream>>>(rows, cols, img1, img2, weights1, weights2, result);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall(cudaDeviceSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
} // namespace blend
|
} // namespace blend
|
||||||
}}} // namespace cv { namespace gpu { namespace device
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
@ -44,7 +44,7 @@
|
|||||||
#include "opencv2/gpu/device/transform.hpp"
|
#include "opencv2/gpu/device/transform.hpp"
|
||||||
#include "opencv2/gpu/device/functional.hpp"
|
#include "opencv2/gpu/device/functional.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
#define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200
|
#define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user