add bruteForceMatcher to ocl module
This commit is contained in:
parent
23244a3565
commit
656f06fa74
@ -946,6 +946,186 @@ namespace cv
|
||||
oclMat maxPosBuffer;
|
||||
|
||||
};
|
||||
////////////////////////////////// BruteForceMatcher //////////////////////////////////
|
||||
|
||||
class CV_EXPORTS BruteForceMatcher_OCL_base
|
||||
{
|
||||
public:
|
||||
enum DistType {L1Dist = 0, L2Dist, HammingDist};
|
||||
|
||||
explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
|
||||
|
||||
// Add descriptors to train descriptor collection
|
||||
void add(const std::vector<oclMat>& descCollection);
|
||||
|
||||
// Get train descriptors collection
|
||||
const std::vector<oclMat>& getTrainDescriptors() const;
|
||||
|
||||
// Clear train descriptors collection
|
||||
void clear();
|
||||
|
||||
// Return true if there are not train descriptors in collection
|
||||
bool empty() const;
|
||||
|
||||
// Return true if the matcher supports mask in match methods
|
||||
bool isMaskSupported() const;
|
||||
|
||||
// Find one best match for each query descriptor
|
||||
void matchSingle(const oclMat& query, const oclMat& train,
|
||||
oclMat& trainIdx, oclMat& distance,
|
||||
const oclMat& mask = oclMat());
|
||||
|
||||
// Download trainIdx and distance and convert it to CPU vector with DMatch
|
||||
static void matchDownload(const oclMat& trainIdx, const oclMat& distance, std::vector<DMatch>& matches);
|
||||
// Convert trainIdx and distance to vector with DMatch
|
||||
static void matchConvert(const Mat& trainIdx, const Mat& distance, std::vector<DMatch>& matches);
|
||||
|
||||
// Find one best match for each query descriptor
|
||||
void match(const oclMat& query, const oclMat& train, std::vector<DMatch>& matches, const oclMat& mask = oclMat());
|
||||
|
||||
// Make gpu collection of trains and masks in suitable format for matchCollection function
|
||||
void makeGpuCollection(oclMat& trainCollection, oclMat& maskCollection, const std::vector<oclMat>& masks = std::vector<oclMat>());
|
||||
|
||||
// Find one best match from train collection for each query descriptor
|
||||
void matchCollection(const oclMat& query, const oclMat& trainCollection,
|
||||
oclMat& trainIdx, oclMat& imgIdx, oclMat& distance,
|
||||
const oclMat& masks = oclMat());
|
||||
|
||||
// Download trainIdx, imgIdx and distance and convert it to vector with DMatch
|
||||
static void matchDownload(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, std::vector<DMatch>& matches);
|
||||
// Convert trainIdx, imgIdx and distance to vector with DMatch
|
||||
static void matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches);
|
||||
|
||||
// Find one best match from train collection for each query descriptor.
|
||||
void match(const oclMat& query, std::vector<DMatch>& matches, const std::vector<oclMat>& masks = std::vector<oclMat>());
|
||||
|
||||
// Find k best matches for each query descriptor (in increasing order of distances)
|
||||
void knnMatchSingle(const oclMat& query, const oclMat& train,
|
||||
oclMat& trainIdx, oclMat& distance, oclMat& allDist, int k,
|
||||
const oclMat& mask = oclMat());
|
||||
|
||||
// Download trainIdx and distance and convert it to vector with DMatch
|
||||
// compactResult is used when mask is not empty. If compactResult is false matches
|
||||
// vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
// matches vector will not contain matches for fully masked out query descriptors.
|
||||
static void knnMatchDownload(const oclMat& trainIdx, const oclMat& distance,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
// Convert trainIdx and distance to vector with DMatch
|
||||
static void knnMatchConvert(const Mat& trainIdx, const Mat& distance,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
|
||||
// Find k best matches for each query descriptor (in increasing order of distances).
|
||||
// compactResult is used when mask is not empty. If compactResult is false matches
|
||||
// vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
// matches vector will not contain matches for fully masked out query descriptors.
|
||||
void knnMatch(const oclMat& query, const oclMat& train,
|
||||
std::vector< std::vector<DMatch> >& matches, int k, const oclMat& mask = oclMat(),
|
||||
bool compactResult = false);
|
||||
|
||||
// Find k best matches from train collection for each query descriptor (in increasing order of distances)
|
||||
void knnMatch2Collection(const oclMat& query, const oclMat& trainCollection,
|
||||
oclMat& trainIdx, oclMat& imgIdx, oclMat& distance,
|
||||
const oclMat& maskCollection = oclMat());
|
||||
|
||||
// Download trainIdx and distance and convert it to vector with DMatch
|
||||
// compactResult is used when mask is not empty. If compactResult is false matches
|
||||
// vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
// matches vector will not contain matches for fully masked out query descriptors.
|
||||
static void knnMatch2Download(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
// Convert trainIdx and distance to vector with DMatch
|
||||
static void knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
|
||||
// Find k best matches for each query descriptor (in increasing order of distances).
|
||||
// compactResult is used when mask is not empty. If compactResult is false matches
|
||||
// vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
// matches vector will not contain matches for fully masked out query descriptors.
|
||||
void knnMatch(const oclMat& query, std::vector< std::vector<DMatch> >& matches, int k,
|
||||
const std::vector<oclMat>& masks = std::vector<oclMat>(), bool compactResult = false);
|
||||
|
||||
// Find best matches for each query descriptor which have distance less than maxDistance.
|
||||
// nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
|
||||
// carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
|
||||
// because it didn't have enough memory.
|
||||
// If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
|
||||
// otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
|
||||
// Matches doesn't sorted.
|
||||
void radiusMatchSingle(const oclMat& query, const oclMat& train,
|
||||
oclMat& trainIdx, oclMat& distance, oclMat& nMatches, float maxDistance,
|
||||
const oclMat& mask = oclMat());
|
||||
|
||||
// Download trainIdx, nMatches and distance and convert it to vector with DMatch.
|
||||
// matches will be sorted in increasing order of distances.
|
||||
// compactResult is used when mask is not empty. If compactResult is false matches
|
||||
// vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
// matches vector will not contain matches for fully masked out query descriptors.
|
||||
static void radiusMatchDownload(const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
// Convert trainIdx, nMatches and distance to vector with DMatch.
|
||||
static void radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
|
||||
// Find best matches for each query descriptor which have distance less than maxDistance
|
||||
// in increasing order of distances).
|
||||
void radiusMatch(const oclMat& query, const oclMat& train,
|
||||
std::vector< std::vector<DMatch> >& matches, float maxDistance,
|
||||
const oclMat& mask = oclMat(), bool compactResult = false);
|
||||
|
||||
// Find best matches for each query descriptor which have distance less than maxDistance.
|
||||
// If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
|
||||
// otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
|
||||
// Matches doesn't sorted.
|
||||
void radiusMatchCollection(const oclMat& query, oclMat& trainIdx, oclMat& imgIdx, oclMat& distance, oclMat& nMatches, float maxDistance,
|
||||
const std::vector<oclMat>& masks = std::vector<oclMat>());
|
||||
|
||||
// Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
|
||||
// matches will be sorted in increasing order of distances.
|
||||
// compactResult is used when mask is not empty. If compactResult is false matches
|
||||
// vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
// matches vector will not contain matches for fully masked out query descriptors.
|
||||
static void radiusMatchDownload(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, const oclMat& nMatches,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
// Convert trainIdx, nMatches and distance to vector with DMatch.
|
||||
static void radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
|
||||
// Find best matches from train collection for each query descriptor which have distance less than
|
||||
// maxDistance (in increasing order of distances).
|
||||
void radiusMatch(const oclMat& query, std::vector< std::vector<DMatch> >& matches, float maxDistance,
|
||||
const std::vector<oclMat>& masks = std::vector<oclMat>(), bool compactResult = false);
|
||||
|
||||
DistType distType;
|
||||
|
||||
private:
|
||||
std::vector<oclMat> trainDescCollection;
|
||||
};
|
||||
|
||||
template <class Distance>
|
||||
class CV_EXPORTS BruteForceMatcher_OCL;
|
||||
|
||||
template <typename T>
|
||||
class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
|
||||
{
|
||||
public:
|
||||
explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
|
||||
explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
|
||||
};
|
||||
template <typename T>
|
||||
class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
|
||||
{
|
||||
public:
|
||||
explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
|
||||
explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
|
||||
};
|
||||
template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
|
||||
{
|
||||
public:
|
||||
explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
|
||||
explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
#include "opencv2/ocl/matrix_operations.hpp"
|
||||
|
1734
modules/ocl/src/brute_force_matcher.cpp
Normal file
1734
modules/ocl/src/brute_force_matcher.cpp
Normal file
File diff suppressed because it is too large
Load Diff
816
modules/ocl/src/kernels/brute_force_match.cl
Normal file
816
modules/ocl/src/kernels/brute_force_match.cl
Normal file
@ -0,0 +1,816 @@
|
||||
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable
|
||||
#define MAX_FLOAT 1e7f
|
||||
|
||||
int bit1Count(float x)
|
||||
{
|
||||
int c = 0;
|
||||
int ix = (int)x;
|
||||
for (int i = 0 ; i < 32 ; i++)
|
||||
{
|
||||
c += ix & 0x1;
|
||||
ix >>= 1;
|
||||
}
|
||||
return (float)c;
|
||||
}
|
||||
/* 2dim launch, global size: dim0 is (query rows + block_size - 1) / block_size * block_size, dim1 is block_size
|
||||
local size: dim0 is block_size, dim1 is block_size.
|
||||
*/
|
||||
__kernel void BruteForceMatch_UnrollMatch(
|
||||
__global float *query,
|
||||
__global float *train,
|
||||
__global float *mask,
|
||||
__global int *bestTrainIdx,
|
||||
__global float *bestDistance,
|
||||
__local float *sharebuffer,
|
||||
int block_size,
|
||||
int max_desc_len,
|
||||
int query_rows,
|
||||
int query_cols,
|
||||
int train_rows,
|
||||
int train_cols,
|
||||
int step,
|
||||
int distType
|
||||
)
|
||||
{
|
||||
const int lidx = get_local_id(0);
|
||||
const int lidy = get_local_id(1);
|
||||
const int groupidx = get_group_id(0);
|
||||
|
||||
__local float *s_query = sharebuffer;
|
||||
__local float *s_train = sharebuffer + block_size * max_desc_len;
|
||||
|
||||
int queryIdx = groupidx * block_size + lidy;
|
||||
// load the query into local memory.
|
||||
for (int i = 0 ; i < max_desc_len / block_size; i ++)
|
||||
{
|
||||
int loadx = lidx + i * block_size;
|
||||
s_query[lidy * max_desc_len + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
|
||||
}
|
||||
|
||||
float myBestDistance = MAX_FLOAT;
|
||||
int myBestTrainIdx = -1;
|
||||
|
||||
// loopUnrolledCached to find the best trainIdx and best distance.
|
||||
volatile int imgIdx = 0;
|
||||
for (int t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
|
||||
{
|
||||
float result = 0;
|
||||
for (int i = 0 ; i < max_desc_len / block_size ; i++)
|
||||
{
|
||||
//load a block_size * block_size block into local train.
|
||||
const int loadx = lidx + i * block_size;
|
||||
s_train[lidx * block_size + lidy] = loadx < train_cols ? train[min(t * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
|
||||
|
||||
//synchronize to make sure each elem for reduceIteration in share memory is written already.
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
/* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
|
||||
sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
|
||||
|
||||
switch(distType)
|
||||
{
|
||||
case 0:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
result += fabs(s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
float qr = s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx];
|
||||
result += qr * qr;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
//result += popcount((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
result += bit1Count((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
int trainIdx = t * block_size + lidx;
|
||||
|
||||
if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance/* && mask(queryIdx, trainIdx)*/)
|
||||
{
|
||||
//bestImgIdx = imgIdx;
|
||||
myBestDistance = result;
|
||||
myBestTrainIdx = trainIdx;
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
__local float *s_distance = (__local float*)(sharebuffer);
|
||||
__local int* s_trainIdx = (__local int *)(sharebuffer + block_size * block_size);
|
||||
|
||||
//find BestMatch
|
||||
s_distance += lidy * block_size;
|
||||
s_trainIdx += lidy * block_size;
|
||||
s_distance[lidx] = myBestDistance;
|
||||
s_trainIdx[lidx] = myBestTrainIdx;
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//reduce -- now all reduce implement in each threads.
|
||||
for (int k = 0 ; k < block_size; k++)
|
||||
{
|
||||
if (myBestDistance > s_distance[k])
|
||||
{
|
||||
myBestDistance = s_distance[k];
|
||||
myBestTrainIdx = s_trainIdx[k];
|
||||
}
|
||||
}
|
||||
|
||||
if (queryIdx < query_rows && lidx == 0)
|
||||
{
|
||||
bestTrainIdx[queryIdx] = myBestTrainIdx;
|
||||
bestDistance[queryIdx] = myBestDistance;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void BruteForceMatch_Match(
|
||||
__global float *query,
|
||||
__global float *train,
|
||||
__global float *mask,
|
||||
__global int *bestTrainIdx,
|
||||
__global float *bestDistance,
|
||||
__local float *sharebuffer,
|
||||
int block_size,
|
||||
int query_rows,
|
||||
int query_cols,
|
||||
int train_rows,
|
||||
int train_cols,
|
||||
int step,
|
||||
int distType
|
||||
)
|
||||
{
|
||||
const int lidx = get_local_id(0);
|
||||
const int lidy = get_local_id(1);
|
||||
const int groupidx = get_group_id(0);
|
||||
|
||||
const int queryIdx = groupidx * block_size + lidy;
|
||||
|
||||
float myBestDistance = MAX_FLOAT;
|
||||
int myBestTrainIdx = -1;
|
||||
|
||||
__local float *s_query = sharebuffer;
|
||||
__local float *s_train = sharebuffer + block_size * block_size;
|
||||
|
||||
// loop
|
||||
for (int t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
|
||||
{
|
||||
//Dist dist;
|
||||
float result = 0;
|
||||
for (int i = 0 ; i < (query_cols + block_size - 1) / block_size ; i++)
|
||||
{
|
||||
const int loadx = lidx + i * block_size;
|
||||
//load query and train into local memory
|
||||
s_query[lidy * block_size + lidx] = 0;
|
||||
s_train[lidx * block_size + lidy] = 0;
|
||||
|
||||
if (loadx < query_cols)
|
||||
{
|
||||
s_query[lidy * block_size + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx];
|
||||
s_train[lidx * block_size + lidy] = train[min(t * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx];
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
/* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
|
||||
sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
|
||||
|
||||
switch(distType)
|
||||
{
|
||||
case 0:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
|
||||
result += qr * qr;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
//result += popcount((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
result += bit1Count((uint)s_query[lidy * block_size + j] ^ (uint)s_train[(uint)j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
const int trainIdx = t * block_size + lidx;
|
||||
|
||||
if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance /*&& mask(queryIdx, trainIdx)*/)
|
||||
{
|
||||
//myBestImgidx = imgIdx;
|
||||
myBestDistance = result;
|
||||
myBestTrainIdx = trainIdx;
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
__local float *s_distance = (__local float *)sharebuffer;
|
||||
__local int *s_trainIdx = (__local int *)(sharebuffer + block_size * block_size);
|
||||
|
||||
//findBestMatch
|
||||
s_distance += lidy * block_size;
|
||||
s_trainIdx += lidy * block_size;
|
||||
s_distance[lidx] = myBestDistance;
|
||||
s_trainIdx[lidx] = myBestTrainIdx;
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//reduce -- now all reduce implement in each threads.
|
||||
for (int k = 0 ; k < block_size; k++)
|
||||
{
|
||||
if (myBestDistance > s_distance[k])
|
||||
{
|
||||
myBestDistance = s_distance[k];
|
||||
myBestTrainIdx = s_trainIdx[k];
|
||||
}
|
||||
}
|
||||
|
||||
if (queryIdx < query_rows && lidx == 0)
|
||||
{
|
||||
bestTrainIdx[queryIdx] = myBestTrainIdx;
|
||||
bestDistance[queryIdx] = myBestDistance;
|
||||
}
|
||||
}
|
||||
|
||||
//radius_unrollmatch
|
||||
__kernel void BruteForceMatch_RadiusUnrollMatch(
|
||||
__global float *query,
|
||||
__global float *train,
|
||||
float maxDistance,
|
||||
__global float *mask,
|
||||
__global int *bestTrainIdx,
|
||||
__global float *bestDistance,
|
||||
__global int *nMatches,
|
||||
__local float *sharebuffer,
|
||||
int block_size,
|
||||
int max_desc_len,
|
||||
int query_rows,
|
||||
int query_cols,
|
||||
int train_rows,
|
||||
int train_cols,
|
||||
int bestTrainIdx_cols,
|
||||
int step,
|
||||
int ostep,
|
||||
int distType
|
||||
)
|
||||
{
|
||||
const int lidx = get_local_id(0);
|
||||
const int lidy = get_local_id(1);
|
||||
const int groupidx = get_group_id(0);
|
||||
const int groupidy = get_group_id(1);
|
||||
|
||||
const int queryIdx = groupidy * block_size + lidy;
|
||||
const int trainIdx = groupidx * block_size + lidx;
|
||||
|
||||
__local float *s_query = sharebuffer;
|
||||
__local float *s_train = sharebuffer + block_size * block_size;
|
||||
|
||||
float result = 0;
|
||||
for (int i = 0 ; i < max_desc_len / block_size ; ++i)
|
||||
{
|
||||
//load a block_size * block_size block into local train.
|
||||
const int loadx = lidx + i * block_size;
|
||||
|
||||
s_query[lidy * block_size + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
|
||||
s_train[lidx * block_size + lidy] = loadx < query_cols ? train[min(groupidx * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
|
||||
|
||||
//synchronize to make sure each elem for reduceIteration in share memory is written already.
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
/* there are three types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
|
||||
sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
|
||||
|
||||
switch(distType)
|
||||
{
|
||||
case 0:
|
||||
for (int j = 0 ; j < block_size ; ++j)
|
||||
{
|
||||
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
for (int j = 0 ; j < block_size ; ++j)
|
||||
{
|
||||
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
|
||||
result += qr * qr;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (int j = 0 ; j < block_size ; ++j)
|
||||
{
|
||||
result += bit1Count((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
if (queryIdx < query_rows && trainIdx < train_rows && result < maxDistance/* && mask(queryIdx, trainIdx)*/)
|
||||
{
|
||||
unsigned int ind = atom_inc(nMatches + queryIdx/*, (unsigned int) -1*/);
|
||||
|
||||
if(ind < bestTrainIdx_cols)
|
||||
{
|
||||
//bestImgIdx = imgIdx;
|
||||
bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
|
||||
bestDistance[queryIdx * (ostep / sizeof(float)) + ind] = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//radius_match
|
||||
__kernel void BruteForceMatch_RadiusMatch(
|
||||
__global float *query,
|
||||
__global float *train,
|
||||
float maxDistance,
|
||||
__global float *mask,
|
||||
__global int *bestTrainIdx,
|
||||
__global float *bestDistance,
|
||||
__global int *nMatches,
|
||||
__local float *sharebuffer,
|
||||
int block_size,
|
||||
int query_rows,
|
||||
int query_cols,
|
||||
int train_rows,
|
||||
int train_cols,
|
||||
int bestTrainIdx_cols,
|
||||
int step,
|
||||
int ostep,
|
||||
int distType
|
||||
)
|
||||
{
|
||||
const int lidx = get_local_id(0);
|
||||
const int lidy = get_local_id(1);
|
||||
const int groupidx = get_group_id(0);
|
||||
const int groupidy = get_group_id(1);
|
||||
|
||||
const int queryIdx = groupidy * block_size + lidy;
|
||||
const int trainIdx = groupidx * block_size + lidx;
|
||||
|
||||
__local float *s_query = sharebuffer;
|
||||
__local float *s_train = sharebuffer + block_size * block_size;
|
||||
|
||||
float result = 0;
|
||||
for (int i = 0 ; i < (query_cols + block_size - 1) / block_size ; ++i)
|
||||
{
|
||||
//load a block_size * block_size block into local train.
|
||||
const int loadx = lidx + i * block_size;
|
||||
|
||||
s_query[lidy * block_size + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
|
||||
s_train[lidx * block_size + lidy] = loadx < query_cols ? train[min(groupidx * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
|
||||
|
||||
//synchronize to make sure each elem for reduceIteration in share memory is written already.
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
/* there are three types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
|
||||
sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
|
||||
|
||||
switch(distType)
|
||||
{
|
||||
case 0:
|
||||
for (int j = 0 ; j < block_size ; ++j)
|
||||
{
|
||||
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
for (int j = 0 ; j < block_size ; ++j)
|
||||
{
|
||||
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
|
||||
result += qr * qr;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (int j = 0 ; j < block_size ; ++j)
|
||||
{
|
||||
result += bit1Count((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
if (queryIdx < query_rows && trainIdx < train_rows && result < maxDistance/* && mask(queryIdx, trainIdx)*/)
|
||||
{
|
||||
unsigned int ind = atom_inc(nMatches + queryIdx/*, (unsigned int) -1*/);
|
||||
|
||||
if(ind < bestTrainIdx_cols)
|
||||
{
|
||||
//bestImgIdx = imgIdx;
|
||||
bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
|
||||
bestDistance[queryIdx * (ostep / sizeof(float)) + ind] = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void BruteForceMatch_knnUnrollMatch(
|
||||
__global float *query,
|
||||
__global float *train,
|
||||
__global float *mask,
|
||||
__global int2 *bestTrainIdx,
|
||||
__global float2 *bestDistance,
|
||||
__local float *sharebuffer,
|
||||
int block_size,
|
||||
int max_desc_len,
|
||||
int query_rows,
|
||||
int query_cols,
|
||||
int train_rows,
|
||||
int train_cols,
|
||||
int step,
|
||||
int distType
|
||||
)
|
||||
{
|
||||
const int lidx = get_local_id(0);
|
||||
const int lidy = get_local_id(1);
|
||||
const int groupidx = get_group_id(0);
|
||||
|
||||
const int queryIdx = groupidx * block_size + lidy;
|
||||
local float *s_query = sharebuffer;
|
||||
local float *s_train = sharebuffer + block_size * max_desc_len;
|
||||
|
||||
// load the query into local memory.
|
||||
for (int i = 0 ; i < max_desc_len / block_size; i ++)
|
||||
{
|
||||
int loadx = lidx + i * block_size;
|
||||
s_query[lidy * max_desc_len + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
|
||||
}
|
||||
|
||||
float myBestDistance1 = MAX_FLOAT;
|
||||
float myBestDistance2 = MAX_FLOAT;
|
||||
int myBestTrainIdx1 = -1;
|
||||
int myBestTrainIdx2 = -1;
|
||||
|
||||
//loopUnrolledCached
|
||||
volatile int imgIdx = 0;
|
||||
for (int t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
|
||||
{
|
||||
float result = 0;
|
||||
for (int i = 0 ; i < max_desc_len / block_size ; i++)
|
||||
{
|
||||
const int loadX = lidx + i * block_size;
|
||||
//load a block_size * block_size block into local train.
|
||||
const int loadx = lidx + i * block_size;
|
||||
s_train[lidx * block_size + lidy] = loadx < train_cols ? train[min(t * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
|
||||
|
||||
//synchronize to make sure each elem for reduceIteration in share memory is written already.
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
/* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
|
||||
sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
|
||||
|
||||
switch(distType)
|
||||
{
|
||||
case 0:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
result += fabs(s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
float qr = s_query[lidy * max_desc_len + i * block_size + j] - s_train[j * block_size + lidx];
|
||||
result += qr * qr;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
//result += popcount((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
result += bit1Count((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
const int trainIdx = t * block_size + lidx;
|
||||
|
||||
if (queryIdx < query_rows && trainIdx < train_rows)
|
||||
{
|
||||
if (result < myBestDistance1)
|
||||
{
|
||||
myBestDistance2 = myBestDistance1;
|
||||
myBestTrainIdx2 = myBestTrainIdx1;
|
||||
myBestDistance1 = result;
|
||||
myBestTrainIdx1 = trainIdx;
|
||||
}
|
||||
else if (result < myBestDistance2)
|
||||
{
|
||||
myBestDistance2 = result;
|
||||
myBestTrainIdx2 = trainIdx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
local float *s_distance = (local float *)sharebuffer;
|
||||
local int *s_trainIdx = (local int *)(sharebuffer + block_size * block_size);
|
||||
|
||||
// find BestMatch
|
||||
s_distance += lidy * block_size;
|
||||
s_trainIdx += lidy * block_size;
|
||||
|
||||
s_distance[lidx] = myBestDistance1;
|
||||
s_trainIdx[lidx] = myBestTrainIdx1;
|
||||
|
||||
float bestDistance1 = MAX_FLOAT;
|
||||
float bestDistance2 = MAX_FLOAT;
|
||||
int bestTrainIdx1 = -1;
|
||||
int bestTrainIdx2 = -1;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (lidx == 0)
|
||||
{
|
||||
for (int i = 0 ; i < block_size ; i++)
|
||||
{
|
||||
float val = s_distance[i];
|
||||
if (val < bestDistance1)
|
||||
{
|
||||
bestDistance2 = bestDistance1;
|
||||
bestTrainIdx2 = bestTrainIdx1;
|
||||
|
||||
bestDistance1 = val;
|
||||
bestTrainIdx1 = s_trainIdx[i];
|
||||
}
|
||||
else if (val < bestDistance2)
|
||||
{
|
||||
bestDistance2 = val;
|
||||
bestTrainIdx2 = s_trainIdx[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
s_distance[lidx] = myBestDistance2;
|
||||
s_trainIdx[lidx] = myBestTrainIdx2;
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (lidx == 0)
|
||||
{
|
||||
for (int i = 0 ; i < block_size ; i++)
|
||||
{
|
||||
float val = s_distance[i];
|
||||
|
||||
if (val < bestDistance2)
|
||||
{
|
||||
bestDistance2 = val;
|
||||
bestTrainIdx2 = s_trainIdx[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
myBestDistance1 = bestDistance1;
|
||||
myBestDistance2 = bestDistance2;
|
||||
|
||||
myBestTrainIdx1 = bestTrainIdx1;
|
||||
myBestTrainIdx2 = bestTrainIdx2;
|
||||
|
||||
if (queryIdx < query_rows && lidx == 0)
|
||||
{
|
||||
bestTrainIdx[queryIdx] = (int2)(myBestTrainIdx1, myBestTrainIdx2);
|
||||
bestDistance[queryIdx] = (float2)(myBestDistance1, myBestDistance2);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void BruteForceMatch_knnMatch(
|
||||
__global float *query,
|
||||
__global float *train,
|
||||
__global float *mask,
|
||||
__global int2 *bestTrainIdx,
|
||||
__global float2 *bestDistance,
|
||||
__local float *sharebuffer,
|
||||
int block_size,
|
||||
int query_rows,
|
||||
int query_cols,
|
||||
int train_rows,
|
||||
int train_cols,
|
||||
int step,
|
||||
int distType
|
||||
)
|
||||
{
|
||||
const int lidx = get_local_id(0);
|
||||
const int lidy = get_local_id(1);
|
||||
const int groupidx = get_group_id(0);
|
||||
|
||||
const int queryIdx = groupidx * block_size + lidy;
|
||||
local float *s_query = sharebuffer;
|
||||
local float *s_train = sharebuffer + block_size * block_size;
|
||||
|
||||
float myBestDistance1 = MAX_FLOAT;
|
||||
float myBestDistance2 = MAX_FLOAT;
|
||||
int myBestTrainIdx1 = -1;
|
||||
int myBestTrainIdx2 = -1;
|
||||
|
||||
//loop
|
||||
for (int t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
|
||||
{
|
||||
float result = 0.0f;
|
||||
for (int i = 0 ; i < (query_cols + block_size -1) / block_size ; i++)
|
||||
{
|
||||
const int loadx = lidx + i * block_size;
|
||||
//load query and train into local memory
|
||||
s_query[lidy * block_size + lidx] = 0;
|
||||
s_train[lidx * block_size + lidy] = 0;
|
||||
|
||||
if (loadx < query_cols)
|
||||
{
|
||||
s_query[lidy * block_size + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx];
|
||||
s_train[lidx * block_size + lidy] = train[min(t * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx];
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
/* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
|
||||
sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
|
||||
|
||||
switch(distType)
|
||||
{
|
||||
case 0:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
|
||||
result += qr * qr;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (int j = 0 ; j < block_size ; j++)
|
||||
{
|
||||
//result += popcount((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
|
||||
result += bit1Count((uint)s_query[lidy * block_size + j] ^ (uint)s_train[(uint)j * block_size + lidx]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
const int trainIdx = t * block_size + lidx;
|
||||
|
||||
if (queryIdx < query_rows && trainIdx < train_rows /*&& mask(queryIdx, trainIdx)*/)
|
||||
{
|
||||
if (result < myBestDistance1)
|
||||
{
|
||||
myBestDistance2 = myBestDistance1;
|
||||
myBestTrainIdx2 = myBestTrainIdx1;
|
||||
myBestDistance1 = result;
|
||||
myBestTrainIdx1 = trainIdx;
|
||||
}
|
||||
else if (result < myBestDistance2)
|
||||
{
|
||||
myBestDistance2 = result;
|
||||
myBestTrainIdx2 = trainIdx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
__local float *s_distance = (__local float *)sharebuffer;
|
||||
__local int *s_trainIdx = (__local int *)(sharebuffer + block_size * block_size);
|
||||
|
||||
//findBestMatch
|
||||
s_distance += lidy * block_size;
|
||||
s_trainIdx += lidy * block_size;
|
||||
|
||||
s_distance[lidx] = myBestDistance1;
|
||||
s_trainIdx[lidx] = myBestTrainIdx1;
|
||||
|
||||
float bestDistance1 = MAX_FLOAT;
|
||||
float bestDistance2 = MAX_FLOAT;
|
||||
int bestTrainIdx1 = -1;
|
||||
int bestTrainIdx2 = -1;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (lidx == 0)
|
||||
{
|
||||
for (int i = 0 ; i < block_size ; i++)
|
||||
{
|
||||
float val = s_distance[i];
|
||||
if (val < bestDistance1)
|
||||
{
|
||||
bestDistance2 = bestDistance1;
|
||||
bestTrainIdx2 = bestTrainIdx1;
|
||||
|
||||
bestDistance1 = val;
|
||||
bestTrainIdx1 = s_trainIdx[i];
|
||||
}
|
||||
else if (val < bestDistance2)
|
||||
{
|
||||
bestDistance2 = val;
|
||||
bestTrainIdx2 = s_trainIdx[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
s_distance[lidx] = myBestDistance2;
|
||||
s_trainIdx[lidx] = myBestTrainIdx2;
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (lidx == 0)
|
||||
{
|
||||
for (int i = 0 ; i < block_size ; i++)
|
||||
{
|
||||
float val = s_distance[i];
|
||||
|
||||
if (val < bestDistance2)
|
||||
{
|
||||
bestDistance2 = val;
|
||||
bestTrainIdx2 = s_trainIdx[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
myBestDistance1 = bestDistance1;
|
||||
myBestDistance2 = bestDistance2;
|
||||
|
||||
myBestTrainIdx1 = bestTrainIdx1;
|
||||
myBestTrainIdx2 = bestTrainIdx2;
|
||||
|
||||
if (queryIdx < query_rows && lidx == 0)
|
||||
{
|
||||
bestTrainIdx[queryIdx] = (int2)(myBestTrainIdx1, myBestTrainIdx2);
|
||||
bestDistance[queryIdx] = (float2)(myBestDistance1, myBestDistance2);
|
||||
}
|
||||
}
|
||||
|
||||
kernel void BruteForceMatch_calcDistanceUnrolled(
|
||||
__global float *query,
|
||||
__global float *train,
|
||||
__global float *mask,
|
||||
__global float *allDist,
|
||||
__local float *sharebuffer,
|
||||
int block_size,
|
||||
int max_desc_len,
|
||||
int query_rows,
|
||||
int query_cols,
|
||||
int train_rows,
|
||||
int train_cols,
|
||||
int step,
|
||||
int distType)
|
||||
{
|
||||
/* Todo */
|
||||
}
|
||||
|
||||
kernel void BruteForceMatch_calcDistance(
|
||||
__global float *query,
|
||||
__global float *train,
|
||||
__global float *mask,
|
||||
__global float *allDist,
|
||||
__local float *sharebuffer,
|
||||
int block_size,
|
||||
int query_rows,
|
||||
int query_cols,
|
||||
int train_rows,
|
||||
int train_cols,
|
||||
int step,
|
||||
int distType)
|
||||
{
|
||||
/* Todo */
|
||||
}
|
||||
|
||||
kernel void BruteForceMatch_findBestMatch(
|
||||
__global float *allDist,
|
||||
__global int *bestTrainIdx,
|
||||
__global float *bestDistance,
|
||||
int k,
|
||||
int block_size
|
||||
)
|
||||
{
|
||||
/* Todo */
|
||||
}
|
219
modules/ocl/test/test_brute_force_matcher.cpp
Normal file
219
modules/ocl/test/test_brute_force_matcher.cpp
Normal file
@ -0,0 +1,219 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// Intel License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of Intel Corporation may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// BruteForceMatcher
|
||||
|
||||
CV_ENUM(DistType, cv::ocl::BruteForceMatcher_OCL_base::L1Dist, cv::ocl::BruteForceMatcher_OCL_base::L2Dist, cv::ocl::BruteForceMatcher_OCL_base::HammingDist)
|
||||
IMPLEMENT_PARAM_CLASS(DescriptorSize, int)
|
||||
|
||||
PARAM_TEST_CASE(BruteForceMatcher/*, NormCode*/, DistType, DescriptorSize)
|
||||
{
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
cv::ocl::BruteForceMatcher_OCL_base::DistType distType;
|
||||
int normCode;
|
||||
int dim;
|
||||
|
||||
int queryDescCount;
|
||||
int countFactor;
|
||||
|
||||
cv::Mat query, train;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
//normCode = GET_PARAM(0);
|
||||
distType = (cv::ocl::BruteForceMatcher_OCL_base::DistType)(int)GET_PARAM(0);
|
||||
dim = GET_PARAM(1);
|
||||
|
||||
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
|
||||
//CV_Assert(devnums > 0);
|
||||
|
||||
queryDescCount = 300; // must be even number because we split train data in some cases in two
|
||||
countFactor = 4; // do not change it
|
||||
|
||||
cv::RNG& rng = cvtest::TS::ptr()->get_rng();
|
||||
|
||||
cv::Mat queryBuf, trainBuf;
|
||||
|
||||
// Generate query descriptors randomly.
|
||||
// Descriptor vector elements are integer values.
|
||||
queryBuf.create(queryDescCount, dim, CV_32SC1);
|
||||
rng.fill(queryBuf, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3));
|
||||
queryBuf.convertTo(queryBuf, CV_32FC1);
|
||||
|
||||
// Generate train decriptors as follows:
|
||||
// copy each query descriptor to train set countFactor times
|
||||
// and perturb some one element of the copied descriptors in
|
||||
// in ascending order. General boundaries of the perturbation
|
||||
// are (0.f, 1.f).
|
||||
trainBuf.create(queryDescCount * countFactor, dim, CV_32FC1);
|
||||
float step = 1.f / countFactor;
|
||||
for (int qIdx = 0; qIdx < queryDescCount; qIdx++)
|
||||
{
|
||||
cv::Mat queryDescriptor = queryBuf.row(qIdx);
|
||||
for (int c = 0; c < countFactor; c++)
|
||||
{
|
||||
int tIdx = qIdx * countFactor + c;
|
||||
cv::Mat trainDescriptor = trainBuf.row(tIdx);
|
||||
queryDescriptor.copyTo(trainDescriptor);
|
||||
int elem = rng(dim);
|
||||
float diff = rng.uniform(step * c, step * (c + 1));
|
||||
trainDescriptor.at<float>(0, elem) += diff;
|
||||
}
|
||||
}
|
||||
|
||||
queryBuf.convertTo(query, CV_32F);
|
||||
trainBuf.convertTo(train, CV_32F);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(BruteForceMatcher, Match_Single)
|
||||
{
|
||||
cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
|
||||
|
||||
std::vector<cv::DMatch> matches;
|
||||
matcher.match(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
int badCount = 0;
|
||||
for (size_t i = 0; i < matches.size(); i++)
|
||||
{
|
||||
cv::DMatch match = matches[i];
|
||||
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0))
|
||||
badCount++;
|
||||
}
|
||||
|
||||
ASSERT_EQ(0, badCount);
|
||||
}
|
||||
|
||||
TEST_P(BruteForceMatcher, KnnMatch_2_Single)
|
||||
{
|
||||
const int knn = 2;
|
||||
|
||||
cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.knnMatch(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches, knn);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
int badCount = 0;
|
||||
for (size_t i = 0; i < matches.size(); i++)
|
||||
{
|
||||
if ((int)matches[i].size() != knn)
|
||||
badCount++;
|
||||
else
|
||||
{
|
||||
int localBadCount = 0;
|
||||
for (int k = 0; k < knn; k++)
|
||||
{
|
||||
cv::DMatch match = matches[i][k];
|
||||
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k) || (match.imgIdx != 0))
|
||||
localBadCount++;
|
||||
}
|
||||
badCount += localBadCount > 0 ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_EQ(0, badCount);
|
||||
}
|
||||
|
||||
TEST_P(BruteForceMatcher, RadiusMatch_Single)
|
||||
{
|
||||
float radius;
|
||||
if(distType == cv::ocl::BruteForceMatcher_OCL_base::L2Dist)
|
||||
radius = 1.f / countFactor /countFactor;
|
||||
else
|
||||
radius = 1.f / countFactor;
|
||||
|
||||
cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
|
||||
|
||||
// assume support atomic.
|
||||
//if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
|
||||
//{
|
||||
// try
|
||||
// {
|
||||
// std::vector< std::vector<cv::DMatch> > matches;
|
||||
// matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
|
||||
// }
|
||||
// catch (const cv::Exception& e)
|
||||
// {
|
||||
// ASSERT_EQ(CV_StsNotImplemented, e.code);
|
||||
// }
|
||||
//}
|
||||
//else
|
||||
{
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.radiusMatch(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches, radius);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
int badCount = 0;
|
||||
for (size_t i = 0; i < matches.size(); i++)
|
||||
{
|
||||
if ((int)matches[i].size() != 1)
|
||||
{
|
||||
badCount++;
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::DMatch match = matches[i][0];
|
||||
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor) || (match.imgIdx != 0))
|
||||
badCount++;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_EQ(0, badCount);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine(
|
||||
//ALL_DEVICES,
|
||||
testing::Values(DistType(cv::ocl::BruteForceMatcher_OCL_base::L1Dist), DistType(cv::ocl::BruteForceMatcher_OCL_base::L2Dist)),
|
||||
testing::Values(DescriptorSize(57), DescriptorSize(64), DescriptorSize(83), DescriptorSize(128), DescriptorSize(179), DescriptorSize(256), DescriptorSize(304))));
|
||||
|
||||
} // namespace
|
||||
|
Loading…
x
Reference in New Issue
Block a user