Merge commit '43aec5ad' into merge-2.4

Conflicts:
	cmake/OpenCVConfig.cmake
	cmake/OpenCVLegacyOptions.cmake
	modules/contrib/src/retina.cpp
	modules/gpu/doc/camera_calibration_and_3d_reconstruction.rst
	modules/gpu/doc/video.rst
	modules/gpu/src/speckle_filtering.cpp
	modules/python/src2/cv2.cv.hpp
	modules/python/test/test2.py
	samples/python/watershed.py
This commit is contained in:
Roman Donchenko
2013-08-27 13:26:44 +04:00
448 changed files with 984 additions and 1307 deletions

View File

@@ -186,4 +186,4 @@ Only basic flags are supported in oclMat(i.e. depth number of channels)
All the 3-channel matrix(i.e. RGB image) are represented by 4-channel matrix in oclMat. It means 3-channel image have 4-channel space with the last channel unused. We provide a transparent interface to handle the difference between OpenCV Mat and oclMat.
For example: If a oclMat has 3 channels, channels() returns 3 and oclchannels() returns 4
For example: If a oclMat has 3 channels, channels() returns 3 and oclchannels() returns 4

View File

@@ -499,4 +499,4 @@ Returns block descriptors computed for the whole image.
* **DESCR_FORMAT_COL_BY_COL** - Column-major order.
The function is mainly used to learn the classifier.
The function is mainly used to learn the classifier.

View File

@@ -315,4 +315,4 @@ Performs linear blending of two images.
:param weights2: Weights for second image. Must have tha same size as ``img2`` . Supports only ``CV_32F`` type.
:param result: Destination image.
:param result: Destination image.

View File

@@ -67,4 +67,4 @@ Returns the squared sum of matrix elements for each channel
:param m: The Source image of all depth
Counts the squared sum of matrix elements for each channel.
Counts the squared sum of matrix elements for each channel.

View File

@@ -500,13 +500,13 @@ Returns void
* **SORT_SELECTION** selection sort, currently cannot sort duplicate keys
* **SORT_MERGE** merge sort
* **SORT_RADIX** radix sort, only support signed int/float keys(``CV_32S``/``CV_32F``)
Returns the sorted result of all the elements in values based on equivalent keys.
The element unit in the values to be sorted is determined from the data type,
The element unit in the values to be sorted is determined from the data type,
i.e., a ``CV_32FC2`` input ``{a1a2, b1b2}`` will be considered as two elements, regardless its matrix dimension.
Both keys and values will be sorted inplace.
Both keys and values will be sorted inplace.
Keys needs to be a **single** channel `oclMat`.

View File

@@ -55,4 +55,4 @@ Returns the pointer to the opencl command queue
.. ocv:function:: void* ocl::getoclCommandQueue()
Thefunction are used to get opencl command queue so that opencv can interactive with other opencl program.
Thefunction are used to get opencl command queue so that opencv can interactive with other opencl program.

View File

@@ -131,7 +131,7 @@ namespace cv
{
openCLFree(tex_);
}
operator cl_mem()
operator cl_mem()
{
return tex_;
}

View File

@@ -1194,4 +1194,4 @@ PERFTEST(AddWeighted)
}
}
}
}

View File

@@ -119,4 +119,4 @@ PERFTEST(blend)
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.f);
}
}
}
}

View File

@@ -164,4 +164,4 @@ PERFTEST(BruteForceMatcher)
else
TestSystem::instance().setAccurate(0, diff);
}
}
}

View File

@@ -48,14 +48,14 @@
///////////// StereoMatchBM ////////////////////////
PERFTEST(StereoMatchBM)
{
Mat left_image = imread(abspath("aloeL.jpg"), cv::IMREAD_GRAYSCALE);
Mat right_image = imread(abspath("aloeR.jpg"), cv::IMREAD_GRAYSCALE);
Mat disp,dst;
ocl::oclMat d_left, d_right,d_disp;
int n_disp= 128;
int winSize =19;
Mat left_image = imread(abspath("aloeL.jpg"), cv::IMREAD_GRAYSCALE);
Mat right_image = imread(abspath("aloeR.jpg"), cv::IMREAD_GRAYSCALE);
Mat disp,dst;
ocl::oclMat d_left, d_right,d_disp;
int n_disp= 128;
int winSize =19;
SUBTEST << left_image.cols << 'x' << left_image.rows << "; aloeL.jpg ;"<< right_image.cols << 'x' << right_image.rows << "; aloeR.jpg ";
SUBTEST << left_image.cols << 'x' << left_image.rows << "; aloeL.jpg ;"<< right_image.cols << 'x' << right_image.rows << "; aloeR.jpg ";
Ptr<StereoBM> bm = createStereoBM(n_disp, winSize);
bm->compute(left_image, right_image, dst);
@@ -64,38 +64,29 @@ PERFTEST(StereoMatchBM)
bm->compute(left_image, right_image, dst);
CPU_OFF;
d_left.upload(left_image);
d_right.upload(right_image);
d_left.upload(left_image);
d_right.upload(right_image);
ocl::StereoBM_OCL d_bm(0, n_disp, winSize);
ocl::StereoBM_OCL d_bm(0, n_disp, winSize);
WARMUP_ON;
d_bm(d_left, d_right, d_disp);
WARMUP_OFF;
WARMUP_ON;
d_bm(d_left, d_right, d_disp);
WARMUP_OFF;
cv::Mat ocl_mat;
d_disp.download(ocl_mat);
ocl_mat.convertTo(ocl_mat, dst.type());
GPU_ON;
d_bm(d_left, d_right, d_disp);
GPU_OFF;
GPU_ON;
d_bm(d_left, d_right, d_disp);
GPU_OFF;
GPU_FULL_ON;
d_left.upload(left_image);
d_right.upload(right_image);
d_bm(d_left, d_right, d_disp);
d_disp.download(disp);
GPU_FULL_OFF;
GPU_FULL_ON;
d_left.upload(left_image);
d_right.upload(right_image);
d_bm(d_left, d_right, d_disp);
d_disp.download(disp);
GPU_FULL_OFF;
TestSystem::instance().setAccurate(-1, 0.);
}

View File

@@ -82,4 +82,4 @@ PERFTEST(Canny)
GPU_FULL_OFF;
TestSystem::instance().ExceptedMatSimilar(edges, ocl_edges, 2e-2);
}
}

View File

@@ -88,4 +88,4 @@ PERFTEST(dft)
}
}
}
}

View File

@@ -284,7 +284,7 @@ PERFTEST(GaussianBlur)
Mat src, dst, ocl_dst;
int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"};
const int ksize = 7;
const int ksize = 7;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
@@ -374,4 +374,4 @@ PERFTEST(filter2D)
}
}
}

View File

@@ -87,4 +87,4 @@ PERFTEST(gemm)
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, src1.cols * src1.rows * 1e-4);
}
}
}

View File

@@ -77,7 +77,7 @@ PERFTEST(HOG)
WARMUP_ON;
ocl_hog.detectMultiScale(d_src, d_found_locations);
WARMUP_OFF;
if(d_found_locations.size() == found_locations.size())
TestSystem::instance().setAccurate(1, 0);
else
@@ -91,4 +91,4 @@ PERFTEST(HOG)
d_src.upload(src);
ocl_hog.detectMultiScale(d_src, found_locations);
GPU_FULL_OFF;
}
}

View File

@@ -183,4 +183,4 @@ PERFTEST(setTo)
}
}
}
}

View File

@@ -72,7 +72,7 @@ PERFTEST(norm)
WARMUP_OFF;
d_src1.download(ocl_src1);
TestSystem::instance().ExpectedMatNear(src1, ocl_src1, .5);
TestSystem::instance().ExpectedMatNear(src1, ocl_src1, .5);
GPU_ON;
ocl::norm(d_src1, d_src2, NORM_INF);
@@ -84,4 +84,4 @@ PERFTEST(norm)
ocl::norm(d_src1, d_src2, NORM_INF);
GPU_FULL_OFF;
}
}
}

View File

@@ -129,4 +129,4 @@ PERFTEST(pyrUp)
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, (src.depth() == CV_32F ? 1e-4f : 1.0));
}
}
}
}

View File

@@ -133,7 +133,7 @@ PERFTEST(Split)
WARMUP_ON;
ocl::split(d_src, d_dst);
WARMUP_OFF;
WARMUP_OFF;
GPU_ON;
ocl::split(d_src, d_dst);

View File

@@ -89,4 +89,4 @@ void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &
openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth);
}
}
}

View File

@@ -81,4 +81,4 @@ void cv::ocl::columnSum(const oclMat &src, oclMat &dst)
openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
}
}

View File

@@ -235,4 +235,3 @@ void interpolate::bindImgTex(const oclMat &img, cl_mem &texture)
}
texture = bindTexture(img);
}

View File

@@ -435,4 +435,3 @@ double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels,
return best_compactness;
}

View File

@@ -67,7 +67,7 @@ __kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src
if (x < cols && y < rows)
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
@@ -110,7 +110,7 @@ __kernel void arithm_s_add_C1_D2 (__global ushort *src1, int src1_step, int sr
if (x < cols && y < rows)
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
@@ -145,7 +145,7 @@ __kernel void arithm_s_add_C1_D3 (__global short *src1, int src1_step, int src
if (x < cols && y < rows)
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
@@ -250,7 +250,7 @@ __kernel void arithm_s_add_C2_D0 (__global uchar *src1, int src1_step, int src
if (x < cols && y < rows)
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif

View File

@@ -69,7 +69,7 @@ __kernel void arithm_s_add_with_mask_C1_D0 (__global uchar *src1, int src1_ste
if (x < cols && y < rows)
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
@@ -122,7 +122,7 @@ __kernel void arithm_s_add_with_mask_C1_D2 (__global ushort *src1, int src1_st
if (x < cols && y < rows)
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
@@ -160,7 +160,7 @@ __kernel void arithm_s_add_with_mask_C1_D3 (__global short *src1, int src1_ste
if (x < cols && y < rows)
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
@@ -284,7 +284,7 @@ __kernel void arithm_s_add_with_mask_C2_D0 (__global uchar *src1, int src1_ste
if (x < cols && y < rows)
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif

View File

@@ -377,4 +377,4 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
}
}
*/
*/

View File

@@ -300,4 +300,4 @@ __kernel void arithm_muls_D5 (__global float *src1, int src1_step, int src1_offs
*((__global float *)((__global char *)dst + dst_index)) = tmp;
}
}
}

View File

@@ -231,7 +231,7 @@ __kernel void boxFilter_C1_D0(__global const uchar * restrict src, __global ucha
{
tmp_sum += (data[i]);
}
int index = dst_startY * dst_step + dst_startX + (col-anX)*4;
temp[0][col] = tmp_sum + (data[0]);

View File

@@ -207,7 +207,7 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
- sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] -
sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)]
+ sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z;
bool passThres = classsum >= nodethreshold;
#if STUMP_BASED
@@ -304,4 +304,3 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
newnode[counter].alpha[1] = t1.alpha[1];
newnode[counter].alpha[2] = t1.alpha[2];
}

View File

@@ -71,15 +71,15 @@ void reduce(volatile __local int* smem, int val, int tid)
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 128)
{
{
smem[tid] = val += smem[tid + 128];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 64)
{
{
smem[tid] = val += smem[tid + 64];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 32)
@@ -125,15 +125,15 @@ void reduce(__local volatile int* smem, int val, int tid)
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 128)
{
{
smem[tid] = val += smem[tid + 128];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 64)
{
{
smem[tid] = val += smem[tid + 64];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 32)

View File

@@ -49,12 +49,12 @@
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
inline float ELEM_INT2(image2d_t _eig, int _x, int _y)
inline float ELEM_INT2(image2d_t _eig, int _x, int _y)
{
return read_imagef(_eig, sampler, (int2)(_x, _y)).x;
}
inline float ELEM_FLT2(image2d_t _eig, float2 pt)
inline float ELEM_FLT2(image2d_t _eig, float2 pt)
{
return read_imagef(_eig, sampler, pt).x;
}
@@ -132,7 +132,7 @@ __kernel
const int pairDistance = 1 << (stage - passOfStage);
const int blockWidth = 2 * pairDistance;
const int leftId = min( (threadId % pairDistance)
const int leftId = min( (threadId % pairDistance)
+ (threadId / pairDistance) * blockWidth, count );
const int rightId = min( leftId + pairDistance, count );
@@ -147,7 +147,7 @@ __kernel
float2 greater = compareResult ? leftPt:rightPt;
float2 lesser = compareResult ? rightPt:leftPt;
corners[leftId] = sortOrder ? lesser : greater;
corners[rightId] = sortOrder ? greater : lesser;
}
@@ -195,20 +195,20 @@ __kernel
{
pt2 = scratch[j];
val2 = ELEM_FLT2(eig, pt2);
if(val2 > val1)
if(val2 > val1)
pos++;//calculate the rank of this element in this work group
else
else
{
if(val1 > val2)
continue;
else
else
{
// val1 and val2 are same
same++;
}
}
}
for (int j=0; j< same; j++)
for (int j=0; j< same; j++)
corners[pos + j] = pt1;
}
__kernel
@@ -240,15 +240,15 @@ __kernel
for(int k=0; k<wg; k++)
{
pt2 = corners[j*wg + k];
val2 = ELEM_FLT2(eig, pt2);
val2 = ELEM_FLT2(eig, pt2);
if(val1 > val2)
break;
else
{
//Increment only if the value is not the same.
//Increment only if the value is not the same.
if( val2 > val1 )
pos++;
else
else
same++;
}
}
@@ -257,20 +257,19 @@ __kernel
for(int k=0; k<remainder; k++)
{
pt2 = corners[(numOfGroups-1)*wg + k];
val2 = ELEM_FLT2(eig, pt2);
val2 = ELEM_FLT2(eig, pt2);
if(val1 > val2)
break;
else
{
//Don't increment if the value is the same.
//Don't increment if the value is the same.
//Two elements are same if (*userComp)(jData, iData) and (*userComp)(iData, jData) are both false
if(val2 > val1)
pos++;
else
else
same++;
}
}
for (int j=0; j< same; j++)
}
for (int j=0; j< same; j++)
corners[pos + j] = pt1;
}

View File

@@ -490,4 +490,4 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global float4 * srcsqsum,
}
barrier(CLK_LOCAL_MEM_FENCE);
}
}
}

View File

@@ -43,7 +43,7 @@
//
//M*/
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
#ifndef N // number of radices
#define N 4
@@ -71,14 +71,14 @@ __inline uint convertKey(uint converted_key)
converted_key ^= mask;
#elif defined(K_INT)
const uint SIGN_MASK = 1u << ((sizeof(int) * 8) - 1);
converted_key ^= SIGN_MASK;
converted_key ^= SIGN_MASK;
#else
#endif
return converted_key;
}
//FIXME(pengx17):
//FIXME(pengx17):
// exclusive scan, need to be optimized as this is too naive...
kernel
void naiveScanAddition(
@@ -108,7 +108,7 @@ kernel
{
const int RADIX_T = N;
const int RADICES_T = (1 << RADIX_T);
const int NUM_OF_ELEMENTS_PER_WORK_ITEM_T = RADICES_T;
const int NUM_OF_ELEMENTS_PER_WORK_ITEM_T = RADICES_T;
const int MASK_T = (1 << RADIX_T) - 1;
int localBuckets[16] = {0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0};

View File

@@ -62,7 +62,7 @@
#endif
/////////////////////// Bitonic sort ////////////////////////////
// ported from
// ported from
// https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_by_key_kernels.cl
__kernel
void bitonicSort
@@ -82,7 +82,7 @@ __kernel
const int pairDistance = 1 << (stage - passOfStage);
const int blockWidth = 2 * pairDistance;
int leftId = min( (threadId % pairDistance)
int leftId = min( (threadId % pairDistance)
+ (threadId / pairDistance) * blockWidth, count );
int rightId = min( leftId + pairDistance, count );
@@ -90,7 +90,7 @@ __kernel
int temp;
const V_T lval = vals[leftId];
const V_T rval = vals[rightId];
const V_T rval = vals[rightId];
const K_T lkey = keys[leftId];
const K_T rkey = keys[rightId];
@@ -142,7 +142,7 @@ __kernel
int offset = groupID * wg;
int same = 0;
vals += offset;
keys += offset;
n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
@@ -163,13 +163,13 @@ __kernel
for (int j=0;j<n;++j)
{
key2 = scratch[j];
if(my_comp(key2, key1))
if(my_comp(key2, key1))
pos++;//calculate the rank of this element in this work group
else
else
{
if(my_comp(key1, key2))
continue;
else
else
{
// key1 and key2 are same
same++;
@@ -209,15 +209,15 @@ __kernel
{
for(int k=0; k<wg; k++)
{
key2 = keys[j*wg + k];
key2 = keys[j*wg + k];
if(my_comp(key1, key2))
break;
else
{
//Increment only if the value is not the same.
//Increment only if the value is not the same.
if(my_comp(key2, key1))
pos++;
else
else
same++;
}
}
@@ -225,18 +225,18 @@ __kernel
for(int k=0; k<remainder; k++)
{
key2 = keys[(numOfGroups-1)*wg + k];
key2 = keys[(numOfGroups-1)*wg + k];
if(my_comp(key1, key2))
break;
else
{
//Don't increment if the value is the same.
//Don't increment if the value is the same.
if(my_comp(key2, key1))
pos++;
else
else
same++;
}
}
}
for (int j=0; j< same; j++)
{
vals[pos + j] = val1;

View File

@@ -318,15 +318,15 @@ __kernel void classify_hists_180_kernel(
volatile __local float* smem = products;
#ifdef CPU
if (tid < 13) smem[tid] = product = product + smem[tid + 32];
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16) smem[tid] = product = product + smem[tid + 16];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<8) smem[tid] = product = product + smem[tid + 8];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<4) smem[tid] = product = product + smem[tid + 4];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<2) smem[tid] = product = product + smem[tid + 2];
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<8) smem[tid] = product = product + smem[tid + 8];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<4) smem[tid] = product = product + smem[tid + 4];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<2) smem[tid] = product = product + smem[tid + 2];
barrier(CLK_LOCAL_MEM_FENCE);
#else
if (tid < 13)
{
@@ -345,9 +345,9 @@ __kernel void classify_hists_180_kernel(
#endif
if (tid == 0){
product = product + smem[tid + 1];
product = product + smem[tid + 1];
labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
}
}
}
//---------------------------------------------------------------------
@@ -388,18 +388,18 @@ __kernel void classify_hists_252_kernel(
if (tid < 64) products[tid] = product = product + products[tid + 64];
barrier(CLK_LOCAL_MEM_FENCE);
volatile __local float* smem = products;
volatile __local float* smem = products;
#ifdef CPU
if(tid<32) smem[tid] = product = product + smem[tid + 32];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<16) smem[tid] = product = product + smem[tid + 16];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<8) smem[tid] = product = product + smem[tid + 8];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<4) smem[tid] = product = product + smem[tid + 4];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<2) smem[tid] = product = product + smem[tid + 2];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<32) smem[tid] = product = product + smem[tid + 32];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<16) smem[tid] = product = product + smem[tid + 16];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<8) smem[tid] = product = product + smem[tid + 8];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<4) smem[tid] = product = product + smem[tid + 4];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<2) smem[tid] = product = product + smem[tid + 2];
barrier(CLK_LOCAL_MEM_FENCE);
#else
if (tid < 32)
{
@@ -415,9 +415,9 @@ __kernel void classify_hists_252_kernel(
}
#endif
if (tid == 0){
product = product + smem[tid + 1];
product = product + smem[tid + 1];
labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
}
}
}
//---------------------------------------------------------------------
@@ -458,18 +458,18 @@ __kernel void classify_hists_kernel(
if (tid < 64) products[tid] = product = product + products[tid + 64];
barrier(CLK_LOCAL_MEM_FENCE);
volatile __local float* smem = products;
volatile __local float* smem = products;
#ifdef CPU
if(tid<32) smem[tid] = product = product + smem[tid + 32];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<16) smem[tid] = product = product + smem[tid + 16];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<8) smem[tid] = product = product + smem[tid + 8];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<4) smem[tid] = product = product + smem[tid + 4];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<2) smem[tid] = product = product + smem[tid + 2];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<32) smem[tid] = product = product + smem[tid + 32];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<16) smem[tid] = product = product + smem[tid + 16];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<8) smem[tid] = product = product + smem[tid + 8];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<4) smem[tid] = product = product + smem[tid + 4];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<2) smem[tid] = product = product + smem[tid + 2];
barrier(CLK_LOCAL_MEM_FENCE);
#else
if (tid < 32)
{
@@ -485,9 +485,9 @@ __kernel void classify_hists_kernel(
}
#endif
if (tid == 0){
smem[tid] = product = product + smem[tid + 1];
smem[tid] = product = product + smem[tid + 1];
labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
}
}
}
//----------------------------------------------------------------------------

View File

@@ -190,7 +190,7 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
{
int idx1 = y_tex * img_step + x_tex;
int idx2 = min(y_tex + ((radius << 1) + 1), cheight - 1) * img_step + x_tex;
barrier(CLK_LOCAL_MEM_FENCE);
StepDown(idx1, idx2, left, right, d, col_ssd);

View File

@@ -129,7 +129,7 @@ __kernel void get_first_k_initial_global_1(__global float *data_cost_selected_,
////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////get_first_k_initial_local////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void get_first_k_initial_local_0(__global short *data_cost_selected_, __global short *selected_disp_pyr,
__kernel void get_first_k_initial_local_0(__global short *data_cost_selected_, __global short *selected_disp_pyr,
__global short *ctemp,int h, int w, int nr_plane,
int cmsg_step1, int cdisp_step1, int cndisp)
{
@@ -187,7 +187,7 @@ __kernel void get_first_k_initial_local_0(__global short *data_cost_selected_,
}
}
__kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, __global float *selected_disp_pyr,
__kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, __global float *selected_disp_pyr,
__global float *ctemp,int h, int w, int nr_plane,
int cmsg_step1, int cdisp_step1, int cndisp)
{
@@ -257,20 +257,20 @@ float compute_3(__global uchar* left, __global uchar* right,
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
}
float compute_1(__global uchar* left, __global uchar* right,
float compute_1(__global uchar* left, __global uchar* right,
float cdata_weight, float cmax_data_term)
{
return fmin(cdata_weight * abs((int)*left - (int)*right), cdata_weight * cmax_data_term);
}
short round_short(float v){
return convert_short_sat_rte(v);
return convert_short_sat_rte(v);
}
///////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////init_data_cost///////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////
__kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
__kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
int h, int w, int level, int channels,
int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
int cth, int cimg_step, int cndisp)
{
int x = get_global_id(0);
@@ -312,9 +312,9 @@ __kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __g
}
}
}
__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
int h, int w, int level, int channels,
int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
int cth, int cimg_step, int cndisp)
{
int x = get_global_id(0);
@@ -361,13 +361,13 @@ __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __g
//////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
__local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
int cdisp_step1, int cmsg_step1)
{
int x_out = get_group_id(0);
int y_out = get_group_id(1) % h;
//int d = (blockIdx.y / h) * blockDim.z + threadIdx.z;
int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2);
int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2);
int tid = get_local_id(0);
@@ -411,39 +411,39 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
if(d < cndisp)
{
__local float* dline = smem + winsz * get_local_id(2);
if (winsz >= 256)
if (winsz >= 256)
{
if (tid < 128)
dline[tid] += dline[tid + 128];
if (tid < 128)
dline[tid] += dline[tid + 128];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local float* dline = smem + winsz * get_local_id(2);
if (winsz >= 128)
if (winsz >= 128)
{
if (tid < 64)
dline[tid] += dline[tid + 64];
if (tid < 64)
dline[tid] += dline[tid + 64];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 64)
if (tid < 32)
if (winsz >= 64)
if (tid < 32)
vdline[tid] += vdline[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 32)
if (tid < 16)
if (winsz >= 32)
if (tid < 16)
vdline[tid] += vdline[tid + 16];
}
barrier(CLK_LOCAL_MEM_FENCE);
@@ -452,7 +452,7 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 16)
if (tid < 8)
if (tid < 8)
vdline[tid] += vdline[tid + 8];
}
barrier(CLK_LOCAL_MEM_FENCE);
@@ -461,7 +461,7 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 8)
if (tid < 4)
if (tid < 4)
vdline[tid] += vdline[tid + 4];
}
barrier(CLK_LOCAL_MEM_FENCE);
@@ -470,7 +470,7 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 4)
if (tid < 2)
if (tid < 2)
vdline[tid] += vdline[tid + 2];
}
barrier(CLK_LOCAL_MEM_FENCE);
@@ -479,7 +479,7 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 2)
if (tid < 1)
if (tid < 1)
vdline[tid] += vdline[tid + 1];
}
barrier(CLK_LOCAL_MEM_FENCE);
@@ -500,7 +500,7 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
{
int x_out = get_group_id(0);
int y_out = get_group_id(1) % h;
int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2);
int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2);
int tid = get_local_id(0);
@@ -545,74 +545,74 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
if(d < cndisp)
{
__local float* dline = smem + winsz * get_local_id(2);
if (winsz >= 256)
if (tid < 128)
dline[tid] += dline[tid + 128];
if (winsz >= 256)
if (tid < 128)
dline[tid] += dline[tid + 128];
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local float* dline = smem + winsz * get_local_id(2);
if (winsz >= 128)
if (tid < 64)
dline[tid] += dline[tid + 64];
if (winsz >= 128)
if (tid < 64)
dline[tid] += dline[tid + 64];
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 64)
if (tid < 32)
vdline[tid] += vdline[tid + 32];
if (winsz >= 64)
if (tid < 32)
vdline[tid] += vdline[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 32)
if (tid < 16)
vdline[tid] += vdline[tid + 16];
if (winsz >= 32)
if (tid < 16)
vdline[tid] += vdline[tid + 16];
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 16)
if (tid < 8)
vdline[tid] += vdline[tid + 8];
if (winsz >= 16)
if (tid < 8)
vdline[tid] += vdline[tid + 8];
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 8)
if (tid < 4)
vdline[tid] += vdline[tid + 4];
if (winsz >= 8)
if (tid < 4)
vdline[tid] += vdline[tid + 4];
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 4)
if (tid < 2)
vdline[tid] += vdline[tid + 2];
if (winsz >= 4)
if (tid < 2)
vdline[tid] += vdline[tid + 2];
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 2)
if (tid < 1)
vdline[tid] += vdline[tid + 1];
if (winsz >= 2)
if (tid < 1)
vdline[tid] += vdline[tid + 1];
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
@@ -626,10 +626,10 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
///////////////////////////////////////////////////////////////
////////////////////// compute data cost //////////////////////
///////////////////////////////////////////////////////////////
__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
__global uchar *cleft, __global uchar *cright,
int h, int w, int level, int nr_plane, int channels,
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
float cmax_data_term, int cimg_step, int cth)
{
@@ -676,10 +676,10 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo
}
}
}
__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
__global uchar *cleft, __global uchar *cright,
int h, int w, int level, int nr_plane, int channels,
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
float cmax_data_term, int cimg_step, int cth)
{
@@ -728,11 +728,11 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////compute_data_cost_reduce//////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
__global uchar *cleft, __global uchar *cright,__local float *smem,
int level, int rows, int cols, int h, int nr_plane,
int level, int rows, int cols, int h, int nr_plane,
int channels, int winsz,
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2,
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2,
float cdata_weight, float cmax_data_term, int cimg_step,int cth)
{
@@ -788,9 +788,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 64)
if (winsz >= 64)
{
if (tid < 32)
if (tid < 32)
vdline[tid] += vdline[tid + 32];
}
}
@@ -799,9 +799,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 32)
if (winsz >= 32)
{
if (tid < 16)
if (tid < 16)
vdline[tid] += vdline[tid + 16];
}
}
@@ -810,9 +810,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 16)
if (winsz >= 16)
{
if (tid < 8)
if (tid < 8)
vdline[tid] += vdline[tid + 8];
}
}
@@ -821,9 +821,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 8)
if (winsz >= 8)
{
if (tid < 4)
if (tid < 4)
vdline[tid] += vdline[tid + 4];
}
}
@@ -832,9 +832,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 4)
if (winsz >= 4)
{
if (tid < 2)
if (tid < 2)
vdline[tid] += vdline[tid + 2];
}
}
@@ -843,9 +843,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 2)
if (winsz >= 2)
{
if (tid < 1)
if (tid < 1)
vdline[tid] += vdline[tid + 1];
}
}
@@ -859,11 +859,11 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
}
}
__kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr, __global float *data_cost_,
__kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr, __global float *data_cost_,
__global uchar *cleft, __global uchar *cright, __local float *smem,
int level, int rows, int cols, int h, int nr_plane,
int level, int rows, int cols, int h, int nr_plane,
int channels, int winsz,
int cmsg_step1, int cmsg_step2, int cdisp_step1,int cdisp_step2, float cdata_weight,
int cmsg_step1, int cmsg_step2, int cdisp_step1,int cdisp_step2, float cdata_weight,
float cmax_data_term, int cimg_step, int cth)
{
@@ -918,21 +918,21 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 64)
if (winsz >= 64)
{
if (tid < 32)
if (tid < 32)
vdline[tid] += vdline[tid + 32];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < nr_plane)
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 32)
if (winsz >= 32)
{
if (tid < 16)
if (tid < 16)
vdline[tid] += vdline[tid + 16];
}
}
@@ -941,9 +941,9 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 16)
if (winsz >= 16)
{
if (tid < 8)
if (tid < 8)
vdline[tid] += vdline[tid + 8];
}
}
@@ -952,9 +952,9 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 8)
if (winsz >= 8)
{
if (tid < 4)
if (tid < 4)
vdline[tid] += vdline[tid + 4];
}
}
@@ -963,9 +963,9 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 4)
if (winsz >= 4)
{
if (tid < 2)
if (tid < 2)
vdline[tid] += vdline[tid + 2];
}
}
@@ -974,9 +974,9 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 2)
if (winsz >= 2)
{
if (tid < 1)
if (tid < 1)
vdline[tid] += vdline[tid + 1];
}
}
@@ -993,11 +993,11 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
///////////////////////////////////////////////////////////////
//////////////////////// init message /////////////////////////
///////////////////////////////////////////////////////////////
void get_first_k_element_increase_0(__global short* u_new, __global short *d_new, __global short *l_new,
__global short *r_new, __global const short *u_cur, __global const short *d_cur,
__global const short *l_cur, __global const short *r_cur,
__global short *data_cost_selected, __global short *disparity_selected_new,
__global short *data_cost_new, __global const short* data_cost_cur,
void get_first_k_element_increase_0(__global short* u_new, __global short *d_new, __global short *l_new,
__global short *r_new, __global const short *u_cur, __global const short *d_cur,
__global const short *l_cur, __global const short *r_cur,
__global short *data_cost_selected, __global short *disparity_selected_new,
__global short *data_cost_new, __global const short* data_cost_cur,
__global const short *disparity_selected_cur,
int nr_plane, int nr_plane2,
int cdisp_step1, int cdisp_step2)
@@ -1027,11 +1027,11 @@ void get_first_k_element_increase_0(__global short* u_new, __global short *d_new
data_cost_new[id * cdisp_step1] = SHRT_MAX;
}
}
void get_first_k_element_increase_1(__global float *u_new, __global float *d_new, __global float *l_new,
__global float *r_new, __global const float *u_cur, __global const float *d_cur,
void get_first_k_element_increase_1(__global float *u_new, __global float *d_new, __global float *l_new,
__global float *r_new, __global const float *u_cur, __global const float *d_cur,
__global const float *l_cur, __global const float *r_cur,
__global float *data_cost_selected, __global float *disparity_selected_new,
__global float *data_cost_new, __global const float *data_cost_cur,
__global float *data_cost_selected, __global float *disparity_selected_new,
__global float *data_cost_new, __global const float *data_cost_cur,
__global const float *disparity_selected_cur,
int nr_plane, int nr_plane2,
int cdisp_step1, int cdisp_step2)
@@ -1057,13 +1057,13 @@ void get_first_k_element_increase_1(__global float *u_new, __global float *d_new
u_new[i * cdisp_step1] = u_cur[id * cdisp_step2];
d_new[i * cdisp_step1] = d_cur[id * cdisp_step2];
l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
data_cost_new[id * cdisp_step1] = FLT_MAX;
}
}
__kernel void init_message_0(__global short *u_new_, __global short *d_new_, __global short *l_new_,
__global short *r_new_, __global short *u_cur_, __global const short *d_cur_,
__global short *r_new_, __global short *u_cur_, __global const short *d_cur_,
__global const short *l_cur_, __global const short *r_cur_, __global short *ctemp,
__global short *selected_disp_pyr_new, __global const short *selected_disp_pyr_cur,
__global short *data_cost_selected_, __global const short *data_cost_,
@@ -1113,7 +1113,7 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g
}
}
__kernel void init_message_1(__global float *u_new_, __global float *d_new_, __global float *l_new_,
__global float *r_new_, __global const float *u_cur_, __global const float *d_cur_,
__global float *r_new_, __global const float *u_cur_, __global const float *d_cur_,
__global const float *l_cur_, __global const float *r_cur_, __global float *ctemp,
__global float *selected_disp_pyr_new, __global const float *selected_disp_pyr_cur,
__global float *data_cost_selected_, __global const float *data_cost_,
@@ -1176,28 +1176,28 @@ __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __g
id = j;
}
}
data_cost_selected[i * cdisp_step1] = data_cost[id * cdisp_step1];
data_cost_selected[i * cdisp_step1] = data_cost[id * cdisp_step1];
disparity_selected_new[i * cdisp_step1] = disparity_selected_cur[id * cdisp_step2];
u_new[i * cdisp_step1] = u_cur[id * cdisp_step2];
d_new[i * cdisp_step1] = d_cur[id * cdisp_step2];
l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
data_cost_new[id * cdisp_step1] = FLT_MAX;
}
}
}
}
///////////////////////////////////////////////////////////////
//////////////////// calc all iterations /////////////////////
///////////////////////////////////////////////////////////////
void message_per_pixel_0(__global const short *data, __global short *msg_dst, __global const short *msg1,
void message_per_pixel_0(__global const short *data, __global short *msg_dst, __global const short *msg1,
__global const short *msg2, __global const short *msg3,
__global const short *dst_disp, __global const short *src_disp,
__global const short *dst_disp, __global const short *src_disp,
int nr_plane, __global short *temp,
float cmax_disc_term, int cdisp_step1, float cdisc_single_jump)
{
short minimum = SHRT_MAX;
for(int d = 0; d < nr_plane; d++)
for(int d = 0; d < nr_plane; d++)
{
int idx = d * cdisp_step1;
short val = data[idx] + msg1[idx] + msg2[idx] + msg3[idx];
@@ -1215,7 +1215,7 @@ void message_per_pixel_0(__global const short *data, __global short *msg_dst, __
short src_disp_reg = src_disp[d * cdisp_step1];
for(int d2 = 0; d2 < nr_plane; d2++)
cost_min = fmin(cost_min, (msg_dst[d2 * cdisp_step1] +
cost_min = fmin(cost_min, (msg_dst[d2 * cdisp_step1] +
cdisc_single_jump * abs(dst_disp[d2 * cdisp_step1] - src_disp_reg)));
temp[d * cdisp_step1] = convert_short_sat_rte(cost_min);
@@ -1226,14 +1226,14 @@ void message_per_pixel_0(__global const short *data, __global short *msg_dst, __
for(int d = 0; d < nr_plane; d++)
msg_dst[d * cdisp_step1] = convert_short_sat_rte(temp[d * cdisp_step1] - sum);
}
void message_per_pixel_1(__global const float *data, __global float *msg_dst, __global const float *msg1,
void message_per_pixel_1(__global const float *data, __global float *msg_dst, __global const float *msg1,
__global const float *msg2, __global const float *msg3,
__global const float *dst_disp, __global const float *src_disp,
__global const float *dst_disp, __global const float *src_disp,
int nr_plane, __global float *temp,
float cmax_disc_term, int cdisp_step1, float cdisc_single_jump)
{
float minimum = FLT_MAX;
for(int d = 0; d < nr_plane; d++)
for(int d = 0; d < nr_plane; d++)
{
int idx = d * cdisp_step1;
float val = data[idx] + msg1[idx] + msg2[idx] + msg3[idx];
@@ -1251,7 +1251,7 @@ void message_per_pixel_1(__global const float *data, __global float *msg_dst, __
float src_disp_reg = src_disp[d * cdisp_step1];
for(int d2 = 0; d2 < nr_plane; d2++)
cost_min = fmin(cost_min, (msg_dst[d2 * cdisp_step1] +
cost_min = fmin(cost_min, (msg_dst[d2 * cdisp_step1] +
cdisc_single_jump * fabs(dst_disp[d2 * cdisp_step1] - src_disp_reg)));
temp[d * cdisp_step1] = cost_min;
@@ -1262,9 +1262,9 @@ void message_per_pixel_1(__global const float *data, __global float *msg_dst, __
for(int d = 0; d < nr_plane; d++)
msg_dst[d * cdisp_step1] = temp[d * cdisp_step1] - sum;
}
__kernel void compute_message_0(__global short *u_, __global short *d_, __global short *l_, __global short *r_,
__global const short *data_cost_selected, __global const short *selected_disp_pyr_cur,
__global short *ctemp, int h, int w, int nr_plane, int i,
__kernel void compute_message_0(__global short *u_, __global short *d_, __global short *l_, __global short *r_,
__global const short *data_cost_selected, __global const short *selected_disp_pyr_cur,
__global short *ctemp, int h, int w, int nr_plane, int i,
float cmax_disc_term, int cdisp_step1, int cmsg_step1, float cdisc_single_jump)
{
int y = get_global_id(1);
@@ -1283,7 +1283,7 @@ __kernel void compute_message_0(__global short *u_, __global short *d_, __global
__global short *temp = ctemp + y * cmsg_step1 + x;
message_per_pixel_0(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp,
message_per_pixel_0(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp,
cmax_disc_term, cdisp_step1, cdisc_single_jump);
message_per_pixel_0(data, d, d - cmsg_step1, r - 1, l + 1, disp, disp + cmsg_step1, nr_plane, temp,
cmax_disc_term, cdisp_step1, cdisc_single_jump);
@@ -1293,9 +1293,9 @@ __kernel void compute_message_0(__global short *u_, __global short *d_, __global
cmax_disc_term, cdisp_step1, cdisc_single_jump);
}
}
__kernel void compute_message_1(__global float *u_, __global float *d_, __global float *l_, __global float *r_,
__global const float *data_cost_selected, __global const float *selected_disp_pyr_cur,
__global float *ctemp, int h, int w, int nr_plane, int i,
__kernel void compute_message_1(__global float *u_, __global float *d_, __global float *l_, __global float *r_,
__global const float *data_cost_selected, __global const float *selected_disp_pyr_cur,
__global float *ctemp, int h, int w, int nr_plane, int i,
float cmax_disc_term, int cdisp_step1, int cmsg_step1, float cdisc_single_jump)
{
int y = get_global_id(1);
@@ -1313,7 +1313,7 @@ __kernel void compute_message_1(__global float *u_, __global float *d_, __global
__global const float *disp = selected_disp_pyr_cur + y * cmsg_step1 + x;
__global float *temp = ctemp + y * cmsg_step1 + x;
message_per_pixel_1(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp,
message_per_pixel_1(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp,
cmax_disc_term, cdisp_step1, cdisc_single_jump);
message_per_pixel_1(data, d, d - cmsg_step1, r - 1, l + 1, disp, disp + cmsg_step1, nr_plane, temp,
cmax_disc_term, cdisp_step1, cdisc_single_jump);
@@ -1327,10 +1327,10 @@ __kernel void compute_message_1(__global float *u_, __global float *d_, __global
///////////////////////////////////////////////////////////////
/////////////////////////// output ////////////////////////////
///////////////////////////////////////////////////////////////
__kernel void compute_disp_0(__global const short *u_, __global const short *d_, __global const short *l_,
__global const short *r_, __global const short * data_cost_selected,
__kernel void compute_disp_0(__global const short *u_, __global const short *d_, __global const short *l_,
__global const short *r_, __global const short * data_cost_selected,
__global const short *disp_selected_pyr,
__global short* disp,
__global short* disp,
int res_step, int cols, int rows, int nr_plane,
int cmsg_step1, int cdisp_step1)
{
@@ -1364,10 +1364,10 @@ __kernel void compute_disp_0(__global const short *u_, __global const short *d_,
disp[res_step * y + x] = best;
}
}
__kernel void compute_disp_1(__global const float *u_, __global const float *d_, __global const float *l_,
__global const float *r_, __global const float *data_cost_selected,
__kernel void compute_disp_1(__global const float *u_, __global const float *d_, __global const float *l_,
__global const float *r_, __global const float *data_cost_selected,
__global const float *disp_selected_pyr,
__global short *disp,
__global short *disp,
int res_step, int cols, int rows, int nr_plane,
int cmsg_step1, int cdisp_step1)
{

View File

@@ -43,7 +43,7 @@
//
//M*/
__kernel void centeredGradientKernel(__global const float* src, int src_col, int src_row, int src_step,
__kernel void centeredGradientKernel(__global const float* src, int src_col, int src_row, int src_step,
__global float* dx, __global float* dy, int dx_step)
{
int x = get_global_id(0);
@@ -51,9 +51,9 @@ __global float* dx, __global float* dy, int dx_step)
if((x < src_col)&&(y < src_row))
{
int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1);
int src_x2 = (x - 1) > 0 ? (x -1) : 0;
int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1);
int src_x2 = (x - 1) > 0 ? (x -1) : 0;
//if(src[y * src_step + src_x1] == src[y * src_step+ src_x2])
//{
// printf("y = %d\n", y);
@@ -61,8 +61,8 @@ __global float* dx, __global float* dy, int dx_step)
// printf("src_x2 = %d\n", src_x2);
//}
dx[y * dx_step+ x] = 0.5f * (src[y * src_step + src_x1] - src[y * src_step+ src_x2]);
int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1);
int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1);
int src_y2 = (y - 1) > 0 ? (y - 1) : 0;
dy[y * dx_step+ x] = 0.5f * (src[src_y1 * src_step + x] - src[src_y2 * src_step+ x]);
}
@@ -89,20 +89,20 @@ float bicubicCoeff(float x_)
}
__kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row,
image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y,
__global const float* u1, int u1_step,
image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y,
__global const float* u1, int u1_step,
__global const float* u2,
__global float* I1w,
__global float* I1wx, /*int I1wx_step,*/
__global float* I1wy, /*int I1wy_step,*/
__global float* grad, /*int grad_step,*/
__global float* rho,
int I1w_step,
int u2_step,
int u1_offset_x,
int u1_offset_y,
int u2_offset_x,
int u2_offset_y)
__global float* I1wx, /*int I1wx_step,*/
__global float* I1wy, /*int I1wy_step,*/
__global float* grad, /*int grad_step,*/
__global float* rho,
int I1w_step,
int u2_step,
int u1_offset_x,
int u1_offset_y,
int u2_offset_x,
int u2_offset_y)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
@@ -136,7 +136,7 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
//sum += w * tex2D(tex_I1 , cx, cy);
int2 cood = (int2)(cx, cy);
int2 cood = (int2)(cx, cy);
sum += w * read_imagef(tex_I1, sampleri, cood).x;
//sumx += w * tex2D(tex_I1x, cx, cy);
sumx += w * read_imagef(tex_I1x, sampleri, cood).x;
@@ -181,18 +181,18 @@ float readImage(__global const float *image, const int x, const int y, const
}
__kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step, int I0_col, int I0_row,
__global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y,
__global const float* u1, int u1_step,
__global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y,
__global const float* u1, int u1_step,
__global const float* u2,
__global float* I1w,
__global float* I1wx, /*int I1wx_step,*/
__global float* I1wy, /*int I1wy_step,*/
__global float* grad, /*int grad_step,*/
__global float* rho,
int I1w_step,
int u2_step,
int I1_step,
int I1x_step)
__global float* I1wx, /*int I1wx_step,*/
__global float* I1wy, /*int I1wy_step,*/
__global float* grad, /*int grad_step,*/
__global float* rho,
int I1w_step,
int u2_step,
int I1_step,
int I1x_step)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
@@ -224,7 +224,7 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
{
const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
int2 cood = (int2)(cx, cy);
int2 cood = (int2)(cx, cy);
sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step);
sumx += w * readImage(tex_I1x, cood.x, cood.y, I0_col, I0_row, I1x_step);
sumy += w * readImage(tex_I1y, cood.x, cood.y, I0_col, I0_row, I1x_step);
@@ -256,18 +256,18 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
}
__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step,
__global const float* u2,
__global float* p11, int p11_step,
__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step,
__global const float* u2,
__global float* p11, int p11_step,
__global float* p12,
__global float* p21,
__global float* p22,
__global float* p22,
const float taut,
int u2_step,
int u1_offset_x,
int u1_offset_y,
int u2_offset_x,
int u2_offset_y)
int u2_step,
int u1_offset_x,
int u1_offset_y,
int u2_offset_x,
int u2_offset_y)
{
//const int x = blockIdx.x * blockDim.x + threadIdx.x;
@@ -277,16 +277,16 @@ __kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col,
if(x < u1_col && y < u1_row)
{
int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
const float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
const float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
const float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
int src_y2 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
int src_y2 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
const float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
const float g1 = hypot(u1x, u1y);
@@ -329,19 +329,19 @@ float divergence(__global const float* v1, __global const float* v2, int y, int
__kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx_row, int I1wx_step,
__global const float* I1wy, /*int I1wy_step,*/
__global const float* grad, /*int grad_step,*/
__global const float* grad, /*int grad_step,*/
__global const float* rho_c, /*int rho_c_step,*/
__global const float* p11, /*int p11_step,*/
__global const float* p12, /*int p12_step,*/
__global const float* p21, /*int p21_step,*/
__global const float* p22, /*int p22_step,*/
__global float* u1, int u1_step,
__global float* u2,
__global float* u1, int u1_step,
__global float* u2,
__global float* error, const float l_t, const float theta, int u2_step,
int u1_offset_x,
int u1_offset_y,
int u2_offset_x,
int u2_offset_y)
int u1_offset_x,
int u1_offset_y,
int u2_offset_x,
int u2_offset_y)
{
//const int x = blockIdx.x * blockDim.x + threadIdx.x;

View File

@@ -109,4 +109,3 @@ void cv::ocl::pyrDown(const oclMat &src, oclMat &dst)
pyrdown_run(src, dst);
}

View File

@@ -85,4 +85,4 @@ namespace cv
openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
}
}
}
}

View File

@@ -160,7 +160,7 @@ static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreater
namespace radix_sort
{
//FIXME(pengx17):
//FIXME(pengx17):
// exclusive scan, need to be optimized as this is too naive...
//void naive_scan_addition(oclMat& input, oclMat& output)
//{
@@ -247,8 +247,8 @@ static void sortByKey(oclMat& keys, oclMat& vals, size_t origVecSize, bool isGre
}
}
ocl::copyMakeBorder(
keys(Rect(0,0,origVecSize,1)), buffer_keys,
0, 0, 0, vecSize - origVecSize,
keys(Rect(0,0,origVecSize,1)), buffer_keys,
0, 0, 0, vecSize - origVecSize,
BORDER_CONSTANT, padding_value);
vals(Rect(0,0,origVecSize,1)).copyTo(roi_buffer_vals);
newBuffer = true;
@@ -274,7 +274,7 @@ static void sortByKey(oclMat& keys, oclMat& vals, size_t origVecSize, bool isGre
genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf);
//additional build option for radix sort
sprintf(build_opt_buf + strlen(build_opt_buf), " -D K_%s", isKeyFloat?"FLT":"INT");
sprintf(build_opt_buf + strlen(build_opt_buf), " -D K_%s", isKeyFloat?"FLT":"INT");
String kernelnames[2] = {String("histogramRadixN"), String("permuteRadixN")};

View File

@@ -178,7 +178,7 @@ PARAM_TEST_CASE(ArithmTestBase, MatType, bool)
}
void Near1(double threshold = 0.)
{
{
EXPECT_MAT_NEAR(dst1, Mat(gdst1_whole), threshold);
}

View File

@@ -116,4 +116,4 @@ INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Blend, Combine(
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4))
));
#endif
#endif

View File

@@ -86,4 +86,4 @@ TEST_P(Canny, Accuracy)
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Canny, testing::Combine(
testing::Values(AppertureSize(3), AppertureSize(5)),
testing::Values(L2gradient(false), L2gradient(true))));
#endif
#endif

View File

@@ -57,8 +57,8 @@ using namespace testing;
using namespace std;
PARAM_TEST_CASE(FilterTestBase,
MatType,
PARAM_TEST_CASE(FilterTestBase,
MatType,
cv::Size, // kernel size
cv::Size, // dx,dy
int // border type, or iteration
@@ -367,7 +367,7 @@ INSTANTIATE_TEST_CASE_P(Filter, Laplacian, Combine(
Values(0))); //not use
INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
Values(Size(0, 0)), //not use
Values(Size(0, 0)), //not use
Values(1)));
@@ -383,7 +383,7 @@ INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(
INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
Values(Size(0, 0)), //not use
Values(Size(0, 1), Size(1, 0)),
Values(Size(0, 1), Size(1, 0)),
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
@@ -395,7 +395,7 @@ INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
INSTANTIATE_TEST_CASE_P(Filter, Filter2D, testing::Combine(
Values(CV_8UC1, CV_32FC1, CV_32FC4),
Values(CV_8UC1, CV_32FC1, CV_32FC4),
Values(Size(3, 3), Size(15, 15), Size(25, 25)),
Values(Size(0, 0)), //not use
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REFLECT101, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT)));

View File

@@ -448,7 +448,7 @@ PARAM_TEST_CASE(ImgprocTestBase, MatType, MatType, MatType, MatType, MatType, bo
{
cv::Mat cpu_cldst;
cldst.download(cpu_cldst);
EXPECT_MAT_NEAR(dst, cpu_cldst, threshold);
EXPECT_MAT_NEAR(dst, cpu_cldst, threshold);
}
};
////////////////////////////////equalizeHist//////////////////////////////////////////

View File

@@ -87,7 +87,7 @@ PARAM_TEST_CASE(Kmeans, int, int, int)
for(int j = 0; j < nchannel; j++)
center_row_header.at<float>(0, i*nchannel+j) = 50000.0;
for(int j = 0; (j < max_neighbour) ||
for(int j = 0; (j < max_neighbour) ||
(i == K-1 && j < max_neighbour + MHEIGHT%K); j ++)
{
Mat cur_row_header = src.row(row_idx + 1 + j);
@@ -121,15 +121,15 @@ TEST_P(Kmeans, Mat){
ocl::kmeans(d_src, K, d_labels,
TermCriteria( TermCriteria::EPS + TermCriteria::MAX_ITER, 100, 0),
1, flags, d_centers);
Mat dd_labels(d_labels);
Mat dd_centers(d_centers);
if(flags & KMEANS_USE_INITIAL_LABELS)
{
EXPECT_MAT_NEAR(labels, dd_labels, 0);
EXPECT_MAT_NEAR(centers, dd_centers, 1e-3);
}
else
}
else
{
int row_idx = 0;
for(int i = 0; i < K; i++)
@@ -157,6 +157,6 @@ TEST_P(Kmeans, Mat){
INSTANTIATE_TEST_CASE_P(OCL_ML, Kmeans, Combine(
Values(3, 5, 8),
Values(CV_32FC1, CV_32FC2, CV_32FC4),
Values(OCL_KMEANS_USE_INITIAL_LABELS/*, OCL_KMEANS_PP_CENTERS*/)));
Values(OCL_KMEANS_USE_INITIAL_LABELS/*, OCL_KMEANS_PP_CENTERS*/)));
#endif

View File

@@ -85,7 +85,7 @@ TEST_P(GoodFeaturesToTrack, Accuracy)
ASSERT_FALSE(d_pts.empty());
std::vector<cv::Point2f> pts(d_pts.cols);
detector.downloadPoints(d_pts, pts);
std::vector<cv::Point2f> pts_gold;
@@ -125,7 +125,7 @@ TEST_P(GoodFeaturesToTrack, EmptyCorners)
ASSERT_TRUE(corners.empty());
}
INSTANTIATE_TEST_CASE_P(OCL_Video, GoodFeaturesToTrack,
INSTANTIATE_TEST_CASE_P(OCL_Video, GoodFeaturesToTrack,
testing::Values(MinDistance(0.0), MinDistance(3.0)));
//////////////////////////////////////////////////////////////////////////
@@ -342,4 +342,3 @@ INSTANTIATE_TEST_CASE_P(OCL_Video, Farneback, testing::Combine(
testing::Values(UseInitFlow(false), UseInitFlow(true))));
#endif // HAVE_OPENCL

View File

@@ -79,7 +79,7 @@ TEST_P(PyrDown, Mat)
Size size(MWIDTH, MHEIGHT);
Mat src = randomMat(size, CV_MAKETYPE(type, channels));
oclMat gsrc(src);
pyrDown(src, dst_cpu);
pyrDown(gsrc, gdst);

View File

@@ -59,7 +59,7 @@ IMPLEMENT_PARAM_CLASS(InputSize, int)
IMPLEMENT_PARAM_CLASS(SortMethod, int)
template<class T>
template<class T>
struct KV_CVTYPE{ static int toType() {return 0;} };
template<> struct KV_CVTYPE<int> { static int toType() {return CV_32SC1;} };
@@ -101,7 +101,7 @@ void kvquicksort(Mat& keys, Mat& vals, bool isGreater = false)
{
vector<pair<key_type, val_type> > kvres;
toKVPair(keys.begin<key_type>(), vals.begin<val_type>(), keys.cols, kvres);
if(isGreater)
{
std::sort(kvres.begin(), kvres.end(), kvgreater<key_type, val_type>);
@@ -180,7 +180,7 @@ bool checkUnstableSorterResult(const Mat& gkeys_, const Mat& gvals_,
{
++ iden_count;
}
// sort dv and gv
int num_of_val = (iden_count + 1) * cn_val;
std::sort(gvptr + i * cn_val, gvptr + i * cn_val + num_of_val);

View File

@@ -225,7 +225,7 @@ double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& o
cpu_result.setTo(0);
for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
{
{
cv::Mat cpu_result_roi(cpu_result, *r);
cpu_result_roi.setTo(1);
cpu_result.copyTo(cpu_result);
@@ -251,4 +251,3 @@ double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& o
}
return final_test_result;
}