Merge commit '43aec5ad' into merge-2.4
Conflicts: cmake/OpenCVConfig.cmake cmake/OpenCVLegacyOptions.cmake modules/contrib/src/retina.cpp modules/gpu/doc/camera_calibration_and_3d_reconstruction.rst modules/gpu/doc/video.rst modules/gpu/src/speckle_filtering.cpp modules/python/src2/cv2.cv.hpp modules/python/test/test2.py samples/python/watershed.py
This commit is contained in:
@@ -186,4 +186,4 @@ Only basic flags are supported in oclMat(i.e. depth number of channels)
|
||||
|
||||
All the 3-channel matrix(i.e. RGB image) are represented by 4-channel matrix in oclMat. It means 3-channel image have 4-channel space with the last channel unused. We provide a transparent interface to handle the difference between OpenCV Mat and oclMat.
|
||||
|
||||
For example: If a oclMat has 3 channels, channels() returns 3 and oclchannels() returns 4
|
||||
For example: If a oclMat has 3 channels, channels() returns 3 and oclchannels() returns 4
|
||||
|
||||
@@ -499,4 +499,4 @@ Returns block descriptors computed for the whole image.
|
||||
|
||||
* **DESCR_FORMAT_COL_BY_COL** - Column-major order.
|
||||
|
||||
The function is mainly used to learn the classifier.
|
||||
The function is mainly used to learn the classifier.
|
||||
|
||||
@@ -315,4 +315,4 @@ Performs linear blending of two images.
|
||||
|
||||
:param weights2: Weights for second image. Must have tha same size as ``img2`` . Supports only ``CV_32F`` type.
|
||||
|
||||
:param result: Destination image.
|
||||
:param result: Destination image.
|
||||
|
||||
@@ -67,4 +67,4 @@ Returns the squared sum of matrix elements for each channel
|
||||
|
||||
:param m: The Source image of all depth
|
||||
|
||||
Counts the squared sum of matrix elements for each channel.
|
||||
Counts the squared sum of matrix elements for each channel.
|
||||
|
||||
@@ -500,13 +500,13 @@ Returns void
|
||||
* **SORT_SELECTION** selection sort, currently cannot sort duplicate keys
|
||||
* **SORT_MERGE** merge sort
|
||||
* **SORT_RADIX** radix sort, only support signed int/float keys(``CV_32S``/``CV_32F``)
|
||||
|
||||
|
||||
Returns the sorted result of all the elements in values based on equivalent keys.
|
||||
|
||||
The element unit in the values to be sorted is determined from the data type,
|
||||
The element unit in the values to be sorted is determined from the data type,
|
||||
i.e., a ``CV_32FC2`` input ``{a1a2, b1b2}`` will be considered as two elements, regardless its matrix dimension.
|
||||
|
||||
Both keys and values will be sorted inplace.
|
||||
Both keys and values will be sorted inplace.
|
||||
|
||||
Keys needs to be a **single** channel `oclMat`.
|
||||
|
||||
|
||||
@@ -55,4 +55,4 @@ Returns the pointer to the opencl command queue
|
||||
|
||||
.. ocv:function:: void* ocl::getoclCommandQueue()
|
||||
|
||||
Thefunction are used to get opencl command queue so that opencv can interactive with other opencl program.
|
||||
Thefunction are used to get opencl command queue so that opencv can interactive with other opencl program.
|
||||
|
||||
@@ -131,7 +131,7 @@ namespace cv
|
||||
{
|
||||
openCLFree(tex_);
|
||||
}
|
||||
operator cl_mem()
|
||||
operator cl_mem()
|
||||
{
|
||||
return tex_;
|
||||
}
|
||||
|
||||
@@ -1194,4 +1194,4 @@ PERFTEST(AddWeighted)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -119,4 +119,4 @@ PERFTEST(blend)
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -164,4 +164,4 @@ PERFTEST(BruteForceMatcher)
|
||||
else
|
||||
TestSystem::instance().setAccurate(0, diff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,14 +48,14 @@
|
||||
///////////// StereoMatchBM ////////////////////////
|
||||
PERFTEST(StereoMatchBM)
|
||||
{
|
||||
Mat left_image = imread(abspath("aloeL.jpg"), cv::IMREAD_GRAYSCALE);
|
||||
Mat right_image = imread(abspath("aloeR.jpg"), cv::IMREAD_GRAYSCALE);
|
||||
Mat disp,dst;
|
||||
ocl::oclMat d_left, d_right,d_disp;
|
||||
int n_disp= 128;
|
||||
int winSize =19;
|
||||
Mat left_image = imread(abspath("aloeL.jpg"), cv::IMREAD_GRAYSCALE);
|
||||
Mat right_image = imread(abspath("aloeR.jpg"), cv::IMREAD_GRAYSCALE);
|
||||
Mat disp,dst;
|
||||
ocl::oclMat d_left, d_right,d_disp;
|
||||
int n_disp= 128;
|
||||
int winSize =19;
|
||||
|
||||
SUBTEST << left_image.cols << 'x' << left_image.rows << "; aloeL.jpg ;"<< right_image.cols << 'x' << right_image.rows << "; aloeR.jpg ";
|
||||
SUBTEST << left_image.cols << 'x' << left_image.rows << "; aloeL.jpg ;"<< right_image.cols << 'x' << right_image.rows << "; aloeR.jpg ";
|
||||
|
||||
Ptr<StereoBM> bm = createStereoBM(n_disp, winSize);
|
||||
bm->compute(left_image, right_image, dst);
|
||||
@@ -64,38 +64,29 @@ PERFTEST(StereoMatchBM)
|
||||
bm->compute(left_image, right_image, dst);
|
||||
CPU_OFF;
|
||||
|
||||
d_left.upload(left_image);
|
||||
d_right.upload(right_image);
|
||||
d_left.upload(left_image);
|
||||
d_right.upload(right_image);
|
||||
|
||||
ocl::StereoBM_OCL d_bm(0, n_disp, winSize);
|
||||
ocl::StereoBM_OCL d_bm(0, n_disp, winSize);
|
||||
|
||||
WARMUP_ON;
|
||||
d_bm(d_left, d_right, d_disp);
|
||||
WARMUP_OFF;
|
||||
WARMUP_ON;
|
||||
d_bm(d_left, d_right, d_disp);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat;
|
||||
d_disp.download(ocl_mat);
|
||||
ocl_mat.convertTo(ocl_mat, dst.type());
|
||||
|
||||
GPU_ON;
|
||||
d_bm(d_left, d_right, d_disp);
|
||||
GPU_OFF;
|
||||
GPU_ON;
|
||||
d_bm(d_left, d_right, d_disp);
|
||||
GPU_OFF;
|
||||
|
||||
GPU_FULL_ON;
|
||||
d_left.upload(left_image);
|
||||
d_right.upload(right_image);
|
||||
d_bm(d_left, d_right, d_disp);
|
||||
d_disp.download(disp);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
GPU_FULL_ON;
|
||||
d_left.upload(left_image);
|
||||
d_right.upload(right_image);
|
||||
d_bm(d_left, d_right, d_disp);
|
||||
d_disp.download(disp);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(-1, 0.);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -82,4 +82,4 @@ PERFTEST(Canny)
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExceptedMatSimilar(edges, ocl_edges, 2e-2);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,4 +88,4 @@ PERFTEST(dft)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -284,7 +284,7 @@ PERFTEST(GaussianBlur)
|
||||
Mat src, dst, ocl_dst;
|
||||
int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
|
||||
std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"};
|
||||
const int ksize = 7;
|
||||
const int ksize = 7;
|
||||
|
||||
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
|
||||
{
|
||||
@@ -374,4 +374,4 @@ PERFTEST(filter2D)
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,4 +87,4 @@ PERFTEST(gemm)
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, src1.cols * src1.rows * 1e-4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -77,7 +77,7 @@ PERFTEST(HOG)
|
||||
WARMUP_ON;
|
||||
ocl_hog.detectMultiScale(d_src, d_found_locations);
|
||||
WARMUP_OFF;
|
||||
|
||||
|
||||
if(d_found_locations.size() == found_locations.size())
|
||||
TestSystem::instance().setAccurate(1, 0);
|
||||
else
|
||||
@@ -91,4 +91,4 @@ PERFTEST(HOG)
|
||||
d_src.upload(src);
|
||||
ocl_hog.detectMultiScale(d_src, found_locations);
|
||||
GPU_FULL_OFF;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -183,4 +183,4 @@ PERFTEST(setTo)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,7 +72,7 @@ PERFTEST(norm)
|
||||
WARMUP_OFF;
|
||||
|
||||
d_src1.download(ocl_src1);
|
||||
TestSystem::instance().ExpectedMatNear(src1, ocl_src1, .5);
|
||||
TestSystem::instance().ExpectedMatNear(src1, ocl_src1, .5);
|
||||
|
||||
GPU_ON;
|
||||
ocl::norm(d_src1, d_src2, NORM_INF);
|
||||
@@ -84,4 +84,4 @@ PERFTEST(norm)
|
||||
ocl::norm(d_src1, d_src2, NORM_INF);
|
||||
GPU_FULL_OFF;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -129,4 +129,4 @@ PERFTEST(pyrUp)
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, (src.depth() == CV_32F ? 1e-4f : 1.0));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -133,7 +133,7 @@ PERFTEST(Split)
|
||||
|
||||
WARMUP_ON;
|
||||
ocl::split(d_src, d_dst);
|
||||
WARMUP_OFF;
|
||||
WARMUP_OFF;
|
||||
|
||||
GPU_ON;
|
||||
ocl::split(d_src, d_dst);
|
||||
|
||||
@@ -89,4 +89,4 @@ void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &
|
||||
|
||||
openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -81,4 +81,4 @@ void cv::ocl::columnSum(const oclMat &src, oclMat &dst)
|
||||
|
||||
openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -235,4 +235,3 @@ void interpolate::bindImgTex(const oclMat &img, cl_mem &texture)
|
||||
}
|
||||
texture = bindTexture(img);
|
||||
}
|
||||
|
||||
|
||||
@@ -435,4 +435,3 @@ double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels,
|
||||
|
||||
return best_compactness;
|
||||
}
|
||||
|
||||
|
||||
@@ -67,7 +67,7 @@ __kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
@@ -110,7 +110,7 @@ __kernel void arithm_s_add_C1_D2 (__global ushort *src1, int src1_step, int sr
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
@@ -145,7 +145,7 @@ __kernel void arithm_s_add_C1_D3 (__global short *src1, int src1_step, int src
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
@@ -250,7 +250,7 @@ __kernel void arithm_s_add_C2_D0 (__global uchar *src1, int src1_step, int src
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
|
||||
@@ -69,7 +69,7 @@ __kernel void arithm_s_add_with_mask_C1_D0 (__global uchar *src1, int src1_ste
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
@@ -122,7 +122,7 @@ __kernel void arithm_s_add_with_mask_C1_D2 (__global ushort *src1, int src1_st
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
@@ -160,7 +160,7 @@ __kernel void arithm_s_add_with_mask_C1_D3 (__global short *src1, int src1_ste
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
@@ -284,7 +284,7 @@ __kernel void arithm_s_add_with_mask_C2_D0 (__global uchar *src1, int src1_ste
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
|
||||
@@ -377,4 +377,4 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
|
||||
}
|
||||
}
|
||||
|
||||
*/
|
||||
*/
|
||||
|
||||
@@ -300,4 +300,4 @@ __kernel void arithm_muls_D5 (__global float *src1, int src1_step, int src1_offs
|
||||
|
||||
*((__global float *)((__global char *)dst + dst_index)) = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -231,7 +231,7 @@ __kernel void boxFilter_C1_D0(__global const uchar * restrict src, __global ucha
|
||||
{
|
||||
tmp_sum += (data[i]);
|
||||
}
|
||||
|
||||
|
||||
int index = dst_startY * dst_step + dst_startX + (col-anX)*4;
|
||||
|
||||
temp[0][col] = tmp_sum + (data[0]);
|
||||
|
||||
@@ -207,7 +207,7 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
|
||||
- sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] -
|
||||
sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)]
|
||||
+ sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z;
|
||||
|
||||
|
||||
bool passThres = classsum >= nodethreshold;
|
||||
|
||||
#if STUMP_BASED
|
||||
@@ -304,4 +304,3 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
|
||||
newnode[counter].alpha[1] = t1.alpha[1];
|
||||
newnode[counter].alpha[2] = t1.alpha[2];
|
||||
}
|
||||
|
||||
|
||||
@@ -71,15 +71,15 @@ void reduce(volatile __local int* smem, int val, int tid)
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 128)
|
||||
{
|
||||
{
|
||||
smem[tid] = val += smem[tid + 128];
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 64)
|
||||
{
|
||||
{
|
||||
smem[tid] = val += smem[tid + 64];
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 32)
|
||||
@@ -125,15 +125,15 @@ void reduce(__local volatile int* smem, int val, int tid)
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 128)
|
||||
{
|
||||
{
|
||||
smem[tid] = val += smem[tid + 128];
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 64)
|
||||
{
|
||||
{
|
||||
smem[tid] = val += smem[tid + 64];
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 32)
|
||||
|
||||
@@ -49,12 +49,12 @@
|
||||
|
||||
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
|
||||
|
||||
inline float ELEM_INT2(image2d_t _eig, int _x, int _y)
|
||||
inline float ELEM_INT2(image2d_t _eig, int _x, int _y)
|
||||
{
|
||||
return read_imagef(_eig, sampler, (int2)(_x, _y)).x;
|
||||
}
|
||||
|
||||
inline float ELEM_FLT2(image2d_t _eig, float2 pt)
|
||||
inline float ELEM_FLT2(image2d_t _eig, float2 pt)
|
||||
{
|
||||
return read_imagef(_eig, sampler, pt).x;
|
||||
}
|
||||
@@ -132,7 +132,7 @@ __kernel
|
||||
const int pairDistance = 1 << (stage - passOfStage);
|
||||
const int blockWidth = 2 * pairDistance;
|
||||
|
||||
const int leftId = min( (threadId % pairDistance)
|
||||
const int leftId = min( (threadId % pairDistance)
|
||||
+ (threadId / pairDistance) * blockWidth, count );
|
||||
|
||||
const int rightId = min( leftId + pairDistance, count );
|
||||
@@ -147,7 +147,7 @@ __kernel
|
||||
|
||||
float2 greater = compareResult ? leftPt:rightPt;
|
||||
float2 lesser = compareResult ? rightPt:leftPt;
|
||||
|
||||
|
||||
corners[leftId] = sortOrder ? lesser : greater;
|
||||
corners[rightId] = sortOrder ? greater : lesser;
|
||||
}
|
||||
@@ -195,20 +195,20 @@ __kernel
|
||||
{
|
||||
pt2 = scratch[j];
|
||||
val2 = ELEM_FLT2(eig, pt2);
|
||||
if(val2 > val1)
|
||||
if(val2 > val1)
|
||||
pos++;//calculate the rank of this element in this work group
|
||||
else
|
||||
else
|
||||
{
|
||||
if(val1 > val2)
|
||||
continue;
|
||||
else
|
||||
else
|
||||
{
|
||||
// val1 and val2 are same
|
||||
same++;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int j=0; j< same; j++)
|
||||
for (int j=0; j< same; j++)
|
||||
corners[pos + j] = pt1;
|
||||
}
|
||||
__kernel
|
||||
@@ -240,15 +240,15 @@ __kernel
|
||||
for(int k=0; k<wg; k++)
|
||||
{
|
||||
pt2 = corners[j*wg + k];
|
||||
val2 = ELEM_FLT2(eig, pt2);
|
||||
val2 = ELEM_FLT2(eig, pt2);
|
||||
if(val1 > val2)
|
||||
break;
|
||||
else
|
||||
{
|
||||
//Increment only if the value is not the same.
|
||||
//Increment only if the value is not the same.
|
||||
if( val2 > val1 )
|
||||
pos++;
|
||||
else
|
||||
else
|
||||
same++;
|
||||
}
|
||||
}
|
||||
@@ -257,20 +257,19 @@ __kernel
|
||||
for(int k=0; k<remainder; k++)
|
||||
{
|
||||
pt2 = corners[(numOfGroups-1)*wg + k];
|
||||
val2 = ELEM_FLT2(eig, pt2);
|
||||
val2 = ELEM_FLT2(eig, pt2);
|
||||
if(val1 > val2)
|
||||
break;
|
||||
else
|
||||
{
|
||||
//Don't increment if the value is the same.
|
||||
//Don't increment if the value is the same.
|
||||
//Two elements are same if (*userComp)(jData, iData) and (*userComp)(iData, jData) are both false
|
||||
if(val2 > val1)
|
||||
pos++;
|
||||
else
|
||||
else
|
||||
same++;
|
||||
}
|
||||
}
|
||||
for (int j=0; j< same; j++)
|
||||
}
|
||||
for (int j=0; j< same; j++)
|
||||
corners[pos + j] = pt1;
|
||||
}
|
||||
|
||||
|
||||
@@ -490,4 +490,4 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global float4 * srcsqsum,
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
|
||||
|
||||
#ifndef N // number of radices
|
||||
#define N 4
|
||||
@@ -71,14 +71,14 @@ __inline uint convertKey(uint converted_key)
|
||||
converted_key ^= mask;
|
||||
#elif defined(K_INT)
|
||||
const uint SIGN_MASK = 1u << ((sizeof(int) * 8) - 1);
|
||||
converted_key ^= SIGN_MASK;
|
||||
converted_key ^= SIGN_MASK;
|
||||
#else
|
||||
|
||||
#endif
|
||||
return converted_key;
|
||||
}
|
||||
|
||||
//FIXME(pengx17):
|
||||
//FIXME(pengx17):
|
||||
// exclusive scan, need to be optimized as this is too naive...
|
||||
kernel
|
||||
void naiveScanAddition(
|
||||
@@ -108,7 +108,7 @@ kernel
|
||||
{
|
||||
const int RADIX_T = N;
|
||||
const int RADICES_T = (1 << RADIX_T);
|
||||
const int NUM_OF_ELEMENTS_PER_WORK_ITEM_T = RADICES_T;
|
||||
const int NUM_OF_ELEMENTS_PER_WORK_ITEM_T = RADICES_T;
|
||||
const int MASK_T = (1 << RADIX_T) - 1;
|
||||
int localBuckets[16] = {0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0};
|
||||
|
||||
@@ -62,7 +62,7 @@
|
||||
#endif
|
||||
|
||||
/////////////////////// Bitonic sort ////////////////////////////
|
||||
// ported from
|
||||
// ported from
|
||||
// https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_by_key_kernels.cl
|
||||
__kernel
|
||||
void bitonicSort
|
||||
@@ -82,7 +82,7 @@ __kernel
|
||||
const int pairDistance = 1 << (stage - passOfStage);
|
||||
const int blockWidth = 2 * pairDistance;
|
||||
|
||||
int leftId = min( (threadId % pairDistance)
|
||||
int leftId = min( (threadId % pairDistance)
|
||||
+ (threadId / pairDistance) * blockWidth, count );
|
||||
|
||||
int rightId = min( leftId + pairDistance, count );
|
||||
@@ -90,7 +90,7 @@ __kernel
|
||||
int temp;
|
||||
|
||||
const V_T lval = vals[leftId];
|
||||
const V_T rval = vals[rightId];
|
||||
const V_T rval = vals[rightId];
|
||||
|
||||
const K_T lkey = keys[leftId];
|
||||
const K_T rkey = keys[rightId];
|
||||
@@ -142,7 +142,7 @@ __kernel
|
||||
|
||||
int offset = groupID * wg;
|
||||
int same = 0;
|
||||
|
||||
|
||||
vals += offset;
|
||||
keys += offset;
|
||||
n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
|
||||
@@ -163,13 +163,13 @@ __kernel
|
||||
for (int j=0;j<n;++j)
|
||||
{
|
||||
key2 = scratch[j];
|
||||
if(my_comp(key2, key1))
|
||||
if(my_comp(key2, key1))
|
||||
pos++;//calculate the rank of this element in this work group
|
||||
else
|
||||
else
|
||||
{
|
||||
if(my_comp(key1, key2))
|
||||
continue;
|
||||
else
|
||||
else
|
||||
{
|
||||
// key1 and key2 are same
|
||||
same++;
|
||||
@@ -209,15 +209,15 @@ __kernel
|
||||
{
|
||||
for(int k=0; k<wg; k++)
|
||||
{
|
||||
key2 = keys[j*wg + k];
|
||||
key2 = keys[j*wg + k];
|
||||
if(my_comp(key1, key2))
|
||||
break;
|
||||
else
|
||||
{
|
||||
//Increment only if the value is not the same.
|
||||
//Increment only if the value is not the same.
|
||||
if(my_comp(key2, key1))
|
||||
pos++;
|
||||
else
|
||||
else
|
||||
same++;
|
||||
}
|
||||
}
|
||||
@@ -225,18 +225,18 @@ __kernel
|
||||
|
||||
for(int k=0; k<remainder; k++)
|
||||
{
|
||||
key2 = keys[(numOfGroups-1)*wg + k];
|
||||
key2 = keys[(numOfGroups-1)*wg + k];
|
||||
if(my_comp(key1, key2))
|
||||
break;
|
||||
else
|
||||
{
|
||||
//Don't increment if the value is the same.
|
||||
//Don't increment if the value is the same.
|
||||
if(my_comp(key2, key1))
|
||||
pos++;
|
||||
else
|
||||
else
|
||||
same++;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int j=0; j< same; j++)
|
||||
{
|
||||
vals[pos + j] = val1;
|
||||
|
||||
@@ -318,15 +318,15 @@ __kernel void classify_hists_180_kernel(
|
||||
volatile __local float* smem = products;
|
||||
#ifdef CPU
|
||||
if (tid < 13) smem[tid] = product = product + smem[tid + 32];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 16) smem[tid] = product = product + smem[tid + 16];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<8) smem[tid] = product = product + smem[tid + 8];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<4) smem[tid] = product = product + smem[tid + 4];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<2) smem[tid] = product = product + smem[tid + 2];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<8) smem[tid] = product = product + smem[tid + 8];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<4) smem[tid] = product = product + smem[tid + 4];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<2) smem[tid] = product = product + smem[tid + 2];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#else
|
||||
if (tid < 13)
|
||||
{
|
||||
@@ -345,9 +345,9 @@ __kernel void classify_hists_180_kernel(
|
||||
#endif
|
||||
|
||||
if (tid == 0){
|
||||
product = product + smem[tid + 1];
|
||||
product = product + smem[tid + 1];
|
||||
labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
@@ -388,18 +388,18 @@ __kernel void classify_hists_252_kernel(
|
||||
if (tid < 64) products[tid] = product = product + products[tid + 64];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
volatile __local float* smem = products;
|
||||
volatile __local float* smem = products;
|
||||
#ifdef CPU
|
||||
if(tid<32) smem[tid] = product = product + smem[tid + 32];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<16) smem[tid] = product = product + smem[tid + 16];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<8) smem[tid] = product = product + smem[tid + 8];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<4) smem[tid] = product = product + smem[tid + 4];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<2) smem[tid] = product = product + smem[tid + 2];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<32) smem[tid] = product = product + smem[tid + 32];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<16) smem[tid] = product = product + smem[tid + 16];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<8) smem[tid] = product = product + smem[tid + 8];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<4) smem[tid] = product = product + smem[tid + 4];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<2) smem[tid] = product = product + smem[tid + 2];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#else
|
||||
if (tid < 32)
|
||||
{
|
||||
@@ -415,9 +415,9 @@ __kernel void classify_hists_252_kernel(
|
||||
}
|
||||
#endif
|
||||
if (tid == 0){
|
||||
product = product + smem[tid + 1];
|
||||
product = product + smem[tid + 1];
|
||||
labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
@@ -458,18 +458,18 @@ __kernel void classify_hists_kernel(
|
||||
if (tid < 64) products[tid] = product = product + products[tid + 64];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
volatile __local float* smem = products;
|
||||
volatile __local float* smem = products;
|
||||
#ifdef CPU
|
||||
if(tid<32) smem[tid] = product = product + smem[tid + 32];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<16) smem[tid] = product = product + smem[tid + 16];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<8) smem[tid] = product = product + smem[tid + 8];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<4) smem[tid] = product = product + smem[tid + 4];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<2) smem[tid] = product = product + smem[tid + 2];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<32) smem[tid] = product = product + smem[tid + 32];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<16) smem[tid] = product = product + smem[tid + 16];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<8) smem[tid] = product = product + smem[tid + 8];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<4) smem[tid] = product = product + smem[tid + 4];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(tid<2) smem[tid] = product = product + smem[tid + 2];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#else
|
||||
if (tid < 32)
|
||||
{
|
||||
@@ -485,9 +485,9 @@ __kernel void classify_hists_kernel(
|
||||
}
|
||||
#endif
|
||||
if (tid == 0){
|
||||
smem[tid] = product = product + smem[tid + 1];
|
||||
smem[tid] = product = product + smem[tid + 1];
|
||||
labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@@ -190,7 +190,7 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
|
||||
{
|
||||
int idx1 = y_tex * img_step + x_tex;
|
||||
int idx2 = min(y_tex + ((radius << 1) + 1), cheight - 1) * img_step + x_tex;
|
||||
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
StepDown(idx1, idx2, left, right, d, col_ssd);
|
||||
|
||||
@@ -129,7 +129,7 @@ __kernel void get_first_k_initial_global_1(__global float *data_cost_selected_,
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////get_first_k_initial_local////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void get_first_k_initial_local_0(__global short *data_cost_selected_, __global short *selected_disp_pyr,
|
||||
__kernel void get_first_k_initial_local_0(__global short *data_cost_selected_, __global short *selected_disp_pyr,
|
||||
__global short *ctemp,int h, int w, int nr_plane,
|
||||
int cmsg_step1, int cdisp_step1, int cndisp)
|
||||
{
|
||||
@@ -187,7 +187,7 @@ __kernel void get_first_k_initial_local_0(__global short *data_cost_selected_,
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, __global float *selected_disp_pyr,
|
||||
__kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, __global float *selected_disp_pyr,
|
||||
__global float *ctemp,int h, int w, int nr_plane,
|
||||
int cmsg_step1, int cdisp_step1, int cndisp)
|
||||
{
|
||||
@@ -257,20 +257,20 @@ float compute_3(__global uchar* left, __global uchar* right,
|
||||
|
||||
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
|
||||
}
|
||||
float compute_1(__global uchar* left, __global uchar* right,
|
||||
float compute_1(__global uchar* left, __global uchar* right,
|
||||
float cdata_weight, float cmax_data_term)
|
||||
{
|
||||
return fmin(cdata_weight * abs((int)*left - (int)*right), cdata_weight * cmax_data_term);
|
||||
}
|
||||
short round_short(float v){
|
||||
return convert_short_sat_rte(v);
|
||||
return convert_short_sat_rte(v);
|
||||
}
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////init_data_cost///////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
|
||||
__kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
|
||||
int h, int w, int level, int channels,
|
||||
int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
|
||||
int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
|
||||
int cth, int cimg_step, int cndisp)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
@@ -312,9 +312,9 @@ __kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __g
|
||||
}
|
||||
}
|
||||
}
|
||||
__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
|
||||
__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
|
||||
int h, int w, int level, int channels,
|
||||
int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
|
||||
int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
|
||||
int cth, int cimg_step, int cndisp)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
@@ -361,13 +361,13 @@ __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __g
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
|
||||
__local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
|
||||
int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
|
||||
int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
|
||||
int cdisp_step1, int cmsg_step1)
|
||||
{
|
||||
int x_out = get_group_id(0);
|
||||
int y_out = get_group_id(1) % h;
|
||||
//int d = (blockIdx.y / h) * blockDim.z + threadIdx.z;
|
||||
int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2);
|
||||
int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2);
|
||||
|
||||
int tid = get_local_id(0);
|
||||
|
||||
@@ -411,39 +411,39 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
|
||||
if(d < cndisp)
|
||||
{
|
||||
__local float* dline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 256)
|
||||
if (winsz >= 256)
|
||||
{
|
||||
if (tid < 128)
|
||||
dline[tid] += dline[tid + 128];
|
||||
if (tid < 128)
|
||||
dline[tid] += dline[tid + 128];
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(d < cndisp)
|
||||
{
|
||||
__local float* dline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 128)
|
||||
if (winsz >= 128)
|
||||
{
|
||||
if (tid < 64)
|
||||
dline[tid] += dline[tid + 64];
|
||||
if (tid < 64)
|
||||
dline[tid] += dline[tid + 64];
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(d < cndisp)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 64)
|
||||
if (tid < 32)
|
||||
if (winsz >= 64)
|
||||
if (tid < 32)
|
||||
vdline[tid] += vdline[tid + 32];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(d < cndisp)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 32)
|
||||
if (tid < 16)
|
||||
if (winsz >= 32)
|
||||
if (tid < 16)
|
||||
vdline[tid] += vdline[tid + 16];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@@ -452,7 +452,7 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 16)
|
||||
if (tid < 8)
|
||||
if (tid < 8)
|
||||
vdline[tid] += vdline[tid + 8];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@@ -461,7 +461,7 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 8)
|
||||
if (tid < 4)
|
||||
if (tid < 4)
|
||||
vdline[tid] += vdline[tid + 4];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@@ -470,7 +470,7 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 4)
|
||||
if (tid < 2)
|
||||
if (tid < 2)
|
||||
vdline[tid] += vdline[tid + 2];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@@ -479,7 +479,7 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 2)
|
||||
if (tid < 1)
|
||||
if (tid < 1)
|
||||
vdline[tid] += vdline[tid + 1];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@@ -500,7 +500,7 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
|
||||
{
|
||||
int x_out = get_group_id(0);
|
||||
int y_out = get_group_id(1) % h;
|
||||
int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2);
|
||||
int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2);
|
||||
|
||||
int tid = get_local_id(0);
|
||||
|
||||
@@ -545,74 +545,74 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
|
||||
if(d < cndisp)
|
||||
{
|
||||
__local float* dline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 256)
|
||||
if (tid < 128)
|
||||
dline[tid] += dline[tid + 128];
|
||||
if (winsz >= 256)
|
||||
if (tid < 128)
|
||||
dline[tid] += dline[tid + 128];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(d < cndisp)
|
||||
{
|
||||
__local float* dline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 128)
|
||||
if (tid < 64)
|
||||
dline[tid] += dline[tid + 64];
|
||||
if (winsz >= 128)
|
||||
if (tid < 64)
|
||||
dline[tid] += dline[tid + 64];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(d < cndisp)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 64)
|
||||
if (tid < 32)
|
||||
vdline[tid] += vdline[tid + 32];
|
||||
if (winsz >= 64)
|
||||
if (tid < 32)
|
||||
vdline[tid] += vdline[tid + 32];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(d < cndisp)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 32)
|
||||
if (tid < 16)
|
||||
vdline[tid] += vdline[tid + 16];
|
||||
if (winsz >= 32)
|
||||
if (tid < 16)
|
||||
vdline[tid] += vdline[tid + 16];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(d < cndisp)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 16)
|
||||
if (tid < 8)
|
||||
vdline[tid] += vdline[tid + 8];
|
||||
if (winsz >= 16)
|
||||
if (tid < 8)
|
||||
vdline[tid] += vdline[tid + 8];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(d < cndisp)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 8)
|
||||
if (tid < 4)
|
||||
vdline[tid] += vdline[tid + 4];
|
||||
if (winsz >= 8)
|
||||
if (tid < 4)
|
||||
vdline[tid] += vdline[tid + 4];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(d < cndisp)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 4)
|
||||
if (tid < 2)
|
||||
vdline[tid] += vdline[tid + 2];
|
||||
if (winsz >= 4)
|
||||
if (tid < 2)
|
||||
vdline[tid] += vdline[tid + 2];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(d < cndisp)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 2)
|
||||
if (tid < 1)
|
||||
vdline[tid] += vdline[tid + 1];
|
||||
if (winsz >= 2)
|
||||
if (tid < 1)
|
||||
vdline[tid] += vdline[tid + 1];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(d < cndisp)
|
||||
{
|
||||
@@ -626,10 +626,10 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
|
||||
///////////////////////////////////////////////////////////////
|
||||
////////////////////// compute data cost //////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
|
||||
__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
|
||||
__global uchar *cleft, __global uchar *cright,
|
||||
int h, int w, int level, int nr_plane, int channels,
|
||||
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
|
||||
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
|
||||
float cmax_data_term, int cimg_step, int cth)
|
||||
{
|
||||
|
||||
@@ -676,10 +676,10 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo
|
||||
}
|
||||
}
|
||||
}
|
||||
__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
|
||||
__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
|
||||
__global uchar *cleft, __global uchar *cright,
|
||||
int h, int w, int level, int nr_plane, int channels,
|
||||
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
|
||||
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
|
||||
float cmax_data_term, int cimg_step, int cth)
|
||||
{
|
||||
|
||||
@@ -728,11 +728,11 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////compute_data_cost_reduce//////////////////////////////////////////
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
|
||||
__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
|
||||
__global uchar *cleft, __global uchar *cright,__local float *smem,
|
||||
int level, int rows, int cols, int h, int nr_plane,
|
||||
int level, int rows, int cols, int h, int nr_plane,
|
||||
int channels, int winsz,
|
||||
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2,
|
||||
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2,
|
||||
float cdata_weight, float cmax_data_term, int cimg_step,int cth)
|
||||
|
||||
{
|
||||
@@ -788,9 +788,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
|
||||
if(d < nr_plane)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 64)
|
||||
if (winsz >= 64)
|
||||
{
|
||||
if (tid < 32)
|
||||
if (tid < 32)
|
||||
vdline[tid] += vdline[tid + 32];
|
||||
}
|
||||
}
|
||||
@@ -799,9 +799,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
|
||||
if(d < nr_plane)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 32)
|
||||
if (winsz >= 32)
|
||||
{
|
||||
if (tid < 16)
|
||||
if (tid < 16)
|
||||
vdline[tid] += vdline[tid + 16];
|
||||
}
|
||||
}
|
||||
@@ -810,9 +810,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
|
||||
if(d < nr_plane)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 16)
|
||||
if (winsz >= 16)
|
||||
{
|
||||
if (tid < 8)
|
||||
if (tid < 8)
|
||||
vdline[tid] += vdline[tid + 8];
|
||||
}
|
||||
}
|
||||
@@ -821,9 +821,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
|
||||
if(d < nr_plane)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 8)
|
||||
if (winsz >= 8)
|
||||
{
|
||||
if (tid < 4)
|
||||
if (tid < 4)
|
||||
vdline[tid] += vdline[tid + 4];
|
||||
}
|
||||
}
|
||||
@@ -832,9 +832,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
|
||||
if(d < nr_plane)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 4)
|
||||
if (winsz >= 4)
|
||||
{
|
||||
if (tid < 2)
|
||||
if (tid < 2)
|
||||
vdline[tid] += vdline[tid + 2];
|
||||
}
|
||||
}
|
||||
@@ -843,9 +843,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
|
||||
if(d < nr_plane)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 2)
|
||||
if (winsz >= 2)
|
||||
{
|
||||
if (tid < 1)
|
||||
if (tid < 1)
|
||||
vdline[tid] += vdline[tid + 1];
|
||||
}
|
||||
}
|
||||
@@ -859,11 +859,11 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr, __global float *data_cost_,
|
||||
__kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr, __global float *data_cost_,
|
||||
__global uchar *cleft, __global uchar *cright, __local float *smem,
|
||||
int level, int rows, int cols, int h, int nr_plane,
|
||||
int level, int rows, int cols, int h, int nr_plane,
|
||||
int channels, int winsz,
|
||||
int cmsg_step1, int cmsg_step2, int cdisp_step1,int cdisp_step2, float cdata_weight,
|
||||
int cmsg_step1, int cmsg_step2, int cdisp_step1,int cdisp_step2, float cdata_weight,
|
||||
float cmax_data_term, int cimg_step, int cth)
|
||||
|
||||
{
|
||||
@@ -918,21 +918,21 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
|
||||
if(d < nr_plane)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 64)
|
||||
if (winsz >= 64)
|
||||
{
|
||||
if (tid < 32)
|
||||
if (tid < 32)
|
||||
vdline[tid] += vdline[tid + 32];
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
if(d < nr_plane)
|
||||
if(d < nr_plane)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 32)
|
||||
if (winsz >= 32)
|
||||
{
|
||||
if (tid < 16)
|
||||
if (tid < 16)
|
||||
vdline[tid] += vdline[tid + 16];
|
||||
}
|
||||
}
|
||||
@@ -941,9 +941,9 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
|
||||
if(d < nr_plane)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 16)
|
||||
if (winsz >= 16)
|
||||
{
|
||||
if (tid < 8)
|
||||
if (tid < 8)
|
||||
vdline[tid] += vdline[tid + 8];
|
||||
}
|
||||
}
|
||||
@@ -952,9 +952,9 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
|
||||
if(d < nr_plane)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 8)
|
||||
if (winsz >= 8)
|
||||
{
|
||||
if (tid < 4)
|
||||
if (tid < 4)
|
||||
vdline[tid] += vdline[tid + 4];
|
||||
}
|
||||
}
|
||||
@@ -963,9 +963,9 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
|
||||
if(d < nr_plane)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 4)
|
||||
if (winsz >= 4)
|
||||
{
|
||||
if (tid < 2)
|
||||
if (tid < 2)
|
||||
vdline[tid] += vdline[tid + 2];
|
||||
}
|
||||
}
|
||||
@@ -974,9 +974,9 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
|
||||
if(d < nr_plane)
|
||||
{
|
||||
__local volatile float* vdline = smem + winsz * get_local_id(2);
|
||||
if (winsz >= 2)
|
||||
if (winsz >= 2)
|
||||
{
|
||||
if (tid < 1)
|
||||
if (tid < 1)
|
||||
vdline[tid] += vdline[tid + 1];
|
||||
}
|
||||
}
|
||||
@@ -993,11 +993,11 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
|
||||
///////////////////////////////////////////////////////////////
|
||||
//////////////////////// init message /////////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
void get_first_k_element_increase_0(__global short* u_new, __global short *d_new, __global short *l_new,
|
||||
__global short *r_new, __global const short *u_cur, __global const short *d_cur,
|
||||
__global const short *l_cur, __global const short *r_cur,
|
||||
__global short *data_cost_selected, __global short *disparity_selected_new,
|
||||
__global short *data_cost_new, __global const short* data_cost_cur,
|
||||
void get_first_k_element_increase_0(__global short* u_new, __global short *d_new, __global short *l_new,
|
||||
__global short *r_new, __global const short *u_cur, __global const short *d_cur,
|
||||
__global const short *l_cur, __global const short *r_cur,
|
||||
__global short *data_cost_selected, __global short *disparity_selected_new,
|
||||
__global short *data_cost_new, __global const short* data_cost_cur,
|
||||
__global const short *disparity_selected_cur,
|
||||
int nr_plane, int nr_plane2,
|
||||
int cdisp_step1, int cdisp_step2)
|
||||
@@ -1027,11 +1027,11 @@ void get_first_k_element_increase_0(__global short* u_new, __global short *d_new
|
||||
data_cost_new[id * cdisp_step1] = SHRT_MAX;
|
||||
}
|
||||
}
|
||||
void get_first_k_element_increase_1(__global float *u_new, __global float *d_new, __global float *l_new,
|
||||
__global float *r_new, __global const float *u_cur, __global const float *d_cur,
|
||||
void get_first_k_element_increase_1(__global float *u_new, __global float *d_new, __global float *l_new,
|
||||
__global float *r_new, __global const float *u_cur, __global const float *d_cur,
|
||||
__global const float *l_cur, __global const float *r_cur,
|
||||
__global float *data_cost_selected, __global float *disparity_selected_new,
|
||||
__global float *data_cost_new, __global const float *data_cost_cur,
|
||||
__global float *data_cost_selected, __global float *disparity_selected_new,
|
||||
__global float *data_cost_new, __global const float *data_cost_cur,
|
||||
__global const float *disparity_selected_cur,
|
||||
int nr_plane, int nr_plane2,
|
||||
int cdisp_step1, int cdisp_step2)
|
||||
@@ -1057,13 +1057,13 @@ void get_first_k_element_increase_1(__global float *u_new, __global float *d_new
|
||||
u_new[i * cdisp_step1] = u_cur[id * cdisp_step2];
|
||||
d_new[i * cdisp_step1] = d_cur[id * cdisp_step2];
|
||||
l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
|
||||
r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
|
||||
r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
|
||||
data_cost_new[id * cdisp_step1] = FLT_MAX;
|
||||
|
||||
}
|
||||
}
|
||||
__kernel void init_message_0(__global short *u_new_, __global short *d_new_, __global short *l_new_,
|
||||
__global short *r_new_, __global short *u_cur_, __global const short *d_cur_,
|
||||
__global short *r_new_, __global short *u_cur_, __global const short *d_cur_,
|
||||
__global const short *l_cur_, __global const short *r_cur_, __global short *ctemp,
|
||||
__global short *selected_disp_pyr_new, __global const short *selected_disp_pyr_cur,
|
||||
__global short *data_cost_selected_, __global const short *data_cost_,
|
||||
@@ -1113,7 +1113,7 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g
|
||||
}
|
||||
}
|
||||
__kernel void init_message_1(__global float *u_new_, __global float *d_new_, __global float *l_new_,
|
||||
__global float *r_new_, __global const float *u_cur_, __global const float *d_cur_,
|
||||
__global float *r_new_, __global const float *u_cur_, __global const float *d_cur_,
|
||||
__global const float *l_cur_, __global const float *r_cur_, __global float *ctemp,
|
||||
__global float *selected_disp_pyr_new, __global const float *selected_disp_pyr_cur,
|
||||
__global float *data_cost_selected_, __global const float *data_cost_,
|
||||
@@ -1176,28 +1176,28 @@ __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __g
|
||||
id = j;
|
||||
}
|
||||
}
|
||||
data_cost_selected[i * cdisp_step1] = data_cost[id * cdisp_step1];
|
||||
data_cost_selected[i * cdisp_step1] = data_cost[id * cdisp_step1];
|
||||
disparity_selected_new[i * cdisp_step1] = disparity_selected_cur[id * cdisp_step2];
|
||||
u_new[i * cdisp_step1] = u_cur[id * cdisp_step2];
|
||||
d_new[i * cdisp_step1] = d_cur[id * cdisp_step2];
|
||||
l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
|
||||
r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
|
||||
r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
|
||||
data_cost_new[id * cdisp_step1] = FLT_MAX;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
//////////////////// calc all iterations /////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
void message_per_pixel_0(__global const short *data, __global short *msg_dst, __global const short *msg1,
|
||||
void message_per_pixel_0(__global const short *data, __global short *msg_dst, __global const short *msg1,
|
||||
__global const short *msg2, __global const short *msg3,
|
||||
__global const short *dst_disp, __global const short *src_disp,
|
||||
__global const short *dst_disp, __global const short *src_disp,
|
||||
int nr_plane, __global short *temp,
|
||||
float cmax_disc_term, int cdisp_step1, float cdisc_single_jump)
|
||||
{
|
||||
short minimum = SHRT_MAX;
|
||||
for(int d = 0; d < nr_plane; d++)
|
||||
for(int d = 0; d < nr_plane; d++)
|
||||
{
|
||||
int idx = d * cdisp_step1;
|
||||
short val = data[idx] + msg1[idx] + msg2[idx] + msg3[idx];
|
||||
@@ -1215,7 +1215,7 @@ void message_per_pixel_0(__global const short *data, __global short *msg_dst, __
|
||||
short src_disp_reg = src_disp[d * cdisp_step1];
|
||||
|
||||
for(int d2 = 0; d2 < nr_plane; d2++)
|
||||
cost_min = fmin(cost_min, (msg_dst[d2 * cdisp_step1] +
|
||||
cost_min = fmin(cost_min, (msg_dst[d2 * cdisp_step1] +
|
||||
cdisc_single_jump * abs(dst_disp[d2 * cdisp_step1] - src_disp_reg)));
|
||||
|
||||
temp[d * cdisp_step1] = convert_short_sat_rte(cost_min);
|
||||
@@ -1226,14 +1226,14 @@ void message_per_pixel_0(__global const short *data, __global short *msg_dst, __
|
||||
for(int d = 0; d < nr_plane; d++)
|
||||
msg_dst[d * cdisp_step1] = convert_short_sat_rte(temp[d * cdisp_step1] - sum);
|
||||
}
|
||||
void message_per_pixel_1(__global const float *data, __global float *msg_dst, __global const float *msg1,
|
||||
void message_per_pixel_1(__global const float *data, __global float *msg_dst, __global const float *msg1,
|
||||
__global const float *msg2, __global const float *msg3,
|
||||
__global const float *dst_disp, __global const float *src_disp,
|
||||
__global const float *dst_disp, __global const float *src_disp,
|
||||
int nr_plane, __global float *temp,
|
||||
float cmax_disc_term, int cdisp_step1, float cdisc_single_jump)
|
||||
{
|
||||
float minimum = FLT_MAX;
|
||||
for(int d = 0; d < nr_plane; d++)
|
||||
for(int d = 0; d < nr_plane; d++)
|
||||
{
|
||||
int idx = d * cdisp_step1;
|
||||
float val = data[idx] + msg1[idx] + msg2[idx] + msg3[idx];
|
||||
@@ -1251,7 +1251,7 @@ void message_per_pixel_1(__global const float *data, __global float *msg_dst, __
|
||||
float src_disp_reg = src_disp[d * cdisp_step1];
|
||||
|
||||
for(int d2 = 0; d2 < nr_plane; d2++)
|
||||
cost_min = fmin(cost_min, (msg_dst[d2 * cdisp_step1] +
|
||||
cost_min = fmin(cost_min, (msg_dst[d2 * cdisp_step1] +
|
||||
cdisc_single_jump * fabs(dst_disp[d2 * cdisp_step1] - src_disp_reg)));
|
||||
|
||||
temp[d * cdisp_step1] = cost_min;
|
||||
@@ -1262,9 +1262,9 @@ void message_per_pixel_1(__global const float *data, __global float *msg_dst, __
|
||||
for(int d = 0; d < nr_plane; d++)
|
||||
msg_dst[d * cdisp_step1] = temp[d * cdisp_step1] - sum;
|
||||
}
|
||||
__kernel void compute_message_0(__global short *u_, __global short *d_, __global short *l_, __global short *r_,
|
||||
__global const short *data_cost_selected, __global const short *selected_disp_pyr_cur,
|
||||
__global short *ctemp, int h, int w, int nr_plane, int i,
|
||||
__kernel void compute_message_0(__global short *u_, __global short *d_, __global short *l_, __global short *r_,
|
||||
__global const short *data_cost_selected, __global const short *selected_disp_pyr_cur,
|
||||
__global short *ctemp, int h, int w, int nr_plane, int i,
|
||||
float cmax_disc_term, int cdisp_step1, int cmsg_step1, float cdisc_single_jump)
|
||||
{
|
||||
int y = get_global_id(1);
|
||||
@@ -1283,7 +1283,7 @@ __kernel void compute_message_0(__global short *u_, __global short *d_, __global
|
||||
|
||||
__global short *temp = ctemp + y * cmsg_step1 + x;
|
||||
|
||||
message_per_pixel_0(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp,
|
||||
message_per_pixel_0(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp,
|
||||
cmax_disc_term, cdisp_step1, cdisc_single_jump);
|
||||
message_per_pixel_0(data, d, d - cmsg_step1, r - 1, l + 1, disp, disp + cmsg_step1, nr_plane, temp,
|
||||
cmax_disc_term, cdisp_step1, cdisc_single_jump);
|
||||
@@ -1293,9 +1293,9 @@ __kernel void compute_message_0(__global short *u_, __global short *d_, __global
|
||||
cmax_disc_term, cdisp_step1, cdisc_single_jump);
|
||||
}
|
||||
}
|
||||
__kernel void compute_message_1(__global float *u_, __global float *d_, __global float *l_, __global float *r_,
|
||||
__global const float *data_cost_selected, __global const float *selected_disp_pyr_cur,
|
||||
__global float *ctemp, int h, int w, int nr_plane, int i,
|
||||
__kernel void compute_message_1(__global float *u_, __global float *d_, __global float *l_, __global float *r_,
|
||||
__global const float *data_cost_selected, __global const float *selected_disp_pyr_cur,
|
||||
__global float *ctemp, int h, int w, int nr_plane, int i,
|
||||
float cmax_disc_term, int cdisp_step1, int cmsg_step1, float cdisc_single_jump)
|
||||
{
|
||||
int y = get_global_id(1);
|
||||
@@ -1313,7 +1313,7 @@ __kernel void compute_message_1(__global float *u_, __global float *d_, __global
|
||||
__global const float *disp = selected_disp_pyr_cur + y * cmsg_step1 + x;
|
||||
__global float *temp = ctemp + y * cmsg_step1 + x;
|
||||
|
||||
message_per_pixel_1(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp,
|
||||
message_per_pixel_1(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp,
|
||||
cmax_disc_term, cdisp_step1, cdisc_single_jump);
|
||||
message_per_pixel_1(data, d, d - cmsg_step1, r - 1, l + 1, disp, disp + cmsg_step1, nr_plane, temp,
|
||||
cmax_disc_term, cdisp_step1, cdisc_single_jump);
|
||||
@@ -1327,10 +1327,10 @@ __kernel void compute_message_1(__global float *u_, __global float *d_, __global
|
||||
///////////////////////////////////////////////////////////////
|
||||
/////////////////////////// output ////////////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
__kernel void compute_disp_0(__global const short *u_, __global const short *d_, __global const short *l_,
|
||||
__global const short *r_, __global const short * data_cost_selected,
|
||||
__kernel void compute_disp_0(__global const short *u_, __global const short *d_, __global const short *l_,
|
||||
__global const short *r_, __global const short * data_cost_selected,
|
||||
__global const short *disp_selected_pyr,
|
||||
__global short* disp,
|
||||
__global short* disp,
|
||||
int res_step, int cols, int rows, int nr_plane,
|
||||
int cmsg_step1, int cdisp_step1)
|
||||
{
|
||||
@@ -1364,10 +1364,10 @@ __kernel void compute_disp_0(__global const short *u_, __global const short *d_,
|
||||
disp[res_step * y + x] = best;
|
||||
}
|
||||
}
|
||||
__kernel void compute_disp_1(__global const float *u_, __global const float *d_, __global const float *l_,
|
||||
__global const float *r_, __global const float *data_cost_selected,
|
||||
__kernel void compute_disp_1(__global const float *u_, __global const float *d_, __global const float *l_,
|
||||
__global const float *r_, __global const float *data_cost_selected,
|
||||
__global const float *disp_selected_pyr,
|
||||
__global short *disp,
|
||||
__global short *disp,
|
||||
int res_step, int cols, int rows, int nr_plane,
|
||||
int cmsg_step1, int cdisp_step1)
|
||||
{
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
__kernel void centeredGradientKernel(__global const float* src, int src_col, int src_row, int src_step,
|
||||
__kernel void centeredGradientKernel(__global const float* src, int src_col, int src_row, int src_step,
|
||||
__global float* dx, __global float* dy, int dx_step)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
@@ -51,9 +51,9 @@ __global float* dx, __global float* dy, int dx_step)
|
||||
|
||||
if((x < src_col)&&(y < src_row))
|
||||
{
|
||||
int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1);
|
||||
int src_x2 = (x - 1) > 0 ? (x -1) : 0;
|
||||
|
||||
int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1);
|
||||
int src_x2 = (x - 1) > 0 ? (x -1) : 0;
|
||||
|
||||
//if(src[y * src_step + src_x1] == src[y * src_step+ src_x2])
|
||||
//{
|
||||
// printf("y = %d\n", y);
|
||||
@@ -61,8 +61,8 @@ __global float* dx, __global float* dy, int dx_step)
|
||||
// printf("src_x2 = %d\n", src_x2);
|
||||
//}
|
||||
dx[y * dx_step+ x] = 0.5f * (src[y * src_step + src_x1] - src[y * src_step+ src_x2]);
|
||||
|
||||
int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1);
|
||||
|
||||
int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1);
|
||||
int src_y2 = (y - 1) > 0 ? (y - 1) : 0;
|
||||
dy[y * dx_step+ x] = 0.5f * (src[src_y1 * src_step + x] - src[src_y2 * src_step+ x]);
|
||||
}
|
||||
@@ -89,20 +89,20 @@ float bicubicCoeff(float x_)
|
||||
}
|
||||
|
||||
__kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row,
|
||||
image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y,
|
||||
__global const float* u1, int u1_step,
|
||||
image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y,
|
||||
__global const float* u1, int u1_step,
|
||||
__global const float* u2,
|
||||
__global float* I1w,
|
||||
__global float* I1wx, /*int I1wx_step,*/
|
||||
__global float* I1wy, /*int I1wy_step,*/
|
||||
__global float* grad, /*int grad_step,*/
|
||||
__global float* rho,
|
||||
int I1w_step,
|
||||
int u2_step,
|
||||
int u1_offset_x,
|
||||
int u1_offset_y,
|
||||
int u2_offset_x,
|
||||
int u2_offset_y)
|
||||
__global float* I1wx, /*int I1wx_step,*/
|
||||
__global float* I1wy, /*int I1wy_step,*/
|
||||
__global float* grad, /*int grad_step,*/
|
||||
__global float* rho,
|
||||
int I1w_step,
|
||||
int u2_step,
|
||||
int u1_offset_x,
|
||||
int u1_offset_y,
|
||||
int u2_offset_x,
|
||||
int u2_offset_y)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
@@ -136,7 +136,7 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
|
||||
const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
|
||||
|
||||
//sum += w * tex2D(tex_I1 , cx, cy);
|
||||
int2 cood = (int2)(cx, cy);
|
||||
int2 cood = (int2)(cx, cy);
|
||||
sum += w * read_imagef(tex_I1, sampleri, cood).x;
|
||||
//sumx += w * tex2D(tex_I1x, cx, cy);
|
||||
sumx += w * read_imagef(tex_I1x, sampleri, cood).x;
|
||||
@@ -181,18 +181,18 @@ float readImage(__global const float *image, const int x, const int y, const
|
||||
}
|
||||
|
||||
__kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step, int I0_col, int I0_row,
|
||||
__global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y,
|
||||
__global const float* u1, int u1_step,
|
||||
__global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y,
|
||||
__global const float* u1, int u1_step,
|
||||
__global const float* u2,
|
||||
__global float* I1w,
|
||||
__global float* I1wx, /*int I1wx_step,*/
|
||||
__global float* I1wy, /*int I1wy_step,*/
|
||||
__global float* grad, /*int grad_step,*/
|
||||
__global float* rho,
|
||||
int I1w_step,
|
||||
int u2_step,
|
||||
int I1_step,
|
||||
int I1x_step)
|
||||
__global float* I1wx, /*int I1wx_step,*/
|
||||
__global float* I1wy, /*int I1wy_step,*/
|
||||
__global float* grad, /*int grad_step,*/
|
||||
__global float* rho,
|
||||
int I1w_step,
|
||||
int u2_step,
|
||||
int I1_step,
|
||||
int I1x_step)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
@@ -224,7 +224,7 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
|
||||
{
|
||||
const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
|
||||
|
||||
int2 cood = (int2)(cx, cy);
|
||||
int2 cood = (int2)(cx, cy);
|
||||
sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step);
|
||||
sumx += w * readImage(tex_I1x, cood.x, cood.y, I0_col, I0_row, I1x_step);
|
||||
sumy += w * readImage(tex_I1y, cood.x, cood.y, I0_col, I0_row, I1x_step);
|
||||
@@ -256,18 +256,18 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
|
||||
}
|
||||
|
||||
|
||||
__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step,
|
||||
__global const float* u2,
|
||||
__global float* p11, int p11_step,
|
||||
__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step,
|
||||
__global const float* u2,
|
||||
__global float* p11, int p11_step,
|
||||
__global float* p12,
|
||||
__global float* p21,
|
||||
__global float* p22,
|
||||
__global float* p22,
|
||||
const float taut,
|
||||
int u2_step,
|
||||
int u1_offset_x,
|
||||
int u1_offset_y,
|
||||
int u2_offset_x,
|
||||
int u2_offset_y)
|
||||
int u2_step,
|
||||
int u1_offset_x,
|
||||
int u1_offset_y,
|
||||
int u2_offset_x,
|
||||
int u2_offset_y)
|
||||
{
|
||||
|
||||
//const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
@@ -277,16 +277,16 @@ __kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col,
|
||||
|
||||
if(x < u1_col && y < u1_row)
|
||||
{
|
||||
int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
|
||||
int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
|
||||
const float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
|
||||
|
||||
int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
|
||||
|
||||
int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
|
||||
const float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
|
||||
|
||||
int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
|
||||
int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
|
||||
const float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
|
||||
|
||||
int src_y2 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
|
||||
int src_y2 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
|
||||
const float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
|
||||
|
||||
const float g1 = hypot(u1x, u1y);
|
||||
@@ -329,19 +329,19 @@ float divergence(__global const float* v1, __global const float* v2, int y, int
|
||||
|
||||
__kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx_row, int I1wx_step,
|
||||
__global const float* I1wy, /*int I1wy_step,*/
|
||||
__global const float* grad, /*int grad_step,*/
|
||||
__global const float* grad, /*int grad_step,*/
|
||||
__global const float* rho_c, /*int rho_c_step,*/
|
||||
__global const float* p11, /*int p11_step,*/
|
||||
__global const float* p12, /*int p12_step,*/
|
||||
__global const float* p21, /*int p21_step,*/
|
||||
__global const float* p22, /*int p22_step,*/
|
||||
__global float* u1, int u1_step,
|
||||
__global float* u2,
|
||||
__global float* u1, int u1_step,
|
||||
__global float* u2,
|
||||
__global float* error, const float l_t, const float theta, int u2_step,
|
||||
int u1_offset_x,
|
||||
int u1_offset_y,
|
||||
int u2_offset_x,
|
||||
int u2_offset_y)
|
||||
int u1_offset_x,
|
||||
int u1_offset_y,
|
||||
int u2_offset_x,
|
||||
int u2_offset_y)
|
||||
{
|
||||
|
||||
//const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
@@ -109,4 +109,3 @@ void cv::ocl::pyrDown(const oclMat &src, oclMat &dst)
|
||||
|
||||
pyrdown_run(src, dst);
|
||||
}
|
||||
|
||||
|
||||
@@ -85,4 +85,4 @@ namespace cv
|
||||
openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -160,7 +160,7 @@ static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreater
|
||||
|
||||
namespace radix_sort
|
||||
{
|
||||
//FIXME(pengx17):
|
||||
//FIXME(pengx17):
|
||||
// exclusive scan, need to be optimized as this is too naive...
|
||||
//void naive_scan_addition(oclMat& input, oclMat& output)
|
||||
//{
|
||||
@@ -247,8 +247,8 @@ static void sortByKey(oclMat& keys, oclMat& vals, size_t origVecSize, bool isGre
|
||||
}
|
||||
}
|
||||
ocl::copyMakeBorder(
|
||||
keys(Rect(0,0,origVecSize,1)), buffer_keys,
|
||||
0, 0, 0, vecSize - origVecSize,
|
||||
keys(Rect(0,0,origVecSize,1)), buffer_keys,
|
||||
0, 0, 0, vecSize - origVecSize,
|
||||
BORDER_CONSTANT, padding_value);
|
||||
vals(Rect(0,0,origVecSize,1)).copyTo(roi_buffer_vals);
|
||||
newBuffer = true;
|
||||
@@ -274,7 +274,7 @@ static void sortByKey(oclMat& keys, oclMat& vals, size_t origVecSize, bool isGre
|
||||
genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf);
|
||||
|
||||
//additional build option for radix sort
|
||||
sprintf(build_opt_buf + strlen(build_opt_buf), " -D K_%s", isKeyFloat?"FLT":"INT");
|
||||
sprintf(build_opt_buf + strlen(build_opt_buf), " -D K_%s", isKeyFloat?"FLT":"INT");
|
||||
|
||||
String kernelnames[2] = {String("histogramRadixN"), String("permuteRadixN")};
|
||||
|
||||
|
||||
@@ -178,7 +178,7 @@ PARAM_TEST_CASE(ArithmTestBase, MatType, bool)
|
||||
}
|
||||
|
||||
void Near1(double threshold = 0.)
|
||||
{
|
||||
{
|
||||
EXPECT_MAT_NEAR(dst1, Mat(gdst1_whole), threshold);
|
||||
}
|
||||
|
||||
|
||||
@@ -116,4 +116,4 @@ INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Blend, Combine(
|
||||
DIFFERENT_SIZES,
|
||||
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4))
|
||||
));
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -86,4 +86,4 @@ TEST_P(Canny, Accuracy)
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Canny, testing::Combine(
|
||||
testing::Values(AppertureSize(3), AppertureSize(5)),
|
||||
testing::Values(L2gradient(false), L2gradient(true))));
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -57,8 +57,8 @@ using namespace testing;
|
||||
using namespace std;
|
||||
|
||||
|
||||
PARAM_TEST_CASE(FilterTestBase,
|
||||
MatType,
|
||||
PARAM_TEST_CASE(FilterTestBase,
|
||||
MatType,
|
||||
cv::Size, // kernel size
|
||||
cv::Size, // dx,dy
|
||||
int // border type, or iteration
|
||||
@@ -367,7 +367,7 @@ INSTANTIATE_TEST_CASE_P(Filter, Laplacian, Combine(
|
||||
Values(0))); //not use
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(
|
||||
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
|
||||
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
|
||||
Values(Size(0, 0)), //not use
|
||||
Values(Size(0, 0)), //not use
|
||||
Values(1)));
|
||||
@@ -383,7 +383,7 @@ INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(
|
||||
INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine(
|
||||
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
|
||||
Values(Size(0, 0)), //not use
|
||||
Values(Size(0, 1), Size(1, 0)),
|
||||
Values(Size(0, 1), Size(1, 0)),
|
||||
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
|
||||
@@ -395,7 +395,7 @@ INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
|
||||
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Filter, Filter2D, testing::Combine(
|
||||
Values(CV_8UC1, CV_32FC1, CV_32FC4),
|
||||
Values(CV_8UC1, CV_32FC1, CV_32FC4),
|
||||
Values(Size(3, 3), Size(15, 15), Size(25, 25)),
|
||||
Values(Size(0, 0)), //not use
|
||||
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REFLECT101, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT)));
|
||||
|
||||
@@ -448,7 +448,7 @@ PARAM_TEST_CASE(ImgprocTestBase, MatType, MatType, MatType, MatType, MatType, bo
|
||||
{
|
||||
cv::Mat cpu_cldst;
|
||||
cldst.download(cpu_cldst);
|
||||
EXPECT_MAT_NEAR(dst, cpu_cldst, threshold);
|
||||
EXPECT_MAT_NEAR(dst, cpu_cldst, threshold);
|
||||
}
|
||||
};
|
||||
////////////////////////////////equalizeHist//////////////////////////////////////////
|
||||
|
||||
@@ -87,7 +87,7 @@ PARAM_TEST_CASE(Kmeans, int, int, int)
|
||||
for(int j = 0; j < nchannel; j++)
|
||||
center_row_header.at<float>(0, i*nchannel+j) = 50000.0;
|
||||
|
||||
for(int j = 0; (j < max_neighbour) ||
|
||||
for(int j = 0; (j < max_neighbour) ||
|
||||
(i == K-1 && j < max_neighbour + MHEIGHT%K); j ++)
|
||||
{
|
||||
Mat cur_row_header = src.row(row_idx + 1 + j);
|
||||
@@ -121,15 +121,15 @@ TEST_P(Kmeans, Mat){
|
||||
ocl::kmeans(d_src, K, d_labels,
|
||||
TermCriteria( TermCriteria::EPS + TermCriteria::MAX_ITER, 100, 0),
|
||||
1, flags, d_centers);
|
||||
|
||||
|
||||
Mat dd_labels(d_labels);
|
||||
Mat dd_centers(d_centers);
|
||||
if(flags & KMEANS_USE_INITIAL_LABELS)
|
||||
{
|
||||
EXPECT_MAT_NEAR(labels, dd_labels, 0);
|
||||
EXPECT_MAT_NEAR(centers, dd_centers, 1e-3);
|
||||
}
|
||||
else
|
||||
}
|
||||
else
|
||||
{
|
||||
int row_idx = 0;
|
||||
for(int i = 0; i < K; i++)
|
||||
@@ -157,6 +157,6 @@ TEST_P(Kmeans, Mat){
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ML, Kmeans, Combine(
|
||||
Values(3, 5, 8),
|
||||
Values(CV_32FC1, CV_32FC2, CV_32FC4),
|
||||
Values(OCL_KMEANS_USE_INITIAL_LABELS/*, OCL_KMEANS_PP_CENTERS*/)));
|
||||
Values(OCL_KMEANS_USE_INITIAL_LABELS/*, OCL_KMEANS_PP_CENTERS*/)));
|
||||
|
||||
#endif
|
||||
|
||||
@@ -85,7 +85,7 @@ TEST_P(GoodFeaturesToTrack, Accuracy)
|
||||
ASSERT_FALSE(d_pts.empty());
|
||||
|
||||
std::vector<cv::Point2f> pts(d_pts.cols);
|
||||
|
||||
|
||||
detector.downloadPoints(d_pts, pts);
|
||||
|
||||
std::vector<cv::Point2f> pts_gold;
|
||||
@@ -125,7 +125,7 @@ TEST_P(GoodFeaturesToTrack, EmptyCorners)
|
||||
ASSERT_TRUE(corners.empty());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OCL_Video, GoodFeaturesToTrack,
|
||||
INSTANTIATE_TEST_CASE_P(OCL_Video, GoodFeaturesToTrack,
|
||||
testing::Values(MinDistance(0.0), MinDistance(3.0)));
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@@ -342,4 +342,3 @@ INSTANTIATE_TEST_CASE_P(OCL_Video, Farneback, testing::Combine(
|
||||
testing::Values(UseInitFlow(false), UseInitFlow(true))));
|
||||
|
||||
#endif // HAVE_OPENCL
|
||||
|
||||
|
||||
@@ -79,7 +79,7 @@ TEST_P(PyrDown, Mat)
|
||||
Size size(MWIDTH, MHEIGHT);
|
||||
Mat src = randomMat(size, CV_MAKETYPE(type, channels));
|
||||
oclMat gsrc(src);
|
||||
|
||||
|
||||
pyrDown(src, dst_cpu);
|
||||
pyrDown(gsrc, gdst);
|
||||
|
||||
|
||||
@@ -59,7 +59,7 @@ IMPLEMENT_PARAM_CLASS(InputSize, int)
|
||||
IMPLEMENT_PARAM_CLASS(SortMethod, int)
|
||||
|
||||
|
||||
template<class T>
|
||||
template<class T>
|
||||
struct KV_CVTYPE{ static int toType() {return 0;} };
|
||||
|
||||
template<> struct KV_CVTYPE<int> { static int toType() {return CV_32SC1;} };
|
||||
@@ -101,7 +101,7 @@ void kvquicksort(Mat& keys, Mat& vals, bool isGreater = false)
|
||||
{
|
||||
vector<pair<key_type, val_type> > kvres;
|
||||
toKVPair(keys.begin<key_type>(), vals.begin<val_type>(), keys.cols, kvres);
|
||||
|
||||
|
||||
if(isGreater)
|
||||
{
|
||||
std::sort(kvres.begin(), kvres.end(), kvgreater<key_type, val_type>);
|
||||
@@ -180,7 +180,7 @@ bool checkUnstableSorterResult(const Mat& gkeys_, const Mat& gvals_,
|
||||
{
|
||||
++ iden_count;
|
||||
}
|
||||
|
||||
|
||||
// sort dv and gv
|
||||
int num_of_val = (iden_count + 1) * cn_val;
|
||||
std::sort(gvptr + i * cn_val, gvptr + i * cn_val + num_of_val);
|
||||
|
||||
@@ -225,7 +225,7 @@ double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& o
|
||||
cpu_result.setTo(0);
|
||||
|
||||
for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
|
||||
{
|
||||
{
|
||||
cv::Mat cpu_result_roi(cpu_result, *r);
|
||||
cpu_result_roi.setTo(1);
|
||||
cpu_result.copyTo(cpu_result);
|
||||
@@ -251,4 +251,3 @@ double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& o
|
||||
}
|
||||
return final_test_result;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user