Merge branch '2.4' of https://github.com/Itseez/opencv into 2.4_oclgfft
This commit is contained in:
@@ -116,6 +116,7 @@ If you use ``cvtColor`` with 8-bit images, the conversion will have some informa
|
||||
The function can do the following transformations:
|
||||
|
||||
*
|
||||
RGB :math:`\leftrightarrow` GRAY ( ``CV_BGR2GRAY, CV_RGB2GRAY, CV_GRAY2BGR, CV_GRAY2RGB`` )
|
||||
Transformations within RGB space like adding/removing the alpha channel, reversing the channel order, conversion to/from 16-bit RGB color (R5:G6:B5 or R5:G5:B5), as well as conversion to/from grayscale using:
|
||||
|
||||
.. math::
|
||||
@@ -765,7 +766,7 @@ Runs the GrabCut algorithm.
|
||||
|
||||
* **GC_PR_BGD** defines a possible background pixel.
|
||||
|
||||
* **GC_PR_BGD** defines a possible foreground pixel.
|
||||
* **GC_PR_FGD** defines a possible foreground pixel.
|
||||
|
||||
:param rect: ROI containing a segmented object. The pixels outside of the ROI are marked as "obvious background". The parameter is only used when ``mode==GC_INIT_WITH_RECT`` .
|
||||
|
||||
|
@@ -80,6 +80,14 @@ public abstract class CameraBridgeViewBase extends SurfaceView implements Surfac
|
||||
mMaxHeight = MAX_UNSPECIFIED;
|
||||
styledAttrs.recycle();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the camera index
|
||||
* @param camera index
|
||||
*/
|
||||
public void setCameraIndex(int cameraIndex) {
|
||||
this.mCameraIndex = cameraIndex;
|
||||
}
|
||||
|
||||
public interface CvCameraViewListener {
|
||||
/**
|
||||
|
@@ -49,7 +49,7 @@
|
||||
#include "opencv2/ocl/ocl.hpp"
|
||||
|
||||
#if defined __APPLE__
|
||||
#include <OpenCL/OpenCL.h>
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
@@ -356,8 +356,7 @@ static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
|
||||
char compile_option[128];
|
||||
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s %s",
|
||||
anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1],
|
||||
rectKernel?"-D RECTKERNEL":"",
|
||||
s);
|
||||
s, rectKernel?"-D RECTKERNEL":"");
|
||||
vector< pair<size_t, const void *> > args;
|
||||
args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
|
||||
args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
|
||||
|
@@ -1578,7 +1578,9 @@ static void openCLExecuteKernel_hog(Context *clCxt , const char **source, string
|
||||
size_t globalThreads[3], size_t localThreads[3],
|
||||
vector< pair<size_t, const void *> > &args)
|
||||
{
|
||||
size_t wave_size = queryDeviceInfo<WAVEFRONT_SIZE, size_t>();
|
||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, source, kernelName);
|
||||
size_t wave_size = queryDeviceInfo<WAVEFRONT_SIZE, size_t>(kernel);
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
if (wave_size <= 16)
|
||||
{
|
||||
char build_options[64];
|
||||
|
@@ -43,9 +43,28 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
#include "precomp.hpp"
|
||||
|
||||
#ifdef __GNUC__
|
||||
#if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 402
|
||||
#define GCC_DIAG_STR(s) #s
|
||||
#define GCC_DIAG_JOINSTR(x,y) GCC_DIAG_STR(x ## y)
|
||||
# define GCC_DIAG_DO_PRAGMA(x) _Pragma (#x)
|
||||
# define GCC_DIAG_PRAGMA(x) GCC_DIAG_DO_PRAGMA(GCC diagnostic x)
|
||||
# if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406
|
||||
# define GCC_DIAG_OFF(x) GCC_DIAG_PRAGMA(push) \
|
||||
GCC_DIAG_PRAGMA(ignored GCC_DIAG_JOINSTR(-W,x))
|
||||
# define GCC_DIAG_ON(x) GCC_DIAG_PRAGMA(pop)
|
||||
# else
|
||||
# define GCC_DIAG_OFF(x) GCC_DIAG_PRAGMA(ignored GCC_DIAG_JOINSTR(-W,x))
|
||||
# define GCC_DIAG_ON(x) GCC_DIAG_PRAGMA(warning GCC_DIAG_JOINSTR(-W,x))
|
||||
# endif
|
||||
#else
|
||||
# define GCC_DIAG_OFF(x)
|
||||
# define GCC_DIAG_ON(x)
|
||||
#endif
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace cv
|
||||
@@ -121,6 +140,9 @@ namespace cv
|
||||
build_options, finish_mode);
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
GCC_DIAG_OFF(deprecated-declarations)
|
||||
#endif
|
||||
cl_mem bindTexture(const oclMat &mat)
|
||||
{
|
||||
cl_mem texture;
|
||||
@@ -180,10 +202,6 @@ namespace cv
|
||||
else
|
||||
#endif
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
#endif
|
||||
texture = clCreateImage2D(
|
||||
(cl_context)mat.clCxt->oclContext(),
|
||||
CL_MEM_READ_WRITE,
|
||||
@@ -193,9 +211,6 @@ namespace cv
|
||||
0,
|
||||
NULL,
|
||||
&err);
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
}
|
||||
size_t origin[] = { 0, 0, 0 };
|
||||
size_t region[] = { mat.cols, mat.rows, 1 };
|
||||
@@ -225,11 +240,14 @@ namespace cv
|
||||
openCLSafeCall(err);
|
||||
return texture;
|
||||
}
|
||||
#ifdef __GNUC__
|
||||
GCC_DIAG_ON(deprecated-declarations)
|
||||
#endif
|
||||
|
||||
Ptr<TextureCL> bindTexturePtr(const oclMat &mat)
|
||||
{
|
||||
return Ptr<TextureCL>(new TextureCL(bindTexture(mat), mat.rows, mat.cols, mat.type()));
|
||||
}
|
||||
|
||||
void releaseTexture(cl_mem& texture)
|
||||
{
|
||||
openCLFree(texture);
|
||||
|
@@ -127,7 +127,7 @@ __kernel void arithm_add_D2 (__global ushort *src1, int src1_step, int src1_offs
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
#define dst_align ((dst_offset / 2) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
|
||||
@@ -165,7 +165,7 @@ __kernel void arithm_add_D3 (__global short *src1, int src1_step, int src1_offse
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
#define dst_align ((dst_offset / 2) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
|
||||
@@ -335,7 +335,7 @@ __kernel void arithm_add_with_mask_C1_D2 (__global ushort *src1, int src1_step,
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
#define dst_align ((dst_offset / 2) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@@ -375,7 +375,7 @@ __kernel void arithm_add_with_mask_C1_D3 (__global short *src1, int src1_step, i
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
#define dst_align ((dst_offset / 2) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@@ -507,7 +507,7 @@ __kernel void arithm_add_with_mask_C2_D0 (__global uchar *src1, int src1_step, i
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
#define dst_align ((dst_offset / 2) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
|
@@ -126,7 +126,7 @@ __kernel void arithm_s_add_with_mask_C1_D2 (__global ushort *src1, int src1_st
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
#define dst_align ((dst_offset / 2) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
|
||||
@@ -164,7 +164,7 @@ __kernel void arithm_s_add_with_mask_C1_D3 (__global short *src1, int src1_ste
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
#define dst_align ((dst_offset / 2) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
|
||||
@@ -288,7 +288,7 @@ __kernel void arithm_s_add_with_mask_C2_D0 (__global uchar *src1, int src1_ste
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
#define dst_align ((dst_offset / 2) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
|
||||
|
@@ -277,9 +277,15 @@ __kernel void arithm_mul_D6 (__global double *src1, int src1_step, int src1_offs
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#define SCALAR_TYPE double
|
||||
#else
|
||||
#define SCALAR_TYPE float
|
||||
#endif
|
||||
|
||||
__kernel void arithm_muls_D5 (__global float *src1, int src1_step, int src1_offset,
|
||||
__global float *dst, int dst_step, int dst_offset,
|
||||
int rows, int cols, int dst_step1, float scalar)
|
||||
int rows, int cols, int dst_step1, SCALAR_TYPE scalar)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
@@ -120,7 +120,7 @@ __kernel void morph_C1_D0(__global const uchar * restrict src,
|
||||
int gidy = get_global_id(1);
|
||||
int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel);
|
||||
|
||||
if(gidx+3<cols && gidy<rows && (dst_offset_in_pixel&3)==0)
|
||||
if(gidx+3<cols && gidy<rows && ((dst_offset_in_pixel&3)==0))
|
||||
{
|
||||
*(__global uchar4*)&dst[out_addr] = res;
|
||||
}
|
||||
|
@@ -143,7 +143,7 @@ __kernel void threshold_C1_D5(__global const float * restrict src, __global floa
|
||||
int4 dpos = (int4)(dstart, dstart+1, dstart+2, dstart+3);
|
||||
float4 dVal = *(__global float4*)(dst+dst_offset+gy*dst_step+dstart);
|
||||
int4 con = dpos >= 0 && dpos < dst_cols;
|
||||
ddata = convert_float4(con) != 0 ? ddata : dVal;
|
||||
ddata = convert_float4(con) != (float4)(0) ? ddata : dVal;
|
||||
if(dstart < dst_cols)
|
||||
{
|
||||
*(__global float4*)(dst+dst_offset+gy*dst_step+dstart) = ddata;
|
||||
|
@@ -46,145 +46,10 @@
|
||||
|
||||
//#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
|
||||
__kernel void calcSharrDeriv_vertical_C1_D0(__global const uchar* src, int srcStep, int rows, int cols, int cn, __global short* dx_buf, int dx_bufStep, __global short* dy_buf, int dy_bufStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols * cn)
|
||||
{
|
||||
const uchar src_val0 = (src + (y > 0 ? y-1 : rows > 1 ? 1 : 0) * srcStep)[x];
|
||||
const uchar src_val1 = (src + y * srcStep)[x];
|
||||
const uchar src_val2 = (src + (y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0) * srcStep)[x];
|
||||
|
||||
((__global short*)((__global char*)dx_buf + y * dx_bufStep / 2))[x] = (src_val0 + src_val2) * 3 + src_val1 * 10;
|
||||
((__global short*)((__global char*)dy_buf + y * dy_bufStep / 2))[x] = src_val2 - src_val0;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void calcSharrDeriv_vertical_C4_D0(__global const uchar* src, int srcStep, int rows, int cols, int cn, __global short* dx_buf, int dx_bufStep, __global short* dy_buf, int dy_bufStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols * cn)
|
||||
{
|
||||
const uchar src_val0 = (src + (y > 0 ? y - 1 : 1) * srcStep)[x];
|
||||
const uchar src_val1 = (src + y * srcStep)[x];
|
||||
const uchar src_val2 = (src + (y < rows - 1 ? y + 1 : rows - 2) * srcStep)[x];
|
||||
|
||||
((__global short*)((__global char*)dx_buf + y * dx_bufStep / 2))[x] = (src_val0 + src_val2) * 3 + src_val1 * 10;
|
||||
((__global short*)((__global char*)dy_buf + y * dy_bufStep / 2))[x] = src_val2 - src_val0;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void calcSharrDeriv_horizontal_C1_D0(int rows, int cols, int cn, __global const short* dx_buf, int dx_bufStep, __global const short* dy_buf, int dy_bufStep, __global short* dIdx, int dIdxStep, __global short* dIdy, int dIdyStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
const int colsn = cols * cn;
|
||||
|
||||
if (y < rows && x < colsn)
|
||||
{
|
||||
__global const short* dx_buf_row = dx_buf + y * dx_bufStep;
|
||||
__global const short* dy_buf_row = dy_buf + y * dy_bufStep;
|
||||
|
||||
const int xr = x + cn < colsn ? x + cn : (cols - 2) * cn + x + cn - colsn;
|
||||
const int xl = x - cn >= 0 ? x - cn : cn + x;
|
||||
|
||||
((__global short*)((__global char*)dIdx + y * dIdxStep / 2))[x] = dx_buf_row[xr] - dx_buf_row[xl];
|
||||
((__global short*)((__global char*)dIdy + y * dIdyStep / 2))[x] = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void calcSharrDeriv_horizontal_C4_D0(int rows, int cols, int cn, __global const short* dx_buf, int dx_bufStep, __global const short* dy_buf, int dy_bufStep, __global short* dIdx, int dIdxStep, __global short* dIdy, int dIdyStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
const int colsn = cols * cn;
|
||||
|
||||
if (y < rows && x < colsn)
|
||||
{
|
||||
__global const short* dx_buf_row = dx_buf + y * dx_bufStep;
|
||||
__global const short* dy_buf_row = dy_buf + y * dy_bufStep;
|
||||
|
||||
const int xr = x + cn < colsn ? x + cn : (cols - 2) * cn + x + cn - colsn;
|
||||
const int xl = x - cn >= 0 ? x - cn : cn + x;
|
||||
|
||||
((__global short*)((__global char*)dIdx + y * dIdxStep / 2))[x] = dx_buf_row[xr] - dx_buf_row[xl];
|
||||
((__global short*)((__global char*)dIdy + y * dIdyStep / 2))[x] = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10;
|
||||
}
|
||||
}
|
||||
|
||||
#define W_BITS 14
|
||||
#define W_BITS1 14
|
||||
|
||||
#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
|
||||
|
||||
int linearFilter_uchar(__global const uchar* src, int srcStep, int cn, float2 pt, int x, int y)
|
||||
{
|
||||
int2 ipt;
|
||||
ipt.x = convert_int_sat_rtn(pt.x);
|
||||
ipt.y = convert_int_sat_rtn(pt.y);
|
||||
|
||||
float a = pt.x - ipt.x;
|
||||
float b = pt.y - ipt.y;
|
||||
|
||||
int iw00 = convert_int_sat_rte((1.0f - a) * (1.0f - b) * (1 << W_BITS));
|
||||
int iw01 = convert_int_sat_rte(a * (1.0f - b) * (1 << W_BITS));
|
||||
int iw10 = convert_int_sat_rte((1.0f - a) * b * (1 << W_BITS));
|
||||
int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
|
||||
|
||||
__global const uchar* src_row = src + (ipt.y + y) * srcStep + ipt.x * cn;
|
||||
__global const uchar* src_row1 = src + (ipt.y + y + 1) * srcStep + ipt.x * cn;
|
||||
|
||||
return CV_DESCALE(src_row[x] * iw00 + src_row[x + cn] * iw01 + src_row1[x] * iw10 + src_row1[x + cn] * iw11, W_BITS1 - 5);
|
||||
}
|
||||
|
||||
int linearFilter_short(__global const short* src, int srcStep, int cn, float2 pt, int x, int y)
|
||||
{
|
||||
int2 ipt;
|
||||
ipt.x = convert_int_sat_rtn(pt.x);
|
||||
ipt.y = convert_int_sat_rtn(pt.y);
|
||||
|
||||
float a = pt.x - ipt.x;
|
||||
float b = pt.y - ipt.y;
|
||||
|
||||
int iw00 = convert_int_sat_rte((1.0f - a) * (1.0f - b) * (1 << W_BITS));
|
||||
int iw01 = convert_int_sat_rte(a * (1.0f - b) * (1 << W_BITS));
|
||||
int iw10 = convert_int_sat_rte((1.0f - a) * b * (1 << W_BITS));
|
||||
int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
|
||||
|
||||
__global const short* src_row = src + (ipt.y + y) * srcStep + ipt.x * cn;
|
||||
__global const short* src_row1 = src + (ipt.y + y + 1) * srcStep + ipt.x * cn;
|
||||
|
||||
return CV_DESCALE(src_row[x] * iw00 + src_row[x + cn] * iw01 + src_row1[x] * iw10 + src_row1[x + cn] * iw11, W_BITS1);
|
||||
}
|
||||
|
||||
float linearFilter_float(__global const float* src, int srcStep, int cn, float2 pt, float x, float y)
|
||||
{
|
||||
int2 ipt;
|
||||
ipt.x = convert_int_sat_rtn(pt.x);
|
||||
ipt.y = convert_int_sat_rtn(pt.y);
|
||||
|
||||
float a = pt.x - ipt.x;
|
||||
float b = pt.y - ipt.y;
|
||||
|
||||
float iw00 = ((1.0f - a) * (1.0f - b) * (1 << W_BITS));
|
||||
float iw01 = (a * (1.0f - b) * (1 << W_BITS));
|
||||
float iw10 = ((1.0f - a) * b * (1 << W_BITS));
|
||||
float iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
|
||||
|
||||
__global const float* src_row = src + (int)(ipt.y + y) * srcStep / 4 + ipt.x * cn;
|
||||
__global const float* src_row1 = src + (int)(ipt.y + y + 1) * srcStep / 4 + ipt.x * cn;
|
||||
|
||||
return src_row[(int)x] * iw00 + src_row[(int)x + cn] * iw01 + src_row1[(int)x] * iw10 + src_row1[(int)x + cn] * iw11, W_BITS1 - 5;
|
||||
}
|
||||
|
||||
#define BUFFER 64
|
||||
|
||||
#ifndef WAVE_SIZE
|
||||
#define WAVE_SIZE 1
|
||||
#endif
|
||||
#ifdef CPU
|
||||
void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid)
|
||||
{
|
||||
@@ -193,71 +58,51 @@ void reduce3(float val1, float val2, float val3, __local float* smem1, __local
|
||||
smem3[tid] = val3;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#if BUFFER > 128
|
||||
if (tid < 128)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 128];
|
||||
smem2[tid] = val2 += smem2[tid + 128];
|
||||
smem3[tid] = val3 += smem3[tid + 128];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
#if BUFFER > 64
|
||||
if (tid < 64)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 64];
|
||||
smem2[tid] = val2 += smem2[tid + 64];
|
||||
smem3[tid] = val3 += smem3[tid + 64];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 32];
|
||||
smem2[tid] = val2 += smem2[tid + 32];
|
||||
smem3[tid] = val3 += smem3[tid + 32];
|
||||
smem1[tid] += smem1[tid + 32];
|
||||
smem2[tid] += smem2[tid + 32];
|
||||
smem3[tid] += smem3[tid + 32];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 16)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 16];
|
||||
smem2[tid] = val2 += smem2[tid + 16];
|
||||
smem3[tid] = val3 += smem3[tid + 16];
|
||||
smem1[tid] += smem1[tid + 16];
|
||||
smem2[tid] += smem2[tid + 16];
|
||||
smem3[tid] += smem3[tid + 16];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 8)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 8];
|
||||
smem2[tid] = val2 += smem2[tid + 8];
|
||||
smem3[tid] = val3 += smem3[tid + 8];
|
||||
smem1[tid] += smem1[tid + 8];
|
||||
smem2[tid] += smem2[tid + 8];
|
||||
smem3[tid] += smem3[tid + 8];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 4)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 4];
|
||||
smem2[tid] = val2 += smem2[tid + 4];
|
||||
smem3[tid] = val3 += smem3[tid + 4];
|
||||
smem1[tid] += smem1[tid + 4];
|
||||
smem2[tid] += smem2[tid + 4];
|
||||
smem3[tid] += smem3[tid + 4];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 2)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 2];
|
||||
smem2[tid] = val2 += smem2[tid + 2];
|
||||
smem3[tid] = val3 += smem3[tid + 2];
|
||||
smem1[tid] += smem1[tid + 2];
|
||||
smem2[tid] += smem2[tid + 2];
|
||||
smem3[tid] += smem3[tid + 2];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 1)
|
||||
{
|
||||
smem1[BUFFER] = val1 += smem1[tid + 1];
|
||||
smem2[BUFFER] = val2 += smem2[tid + 1];
|
||||
smem3[BUFFER] = val3 += smem3[tid + 1];
|
||||
smem1[BUFFER] = smem1[tid] + smem1[tid + 1];
|
||||
smem2[BUFFER] = smem2[tid] + smem2[tid + 1];
|
||||
smem3[BUFFER] = smem3[tid] + smem3[tid + 1];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
@@ -268,63 +113,45 @@ void reduce2(float val1, float val2, volatile __local float* smem1, volatile __l
|
||||
smem2[tid] = val2;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#if BUFFER > 128
|
||||
if (tid < 128)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 128]);
|
||||
smem2[tid] = (val2 += smem2[tid + 128]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
#if BUFFER > 64
|
||||
if (tid < 64)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 64]);
|
||||
smem2[tid] = (val2 += smem2[tid + 64]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 32]);
|
||||
smem2[tid] = (val2 += smem2[tid + 32]);
|
||||
smem1[tid] += smem1[tid + 32];
|
||||
smem2[tid] += smem2[tid + 32];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 16)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 16]);
|
||||
smem2[tid] = (val2 += smem2[tid + 16]);
|
||||
smem1[tid] += smem1[tid + 16];
|
||||
smem2[tid] += smem2[tid + 16];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 8)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 8]);
|
||||
smem2[tid] = (val2 += smem2[tid + 8]);
|
||||
smem1[tid] += smem1[tid + 8];
|
||||
smem2[tid] += smem2[tid + 8];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 4)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 4]);
|
||||
smem2[tid] = (val2 += smem2[tid + 4]);
|
||||
smem1[tid] += smem1[tid + 4];
|
||||
smem2[tid] += smem2[tid + 4];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 2)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 2]);
|
||||
smem2[tid] = (val2 += smem2[tid + 2]);
|
||||
smem1[tid] += smem1[tid + 2];
|
||||
smem2[tid] += smem2[tid + 2];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 1)
|
||||
{
|
||||
smem1[BUFFER] = (val1 += smem1[tid + 1]);
|
||||
smem2[BUFFER] = (val2 += smem2[tid + 1]);
|
||||
smem1[BUFFER] = smem1[tid] + smem1[tid + 1];
|
||||
smem2[BUFFER] = smem2[tid] + smem2[tid + 1];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
@@ -334,205 +161,146 @@ void reduce1(float val1, volatile __local float* smem1, int tid)
|
||||
smem1[tid] = val1;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#if BUFFER > 128
|
||||
if (tid < 128)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 128]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
#if BUFFER > 64
|
||||
if (tid < 64)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 64]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 32]);
|
||||
smem1[tid] += smem1[tid + 32];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 16)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 16]);
|
||||
smem1[tid] += smem1[tid + 16];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 8)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 8]);
|
||||
smem1[tid] += smem1[tid + 8];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 4)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 4]);
|
||||
smem1[tid] += smem1[tid + 4];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 2)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 2]);
|
||||
smem1[tid] += smem1[tid + 2];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 1)
|
||||
{
|
||||
smem1[BUFFER] = (val1 += smem1[tid + 1]);
|
||||
smem1[BUFFER] = smem1[tid] + smem1[tid + 1];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
#else
|
||||
void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid)
|
||||
void reduce3(float val1, float val2, float val3,
|
||||
__local volatile float* smem1, __local volatile float* smem2, __local volatile float* smem3, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
smem2[tid] = val2;
|
||||
smem3[tid] = val3;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#if BUFFER > 128
|
||||
if (tid < 128)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 128];
|
||||
smem2[tid] = val2 += smem2[tid + 128];
|
||||
smem3[tid] = val3 += smem3[tid + 128];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
#if BUFFER > 64
|
||||
if (tid < 64)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 64];
|
||||
smem2[tid] = val2 += smem2[tid + 64];
|
||||
smem3[tid] = val3 += smem3[tid + 64];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
volatile __local float* vmem1 = smem1;
|
||||
volatile __local float* vmem2 = smem2;
|
||||
volatile __local float* vmem3 = smem3;
|
||||
smem1[tid] += smem1[tid + 32];
|
||||
smem2[tid] += smem2[tid + 32];
|
||||
smem3[tid] += smem3[tid + 32];
|
||||
#if WAVE_SIZE < 32
|
||||
} barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 16) {
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 16];
|
||||
smem2[tid] += smem2[tid + 16];
|
||||
smem3[tid] += smem3[tid + 16];
|
||||
#if WAVE_SIZE <16
|
||||
} barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 8) {
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 8];
|
||||
smem2[tid] += smem2[tid + 8];
|
||||
smem3[tid] += smem3[tid + 8];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 32];
|
||||
vmem2[tid] = val2 += vmem2[tid + 32];
|
||||
vmem3[tid] = val3 += vmem3[tid + 32];
|
||||
smem1[tid] += smem1[tid + 4];
|
||||
smem2[tid] += smem2[tid + 4];
|
||||
smem3[tid] += smem3[tid + 4];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 16];
|
||||
vmem2[tid] = val2 += vmem2[tid + 16];
|
||||
vmem3[tid] = val3 += vmem3[tid + 16];
|
||||
smem1[tid] += smem1[tid + 2];
|
||||
smem2[tid] += smem2[tid + 2];
|
||||
smem3[tid] += smem3[tid + 2];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 8];
|
||||
vmem2[tid] = val2 += vmem2[tid + 8];
|
||||
vmem3[tid] = val3 += vmem3[tid + 8];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 4];
|
||||
vmem2[tid] = val2 += vmem2[tid + 4];
|
||||
vmem3[tid] = val3 += vmem3[tid + 4];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 2];
|
||||
vmem2[tid] = val2 += vmem2[tid + 2];
|
||||
vmem3[tid] = val3 += vmem3[tid + 2];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 1];
|
||||
vmem2[tid] = val2 += vmem2[tid + 1];
|
||||
vmem3[tid] = val3 += vmem3[tid + 1];
|
||||
smem1[tid] += smem1[tid + 1];
|
||||
smem2[tid] += smem2[tid + 1];
|
||||
smem3[tid] += smem3[tid + 1];
|
||||
}
|
||||
}
|
||||
|
||||
void reduce2(float val1, float val2, __local float* smem1, __local float* smem2, int tid)
|
||||
void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
smem2[tid] = val2;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#if BUFFER > 128
|
||||
if (tid < 128)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 128];
|
||||
smem2[tid] = val2 += smem2[tid + 128];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
#if BUFFER > 64
|
||||
if (tid < 64)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 64];
|
||||
smem2[tid] = val2 += smem2[tid + 64];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
volatile __local float* vmem1 = smem1;
|
||||
volatile __local float* vmem2 = smem2;
|
||||
smem1[tid] += smem1[tid + 32];
|
||||
smem2[tid] += smem2[tid + 32];
|
||||
#if WAVE_SIZE < 32
|
||||
} barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 16) {
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 16];
|
||||
smem2[tid] += smem2[tid + 16];
|
||||
#if WAVE_SIZE <16
|
||||
} barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 8) {
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 8];
|
||||
smem2[tid] += smem2[tid + 8];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 32];
|
||||
vmem2[tid] = val2 += vmem2[tid + 32];
|
||||
smem1[tid] += smem1[tid + 4];
|
||||
smem2[tid] += smem2[tid + 4];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 16];
|
||||
vmem2[tid] = val2 += vmem2[tid + 16];
|
||||
smem1[tid] += smem1[tid + 2];
|
||||
smem2[tid] += smem2[tid + 2];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 8];
|
||||
vmem2[tid] = val2 += vmem2[tid + 8];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 4];
|
||||
vmem2[tid] = val2 += vmem2[tid + 4];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 2];
|
||||
vmem2[tid] = val2 += vmem2[tid + 2];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 1];
|
||||
vmem2[tid] = val2 += vmem2[tid + 1];
|
||||
smem1[tid] += smem1[tid + 1];
|
||||
smem2[tid] += smem2[tid + 1];
|
||||
}
|
||||
}
|
||||
|
||||
void reduce1(float val1, __local float* smem1, int tid)
|
||||
void reduce1(float val1, __local volatile float* smem1, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#if BUFFER > 128
|
||||
if (tid < 128)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 128];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
#if BUFFER > 64
|
||||
if (tid < 64)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 64];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
volatile __local float* vmem1 = smem1;
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 32];
|
||||
vmem1[tid] = val1 += vmem1[tid + 16];
|
||||
vmem1[tid] = val1 += vmem1[tid + 8];
|
||||
vmem1[tid] = val1 += vmem1[tid + 4];
|
||||
vmem1[tid] = val1 += vmem1[tid + 2];
|
||||
vmem1[tid] = val1 += vmem1[tid + 1];
|
||||
smem1[tid] += smem1[tid + 32];
|
||||
#if WAVE_SIZE < 32
|
||||
} barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 16) {
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 16];
|
||||
#if WAVE_SIZE <16
|
||||
} barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 8) {
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 8];
|
||||
smem1[tid] += smem1[tid + 4];
|
||||
smem1[tid] += smem1[tid + 2];
|
||||
smem1[tid] += smem1[tid + 1];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#define SCALE (1.0f / (1 << 20))
|
||||
#define THRESHOLD 0.01f
|
||||
#define DIMENSION 21
|
||||
|
||||
// Image read mode
|
||||
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
|
||||
|
@@ -78,6 +78,7 @@
|
||||
|
||||
#if defined (HAVE_OPENCL)
|
||||
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
#include "opencv2/ocl/private/util.hpp"
|
||||
#include "safe_call.hpp"
|
||||
|
||||
|
@@ -15,8 +15,8 @@
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Dachuan Zhao, dachuan@multicorewareinc.com
|
||||
// Yao Wang, bitwangyaoyao@gmail.com
|
||||
// Dachuan Zhao, dachuan@multicorewareinc.com
|
||||
// Yao Wang, bitwangyaoyao@gmail.com
|
||||
// Nathan, liujun@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
@@ -56,31 +56,16 @@ namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||
extern const char *pyrlk;
|
||||
extern const char *pyrlk_no_image;
|
||||
extern const char *arithm_mul;
|
||||
}
|
||||
}
|
||||
|
||||
struct dim3
|
||||
{
|
||||
unsigned int x, y, z;
|
||||
};
|
||||
|
||||
struct float2
|
||||
{
|
||||
float x, y;
|
||||
};
|
||||
|
||||
struct int2
|
||||
{
|
||||
int x, y;
|
||||
};
|
||||
|
||||
namespace
|
||||
{
|
||||
void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDeviceArch11)
|
||||
static void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDeviceArch11)
|
||||
{
|
||||
winSize.width *= cn;
|
||||
|
||||
@@ -100,45 +85,6 @@ void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDe
|
||||
|
||||
block.z = patch.z = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void multiply_cus(const oclMat &src1, oclMat &dst, float scalar)
|
||||
{
|
||||
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
|
||||
{
|
||||
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
|
||||
return;
|
||||
}
|
||||
|
||||
CV_Assert(src1.cols == dst.cols &&
|
||||
src1.rows == dst.rows);
|
||||
|
||||
CV_Assert(src1.type() == dst.type());
|
||||
CV_Assert(src1.depth() != CV_8S);
|
||||
|
||||
Context *clCxt = src1.clCxt;
|
||||
|
||||
size_t localThreads[3] = { 16, 16, 1 };
|
||||
size_t globalThreads[3] = { src1.cols,
|
||||
src1.rows,
|
||||
1
|
||||
};
|
||||
|
||||
int dst_step1 = dst.cols * dst.elemSize();
|
||||
vector<pair<size_t , const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
|
||||
args.push_back( make_pair( sizeof(float), (float *)&scalar ));
|
||||
|
||||
openCLExecuteKernel(clCxt, &arithm_mul, "arithm_muls", globalThreads, localThreads, args, -1, src1.depth());
|
||||
}
|
||||
|
||||
static void lkSparse_run(oclMat &I, oclMat &J,
|
||||
const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount,
|
||||
@@ -151,15 +97,7 @@ static void lkSparse_run(oclMat &I, oclMat &J,
|
||||
size_t localThreads[3] = { 8, isImageSupported ? 8 : 32, 1 };
|
||||
size_t globalThreads[3] = { 8 * ptcount, isImageSupported ? 8 : 32, 1};
|
||||
int cn = I.oclchannels();
|
||||
char calcErr;
|
||||
if (level == 0)
|
||||
{
|
||||
calcErr = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
calcErr = 0;
|
||||
}
|
||||
char calcErr = level==0?1:0;
|
||||
|
||||
vector<pair<size_t , const void *> > args;
|
||||
|
||||
@@ -198,7 +136,17 @@ static void lkSparse_run(oclMat &I, oclMat &J,
|
||||
{
|
||||
if(isImageSupported)
|
||||
{
|
||||
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth());
|
||||
stringstream idxStr;
|
||||
idxStr << kernelName << "_C" << I.oclchannels() << "_D" << I.depth();
|
||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str());
|
||||
int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
|
||||
static char opt[16] = {0};
|
||||
sprintf(opt, " -D WAVE_SIZE=%d", wave_size);
|
||||
|
||||
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads,
|
||||
args, I.oclchannels(), I.depth(), opt);
|
||||
releaseTexture(ITex);
|
||||
releaseTexture(JTex);
|
||||
}
|
||||
@@ -241,8 +189,7 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next
|
||||
|
||||
oclMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
|
||||
oclMat temp2 = nextPts.reshape(1);
|
||||
multiply_cus(temp1, temp2, 1.0f / (1 << maxLevel) / 2.0f);
|
||||
//::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2);
|
||||
multiply(1.0f/(1<<maxLevel)/2.0f, temp1, temp2);
|
||||
|
||||
ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
|
||||
status.setTo(Scalar::all(1));
|
||||
@@ -257,7 +204,6 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next
|
||||
ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err);
|
||||
|
||||
// build the image pyramids.
|
||||
|
||||
prevPyr_.resize(maxLevel + 1);
|
||||
nextPyr_.resize(maxLevel + 1);
|
||||
|
||||
@@ -274,7 +220,6 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next
|
||||
}
|
||||
|
||||
// dI/dx ~ Ix, dI/dy ~ Iy
|
||||
|
||||
for (int level = maxLevel; level >= 0; level--)
|
||||
{
|
||||
lkSparse_run(prevPyr_[level], nextPyr_[level],
|
||||
|
@@ -47,7 +47,7 @@
|
||||
#define __OPENCV_OPENCL_SAFE_CALL_HPP__
|
||||
|
||||
#if defined __APPLE__
|
||||
#include <OpenCL/OpenCL.h>
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
@@ -472,4 +472,8 @@ void ocl_tvl1flow::warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x,
|
||||
args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));
|
||||
|
||||
openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
|
||||
|
||||
releaseTexture(I1_tex);
|
||||
releaseTexture(I1x_tex);
|
||||
releaseTexture(I1y_tex);
|
||||
}
|
Reference in New Issue
Block a user