From a92721b5695d3ddc26d1bc362ba77ae7f372158a Mon Sep 17 00:00:00 2001 From: pengxiao Date: Tue, 10 Sep 2013 14:33:23 +0800 Subject: [PATCH 1/2] Fix retina ocl on NV. Previously we incorrectly assumed data was aligned in 16 bytes, which should be 32. --- .../bioinspired/src/opencl/retina_kernel.cl | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/modules/bioinspired/src/opencl/retina_kernel.cl b/modules/bioinspired/src/opencl/retina_kernel.cl index 1eac50324..515dfdea5 100644 --- a/modules/bioinspired/src/opencl/retina_kernel.cl +++ b/modules/bioinspired/src/opencl/retina_kernel.cl @@ -43,6 +43,9 @@ // //M*/ +//data (which is float) is aligend in 32 bytes +#define WIDTH_MULTIPLE (32 >> 2) + ///////////////////////////////////////////////////////// //******************************************************* // basicretinafilter @@ -116,22 +119,18 @@ kernel void horizontalAnticausalFilter( float4 result_v4 = (float4)(0), out_v4; float result = 0; - // we assume elements_per_row is multple of 4 - for(int i = 0; i < 4; ++ i, -- optr) + // we assume elements_per_row is multple of WIDTH_MULTIPLE + for(int i = 0; i < WIDTH_MULTIPLE; ++ i, -- optr) { - if(i < elements_per_row - cols) - { - *optr = result; - } - else + if(i >= elements_per_row - cols) { result = *optr + _a * result; - *optr = result; } + *optr = result; } result_v4.x = result; optr -= 3; - for(int i = 1; i < elements_per_row / 4; ++i, optr -= 4) + for(int i = WIDTH_MULTIPLE / 4; i < elements_per_row / 4; ++i, optr -= 4) { // shift left, `offset` is type `size_t` so it cannot be negative out_v4 = vload4(0, optr); @@ -223,23 +222,19 @@ kernel void horizontalAnticausalFilter_Irregular( float4 buf_v4, out_v4, res_v4 = (float4)(0); float result = 0; - // we assume elements_per_row is multple of 4 - for(int i = 0; i < 4; ++ i, -- optr, -- bptr) + // we assume elements_per_row is multple of WIDTH_MULTIPLE + for(int i = 0; i < WIDTH_MULTIPLE; ++ i, -- optr, -- bptr) { - if(i < elements_per_row - cols) - { - *optr = result; - } - else + if(i >= elements_per_row - cols) { result = *optr + *bptr * result; - *optr = result; } + *optr = result; } res_v4.x = result; optr -= 3; bptr -= 3; - for(int i = 0; i < elements_per_row / 4 - 1; ++i, optr -= 4, bptr -= 4) + for(int i = WIDTH_MULTIPLE / 4; i < elements_per_row / 4; ++i, optr -= 4, bptr -= 4) { buf_v4 = vload4(0, bptr); out_v4 = vload4(0, optr); From 8767c474632ff409d3c9dcbf614ab9a5f3ba5886 Mon Sep 17 00:00:00 2001 From: peng xiao Date: Tue, 10 Sep 2013 15:13:53 +0800 Subject: [PATCH 2/2] Fix whitespaces. --- modules/bioinspired/src/opencl/retina_kernel.cl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/bioinspired/src/opencl/retina_kernel.cl b/modules/bioinspired/src/opencl/retina_kernel.cl index 515dfdea5..169be4d27 100644 --- a/modules/bioinspired/src/opencl/retina_kernel.cl +++ b/modules/bioinspired/src/opencl/retina_kernel.cl @@ -126,7 +126,7 @@ kernel void horizontalAnticausalFilter( { result = *optr + _a * result; } - *optr = result; + *optr = result; } result_v4.x = result; optr -= 3; @@ -229,7 +229,7 @@ kernel void horizontalAnticausalFilter_Irregular( { result = *optr + *bptr * result; } - *optr = result; + *optr = result; } res_v4.x = result; optr -= 3;