diff --git a/modules/contrib/src/basicretinafilter.cpp b/modules/contrib/src/basicretinafilter.cpp index a4270aa74..c9e6a92e4 100644 --- a/modules/contrib/src/basicretinafilter.cpp +++ b/modules/contrib/src/basicretinafilter.cpp @@ -345,8 +345,8 @@ void BasicRetinaFilter::_localLuminanceAdaptation(const float *inputFrame, const //float tempMeanValue=meanLuminance+_meanInputValue*_tau; updateCompressionParameter(meanLuminance); } -#ifdef HAVE_TBB - tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_localAdaptation(localLuminance, inputFrame, outputFrame, _localLuminanceFactor, _localLuminanceAddon, _maxInputValue), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL + cv::parallel_for_(cv::Range(0,_filterOutput.getNBpixels()), Parallel_localAdaptation(localLuminance, inputFrame, outputFrame, _localLuminanceFactor, _localLuminanceAddon, _maxInputValue)); #else //std::cout<<meanLuminance<<std::endl; const float *localLuminancePTR=localLuminance; @@ -466,8 +466,8 @@ void BasicRetinaFilter::_horizontalCausalFilter(float *outputFrame, unsigned int // horizontal causal filter which adds the input inside void BasicRetinaFilter::_horizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd) { -#ifdef HAVE_TBB - tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_horizontalCausalFilter_addInput(inputFrame, outputFrame, IDrowStart, _filterOutput.getNBcolumns(), _a, _tau), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL + cv::parallel_for_(cv::Range(IDrowStart,IDrowEnd), Parallel_horizontalCausalFilter_addInput(inputFrame, outputFrame, IDrowStart, _filterOutput.getNBcolumns(), _a, _tau)); #else for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow) { @@ -487,8 +487,8 @@ void BasicRetinaFilter::_horizontalCausalFilter_addInput(const float *inputFrame void BasicRetinaFilter::_horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd) { -#ifdef HAVE_TBB - tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_horizontalAnticausalFilter(outputFrame, IDrowEnd, _filterOutput.getNBcolumns(), _a ), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL + cv::parallel_for_(cv::Range(IDrowStart,IDrowEnd), Parallel_horizontalAnticausalFilter(outputFrame, IDrowEnd, _filterOutput.getNBcolumns(), _a )); #else for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow) { @@ -523,8 +523,8 @@ void BasicRetinaFilter::_horizontalAnticausalFilter_multGain(float *outputFrame, // vertical anticausal filter void BasicRetinaFilter::_verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd) { -#ifdef HAVE_TBB - tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_verticalCausalFilter(outputFrame, _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _a ), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL + cv::parallel_for_(cv::Range(IDcolumnStart,IDcolumnEnd), Parallel_verticalCausalFilter(outputFrame, _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _a )); #else for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn) { @@ -566,8 +566,8 @@ void BasicRetinaFilter::_verticalAnticausalFilter(float *outputFrame, unsigned i // vertical anticausal filter which multiplies the output by _gain void BasicRetinaFilter::_verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd) { -#ifdef HAVE_TBB - tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_verticalAnticausalFilter_multGain(outputFrame, _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _a, _gain ), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL + cv::parallel_for_(cv::Range(IDcolumnStart,IDcolumnEnd), Parallel_verticalAnticausalFilter_multGain(outputFrame, _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _a, _gain )); #else float* offset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns(); //#pragma omp parallel for @@ -819,8 +819,8 @@ void BasicRetinaFilter::_horizontalCausalFilter_Irregular_addInput(const float * // horizontal anticausal filter (basic way, no add on) void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const float *spatialConstantBuffer) { -#ifdef HAVE_TBB - tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_horizontalAnticausalFilter_Irregular(outputFrame, spatialConstantBuffer, IDrowEnd, _filterOutput.getNBcolumns()), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL + cv::parallel_for_(cv::Range(IDrowStart,IDrowEnd), Parallel_horizontalAnticausalFilter_Irregular(outputFrame, spatialConstantBuffer, IDrowEnd, _filterOutput.getNBcolumns())); #else register float* outputPTR=outputFrame+IDrowEnd*(_filterOutput.getNBcolumns())-1; register const float* spatialConstantPTR=spatialConstantBuffer+IDrowEnd*(_filterOutput.getNBcolumns())-1; @@ -841,8 +841,8 @@ void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame // vertical anticausal filter void BasicRetinaFilter::_verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const float *spatialConstantBuffer) { -#ifdef HAVE_TBB - tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_verticalCausalFilter_Irregular(outputFrame, spatialConstantBuffer, _filterOutput.getNBrows(), _filterOutput.getNBcolumns()), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL + cv::parallel_for_(cv::Range(IDcolumnStart,IDcolumnEnd), Parallel_verticalCausalFilter_Irregular(outputFrame, spatialConstantBuffer, _filterOutput.getNBrows(), _filterOutput.getNBcolumns())); #else for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn) { diff --git a/modules/contrib/src/basicretinafilter.hpp b/modules/contrib/src/basicretinafilter.hpp index c0cf100a4..0039c9a6f 100644 --- a/modules/contrib/src/basicretinafilter.hpp +++ b/modules/contrib/src/basicretinafilter.hpp @@ -436,16 +436,16 @@ protected: void _local_verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas); void _local_verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas); // this functions affects _gain at the output -#ifdef HAVE_TBB +#ifdef MAKE_PARALLEL /****************************************************** -** IF TBB is useable, then, main loops are parallelized using these functors +** IF some parallelizing thread methods are available, then, main loops are parallelized using these functors ** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary ** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised ** ==> functors constructors can differ from the parameters used with their related serial functions */ #define _DEBUG_TBB // define DEBUG_TBB in order to display additionnal data on stdout - class Parallel_horizontalAnticausalFilter + class Parallel_horizontalAnticausalFilter: public cv::ParallelLoopBody { private: float *outputFrame; @@ -465,16 +465,16 @@ protected: #endif } - void operator()( const tbb::blocked_range<size_t>& r ) const { + virtual void operator()( const Range& r ) const { #ifdef DEBUG_TBB std::cout<<"Parallel_horizontalAnticausalFilter::operator() :" <<"\n\t range size="<<r.size() - <<"\n\t first index="<<r.begin() + <<"\n\t first index="<<r.start //<<"\n\t last index="<<filterParam <<std::endl; #endif - for (size_t IDrow=r.begin(); IDrow!=r.end(); ++IDrow) + for (int IDrow=r.start; IDrow!=r.end; ++IDrow) { register float* outputPTR=outputFrame+(IDrowEnd-IDrow)*(nbColumns)-1; register float result=0; @@ -487,7 +487,7 @@ protected: } }; - class Parallel_horizontalCausalFilter_addInput + class Parallel_horizontalCausalFilter_addInput: public cv::ParallelLoopBody { private: const float *inputFrame; @@ -498,8 +498,8 @@ protected: Parallel_horizontalCausalFilter_addInput(const float *bufferToAddAsInputProcess, float *bufferToProcess, const unsigned int idStart, const unsigned int nbCols, const float a, const float tau) :inputFrame(bufferToAddAsInputProcess), outputFrame(bufferToProcess), IDrowStart(idStart), nbColumns(nbCols), filterParam_a(a), filterParam_tau(tau){} - void operator()( const tbb::blocked_range<size_t>& r ) const { - for (unsigned int IDrow=r.begin(); IDrow!=r.end(); ++IDrow) + virtual void operator()( const Range& r ) const { + for (int IDrow=r.start; IDrow!=r.end; ++IDrow) { register float* outputPTR=outputFrame+(IDrowStart+IDrow)*nbColumns; register const float* inputPTR=inputFrame+(IDrowStart+IDrow)*nbColumns; @@ -513,7 +513,7 @@ protected: } }; - class Parallel_verticalCausalFilter + class Parallel_verticalCausalFilter: public cv::ParallelLoopBody { private: float *outputFrame; @@ -523,8 +523,8 @@ protected: Parallel_verticalCausalFilter(float *bufferToProcess, const unsigned int nbRws, const unsigned int nbCols, const float a ) :outputFrame(bufferToProcess), nbRows(nbRws), nbColumns(nbCols), filterParam_a(a){} - void operator()( const tbb::blocked_range<size_t>& r ) const { - for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn) + virtual void operator()( const Range& r ) const { + for (int IDcolumn=r.start; IDcolumn!=r.end; ++IDcolumn) { register float result=0; register float *outputPTR=outputFrame+IDcolumn; @@ -540,7 +540,7 @@ protected: } }; - class Parallel_verticalAnticausalFilter_multGain + class Parallel_verticalAnticausalFilter_multGain: public cv::ParallelLoopBody { private: float *outputFrame; @@ -550,9 +550,9 @@ protected: Parallel_verticalAnticausalFilter_multGain(float *bufferToProcess, const unsigned int nbRws, const unsigned int nbCols, const float a, const float gain) :outputFrame(bufferToProcess), nbRows(nbRws), nbColumns(nbCols), filterParam_a(a), filterParam_gain(gain){} - void operator()( const tbb::blocked_range<size_t>& r ) const { + virtual void operator()( const Range& r ) const { float* offset=outputFrame+nbColumns*nbRows-nbColumns; - for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn) + for (int IDcolumn=r.start; IDcolumn!=r.end; ++IDcolumn) { register float result=0; register float *outputPTR=offset+IDcolumn; @@ -568,7 +568,7 @@ protected: } }; - class Parallel_localAdaptation + class Parallel_localAdaptation: public cv::ParallelLoopBody { private: const float *localLuminance, *inputFrame; @@ -578,11 +578,11 @@ protected: Parallel_localAdaptation(const float *localLum, const float *inputImg, float *bufferToProcess, const float localLuminanceFact, const float localLuminanceAdd, const float maxInputVal) :localLuminance(localLum), inputFrame(inputImg),outputFrame(bufferToProcess), localLuminanceFactor(localLuminanceFact), localLuminanceAddon(localLuminanceAdd), maxInputValue(maxInputVal) {}; - void operator()( const tbb::blocked_range<size_t>& r ) const { - const float *localLuminancePTR=localLuminance+r.begin(); - const float *inputFramePTR=inputFrame+r.begin(); - float *outputFramePTR=outputFrame+r.begin(); - for (register unsigned int IDpixel=r.begin() ; IDpixel!=r.end() ; ++IDpixel, ++inputFramePTR, ++outputFramePTR) + virtual void operator()( const Range& r ) const { + const float *localLuminancePTR=localLuminance+r.start; + const float *inputFramePTR=inputFrame+r.start; + float *outputFramePTR=outputFrame+r.start; + for (register int IDpixel=r.start ; IDpixel!=r.end ; ++IDpixel, ++inputFramePTR, ++outputFramePTR) { float X0=*(localLuminancePTR++)*localLuminanceFactor+localLuminanceAddon; // TODO : the following line can lead to a divide by zero ! A small offset is added, take care if the offset is too large in case of High Dynamic Range images which can use very small values... @@ -594,7 +594,7 @@ protected: ////////////////////////////////////////// /// Specific filtering methods which manage non const spatial filtering parameter (used By retinacolor and LogProjectors) - class Parallel_horizontalAnticausalFilter_Irregular + class Parallel_horizontalAnticausalFilter_Irregular: public cv::ParallelLoopBody { private: float *outputFrame; @@ -604,9 +604,9 @@ protected: Parallel_horizontalAnticausalFilter_Irregular(float *bufferToProcess, const float *spatialConst, const unsigned int idEnd, const unsigned int nbCols) :outputFrame(bufferToProcess), spatialConstantBuffer(spatialConst), IDrowEnd(idEnd), nbColumns(nbCols){} -void operator()( const tbb::blocked_range<size_t>& r ) const { +virtual void operator()( const Range& r ) const { - for (size_t IDrow=r.begin(); IDrow!=r.end(); ++IDrow) + for (int IDrow=r.start; IDrow!=r.end; ++IDrow) { register float* outputPTR=outputFrame+(IDrowEnd-IDrow)*(nbColumns)-1; register const float* spatialConstantPTR=spatialConstantBuffer+(IDrowEnd-IDrow)*(nbColumns)-1; @@ -620,7 +620,7 @@ void operator()( const tbb::blocked_range<size_t>& r ) const { } }; - class Parallel_verticalCausalFilter_Irregular + class Parallel_verticalCausalFilter_Irregular: public cv::ParallelLoopBody { private: float *outputFrame; @@ -630,8 +630,8 @@ void operator()( const tbb::blocked_range<size_t>& r ) const { Parallel_verticalCausalFilter_Irregular(float *bufferToProcess, const float *spatialConst, const unsigned int nbRws, const unsigned int nbCols) :outputFrame(bufferToProcess), spatialConstantBuffer(spatialConst), nbRows(nbRws), nbColumns(nbCols){} - void operator()( const tbb::blocked_range<size_t>& r ) const { - for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn) + virtual void operator()( const Range& r ) const { + for (int IDcolumn=r.start; IDcolumn!=r.end; ++IDcolumn) { register float result=0; register float *outputPTR=outputFrame+IDcolumn; diff --git a/modules/contrib/src/magnoretinafilter.cpp b/modules/contrib/src/magnoretinafilter.cpp index 4626133ef..9bdf4e613 100644 --- a/modules/contrib/src/magnoretinafilter.cpp +++ b/modules/contrib/src/magnoretinafilter.cpp @@ -153,8 +153,8 @@ void MagnoRetinaFilter::setCoefficientsTable(const float parasolCells_beta, cons void MagnoRetinaFilter::_amacrineCellsComputing(const float *OPL_ON, const float *OPL_OFF) { -#ifdef HAVE_TBB - tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_amacrineCellsComputing(OPL_ON, OPL_OFF, &_previousInput_ON[0], &_previousInput_OFF[0], &_amacrinCellsTempOutput_ON[0], &_amacrinCellsTempOutput_OFF[0], _temporalCoefficient), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL + cv::parallel_for_(cv::Range(0,_filterOutput.getNBpixels()), Parallel_amacrineCellsComputing(OPL_ON, OPL_OFF, &_previousInput_ON[0], &_previousInput_OFF[0], &_amacrinCellsTempOutput_ON[0], &_amacrinCellsTempOutput_OFF[0], _temporalCoefficient)); #else register const float *OPL_ON_PTR=OPL_ON; register const float *OPL_OFF_PTR=OPL_OFF; diff --git a/modules/contrib/src/magnoretinafilter.hpp b/modules/contrib/src/magnoretinafilter.hpp index daefb7448..b14abfd5c 100644 --- a/modules/contrib/src/magnoretinafilter.hpp +++ b/modules/contrib/src/magnoretinafilter.hpp @@ -192,14 +192,14 @@ private: // amacrine cells filter : high pass temporal filter void _amacrineCellsComputing(const float *ONinput, const float *OFFinput); -#ifdef HAVE_TBB +#ifdef MAKE_PARALLEL /****************************************************** -** IF TBB is useable, then, main loops are parallelized using these functors +** IF some parallelizing thread methods are available, then, main loops are parallelized using these functors ** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary ** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised ** ==> functors constructors can differ from the parameters used with their related serial functions */ - class Parallel_amacrineCellsComputing + class Parallel_amacrineCellsComputing: public cv::ParallelLoopBody { private: const float *OPL_ON, *OPL_OFF; @@ -209,15 +209,15 @@ private: Parallel_amacrineCellsComputing(const float *OPL_ON_PTR, const float *OPL_OFF_PTR, float *previousInput_ON_PTR, float *previousInput_OFF_PTR, float *amacrinCellsTempOutput_ON_PTR, float *amacrinCellsTempOutput_OFF_PTR, float temporalCoefficientVal) :OPL_ON(OPL_ON_PTR), OPL_OFF(OPL_OFF_PTR), previousInput_ON(previousInput_ON_PTR), previousInput_OFF(previousInput_OFF_PTR), amacrinCellsTempOutput_ON(amacrinCellsTempOutput_ON_PTR), amacrinCellsTempOutput_OFF(amacrinCellsTempOutput_OFF_PTR), temporalCoefficient(temporalCoefficientVal) {} - void operator()( const tbb::blocked_range<size_t>& r ) const { - register const float *OPL_ON_PTR=OPL_ON+r.begin(); - register const float *OPL_OFF_PTR=OPL_OFF+r.begin(); - register float *previousInput_ON_PTR= previousInput_ON+r.begin(); - register float *previousInput_OFF_PTR= previousInput_OFF+r.begin(); - register float *amacrinCellsTempOutput_ON_PTR= amacrinCellsTempOutput_ON+r.begin(); - register float *amacrinCellsTempOutput_OFF_PTR= amacrinCellsTempOutput_OFF+r.begin(); + virtual void operator()( const Range& r ) const { + register const float *OPL_ON_PTR=OPL_ON+r.start; + register const float *OPL_OFF_PTR=OPL_OFF+r.start; + register float *previousInput_ON_PTR= previousInput_ON+r.start; + register float *previousInput_OFF_PTR= previousInput_OFF+r.start; + register float *amacrinCellsTempOutput_ON_PTR= amacrinCellsTempOutput_ON+r.start; + register float *amacrinCellsTempOutput_OFF_PTR= amacrinCellsTempOutput_OFF+r.start; - for (unsigned int IDpixel=r.begin() ; IDpixel!=r.end(); ++IDpixel) + for (int IDpixel=r.start ; IDpixel!=r.end; ++IDpixel) { /* Compute ON and OFF amacrin cells high pass temporal filter */ diff --git a/modules/contrib/src/parvoretinafilter.cpp b/modules/contrib/src/parvoretinafilter.cpp index 50d1b0ba6..64464ebdd 100644 --- a/modules/contrib/src/parvoretinafilter.cpp +++ b/modules/contrib/src/parvoretinafilter.cpp @@ -204,8 +204,8 @@ void ParvoRetinaFilter::_OPL_OnOffWaysComputing() // WARNING : this method requi // loop that makes the difference between photoreceptor cells output and horizontal cells // positive part goes on the ON way, negative pat goes on the OFF way -#ifdef HAVE_TBB - tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_OPL_OnOffWaysComputing(&_photoreceptorsOutput[0], &_horizontalCellsOutput[0], &_bipolarCellsOutputON[0], &_bipolarCellsOutputOFF[0], &_parvocellularOutputON[0], &_parvocellularOutputOFF[0]), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL + cv::parallel_for_(cv::Range(0,_filterOutput.getNBpixels()), Parallel_OPL_OnOffWaysComputing(&_photoreceptorsOutput[0], &_horizontalCellsOutput[0], &_bipolarCellsOutputON[0], &_bipolarCellsOutputOFF[0], &_parvocellularOutputON[0], &_parvocellularOutputOFF[0])); #else float *photoreceptorsOutput_PTR= &_photoreceptorsOutput[0]; float *horizontalCellsOutput_PTR= &_horizontalCellsOutput[0]; diff --git a/modules/contrib/src/parvoretinafilter.hpp b/modules/contrib/src/parvoretinafilter.hpp index 76f550685..2131ef916 100644 --- a/modules/contrib/src/parvoretinafilter.hpp +++ b/modules/contrib/src/parvoretinafilter.hpp @@ -216,14 +216,14 @@ private: // private functions void _OPL_OnOffWaysComputing(); -#ifdef HAVE_TBB +#ifdef MAKE_PARALLEL /****************************************************** -** IF TBB is useable, then, main loops are parallelized using these functors +** IF some parallelizing thread methods are available, then, main loops are parallelized using these functors ** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary ** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised ** ==> functors constructors can differ from the parameters used with their related serial functions */ - class Parallel_OPL_OnOffWaysComputing + class Parallel_OPL_OnOffWaysComputing: public cv::ParallelLoopBody { private: float *photoreceptorsOutput, *horizontalCellsOutput, *bipolarCellsON, *bipolarCellsOFF, *parvocellularOutputON, *parvocellularOutputOFF; @@ -231,17 +231,17 @@ private: Parallel_OPL_OnOffWaysComputing(float *photoreceptorsOutput_PTR, float *horizontalCellsOutput_PTR, float *bipolarCellsON_PTR, float *bipolarCellsOFF_PTR, float *parvocellularOutputON_PTR, float *parvocellularOutputOFF_PTR) :photoreceptorsOutput(photoreceptorsOutput_PTR), horizontalCellsOutput(horizontalCellsOutput_PTR), bipolarCellsON(bipolarCellsON_PTR), bipolarCellsOFF(bipolarCellsOFF_PTR), parvocellularOutputON(parvocellularOutputON_PTR), parvocellularOutputOFF(parvocellularOutputOFF_PTR) {} - void operator()( const tbb::blocked_range<size_t>& r ) const { + virtual void operator()( const Range& r ) const { // compute bipolar cells response equal to photoreceptors minus horizontal cells response // and copy the result on parvo cellular outputs... keeping time before their local contrast adaptation for final result - float *photoreceptorsOutput_PTR= photoreceptorsOutput+r.begin(); - float *horizontalCellsOutput_PTR= horizontalCellsOutput+r.begin(); - float *bipolarCellsON_PTR = bipolarCellsON+r.begin(); - float *bipolarCellsOFF_PTR = bipolarCellsOFF+r.begin(); - float *parvocellularOutputON_PTR= parvocellularOutputON+r.begin(); - float *parvocellularOutputOFF_PTR= parvocellularOutputOFF+r.begin(); + float *photoreceptorsOutput_PTR= photoreceptorsOutput+r.start; + float *horizontalCellsOutput_PTR= horizontalCellsOutput+r.start; + float *bipolarCellsON_PTR = bipolarCellsON+r.start; + float *bipolarCellsOFF_PTR = bipolarCellsOFF+r.start; + float *parvocellularOutputON_PTR= parvocellularOutputON+r.start; + float *parvocellularOutputOFF_PTR= parvocellularOutputOFF+r.start; - for (register unsigned int IDpixel=r.begin() ; IDpixel!=r.end() ; ++IDpixel) + for (register int IDpixel=r.start ; IDpixel!=r.end ; ++IDpixel) { float pixelDifference = *(photoreceptorsOutput_PTR++) -*(horizontalCellsOutput_PTR++); // test condition to allow write pixelDifference in ON or OFF buffer and 0 in the over diff --git a/modules/contrib/src/retinacolor.cpp b/modules/contrib/src/retinacolor.cpp index 247234ffe..d138582ce 100644 --- a/modules/contrib/src/retinacolor.cpp +++ b/modules/contrib/src/retinacolor.cpp @@ -433,8 +433,8 @@ void RetinaColor::clipRGBOutput_0_maxInputValue(float *inputOutputBuffer, const if (inputOutputBuffer==NULL) inputOutputBuffer= &_demultiplexedColorFrame[0]; -#ifdef HAVE_TBB // call the TemplateBuffer TBB clipping method - tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()*3), Parallel_clipBufferValues<float>(inputOutputBuffer, 0, maxInputValue), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL // call the TemplateBuffer TBB clipping method + cv::parallel_for_(cv::Range(0,_filterOutput.getNBpixels()*3), Parallel_clipBufferValues<float>(inputOutputBuffer, 0, maxInputValue)); #else register float *inputOutputBufferPTR=inputOutputBuffer; for (register unsigned int jf = 0; jf < _filterOutput.getNBpixels()*3; ++jf, ++inputOutputBufferPTR) @@ -580,8 +580,8 @@ void RetinaColor::_adaptiveSpatialLPfilter(const float *inputFrame, float *outpu // horizontal causal filter which adds the input inside... replaces the parent _horizontalCausalFilter_Irregular_addInput by avoiding a product for each pixel void RetinaColor::_adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd) { -#ifdef HAVE_TBB - tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_adaptiveHorizontalCausalFilter_addInput(inputFrame, outputFrame, &_imageGradient[0], _filterOutput.getNBcolumns()), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL + cv::parallel_for_(cv::Range(IDrowStart,IDrowEnd), Parallel_adaptiveHorizontalCausalFilter_addInput(inputFrame, outputFrame, &_imageGradient[0], _filterOutput.getNBcolumns())); #else register float* outputPTR=outputFrame+IDrowStart*_filterOutput.getNBcolumns(); register const float* inputPTR=inputFrame+IDrowStart*_filterOutput.getNBcolumns(); @@ -604,8 +604,8 @@ void RetinaColor::_adaptiveHorizontalCausalFilter_addInput(const float *inputFra // vertical anticausal filter which multiplies the output by _gain... replaces the parent _verticalAnticausalFilter_multGain by avoiding a product for each pixel and taking into account the second layer of the _imageGradient buffer void RetinaColor::_adaptiveVerticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd) { -#ifdef HAVE_TBB - tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_adaptiveVerticalAnticausalFilter_multGain(outputFrame, &_imageGradient[0]+_filterOutput.getNBpixels(), _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _gain), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL + cv::parallel_for_(cv::Range(IDcolumnStart,IDcolumnEnd), Parallel_adaptiveVerticalAnticausalFilter_multGain(outputFrame, &_imageGradient[0]+_filterOutput.getNBpixels(), _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _gain)); #else float* outputOffset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns(); float* gradOffset= &_imageGradient[0]+_filterOutput.getNBpixels()*2-_filterOutput.getNBcolumns(); @@ -661,8 +661,8 @@ void RetinaColor::_computeGradient(const float *luminance) } } } - } + bool RetinaColor::applyKrauskopfLMS2Acr1cr2Transform(std::valarray<float> &result) { bool processSuccess=true; diff --git a/modules/contrib/src/retinacolor.hpp b/modules/contrib/src/retinacolor.hpp index ec5d060c5..67834629e 100644 --- a/modules/contrib/src/retinacolor.hpp +++ b/modules/contrib/src/retinacolor.hpp @@ -256,16 +256,16 @@ protected: // color space transform void _applyImageColorSpaceConversion(const std::valarray<float> &inputFrame, std::valarray<float> &outputFrame, const float *transformTable); -#ifdef HAVE_TBB +#ifdef MAKE_PARALLEL /****************************************************** -** IF TBB is useable, then, main loops are parallelized using these functors +** IF some parallelizing thread methods are available, then, main loops are parallelized using these functors ** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary ** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised ** ==> functors constructors can differ from the parameters used with their related serial functions */ /* Template : - class + class Parallel_ : public cv::ParallelLoopBody { private: @@ -273,12 +273,12 @@ protected: Parallel_() : {} - void operator()( const tbb::blocked_range<size_t>& r ) const { + virtual void operator()( const cv::Range& r ) const { } }: */ - class Parallel_adaptiveHorizontalCausalFilter_addInput + class Parallel_adaptiveHorizontalCausalFilter_addInput: public cv::ParallelLoopBody { private: float *outputFrame; @@ -288,11 +288,11 @@ protected: Parallel_adaptiveHorizontalCausalFilter_addInput(const float *inputImg, float *bufferToProcess, const float *imageGrad, const unsigned int nbCols) :outputFrame(bufferToProcess), inputFrame(inputImg), imageGradient(imageGrad), nbColumns(nbCols) {}; - void operator()( const tbb::blocked_range<size_t>& r ) const { - register float* outputPTR=outputFrame+r.begin()*nbColumns; - register const float* inputPTR=inputFrame+r.begin()*nbColumns; - register const float *imageGradientPTR= imageGradient+r.begin()*nbColumns; - for (unsigned int IDrow=r.begin(); IDrow!=r.end(); ++IDrow) + virtual void operator()( const Range& r ) const { + register float* outputPTR=outputFrame+r.start*nbColumns; + register const float* inputPTR=inputFrame+r.start*nbColumns; + register const float *imageGradientPTR= imageGradient+r.start*nbColumns; + for (int IDrow=r.start; IDrow!=r.end; ++IDrow) { register float result=0; for (unsigned int index=0; index<nbColumns; ++index) @@ -304,7 +304,7 @@ protected: } }; - class Parallel_adaptiveVerticalAnticausalFilter_multGain + class Parallel_adaptiveVerticalAnticausalFilter_multGain: public cv::ParallelLoopBody { private: float *outputFrame; @@ -315,10 +315,10 @@ protected: Parallel_adaptiveVerticalAnticausalFilter_multGain(float *bufferToProcess, const float *imageGrad, const unsigned int nbRws, const unsigned int nbCols, const float gain) :outputFrame(bufferToProcess), imageGradient(imageGrad), nbRows(nbRws), nbColumns(nbCols), filterParam_gain(gain){} - void operator()( const tbb::blocked_range<size_t>& r ) const { + virtual void operator()( const Range& r ) const { float* offset=outputFrame+nbColumns*nbRows-nbColumns; const float* gradOffset= imageGradient+nbColumns*nbRows-nbColumns; - for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn) + for (int IDcolumn=r.start; IDcolumn!=r.end; ++IDcolumn) { register float result=0; register float *outputPTR=offset+IDcolumn; diff --git a/modules/contrib/src/templatebuffer.hpp b/modules/contrib/src/templatebuffer.hpp index b119b9508..50aedce38 100644 --- a/modules/contrib/src/templatebuffer.hpp +++ b/modules/contrib/src/templatebuffer.hpp @@ -71,15 +71,14 @@ #include <cmath> -//// If TBB is used +//// If a parallelization method is available then, you should define MAKE_PARALLEL, in the other case, the classical serial code will be used +#define MAKE_PARALLEL // ==> then include required includes -#ifdef HAVE_TBB -#include "tbb/parallel_for.h" -#include "tbb/blocked_range.h" +#ifdef MAKE_PARALLEL // ==> declare usefull generic tools template <class type> -class Parallel_clipBufferValues +class Parallel_clipBufferValues: public cv::ParallelLoopBody { private: type *bufferToClip; @@ -89,9 +88,9 @@ public: Parallel_clipBufferValues(type* bufferToProcess, const type min, const type max) : bufferToClip(bufferToProcess), minValue(min), maxValue(max){} - void operator()( const tbb::blocked_range<size_t>& r ) const { - register type *inputOutputBufferPTR=bufferToClip+r.begin(); - for (register unsigned int jf = r.begin(); jf != r.end(); ++jf, ++inputOutputBufferPTR) + virtual void operator()( const cv::Range &r ) const { + register type *inputOutputBufferPTR=bufferToClip+r.start; + for (register int jf = r.start; jf != r.end; ++jf, ++inputOutputBufferPTR) { if (*inputOutputBufferPTR>maxValue) *inputOutputBufferPTR=maxValue; @@ -389,8 +388,8 @@ public: std::cout<<"this->min()"<<this->min()<<"minThreshold="<<minThreshold<<"updatedLowValue="<<updatedLowValue<<std::endl; // clipping values outside than the updated thresholds bufferPTR=this->Buffer(); -#ifdef HAVE_TBB // call the TemplateBuffer TBB clipping method - tbb::parallel_for(tbb::blocked_range<size_t>(0,this->size()), Parallel_clipBufferValues<type>(bufferPTR, updatedLowValue, updatedHighValue), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL // call the TemplateBuffer TBB clipping method + parallel_for_(tbb::blocked_range<size_t>(0,this->size()), Parallel_clipBufferValues<type>(bufferPTR, updatedLowValue, updatedHighValue)); #else for (unsigned int i=0;i<this->size();++i, ++bufferPTR)