From 00c36e88efec3f7f443d291a0f0e8572c7f589e4 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 4 Mar 2015 14:18:49 +0300 Subject: [PATCH] reduce separable filter instantiates for tiny build --- modules/gpu/src/cuda/column_filter.10.cu | 4 + modules/gpu/src/cuda/column_filter.11.cu | 4 + modules/gpu/src/cuda/column_filter.12.cu | 4 + modules/gpu/src/cuda/column_filter.13.cu | 4 + modules/gpu/src/cuda/column_filter.14.cu | 4 + modules/gpu/src/cuda/column_filter.3.cu | 4 + modules/gpu/src/cuda/column_filter.4.cu | 4 + modules/gpu/src/cuda/column_filter.8.cu | 4 + modules/gpu/src/cuda/column_filter.9.cu | 4 + modules/gpu/src/cuda/column_filter.h | 187 ++++++++++++++++++++++- modules/gpu/src/cuda/row_filter.10.cu | 4 + modules/gpu/src/cuda/row_filter.11.cu | 4 + modules/gpu/src/cuda/row_filter.12.cu | 4 + modules/gpu/src/cuda/row_filter.13.cu | 4 + modules/gpu/src/cuda/row_filter.14.cu | 4 + modules/gpu/src/cuda/row_filter.3.cu | 4 + modules/gpu/src/cuda/row_filter.4.cu | 4 + modules/gpu/src/cuda/row_filter.8.cu | 4 + modules/gpu/src/cuda/row_filter.9.cu | 4 + modules/gpu/src/cuda/row_filter.h | 187 ++++++++++++++++++++++- modules/gpu/src/filtering.cpp | 26 ++++ 21 files changed, 470 insertions(+), 2 deletions(-) diff --git a/modules/gpu/src/cuda/column_filter.10.cu b/modules/gpu/src/cuda/column_filter.10.cu index b71e25207..81e4fe7a0 100644 --- a/modules/gpu/src/cuda/column_filter.10.cu +++ b/modules/gpu/src/cuda/column_filter.10.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.11.cu b/modules/gpu/src/cuda/column_filter.11.cu index ccfbf8e77..34a065453 100644 --- a/modules/gpu/src/cuda/column_filter.11.cu +++ b/modules/gpu/src/cuda/column_filter.11.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.12.cu b/modules/gpu/src/cuda/column_filter.12.cu index a38f93b53..bc0a45bc3 100644 --- a/modules/gpu/src/cuda/column_filter.12.cu +++ b/modules/gpu/src/cuda/column_filter.12.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.13.cu b/modules/gpu/src/cuda/column_filter.13.cu index 40eec7a83..b7facb6c0 100644 --- a/modules/gpu/src/cuda/column_filter.13.cu +++ b/modules/gpu/src/cuda/column_filter.13.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.14.cu b/modules/gpu/src/cuda/column_filter.14.cu index 08151ac6d..6db983786 100644 --- a/modules/gpu/src/cuda/column_filter.14.cu +++ b/modules/gpu/src/cuda/column_filter.14.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.3.cu b/modules/gpu/src/cuda/column_filter.3.cu index 7304565b9..339bdabc6 100644 --- a/modules/gpu/src/cuda/column_filter.3.cu +++ b/modules/gpu/src/cuda/column_filter.3.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.4.cu b/modules/gpu/src/cuda/column_filter.4.cu index 8c9db6985..37f9bd718 100644 --- a/modules/gpu/src/cuda/column_filter.4.cu +++ b/modules/gpu/src/cuda/column_filter.4.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.8.cu b/modules/gpu/src/cuda/column_filter.8.cu index 0a63a1dd4..b4ad5bd02 100644 --- a/modules/gpu/src/cuda/column_filter.8.cu +++ b/modules/gpu/src/cuda/column_filter.8.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.9.cu b/modules/gpu/src/cuda/column_filter.9.cu index 758d9289d..da64c3222 100644 --- a/modules/gpu/src/cuda/column_filter.9.cu +++ b/modules/gpu/src/cuda/column_filter.9.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.h b/modules/gpu/src/cuda/column_filter.h index 46e358315..139a6ef20 100644 --- a/modules/gpu/src/cuda/column_filter.h +++ b/modules/gpu/src/cuda/column_filter.h @@ -183,6 +183,186 @@ namespace filter { typedef void (*caller_t)(PtrStepSz src, PtrStepSz dst, int anchor, int cc, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const caller_t callers[5][33] = + { + { + 0, + 0, + 0, + column_filter::caller< 3, T, D, BrdColReflect101>, + 0, + column_filter::caller< 5, T, D, BrdColReflect101>, + 0, + column_filter::caller< 7, T, D, BrdColReflect101>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + column_filter::caller< 3, T, D, BrdColReplicate>, + 0, + column_filter::caller< 5, T, D, BrdColReplicate>, + 0, + column_filter::caller< 7, T, D, BrdColReplicate>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + column_filter::caller< 3, T, D, BrdColConstant>, + 0, + column_filter::caller< 5, T, D, BrdColConstant>, + 0, + column_filter::caller< 7, T, D, BrdColConstant>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + column_filter::caller< 3, T, D, BrdColReflect>, + 0, + column_filter::caller< 5, T, D, BrdColReflect>, + 0, + column_filter::caller< 7, T, D, BrdColReflect>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + } + }; +#else static const caller_t callers[5][33] = { { @@ -361,12 +541,17 @@ namespace filter column_filter::caller<32, T, D, BrdColWrap> } }; +#endif + + const caller_t caller = callers[brd_type][ksize]; + if (!caller) + cv::gpu::error("Unsupported input parameters for column_filter", __FILE__, __LINE__, ""); if (stream == 0) cudaSafeCall( cudaMemcpyToSymbol(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); else cudaSafeCall( cudaMemcpyToSymbolAsync(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) ); - callers[brd_type][ksize]((PtrStepSz)src, (PtrStepSz)dst, anchor, cc, stream); + caller((PtrStepSz)src, (PtrStepSz)dst, anchor, cc, stream); } } diff --git a/modules/gpu/src/cuda/row_filter.10.cu b/modules/gpu/src/cuda/row_filter.10.cu index 7d93ee31a..c910270a6 100644 --- a/modules/gpu/src/cuda/row_filter.10.cu +++ b/modules/gpu/src/cuda/row_filter.10.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.11.cu b/modules/gpu/src/cuda/row_filter.11.cu index 31bccc48b..c5e1fbcd9 100644 --- a/modules/gpu/src/cuda/row_filter.11.cu +++ b/modules/gpu/src/cuda/row_filter.11.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.12.cu b/modules/gpu/src/cuda/row_filter.12.cu index 7be543f6b..017aff8e7 100644 --- a/modules/gpu/src/cuda/row_filter.12.cu +++ b/modules/gpu/src/cuda/row_filter.12.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.13.cu b/modules/gpu/src/cuda/row_filter.13.cu index bd700b1bb..676f5ae82 100644 --- a/modules/gpu/src/cuda/row_filter.13.cu +++ b/modules/gpu/src/cuda/row_filter.13.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.14.cu b/modules/gpu/src/cuda/row_filter.14.cu index 97df2f128..e8d0ec501 100644 --- a/modules/gpu/src/cuda/row_filter.14.cu +++ b/modules/gpu/src/cuda/row_filter.14.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.3.cu b/modules/gpu/src/cuda/row_filter.3.cu index fe8466695..57013781c 100644 --- a/modules/gpu/src/cuda/row_filter.3.cu +++ b/modules/gpu/src/cuda/row_filter.3.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.4.cu b/modules/gpu/src/cuda/row_filter.4.cu index 050f7af04..277ab7f87 100644 --- a/modules/gpu/src/cuda/row_filter.4.cu +++ b/modules/gpu/src/cuda/row_filter.4.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.8.cu b/modules/gpu/src/cuda/row_filter.8.cu index b899e87a7..e9dfd7f4a 100644 --- a/modules/gpu/src/cuda/row_filter.8.cu +++ b/modules/gpu/src/cuda/row_filter.8.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.9.cu b/modules/gpu/src/cuda/row_filter.9.cu index 516dd8fe7..eaad54d34 100644 --- a/modules/gpu/src/cuda/row_filter.9.cu +++ b/modules/gpu/src/cuda/row_filter.9.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.h b/modules/gpu/src/cuda/row_filter.h index 933f90029..9bfaf7f3d 100644 --- a/modules/gpu/src/cuda/row_filter.h +++ b/modules/gpu/src/cuda/row_filter.h @@ -182,6 +182,186 @@ namespace filter { typedef void (*caller_t)(PtrStepSz src, PtrStepSz dst, int anchor, int cc, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const caller_t callers[5][33] = + { + { + 0, + 0, + 0, + row_filter::caller< 3, T, D, BrdRowReflect101>, + 0, + row_filter::caller< 5, T, D, BrdRowReflect101>, + 0, + row_filter::caller< 7, T, D, BrdRowReflect101>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + row_filter::caller< 3, T, D, BrdRowReplicate>, + 0, + row_filter::caller< 5, T, D, BrdRowReplicate>, + 0, + row_filter::caller< 7, T, D, BrdRowReplicate>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + row_filter::caller< 3, T, D, BrdRowConstant>, + 0, + row_filter::caller< 5, T, D, BrdRowConstant>, + 0, + row_filter::caller< 7, T, D, BrdRowConstant>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + row_filter::caller< 3, T, D, BrdRowReflect>, + 0, + row_filter::caller< 5, T, D, BrdRowReflect>, + 0, + row_filter::caller< 7, T, D, BrdRowReflect>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + } + }; +#else static const caller_t callers[5][33] = { { @@ -360,12 +540,17 @@ namespace filter row_filter::caller<32, T, D, BrdRowWrap> } }; +#endif + + const caller_t caller = callers[brd_type][ksize]; + if (!caller) + cv::gpu::error("Unsupported input parameters for row_filter", __FILE__, __LINE__, ""); if (stream == 0) cudaSafeCall( cudaMemcpyToSymbol(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); else cudaSafeCall( cudaMemcpyToSymbolAsync(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) ); - callers[brd_type][ksize]((PtrStepSz)src, (PtrStepSz)dst, anchor, cc, stream); + caller((PtrStepSz)src, (PtrStepSz)dst, anchor, cc, stream); } } diff --git a/modules/gpu/src/filtering.cpp b/modules/gpu/src/filtering.cpp index 8905eaed6..8f6e780a5 100644 --- a/modules/gpu/src/filtering.cpp +++ b/modules/gpu/src/filtering.cpp @@ -893,6 +893,18 @@ namespace Ptr cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, int anchor, int borderType) { +#ifdef OPENCV_TINY_GPU_MODULE + static const gpuFilter1D_t funcs[7][4] = + { + {filter::linearRow, 0, filter::linearRow, filter::linearRow}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {filter::linearRow, 0, filter::linearRow, filter::linearRow}, + {0, 0, 0, 0} + }; +#else static const gpuFilter1D_t funcs[7][4] = { {filter::linearRow, 0, filter::linearRow, filter::linearRow}, @@ -903,6 +915,7 @@ Ptr cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, {filter::linearRow, 0, filter::linearRow, filter::linearRow}, {0, 0, 0, 0} }; +#endif static const nppFilter1D_t npp_funcs[] = { 0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R @@ -998,6 +1011,18 @@ namespace Ptr cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, int anchor, int borderType) { +#ifdef OPENCV_TINY_GPU_MODULE + static const gpuFilter1D_t funcs[7][4] = + { + {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, + {0, 0, 0, 0} + }; +#else static const gpuFilter1D_t funcs[7][4] = { {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, @@ -1008,6 +1033,7 @@ Ptr cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, {0, 0, 0, 0} }; +#endif static const nppFilter1D_t npp_funcs[] = { 0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R