From 3e8c35a347c1a899b03999931b14adcbf8a1a4f6 Mon Sep 17 00:00:00 2001 From: "marina.kolpakova" Date: Wed, 8 Aug 2012 15:17:26 +0400 Subject: [PATCH] added perf. test for GFF --- modules/gpu/src/cuda/ccomponetns.cu | 12 ++++++------ modules/gpu/src/graphcuts.cpp | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/gpu/src/cuda/ccomponetns.cu b/modules/gpu/src/cuda/ccomponetns.cu index 3a54c494f..11d4742df 100644 --- a/modules/gpu/src/cuda/ccomponetns.cu +++ b/modules/gpu/src/cuda/ccomponetns.cu @@ -485,7 +485,9 @@ namespace cv { namespace gpu { namespace device comps(y, x) = root(comps, comps(y, x)); } - void labelComponents(const DevMem2D& edges, DevMem2Di comps, cudaStream_t stream) + enum {CC_NO_COMPACT = 0, CC_COMPACT_LABELS = 1}; + + void labelComponents(const DevMem2D& edges, DevMem2Di comps, int flags, cudaStream_t stream) { dim3 block(CTA_SIZE_X, CTA_SIZE_Y); dim3 grid(divUp(edges.cols, TILE_COLS), divUp(edges.rows, TILE_ROWS)); @@ -494,15 +496,12 @@ namespace cv { namespace gpu { namespace device cudaSafeCall( cudaGetLastError() ); int tileSizeX = TILE_COLS, tileSizeY = TILE_ROWS; - - cudaSafeCall( cudaGetLastError() ); - // cudaSafeCall( cudaDeviceSynchronize() ); - while (grid.x > 1 || grid.y > 1) { dim3 mergeGrid(ceilf(grid.x / 2.0), ceilf(grid.y / 2.0)); dim3 mergeBlock(STA_SIZE_MARGE_X, STA_SIZE_MARGE_Y); - std::cout << "merging: " << grid.y << " x " << grid.x << " ---> " << mergeGrid.y << " x " << mergeGrid.x << " for tiles: " << tileSizeY << " x " << tileSizeX << std::endl; + // debug log + // std::cout << "merging: " << grid.y << " x " << grid.x << " ---> " << mergeGrid.y << " x " << mergeGrid.x << " for tiles: " << tileSizeY << " x " << tileSizeX << std::endl; crossMerge<<>>(2, 2, tileSizeY, tileSizeX, edges, comps, ceilf(grid.y / 2.0) - grid.y / 2, ceilf(grid.x / 2.0) - grid.x / 2); tileSizeX <<= 1; tileSizeY <<= 1; @@ -515,6 +514,7 @@ namespace cv { namespace gpu { namespace device grid.y = divUp(edges.rows, block.y); flatten<<>>(edges, comps); cudaSafeCall( cudaGetLastError() ); + if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } diff --git a/modules/gpu/src/graphcuts.cpp b/modules/gpu/src/graphcuts.cpp index dd8cf8e59..73ae4c885 100644 --- a/modules/gpu/src/graphcuts.cpp +++ b/modules/gpu/src/graphcuts.cpp @@ -56,7 +56,7 @@ namespace cv { namespace gpu { namespace device { namespace ccl { - void labelComponents(const DevMem2D& edges, DevMem2Di comps, cudaStream_t stream); + void labelComponents(const DevMem2D& edges, DevMem2Di comps, int flags, cudaStream_t stream); template void computeEdges(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); @@ -114,7 +114,7 @@ void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, Stream& s) components.create(mask.size(), CV_32SC1); cudaStream_t stream = StreamAccessor::getStream(s); - device::ccl::labelComponents(mask, components, stream); + device::ccl::labelComponents(mask, components, 0, stream); } namespace