integrate Kepler version
This commit is contained in:
parent
7db1323f81
commit
56c7ef06e7
@ -138,65 +138,7 @@ namespace icf {
|
|||||||
return (a - b + c - d);
|
return (a - b + c - d);
|
||||||
}
|
}
|
||||||
|
|
||||||
// __global__ void test_kernel(const Level* levels, const Octave* octaves, const float* stages,
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
// const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr)
|
|
||||||
// {
|
|
||||||
// const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
||||||
// const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
// Level level = levels[blockIdx.z];
|
|
||||||
|
|
||||||
// // if (blockIdx.z != 31) return;
|
|
||||||
// if(x >= level.workRect.x || y >= level.workRect.y) return;
|
|
||||||
|
|
||||||
// Octave octave = octaves[level.octave];
|
|
||||||
|
|
||||||
// int st = octave.index * octave.stages;
|
|
||||||
// const int stEnd = st + 1000;//octave.stages;
|
|
||||||
|
|
||||||
// float confidence = 0.f;
|
|
||||||
|
|
||||||
// // #pragma unroll 2
|
|
||||||
// for(; st < stEnd; ++st)
|
|
||||||
// {
|
|
||||||
// dprintf("\n\nstage: %d\n", st);
|
|
||||||
// const int nId = st * 3;
|
|
||||||
// Node node = nodes[nId];
|
|
||||||
|
|
||||||
// dprintf("Node: [%d %d %d %d] %d %d\n", node.rect.x, node.rect.y, node.rect.z, node.rect.w,
|
|
||||||
// node.threshold >> 28, node.threshold & 0x0FFFFFFFU);
|
|
||||||
|
|
||||||
// float threshold = rescale(level, node.rect, node);
|
|
||||||
// int sum = get(x, y + (node.threshold >> 28) * 121, node.rect);
|
|
||||||
|
|
||||||
// dprintf("Node: [%d %d %d %d] %f\n", node.rect.x, node.rect.y, node.rect.z,
|
|
||||||
// node.rect.w, threshold);
|
|
||||||
|
|
||||||
// int next = 1 + (int)(sum >= threshold);
|
|
||||||
// dprintf("go: %d (%d >= %f)\n\n" ,next, sum, threshold);
|
|
||||||
|
|
||||||
// node = nodes[nId + next];
|
|
||||||
// threshold = rescale(level, node.rect, node);
|
|
||||||
// sum = get(x, y + (node.threshold >> 28) * 121, node.rect);
|
|
||||||
|
|
||||||
// const int lShift = (next - 1) * 2 + (int)(sum >= threshold);
|
|
||||||
// float impact = leaves[st * 4 + lShift];
|
|
||||||
// confidence += impact;
|
|
||||||
|
|
||||||
// if (confidence <= stages[st]) st = stEnd + 10;
|
|
||||||
// dprintf("decided: %d (%d >= %f) %d %f\n\n" ,next, sum, threshold, lShift, impact);
|
|
||||||
// dprintf("extracted stage: %f\n", stages[st]);
|
|
||||||
// dprintf("computed score: %f\n\n", confidence);
|
|
||||||
// }
|
|
||||||
|
|
||||||
// if(st == stEnd)
|
|
||||||
// {
|
|
||||||
// int idx = atomicInc(ctr, ndetections);
|
|
||||||
// // store detection
|
|
||||||
// objects[idx] = Detection(__float2int_rn(x * octave.shrinkage),
|
|
||||||
// __float2int_rn(y * octave.shrinkage), level.objSize.x, level.objSize.y, confidence);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
__global__ void test_kernel_warp(const Level* levels, const Octave* octaves, const float* stages,
|
__global__ void test_kernel_warp(const Level* levels, const Octave* octaves, const float* stages,
|
||||||
const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr,
|
const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr,
|
||||||
const int downscales)
|
const int downscales)
|
||||||
@ -261,6 +203,66 @@ namespace icf {
|
|||||||
__float2int_rn(y * octave.shrinkage), level.objSize.x, level.objSize.y, confidence);
|
__float2int_rn(y * octave.shrinkage), level.objSize.x, level.objSize.y, confidence);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
__global__ void test_kernel_warp(const Level* levels, const Octave* octaves, const float* stages,
|
||||||
|
const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr,
|
||||||
|
const int downscales)
|
||||||
|
{
|
||||||
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
Level level = levels[blockIdx.z];
|
||||||
|
|
||||||
|
// if (blockIdx.z != 31) return;
|
||||||
|
if(x >= level.workRect.x || y >= level.workRect.y) return;
|
||||||
|
|
||||||
|
Octave octave = octaves[level.octave];
|
||||||
|
|
||||||
|
int st = octave.index * octave.stages;
|
||||||
|
const int stEnd = st + 1000;//octave.stages;
|
||||||
|
|
||||||
|
float confidence = 0.f;
|
||||||
|
|
||||||
|
for(; st < stEnd; ++st)
|
||||||
|
{
|
||||||
|
dprintf("\n\nstage: %d\n", st);
|
||||||
|
const int nId = st * 3;
|
||||||
|
Node node = nodes[nId];
|
||||||
|
|
||||||
|
dprintf("Node: [%d %d %d %d] %d %d\n", node.rect.x, node.rect.y, node.rect.z, node.rect.w,
|
||||||
|
node.threshold >> 28, node.threshold & 0x0FFFFFFFU);
|
||||||
|
|
||||||
|
float threshold = rescale(level, node.rect, node);
|
||||||
|
int sum = get(x, y + (node.threshold >> 28) * 121, node.rect);
|
||||||
|
|
||||||
|
dprintf("Node: [%d %d %d %d] %f\n", node.rect.x, node.rect.y, node.rect.z,
|
||||||
|
node.rect.w, threshold);
|
||||||
|
|
||||||
|
int next = 1 + (int)(sum >= threshold);
|
||||||
|
dprintf("go: %d (%d >= %f)\n\n" ,next, sum, threshold);
|
||||||
|
|
||||||
|
node = nodes[nId + next];
|
||||||
|
threshold = rescale(level, node.rect, node);
|
||||||
|
sum = get(x, y + (node.threshold >> 28) * 121, node.rect);
|
||||||
|
|
||||||
|
const int lShift = (next - 1) * 2 + (int)(sum >= threshold);
|
||||||
|
float impact = leaves[st * 4 + lShift];
|
||||||
|
confidence += impact;
|
||||||
|
|
||||||
|
if (confidence <= stages[st]) st = stEnd + 10;
|
||||||
|
dprintf("decided: %d (%d >= %f) %d %f\n\n" ,next, sum, threshold, lShift, impact);
|
||||||
|
dprintf("extracted stage: %f\n", stages[st]);
|
||||||
|
dprintf("computed score: %f\n\n", confidence);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(st == stEnd)
|
||||||
|
{
|
||||||
|
int idx = atomicInc(ctr, ndetections);
|
||||||
|
// store detection
|
||||||
|
objects[idx] = Detection(__float2int_rn(x * octave.shrinkage),
|
||||||
|
__float2int_rn(y * octave.shrinkage), level.objSize.x, level.objSize.y, confidence);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
|
void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
|
||||||
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv,
|
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user