Merge pull request #6790 from terfendail:linearresize_accuracy

This commit is contained in:
Alexander Alekhin 2016-07-11 10:02:06 +00:00
commit 9ed1474340
2 changed files with 17 additions and 17 deletions

View File

@ -1433,7 +1433,7 @@ inline int TEGRA_MORPHFREE(cvhalFilter2D *context)
#define TEGRA_RESIZE(src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, inv_scale_x, inv_scale_y, interpolation) \
( \
/*interpolation == CV_HAL_INTER_LINEAR ? \
interpolation == CV_HAL_INTER_LINEAR ? \
CV_MAT_DEPTH(src_type) == CV_8U && CAROTENE_NS::isResizeLinearOpenCVSupported(CAROTENE_NS::Size2D(src_width, src_height), CAROTENE_NS::Size2D(dst_width, dst_height), ((src_type >> CV_CN_SHIFT) + 1)) && \
inv_scale_x > 0 && inv_scale_y > 0 && \
(dst_width - 0.5)/inv_scale_x - 0.5 < src_width && (dst_height - 0.5)/inv_scale_y - 0.5 < src_height && \
@ -1441,7 +1441,7 @@ inline int TEGRA_MORPHFREE(cvhalFilter2D *context)
std::abs(dst_width / inv_scale_x - src_width) < 0.1 && std::abs(dst_height / inv_scale_y - src_height) < 0.1 ? \
CAROTENE_NS::resizeLinearOpenCV(CAROTENE_NS::Size2D(src_width, src_height), CAROTENE_NS::Size2D(dst_width, dst_height), \
src_data, src_step, dst_data, dst_step, 1.0/inv_scale_x, 1.0/inv_scale_y, ((src_type >> CV_CN_SHIFT) + 1)), \
CV_HAL_ERROR_OK : CV_HAL_ERROR_NOT_IMPLEMENTED :*/ \
CV_HAL_ERROR_OK : CV_HAL_ERROR_NOT_IMPLEMENTED : \
interpolation == CV_HAL_INTER_AREA ? \
CV_MAT_DEPTH(src_type) == CV_8U && CAROTENE_NS::isResizeAreaSupported(1.0/inv_scale_x, 1.0/inv_scale_y, ((src_type >> CV_CN_SHIFT) + 1)) && \
std::abs(dst_width / inv_scale_x - src_width) < 0.1 && std::abs(dst_height / inv_scale_y - src_height) < 0.1 ? \

View File

@ -1681,15 +1681,15 @@ void downsample_bilinear_8uc1(const Size2D &ssize, const Size2D &dsize,
vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col, vres);
#else
/* ugly version matching to OpenCV's SSE optimization */
int16x4_t v1Ls = vshrn_n_s32(v1L, 5);
int16x4_t v1Hs = vshrn_n_s32(v1H, 5);
int16x4_t v2Ls = vshrn_n_s32(v2L, 5);
int16x4_t v2Hs = vshrn_n_s32(v2H, 5);
int16x4_t v1Ls = vshrn_n_s32(v1L, 4);
int16x4_t v1Hs = vshrn_n_s32(v1H, 4);
int16x4_t v2Ls = vshrn_n_s32(v2L, 4);
int16x4_t v2Hs = vshrn_n_s32(v2H, 4);
int16x8_t v1s = vqdmulhq_s16(vcombine_s16(v1Ls, v1Hs), vrw);
int16x8_t v2s = vqdmulhq_s16(vcombine_s16(v2Ls, v2Hs), vrW);
int16x8_t vsum = vaddq_s16(v1s, v2s);
int16x8_t vsum = vaddq_s16(vshrq_n_s16(v1s,1), vshrq_n_s16(v2s,1));
uint8x8_t vres = vqrshrun_n_s16(vsum, 2);
vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col, vres);
@ -1736,15 +1736,15 @@ void downsample_bilinear_8uc1(const Size2D &ssize, const Size2D &dsize,
vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col + 8, vres);
#else
/* ugly version matching to OpenCV's SSE optimization */
int16x4_t v1Ls = vshrn_n_s32(v1L, 5);
int16x4_t v1Hs = vshrn_n_s32(v1H, 5);
int16x4_t v2Ls = vshrn_n_s32(v2L, 5);
int16x4_t v2Hs = vshrn_n_s32(v2H, 5);
int16x4_t v1Ls = vshrn_n_s32(v1L, 4);
int16x4_t v1Hs = vshrn_n_s32(v1H, 4);
int16x4_t v2Ls = vshrn_n_s32(v2L, 4);
int16x4_t v2Hs = vshrn_n_s32(v2H, 4);
int16x8_t v1s = vqdmulhq_s16(vcombine_s16(v1Ls, v1Hs), vrw);
int16x8_t v2s = vqdmulhq_s16(vcombine_s16(v2Ls, v2Hs), vrW);
int16x8_t vsum = vaddq_s16(v1s, v2s);
int16x8_t vsum = vaddq_s16(vshrq_n_s16(v1s,1), vshrq_n_s16(v2s,1));
uint8x8_t vres = vqrshrun_n_s16(vsum, 2);
vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col + 8, vres);
@ -1836,15 +1836,15 @@ downsample_bilinear_8uc1_col_loop8:
vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col, vres);
#else
/* ugly version matching to OpenCV's SSE optimization */
int16x4_t v1Ls = vshrn_n_s32(v1L, 5);
int16x4_t v1Hs = vshrn_n_s32(v1H, 5);
int16x4_t v2Ls = vshrn_n_s32(v2L, 5);
int16x4_t v2Hs = vshrn_n_s32(v2H, 5);
int16x4_t v1Ls = vshrn_n_s32(v1L, 4);
int16x4_t v1Hs = vshrn_n_s32(v1H, 4);
int16x4_t v2Ls = vshrn_n_s32(v2L, 4);
int16x4_t v2Hs = vshrn_n_s32(v2H, 4);
int16x8_t v1s = vqdmulhq_s16(vcombine_s16(v1Ls, v1Hs), vrw);
int16x8_t v2s = vqdmulhq_s16(vcombine_s16(v2Ls, v2Hs), vrW);
int16x8_t vsum = vaddq_s16(v1s, v2s);
int16x8_t vsum = vaddq_s16(vshrq_n_s16(v1s,1), vshrq_n_s16(v2s,1));
uint8x8_t vres = vqrshrun_n_s16(vsum, 2);
vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col, vres);