Merge pull request #6790 from terfendail:linearresize_accuracy
This commit is contained in:
commit
9ed1474340
4
3rdparty/carotene/hal/tegra_hal.hpp
vendored
4
3rdparty/carotene/hal/tegra_hal.hpp
vendored
@ -1433,7 +1433,7 @@ inline int TEGRA_MORPHFREE(cvhalFilter2D *context)
|
|||||||
|
|
||||||
#define TEGRA_RESIZE(src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, inv_scale_x, inv_scale_y, interpolation) \
|
#define TEGRA_RESIZE(src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, inv_scale_x, inv_scale_y, interpolation) \
|
||||||
( \
|
( \
|
||||||
/*interpolation == CV_HAL_INTER_LINEAR ? \
|
interpolation == CV_HAL_INTER_LINEAR ? \
|
||||||
CV_MAT_DEPTH(src_type) == CV_8U && CAROTENE_NS::isResizeLinearOpenCVSupported(CAROTENE_NS::Size2D(src_width, src_height), CAROTENE_NS::Size2D(dst_width, dst_height), ((src_type >> CV_CN_SHIFT) + 1)) && \
|
CV_MAT_DEPTH(src_type) == CV_8U && CAROTENE_NS::isResizeLinearOpenCVSupported(CAROTENE_NS::Size2D(src_width, src_height), CAROTENE_NS::Size2D(dst_width, dst_height), ((src_type >> CV_CN_SHIFT) + 1)) && \
|
||||||
inv_scale_x > 0 && inv_scale_y > 0 && \
|
inv_scale_x > 0 && inv_scale_y > 0 && \
|
||||||
(dst_width - 0.5)/inv_scale_x - 0.5 < src_width && (dst_height - 0.5)/inv_scale_y - 0.5 < src_height && \
|
(dst_width - 0.5)/inv_scale_x - 0.5 < src_width && (dst_height - 0.5)/inv_scale_y - 0.5 < src_height && \
|
||||||
@ -1441,7 +1441,7 @@ inline int TEGRA_MORPHFREE(cvhalFilter2D *context)
|
|||||||
std::abs(dst_width / inv_scale_x - src_width) < 0.1 && std::abs(dst_height / inv_scale_y - src_height) < 0.1 ? \
|
std::abs(dst_width / inv_scale_x - src_width) < 0.1 && std::abs(dst_height / inv_scale_y - src_height) < 0.1 ? \
|
||||||
CAROTENE_NS::resizeLinearOpenCV(CAROTENE_NS::Size2D(src_width, src_height), CAROTENE_NS::Size2D(dst_width, dst_height), \
|
CAROTENE_NS::resizeLinearOpenCV(CAROTENE_NS::Size2D(src_width, src_height), CAROTENE_NS::Size2D(dst_width, dst_height), \
|
||||||
src_data, src_step, dst_data, dst_step, 1.0/inv_scale_x, 1.0/inv_scale_y, ((src_type >> CV_CN_SHIFT) + 1)), \
|
src_data, src_step, dst_data, dst_step, 1.0/inv_scale_x, 1.0/inv_scale_y, ((src_type >> CV_CN_SHIFT) + 1)), \
|
||||||
CV_HAL_ERROR_OK : CV_HAL_ERROR_NOT_IMPLEMENTED :*/ \
|
CV_HAL_ERROR_OK : CV_HAL_ERROR_NOT_IMPLEMENTED : \
|
||||||
interpolation == CV_HAL_INTER_AREA ? \
|
interpolation == CV_HAL_INTER_AREA ? \
|
||||||
CV_MAT_DEPTH(src_type) == CV_8U && CAROTENE_NS::isResizeAreaSupported(1.0/inv_scale_x, 1.0/inv_scale_y, ((src_type >> CV_CN_SHIFT) + 1)) && \
|
CV_MAT_DEPTH(src_type) == CV_8U && CAROTENE_NS::isResizeAreaSupported(1.0/inv_scale_x, 1.0/inv_scale_y, ((src_type >> CV_CN_SHIFT) + 1)) && \
|
||||||
std::abs(dst_width / inv_scale_x - src_width) < 0.1 && std::abs(dst_height / inv_scale_y - src_height) < 0.1 ? \
|
std::abs(dst_width / inv_scale_x - src_width) < 0.1 && std::abs(dst_height / inv_scale_y - src_height) < 0.1 ? \
|
||||||
|
30
3rdparty/carotene/src/resize.cpp
vendored
30
3rdparty/carotene/src/resize.cpp
vendored
@ -1681,15 +1681,15 @@ void downsample_bilinear_8uc1(const Size2D &ssize, const Size2D &dsize,
|
|||||||
vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col, vres);
|
vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col, vres);
|
||||||
#else
|
#else
|
||||||
/* ugly version matching to OpenCV's SSE optimization */
|
/* ugly version matching to OpenCV's SSE optimization */
|
||||||
int16x4_t v1Ls = vshrn_n_s32(v1L, 5);
|
int16x4_t v1Ls = vshrn_n_s32(v1L, 4);
|
||||||
int16x4_t v1Hs = vshrn_n_s32(v1H, 5);
|
int16x4_t v1Hs = vshrn_n_s32(v1H, 4);
|
||||||
int16x4_t v2Ls = vshrn_n_s32(v2L, 5);
|
int16x4_t v2Ls = vshrn_n_s32(v2L, 4);
|
||||||
int16x4_t v2Hs = vshrn_n_s32(v2H, 5);
|
int16x4_t v2Hs = vshrn_n_s32(v2H, 4);
|
||||||
|
|
||||||
int16x8_t v1s = vqdmulhq_s16(vcombine_s16(v1Ls, v1Hs), vrw);
|
int16x8_t v1s = vqdmulhq_s16(vcombine_s16(v1Ls, v1Hs), vrw);
|
||||||
int16x8_t v2s = vqdmulhq_s16(vcombine_s16(v2Ls, v2Hs), vrW);
|
int16x8_t v2s = vqdmulhq_s16(vcombine_s16(v2Ls, v2Hs), vrW);
|
||||||
|
|
||||||
int16x8_t vsum = vaddq_s16(v1s, v2s);
|
int16x8_t vsum = vaddq_s16(vshrq_n_s16(v1s,1), vshrq_n_s16(v2s,1));
|
||||||
uint8x8_t vres = vqrshrun_n_s16(vsum, 2);
|
uint8x8_t vres = vqrshrun_n_s16(vsum, 2);
|
||||||
|
|
||||||
vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col, vres);
|
vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col, vres);
|
||||||
@ -1736,15 +1736,15 @@ void downsample_bilinear_8uc1(const Size2D &ssize, const Size2D &dsize,
|
|||||||
vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col + 8, vres);
|
vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col + 8, vres);
|
||||||
#else
|
#else
|
||||||
/* ugly version matching to OpenCV's SSE optimization */
|
/* ugly version matching to OpenCV's SSE optimization */
|
||||||
int16x4_t v1Ls = vshrn_n_s32(v1L, 5);
|
int16x4_t v1Ls = vshrn_n_s32(v1L, 4);
|
||||||
int16x4_t v1Hs = vshrn_n_s32(v1H, 5);
|
int16x4_t v1Hs = vshrn_n_s32(v1H, 4);
|
||||||
int16x4_t v2Ls = vshrn_n_s32(v2L, 5);
|
int16x4_t v2Ls = vshrn_n_s32(v2L, 4);
|
||||||
int16x4_t v2Hs = vshrn_n_s32(v2H, 5);
|
int16x4_t v2Hs = vshrn_n_s32(v2H, 4);
|
||||||
|
|
||||||
int16x8_t v1s = vqdmulhq_s16(vcombine_s16(v1Ls, v1Hs), vrw);
|
int16x8_t v1s = vqdmulhq_s16(vcombine_s16(v1Ls, v1Hs), vrw);
|
||||||
int16x8_t v2s = vqdmulhq_s16(vcombine_s16(v2Ls, v2Hs), vrW);
|
int16x8_t v2s = vqdmulhq_s16(vcombine_s16(v2Ls, v2Hs), vrW);
|
||||||
|
|
||||||
int16x8_t vsum = vaddq_s16(v1s, v2s);
|
int16x8_t vsum = vaddq_s16(vshrq_n_s16(v1s,1), vshrq_n_s16(v2s,1));
|
||||||
uint8x8_t vres = vqrshrun_n_s16(vsum, 2);
|
uint8x8_t vres = vqrshrun_n_s16(vsum, 2);
|
||||||
|
|
||||||
vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col + 8, vres);
|
vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col + 8, vres);
|
||||||
@ -1836,15 +1836,15 @@ downsample_bilinear_8uc1_col_loop8:
|
|||||||
vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col, vres);
|
vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col, vres);
|
||||||
#else
|
#else
|
||||||
/* ugly version matching to OpenCV's SSE optimization */
|
/* ugly version matching to OpenCV's SSE optimization */
|
||||||
int16x4_t v1Ls = vshrn_n_s32(v1L, 5);
|
int16x4_t v1Ls = vshrn_n_s32(v1L, 4);
|
||||||
int16x4_t v1Hs = vshrn_n_s32(v1H, 5);
|
int16x4_t v1Hs = vshrn_n_s32(v1H, 4);
|
||||||
int16x4_t v2Ls = vshrn_n_s32(v2L, 5);
|
int16x4_t v2Ls = vshrn_n_s32(v2L, 4);
|
||||||
int16x4_t v2Hs = vshrn_n_s32(v2H, 5);
|
int16x4_t v2Hs = vshrn_n_s32(v2H, 4);
|
||||||
|
|
||||||
int16x8_t v1s = vqdmulhq_s16(vcombine_s16(v1Ls, v1Hs), vrw);
|
int16x8_t v1s = vqdmulhq_s16(vcombine_s16(v1Ls, v1Hs), vrw);
|
||||||
int16x8_t v2s = vqdmulhq_s16(vcombine_s16(v2Ls, v2Hs), vrW);
|
int16x8_t v2s = vqdmulhq_s16(vcombine_s16(v2Ls, v2Hs), vrW);
|
||||||
|
|
||||||
int16x8_t vsum = vaddq_s16(v1s, v2s);
|
int16x8_t vsum = vaddq_s16(vshrq_n_s16(v1s,1), vshrq_n_s16(v2s,1));
|
||||||
uint8x8_t vres = vqrshrun_n_s16(vsum, 2);
|
uint8x8_t vres = vqrshrun_n_s16(vsum, 2);
|
||||||
|
|
||||||
vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col, vres);
|
vst1_u8(internal::getRowPtr(dstBase, dstStride, row) + col, vres);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user