Merge "Incorporate WRAPLOW macro into non-highbitdepth tx"
This commit is contained in:
commit
8a01074d04
2
configure
vendored
2
configure
vendored
@ -281,7 +281,7 @@ EXPERIMENT_LIST="
|
||||
spatial_svc
|
||||
vp9_temporal_denoising
|
||||
fp_mb_stats
|
||||
emulate_hardware_highbitdepth
|
||||
emulate_hardware
|
||||
"
|
||||
CONFIG_LIST="
|
||||
external_build
|
||||
|
@ -217,7 +217,7 @@ void high_filter_block2d_8_c(const uint16_t *src_ptr,
|
||||
(VP9_FILTER_WEIGHT >> 1); // Rounding
|
||||
|
||||
// Normalize back to 0-255...
|
||||
*output_ptr = clip_pixel_high(temp >> VP9_FILTER_SHIFT, bd);
|
||||
*output_ptr = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
|
||||
++src_ptr;
|
||||
output_ptr += intermediate_height;
|
||||
}
|
||||
@ -245,7 +245,7 @@ void high_filter_block2d_8_c(const uint16_t *src_ptr,
|
||||
(VP9_FILTER_WEIGHT >> 1); // Rounding
|
||||
|
||||
// Normalize back to 0-255...
|
||||
*dst_ptr++ = clip_pixel_high(temp >> VP9_FILTER_SHIFT, bd);
|
||||
*dst_ptr++ = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
|
||||
src_ptr += intermediate_height;
|
||||
}
|
||||
src_ptr += intermediate_next_stride;
|
||||
|
@ -745,7 +745,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
|
||||
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
NEON, Trans16x16DCT,
|
||||
::testing::Values(
|
||||
@ -753,7 +753,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
&vp9_idct16x16_256_add_neon, 0, VPX_BITS_8)));
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
|
||||
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans16x16DCT,
|
||||
::testing::Values(
|
||||
@ -772,7 +772,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
VPX_BITS_8)));
|
||||
#endif
|
||||
|
||||
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
|
||||
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSSE3, Trans16x16DCT,
|
||||
::testing::Values(
|
||||
|
@ -333,7 +333,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
&vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
|
||||
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
NEON, Trans32x32Test,
|
||||
::testing::Values(
|
||||
@ -343,7 +343,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
&vp9_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
|
||||
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans32x32Test,
|
||||
::testing::Values(
|
||||
@ -353,7 +353,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
&vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
|
||||
#endif
|
||||
|
||||
#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
|
||||
#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
AVX2, Trans32x32Test,
|
||||
::testing::Values(
|
||||
|
@ -458,7 +458,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
|
||||
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
NEON, Trans4x4DCT,
|
||||
::testing::Values(
|
||||
@ -473,14 +473,15 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8)));
|
||||
#endif
|
||||
|
||||
#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH
|
||||
#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH && \
|
||||
!CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MMX, Trans4x4WHT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
|
||||
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans4x4DCT,
|
||||
::testing::Values(
|
||||
|
@ -568,7 +568,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
|
||||
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
NEON, FwdTrans8x8DCT,
|
||||
::testing::Values(
|
||||
@ -583,7 +583,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
|
||||
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, FwdTrans8x8DCT,
|
||||
::testing::Values(
|
||||
@ -598,7 +598,8 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3, VPX_BITS_8)));
|
||||
#endif
|
||||
|
||||
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH
|
||||
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
|
||||
!CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSSE3, FwdTrans8x8DCT,
|
||||
::testing::Values(
|
||||
|
@ -260,7 +260,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
TX_4X4, 1)));
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
|
||||
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, PartialIDctTest,
|
||||
::testing::Values(
|
||||
@ -294,7 +294,8 @@ INSTANTIATE_TEST_CASE_P(
|
||||
TX_4X4, 1)));
|
||||
#endif
|
||||
|
||||
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH
|
||||
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
|
||||
!CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSSE3_64, PartialIDctTest,
|
||||
::testing::Values(
|
||||
@ -304,7 +305,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
TX_8X8, 12)));
|
||||
#endif
|
||||
|
||||
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
|
||||
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSSE3, PartialIDctTest,
|
||||
::testing::Values(
|
||||
|
@ -65,7 +65,7 @@ static INLINE int get_unsigned_bits(unsigned int num_values) {
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
static INLINE uint16_t clip_pixel_high(int val, int bd) {
|
||||
static INLINE uint16_t clip_pixel_highbd(int val, int bd) {
|
||||
switch (bd) {
|
||||
case 8:
|
||||
default:
|
||||
|
@ -299,7 +299,7 @@ static void high_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SUBPEL_TAPS; ++k)
|
||||
sum += src_x[k] * x_filter[k];
|
||||
dst[x] = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
|
||||
dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
|
||||
x_q4 += x_step_q4;
|
||||
}
|
||||
src += src_stride;
|
||||
@ -325,7 +325,7 @@ static void high_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,
|
||||
for (k = 0; k < SUBPEL_TAPS; ++k)
|
||||
sum += src_x[k] * x_filter[k];
|
||||
dst[x] = ROUND_POWER_OF_TWO(dst[x] +
|
||||
clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
|
||||
clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
|
||||
x_q4 += x_step_q4;
|
||||
}
|
||||
src += src_stride;
|
||||
@ -350,7 +350,7 @@ static void high_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SUBPEL_TAPS; ++k)
|
||||
sum += src_y[k * src_stride] * y_filter[k];
|
||||
dst[y * dst_stride] = clip_pixel_high(
|
||||
dst[y * dst_stride] = clip_pixel_highbd(
|
||||
ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
|
||||
y_q4 += y_step_q4;
|
||||
}
|
||||
@ -377,7 +377,7 @@ static void high_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,
|
||||
for (k = 0; k < SUBPEL_TAPS; ++k)
|
||||
sum += src_y[k * src_stride] * y_filter[k];
|
||||
dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
|
||||
clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
|
||||
clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
|
||||
y_q4 += y_step_q4;
|
||||
}
|
||||
++src;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -251,7 +251,7 @@ static INLINE void high_tm_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
|
||||
|
||||
for (r = 0; r < bs; r++) {
|
||||
for (c = 0; c < bs; c++)
|
||||
dst[c] = clip_pixel_high(left[r] + above[c] - ytop_left, bd);
|
||||
dst[c] = clip_pixel_highbd(left[r] + above[c] - ytop_left, bd);
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
|
@ -331,6 +331,8 @@ $vp9_convolve8_avg_vert_neon_asm=vp9_convolve8_avg_vert_neon;
|
||||
# dct
|
||||
#
|
||||
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
# Note as optimized versions of these functions are added we need to add a check to ensure
|
||||
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
|
||||
add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_1_add/;
|
||||
|
||||
@ -380,69 +382,123 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
|
||||
add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_16_add/;
|
||||
|
||||
} else {
|
||||
add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_1_add sse2 neon_asm dspr2/;
|
||||
$vp9_idct4x4_1_add_neon_asm=vp9_idct4x4_1_add_neon;
|
||||
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
|
||||
if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
|
||||
add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_16_add sse2 neon_asm dspr2/;
|
||||
$vp9_idct4x4_16_add_neon_asm=vp9_idct4x4_16_add_neon;
|
||||
add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_16_add/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_1_add sse2 neon_asm dspr2/;
|
||||
$vp9_idct8x8_1_add_neon_asm=vp9_idct8x8_1_add_neon;
|
||||
add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_1_add/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_64_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
|
||||
$vp9_idct8x8_64_add_neon_asm=vp9_idct8x8_64_add_neon;
|
||||
add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_64_add/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_12_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
|
||||
$vp9_idct8x8_12_add_neon_asm=vp9_idct8x8_12_add_neon;
|
||||
add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_12_add/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/;
|
||||
$vp9_idct16x16_1_add_neon_asm=vp9_idct16x16_1_add_neon;
|
||||
add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_1_add/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/;
|
||||
$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
|
||||
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_256_add/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/;
|
||||
$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon;
|
||||
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_10_add/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/;
|
||||
$vp9_idct32x32_1024_add_neon_asm=vp9_idct32x32_1024_add_neon;
|
||||
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1024_add/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/;
|
||||
$vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;
|
||||
add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_34_add/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/;
|
||||
$vp9_idct32x32_1_add_neon_asm=vp9_idct32x32_1_add_neon;
|
||||
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1_add/;
|
||||
|
||||
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/;
|
||||
$vp9_iht4x4_16_add_neon_asm=vp9_iht4x4_16_add_neon;
|
||||
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp9_iht4x4_16_add/;
|
||||
|
||||
add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp9_iht8x8_64_add sse2 neon_asm dspr2/;
|
||||
$vp9_iht8x8_64_add_neon_asm=vp9_iht8x8_64_add_neon;
|
||||
add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp9_iht8x8_64_add/;
|
||||
|
||||
add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
|
||||
specialize qw/vp9_iht16x16_256_add sse2 dspr2/;
|
||||
add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
|
||||
specialize qw/vp9_iht16x16_256_add/;
|
||||
|
||||
# dct and add
|
||||
# dct and add
|
||||
|
||||
add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_1_add/;
|
||||
add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_16_add/;
|
||||
add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_16_add/;
|
||||
} else {
|
||||
add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_1_add sse2 neon_asm dspr2/;
|
||||
$vp9_idct4x4_1_add_neon_asm=vp9_idct4x4_1_add_neon;
|
||||
|
||||
add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_16_add sse2 neon_asm dspr2/;
|
||||
$vp9_idct4x4_16_add_neon_asm=vp9_idct4x4_16_add_neon;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_1_add sse2 neon_asm dspr2/;
|
||||
$vp9_idct8x8_1_add_neon_asm=vp9_idct8x8_1_add_neon;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_64_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
|
||||
$vp9_idct8x8_64_add_neon_asm=vp9_idct8x8_64_add_neon;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_12_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
|
||||
$vp9_idct8x8_12_add_neon_asm=vp9_idct8x8_12_add_neon;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/;
|
||||
$vp9_idct16x16_1_add_neon_asm=vp9_idct16x16_1_add_neon;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/;
|
||||
$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/;
|
||||
$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/;
|
||||
$vp9_idct32x32_1024_add_neon_asm=vp9_idct32x32_1024_add_neon;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/;
|
||||
$vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/;
|
||||
$vp9_idct32x32_1_add_neon_asm=vp9_idct32x32_1_add_neon;
|
||||
|
||||
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/;
|
||||
$vp9_iht4x4_16_add_neon_asm=vp9_iht4x4_16_add_neon;
|
||||
|
||||
add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp9_iht8x8_64_add sse2 neon_asm dspr2/;
|
||||
$vp9_iht8x8_64_add_neon_asm=vp9_iht8x8_64_add_neon;
|
||||
|
||||
add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
|
||||
specialize qw/vp9_iht16x16_256_add sse2 dspr2/;
|
||||
|
||||
# dct and add
|
||||
|
||||
add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_16_add/;
|
||||
}
|
||||
}
|
||||
|
||||
# High bitdepth functions
|
||||
@ -689,6 +745,8 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
#
|
||||
# dct
|
||||
#
|
||||
# Note as optimized versions of these functions are added we need to add a check to ensure
|
||||
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
|
||||
add_proto qw/void vp9_high_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_high_idct4x4_1_add/;
|
||||
|
||||
|
@ -571,7 +571,7 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
|
||||
sum += filter[k] *
|
||||
input[(pk < 0 ? 0 : (pk >= inlength ? inlength - 1 : pk))];
|
||||
}
|
||||
*optr++ = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
|
||||
*optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
|
||||
}
|
||||
} else {
|
||||
// Initial part.
|
||||
@ -585,7 +585,7 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
|
||||
sum += filter[k] *
|
||||
input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ?
|
||||
0 : int_pel - INTERP_TAPS / 2 + 1 + k)];
|
||||
*optr++ = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
|
||||
*optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
|
||||
}
|
||||
// Middle part.
|
||||
for (; x <= x2; ++x, y += delta) {
|
||||
@ -596,7 +596,7 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
|
||||
sum = 0;
|
||||
for (k = 0; k < INTERP_TAPS; ++k)
|
||||
sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k];
|
||||
*optr++ = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
|
||||
*optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
|
||||
}
|
||||
// End part.
|
||||
for (; x < outlength; ++x, y += delta) {
|
||||
@ -609,7 +609,7 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
|
||||
sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >=
|
||||
inlength ? inlength - 1 :
|
||||
int_pel - INTERP_TAPS / 2 + 1 + k)];
|
||||
*optr++ = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
|
||||
*optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -635,7 +635,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
|
||||
filter[j];
|
||||
}
|
||||
sum >>= FILTER_BITS;
|
||||
*optr++ = clip_pixel_high(sum, bd);
|
||||
*optr++ = clip_pixel_highbd(sum, bd);
|
||||
}
|
||||
} else {
|
||||
// Initial part.
|
||||
@ -645,7 +645,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
|
||||
sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + 1 + j]) * filter[j];
|
||||
}
|
||||
sum >>= FILTER_BITS;
|
||||
*optr++ = clip_pixel_high(sum, bd);
|
||||
*optr++ = clip_pixel_highbd(sum, bd);
|
||||
}
|
||||
// Middle part.
|
||||
for (; i < l2; i += 2) {
|
||||
@ -654,7 +654,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
|
||||
sum += (input[i - j] + input[i + 1 + j]) * filter[j];
|
||||
}
|
||||
sum >>= FILTER_BITS;
|
||||
*optr++ = clip_pixel_high(sum, bd);
|
||||
*optr++ = clip_pixel_highbd(sum, bd);
|
||||
}
|
||||
// End part.
|
||||
for (; i < length; i += 2) {
|
||||
@ -665,7 +665,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
|
||||
filter[j];
|
||||
}
|
||||
sum >>= FILTER_BITS;
|
||||
*optr++ = clip_pixel_high(sum, bd);
|
||||
*optr++ = clip_pixel_highbd(sum, bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -691,7 +691,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
|
||||
filter[j];
|
||||
}
|
||||
sum >>= FILTER_BITS;
|
||||
*optr++ = clip_pixel_high(sum, bd);
|
||||
*optr++ = clip_pixel_highbd(sum, bd);
|
||||
}
|
||||
} else {
|
||||
// Initial part.
|
||||
@ -701,7 +701,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
|
||||
sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + j]) * filter[j];
|
||||
}
|
||||
sum >>= FILTER_BITS;
|
||||
*optr++ = clip_pixel_high(sum, bd);
|
||||
*optr++ = clip_pixel_highbd(sum, bd);
|
||||
}
|
||||
// Middle part.
|
||||
for (; i < l2; i += 2) {
|
||||
@ -710,7 +710,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
|
||||
sum += (input[i - j] + input[i + j]) * filter[j];
|
||||
}
|
||||
sum >>= FILTER_BITS;
|
||||
*optr++ = clip_pixel_high(sum, bd);
|
||||
*optr++ = clip_pixel_highbd(sum, bd);
|
||||
}
|
||||
// End part.
|
||||
for (; i < length; i += 2) {
|
||||
@ -720,7 +720,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
|
||||
filter[j];
|
||||
}
|
||||
sum >>= FILTER_BITS;
|
||||
*optr++ = clip_pixel_high(sum, bd);
|
||||
*optr++ = clip_pixel_highbd(sum, bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user