mips msa vp9 idct 32x32 optimization
average improvement ~4x-6x Change-Id: Idaba7e49fbd7f388caee0d73773ccf6e4807ef17
This commit is contained in:
parent
d1cdda88bd
commit
1601c1385a
@ -382,4 +382,12 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
make_tuple(&vp9_fdct32x32_rd_avx2,
|
make_tuple(&vp9_fdct32x32_rd_avx2,
|
||||||
&vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
|
&vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
|
||||||
#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||||
|
|
||||||
|
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
MSA, Trans32x32Test,
|
||||||
|
::testing::Values(
|
||||||
|
make_tuple(&vp9_fdct32x32_c,
|
||||||
|
&vp9_idct32x32_1024_add_msa, 0, VPX_BITS_8)));
|
||||||
|
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -309,14 +309,18 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
MSA, PartialIDctTest,
|
MSA, PartialIDctTest,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
|
make_tuple(&vp9_fdct32x32_c,
|
||||||
|
&vp9_idct32x32_1024_add_c,
|
||||||
|
&vp9_idct32x32_34_add_msa,
|
||||||
|
TX_32X32, 34),
|
||||||
|
make_tuple(&vp9_fdct32x32_c,
|
||||||
|
&vp9_idct32x32_1024_add_c,
|
||||||
|
&vp9_idct32x32_1_add_msa,
|
||||||
|
TX_32X32, 1),
|
||||||
make_tuple(&vp9_fdct16x16_c,
|
make_tuple(&vp9_fdct16x16_c,
|
||||||
&vp9_idct16x16_256_add_c,
|
&vp9_idct16x16_256_add_c,
|
||||||
&vp9_idct16x16_10_add_msa,
|
&vp9_idct16x16_10_add_msa,
|
||||||
TX_16X16, 10),
|
TX_16X16, 10),
|
||||||
make_tuple(&vp9_fdct16x16_c,
|
|
||||||
&vp9_idct16x16_256_add_msa,
|
|
||||||
&vp9_idct16x16_10_add_c,
|
|
||||||
TX_16X16, 10),
|
|
||||||
make_tuple(&vp9_fdct16x16_c,
|
make_tuple(&vp9_fdct16x16_c,
|
||||||
&vp9_idct16x16_256_add_c,
|
&vp9_idct16x16_256_add_c,
|
||||||
&vp9_idct16x16_1_add_msa,
|
&vp9_idct16x16_1_add_msa,
|
||||||
|
1077
vp9/common/mips/msa/vp9_idct32x32_msa.c
Normal file
1077
vp9/common/mips/msa/vp9_idct32x32_msa.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -358,6 +358,14 @@
|
|||||||
src = (v16u8)__msa_insert_d((v2i64)(src), 1, (src1)); \
|
src = (v16u8)__msa_insert_d((v2i64)(src), 1, (src1)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define STORE_4VECS_SH(ptr, stride, \
|
||||||
|
in0, in1, in2, in3) { \
|
||||||
|
STORE_SH(in0, ((ptr) + 0 * stride)); \
|
||||||
|
STORE_SH(in1, ((ptr) + 1 * stride)); \
|
||||||
|
STORE_SH(in2, ((ptr) + 2 * stride)); \
|
||||||
|
STORE_SH(in3, ((ptr) + 3 * stride)); \
|
||||||
|
}
|
||||||
|
|
||||||
#define STORE_8VECS_SH(ptr, stride, \
|
#define STORE_8VECS_SH(ptr, stride, \
|
||||||
in0, in1, in2, in3, \
|
in0, in1, in2, in3, \
|
||||||
in4, in5, in6, in7) { \
|
in4, in5, in6, in7) { \
|
||||||
|
@ -443,15 +443,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||||||
specialize qw/vp9_idct16x16_10_add sse2 neon dspr2 msa/;
|
specialize qw/vp9_idct16x16_10_add sse2 neon dspr2 msa/;
|
||||||
|
|
||||||
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2/;
|
specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2 msa/;
|
||||||
|
|
||||||
add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/;
|
specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2 msa/;
|
||||||
#is this a typo?
|
#is this a typo?
|
||||||
$vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;
|
$vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;
|
||||||
|
|
||||||
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct32x32_1_add sse2 neon dspr2/;
|
specialize qw/vp9_idct32x32_1_add sse2 neon dspr2 msa/;
|
||||||
|
|
||||||
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||||
specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/;
|
specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/;
|
||||||
|
@ -138,6 +138,7 @@ VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_avg_msa.c
|
|||||||
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_copy_msa.c
|
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_copy_msa.c
|
||||||
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_msa.h
|
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_msa.h
|
||||||
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
|
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
|
||||||
|
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct32x32_msa.c
|
||||||
|
|
||||||
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
|
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
|
||||||
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.h
|
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.h
|
||||||
|
Loading…
x
Reference in New Issue
Block a user