From 08a453b9de37c796f52e439da98364923726d095 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Mon, 3 Aug 2015 14:51:10 -0700 Subject: [PATCH] Replace vp9_ prefix with vpx_ prefix in vpx_dsp function names This commit clears the function naming convention in vpx_dsp. It replaces vp9_ prefix of global functions with vpx_ prefix. It also removes the vp9_ prefix from static functions. Change-Id: I6394359a63b71a51dda01342eec6a3cc08dfeedf --- test/dct16x16_test.cc | 30 +-- test/dct32x32_test.cc | 34 +-- test/fdct4x4_test.cc | 32 +-- test/fdct8x8_test.cc | 30 +-- test/idct8x8_test.cc | 2 +- test/partial_idct_test.cc | 112 ++++----- test/register_state_check.h | 6 +- vp9/common/mips/dspr2/vp9_itrans4_dspr2.c | 8 +- vp9/common/mips/msa/vp9_idct16x16_msa.c | 16 +- vp9/common/vp9_idct.c | 76 +++--- vp9/common/vp9_idct.h | 4 +- vp9/vp9_common.mk | 2 - vpx_dsp/arm/fwd_txfm_neon.c | 2 +- vpx_dsp/arm/idct16x16_1_add_neon.asm | 8 +- vpx_dsp/arm/idct16x16_1_add_neon.c | 2 +- vpx_dsp/arm/idct16x16_add_neon.asm | 32 +-- vpx_dsp/arm/idct16x16_add_neon.c | 8 +- vpx_dsp/arm/idct16x16_neon.c | 52 ++-- vpx_dsp/arm/idct32x32_1_add_neon.asm | 8 +- vpx_dsp/arm/idct32x32_1_add_neon.c | 2 +- vpx_dsp/arm/idct32x32_add_neon.asm | 8 +- vpx_dsp/arm/idct32x32_add_neon.c | 2 +- vpx_dsp/arm/idct4x4_1_add_neon.asm | 8 +- vpx_dsp/arm/idct4x4_1_add_neon.c | 2 +- vpx_dsp/arm/idct4x4_add_neon.asm | 8 +- vpx_dsp/arm/idct4x4_add_neon.c | 2 +- vpx_dsp/arm/idct8x8_1_add_neon.asm | 8 +- vpx_dsp/arm/idct8x8_1_add_neon.c | 2 +- vpx_dsp/arm/idct8x8_add_neon.asm | 16 +- vpx_dsp/arm/idct8x8_add_neon.c | 4 +- .../arm/save_reg_neon.asm | 8 +- vpx_dsp/fwd_txfm.c | 2 +- vpx_dsp/inv_txfm.c | 89 +++---- vpx_dsp/inv_txfm.h | 13 +- vpx_dsp/mips/idct16x16_msa.c | 22 +- vpx_dsp/mips/idct32x32_msa.c | 78 +++--- vpx_dsp/mips/idct4x4_msa.c | 8 +- vpx_dsp/mips/idct8x8_msa.c | 6 +- vpx_dsp/mips/inv_txfm_dspr2.h | 6 +- vpx_dsp/mips/inv_txfm_msa.h | 8 +- vpx_dsp/mips/itrans16_dspr2.c | 6 +- vpx_dsp/mips/itrans32_cols_dspr2.c | 2 +- vpx_dsp/mips/itrans32_dspr2.c | 10 +- vpx_dsp/mips/itrans4_dspr2.c | 12 +- vpx_dsp/mips/itrans8_dspr2.c | 6 +- vpx_dsp/vpx_dsp.mk | 1 + vpx_dsp/vpx_dsp_rtcd_defs.pl | 230 +++++++++--------- vpx_dsp/x86/inv_txfm_sse2.asm | 3 + vpx_dsp/x86/inv_txfm_sse2.c | 56 ++--- vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm | 3 + 50 files changed, 550 insertions(+), 545 deletions(-) rename vp9/common/arm/neon/vp9_save_reg_neon.asm => vpx_dsp/arm/save_reg_neon.asm (88%) diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc index 173e5bb80..e9de76ada 100644 --- a/test/dct16x16_test.cc +++ b/test/dct16x16_test.cc @@ -277,7 +277,7 @@ void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride, void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride, int /*tx_type*/) { - vp9_idct16x16_256_add_c(in, dest, stride); + vpx_idct16x16_256_add_c(in, dest, stride); } void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride, @@ -292,11 +292,11 @@ void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride, #if CONFIG_VP9_HIGHBITDEPTH void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct16x16_256_add_c(in, out, stride, 10); + vpx_highbd_idct16x16_256_add_c(in, out, stride, 10); } void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct16x16_256_add_c(in, out, stride, 12); + vpx_highbd_idct16x16_256_add_c(in, out, stride, 12); } void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride, @@ -318,28 +318,28 @@ void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { } void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct16x16_10_add_c(in, out, stride, 10); + vpx_highbd_idct16x16_10_add_c(in, out, stride, 10); } void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct16x16_10_add_c(in, out, stride, 12); + vpx_highbd_idct16x16_10_add_c(in, out, stride, 12); } #if HAVE_SSE2 void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 10); + vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 10); } void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 12); + vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 12); } void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 10); + vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 10); } void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 12); + vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 12); } #endif // HAVE_SSE2 #endif // CONFIG_VP9_HIGHBITDEPTH @@ -824,12 +824,12 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values( make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10), make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12), - make_tuple(&vpx_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8))); + make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8))); #else INSTANTIATE_TEST_CASE_P( C, Trans16x16DCT, ::testing::Values( - make_tuple(&vpx_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8))); + make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8))); #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH @@ -863,7 +863,7 @@ INSTANTIATE_TEST_CASE_P( NEON, Trans16x16DCT, ::testing::Values( make_tuple(&vpx_fdct16x16_c, - &vp9_idct16x16_256_add_neon, 0, VPX_BITS_8))); + &vpx_idct16x16_256_add_neon, 0, VPX_BITS_8))); #endif #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -871,7 +871,7 @@ INSTANTIATE_TEST_CASE_P( SSE2, Trans16x16DCT, ::testing::Values( make_tuple(&vpx_fdct16x16_sse2, - &vp9_idct16x16_256_add_sse2, 0, VPX_BITS_8))); + &vpx_idct16x16_256_add_sse2, 0, VPX_BITS_8))); INSTANTIATE_TEST_CASE_P( SSE2, Trans16x16HT, ::testing::Values( @@ -898,7 +898,7 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_256_add_12_sse2, 0, VPX_BITS_12), make_tuple(&vpx_fdct16x16_sse2, - &vp9_idct16x16_256_add_c, 0, VPX_BITS_8))); + &vpx_idct16x16_256_add_c, 0, VPX_BITS_8))); INSTANTIATE_TEST_CASE_P( SSE2, Trans16x16HT, ::testing::Values( @@ -927,7 +927,7 @@ INSTANTIATE_TEST_CASE_P( MSA, Trans16x16DCT, ::testing::Values( make_tuple(&vpx_fdct16x16_msa, - &vp9_idct16x16_256_add_msa, 0, VPX_BITS_8))); + &vpx_idct16x16_256_add_msa, 0, VPX_BITS_8))); INSTANTIATE_TEST_CASE_P( MSA, Trans16x16HT, ::testing::Values( diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc index 70a2b2394..f7327b100 100644 --- a/test/dct32x32_test.cc +++ b/test/dct32x32_test.cc @@ -82,15 +82,15 @@ typedef std::tr1::tuple #if CONFIG_VP9_HIGHBITDEPTH void idct32x32_8(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct32x32_1024_add_c(in, out, stride, 8); + vpx_highbd_idct32x32_1024_add_c(in, out, stride, 8); } void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct32x32_1024_add_c(in, out, stride, 10); + vpx_highbd_idct32x32_1024_add_c(in, out, stride, 10); } void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct32x32_1024_add_c(in, out, stride, 12); + vpx_highbd_idct32x32_1024_add_c(in, out, stride, 12); } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -324,17 +324,17 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vpx_highbd_fdct32x32_rd_c, &idct32x32_12, 1, VPX_BITS_12), make_tuple(&vpx_fdct32x32_c, - &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8), + &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_c, - &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8))); + &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8))); #else INSTANTIATE_TEST_CASE_P( C, Trans32x32Test, ::testing::Values( make_tuple(&vpx_fdct32x32_c, - &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8), + &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_c, - &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8))); + &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8))); #endif // CONFIG_VP9_HIGHBITDEPTH #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -342,9 +342,9 @@ INSTANTIATE_TEST_CASE_P( NEON, Trans32x32Test, ::testing::Values( make_tuple(&vpx_fdct32x32_c, - &vp9_idct32x32_1024_add_neon, 0, VPX_BITS_8), + &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_c, - &vp9_idct32x32_1024_add_neon, 1, VPX_BITS_8))); + &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8))); #endif // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -352,9 +352,9 @@ INSTANTIATE_TEST_CASE_P( SSE2, Trans32x32Test, ::testing::Values( make_tuple(&vpx_fdct32x32_sse2, - &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8), + &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_sse2, - &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8))); + &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8))); #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -367,9 +367,9 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12), make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1, VPX_BITS_12), - make_tuple(&vpx_fdct32x32_sse2, &vp9_idct32x32_1024_add_c, 0, + make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8), - make_tuple(&vpx_fdct32x32_rd_sse2, &vp9_idct32x32_1024_add_c, 1, + make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8))); #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -378,9 +378,9 @@ INSTANTIATE_TEST_CASE_P( AVX2, Trans32x32Test, ::testing::Values( make_tuple(&vpx_fdct32x32_avx2, - &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8), + &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_avx2, - &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8))); + &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8))); #endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -388,8 +388,8 @@ INSTANTIATE_TEST_CASE_P( MSA, Trans32x32Test, ::testing::Values( make_tuple(&vpx_fdct32x32_msa, - &vp9_idct32x32_1024_add_msa, 0, VPX_BITS_8), + &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_msa, - &vp9_idct32x32_1024_add_msa, 1, VPX_BITS_8))); + &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8))); #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE } // namespace diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc index 9dcf636d9..3f6b738e5 100644 --- a/test/fdct4x4_test.cc +++ b/test/fdct4x4_test.cc @@ -55,11 +55,11 @@ void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride, #if CONFIG_VP9_HIGHBITDEPTH void idct4x4_10(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct4x4_16_add_c(in, out, stride, 10); + vpx_highbd_idct4x4_16_add_c(in, out, stride, 10); } void idct4x4_12(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct4x4_16_add_c(in, out, stride, 12); + vpx_highbd_idct4x4_16_add_c(in, out, stride, 12); } void iht4x4_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { @@ -71,20 +71,20 @@ void iht4x4_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { } void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_iwht4x4_16_add_c(in, out, stride, 10); + vpx_highbd_iwht4x4_16_add_c(in, out, stride, 10); } void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_iwht4x4_16_add_c(in, out, stride, 12); + vpx_highbd_iwht4x4_16_add_c(in, out, stride, 12); } #if HAVE_SSE2 void idct4x4_10_sse2(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 10); + vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 10); } void idct4x4_12_sse2(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 12); + vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 12); } #endif // HAVE_SSE2 #endif // CONFIG_VP9_HIGHBITDEPTH @@ -421,12 +421,12 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values( make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10), make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12), - make_tuple(&vpx_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8))); + make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8))); #else INSTANTIATE_TEST_CASE_P( C, Trans4x4DCT, ::testing::Values( - make_tuple(&vpx_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8))); + make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8))); #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH @@ -461,12 +461,12 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values( make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_10, 0, VPX_BITS_10), make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_12, 0, VPX_BITS_12), - make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8))); + make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8))); #else INSTANTIATE_TEST_CASE_P( C, Trans4x4WHT, ::testing::Values( - make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8))); + make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8))); #endif // CONFIG_VP9_HIGHBITDEPTH #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -474,7 +474,7 @@ INSTANTIATE_TEST_CASE_P( NEON, Trans4x4DCT, ::testing::Values( make_tuple(&vpx_fdct4x4_c, - &vp9_idct4x4_16_add_neon, 0, VPX_BITS_8))); + &vpx_idct4x4_16_add_neon, 0, VPX_BITS_8))); #endif // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -492,7 +492,7 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( MMX, Trans4x4WHT, ::testing::Values( - make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8))); + make_tuple(&vp9_fwht4x4_mmx, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8))); #endif #if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && \ @@ -500,7 +500,7 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( SSE2, Trans4x4WHT, ::testing::Values( - make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_sse2, 0, VPX_BITS_8))); + make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0, VPX_BITS_8))); #endif #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -508,7 +508,7 @@ INSTANTIATE_TEST_CASE_P( SSE2, Trans4x4DCT, ::testing::Values( make_tuple(&vpx_fdct4x4_sse2, - &vp9_idct4x4_16_add_sse2, 0, VPX_BITS_8))); + &vpx_idct4x4_16_add_sse2, 0, VPX_BITS_8))); INSTANTIATE_TEST_CASE_P( SSE2, Trans4x4HT, ::testing::Values( @@ -526,7 +526,7 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, VPX_BITS_10), make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12_sse2, 0, VPX_BITS_12), make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, VPX_BITS_12), - make_tuple(&vpx_fdct4x4_sse2, &vp9_idct4x4_16_add_c, 0, + make_tuple(&vpx_fdct4x4_sse2, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8))); INSTANTIATE_TEST_CASE_P( @@ -542,7 +542,7 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( MSA, Trans4x4DCT, ::testing::Values( - make_tuple(&vpx_fdct4x4_msa, &vp9_idct4x4_16_add_msa, 0, VPX_BITS_8))); + make_tuple(&vpx_fdct4x4_msa, &vpx_idct4x4_16_add_msa, 0, VPX_BITS_8))); INSTANTIATE_TEST_CASE_P( MSA, Trans4x4HT, ::testing::Values( diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc index eeafde1af..c0deaf406 100644 --- a/test/fdct8x8_test.cc +++ b/test/fdct8x8_test.cc @@ -92,11 +92,11 @@ void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { #if CONFIG_VP9_HIGHBITDEPTH void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct8x8_64_add_c(in, out, stride, 10); + vpx_highbd_idct8x8_64_add_c(in, out, stride, 10); } void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct8x8_64_add_c(in, out, stride, 12); + vpx_highbd_idct8x8_64_add_c(in, out, stride, 12); } void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { @@ -108,28 +108,28 @@ void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { } void idct8x8_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct8x8_10_add_c(in, out, stride, 10); + vpx_highbd_idct8x8_10_add_c(in, out, stride, 10); } void idct8x8_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct8x8_10_add_c(in, out, stride, 12); + vpx_highbd_idct8x8_10_add_c(in, out, stride, 12); } #if HAVE_SSE2 void idct8x8_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct8x8_10_add_sse2(in, out, stride, 10); + vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 10); } void idct8x8_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct8x8_10_add_sse2(in, out, stride, 12); + vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 12); } void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 10); + vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 10); } void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) { - vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 12); + vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 12); } #endif // HAVE_SSE2 #endif // CONFIG_VP9_HIGHBITDEPTH @@ -658,14 +658,14 @@ using std::tr1::make_tuple; INSTANTIATE_TEST_CASE_P( C, FwdTrans8x8DCT, ::testing::Values( - make_tuple(&vpx_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8), + make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8), make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10), make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12))); #else INSTANTIATE_TEST_CASE_P( C, FwdTrans8x8DCT, ::testing::Values( - make_tuple(&vpx_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8))); + make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8))); #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH @@ -698,7 +698,7 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( NEON, FwdTrans8x8DCT, ::testing::Values( - make_tuple(&vpx_fdct8x8_neon, &vp9_idct8x8_64_add_neon, 0, + make_tuple(&vpx_fdct8x8_neon, &vpx_idct8x8_64_add_neon, 0, VPX_BITS_8))); #endif // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -716,7 +716,7 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( SSE2, FwdTrans8x8DCT, ::testing::Values( - make_tuple(&vpx_fdct8x8_sse2, &vp9_idct8x8_64_add_sse2, 0, + make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_sse2, 0, VPX_BITS_8))); INSTANTIATE_TEST_CASE_P( SSE2, FwdTrans8x8HT, @@ -731,7 +731,7 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( SSE2, FwdTrans8x8DCT, ::testing::Values( - make_tuple(&vpx_fdct8x8_sse2, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8), + make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8), make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_10_sse2, 12, VPX_BITS_10), make_tuple(&vpx_highbd_fdct8x8_sse2, @@ -769,7 +769,7 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( SSSE3, FwdTrans8x8DCT, ::testing::Values( - make_tuple(&vpx_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0, + make_tuple(&vpx_fdct8x8_ssse3, &vpx_idct8x8_64_add_ssse3, 0, VPX_BITS_8))); #endif @@ -777,7 +777,7 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( MSA, FwdTrans8x8DCT, ::testing::Values( - make_tuple(&vpx_fdct8x8_msa, &vp9_idct8x8_64_add_msa, 0, VPX_BITS_8))); + make_tuple(&vpx_fdct8x8_msa, &vpx_idct8x8_64_add_msa, 0, VPX_BITS_8))); INSTANTIATE_TEST_CASE_P( MSA, FwdTrans8x8HT, ::testing::Values( diff --git a/test/idct8x8_test.cc b/test/idct8x8_test.cc index 1a1a2344e..987ba7536 100644 --- a/test/idct8x8_test.cc +++ b/test/idct8x8_test.cc @@ -124,7 +124,7 @@ TEST(VP9Idct8x8Test, AccuracyCheck) { reference_dct_2d(input, output_r); for (int j = 0; j < 64; ++j) coeff[j] = round(output_r[j]); - vp9_idct8x8_64_add_c(coeff, dst, 8); + vpx_idct8x8_64_add_c(coeff, dst, 8); for (int j = 0; j < 64; ++j) { const int diff = dst[j] - src[j]; const int error = diff * diff; diff --git a/test/partial_idct_test.cc b/test/partial_idct_test.cc index 09cd09b7a..6c824128b 100644 --- a/test/partial_idct_test.cc +++ b/test/partial_idct_test.cc @@ -203,32 +203,32 @@ INSTANTIATE_TEST_CASE_P( C, PartialIDctTest, ::testing::Values( make_tuple(&vpx_fdct32x32_c, - &vp9_idct32x32_1024_add_c, - &vp9_idct32x32_34_add_c, + &vpx_idct32x32_1024_add_c, + &vpx_idct32x32_34_add_c, TX_32X32, 34), make_tuple(&vpx_fdct32x32_c, - &vp9_idct32x32_1024_add_c, - &vp9_idct32x32_1_add_c, + &vpx_idct32x32_1024_add_c, + &vpx_idct32x32_1_add_c, TX_32X32, 1), make_tuple(&vpx_fdct16x16_c, - &vp9_idct16x16_256_add_c, - &vp9_idct16x16_10_add_c, + &vpx_idct16x16_256_add_c, + &vpx_idct16x16_10_add_c, TX_16X16, 10), make_tuple(&vpx_fdct16x16_c, - &vp9_idct16x16_256_add_c, - &vp9_idct16x16_1_add_c, + &vpx_idct16x16_256_add_c, + &vpx_idct16x16_1_add_c, TX_16X16, 1), make_tuple(&vpx_fdct8x8_c, - &vp9_idct8x8_64_add_c, - &vp9_idct8x8_12_add_c, + &vpx_idct8x8_64_add_c, + &vpx_idct8x8_12_add_c, TX_8X8, 12), make_tuple(&vpx_fdct8x8_c, - &vp9_idct8x8_64_add_c, - &vp9_idct8x8_1_add_c, + &vpx_idct8x8_64_add_c, + &vpx_idct8x8_1_add_c, TX_8X8, 1), make_tuple(&vpx_fdct4x4_c, - &vp9_idct4x4_16_add_c, - &vp9_idct4x4_1_add_c, + &vpx_idct4x4_16_add_c, + &vpx_idct4x4_1_add_c, TX_4X4, 1))); #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -236,28 +236,28 @@ INSTANTIATE_TEST_CASE_P( NEON, PartialIDctTest, ::testing::Values( make_tuple(&vpx_fdct32x32_c, - &vp9_idct32x32_1024_add_c, - &vp9_idct32x32_1_add_neon, + &vpx_idct32x32_1024_add_c, + &vpx_idct32x32_1_add_neon, TX_32X32, 1), make_tuple(&vpx_fdct16x16_c, - &vp9_idct16x16_256_add_c, - &vp9_idct16x16_10_add_neon, + &vpx_idct16x16_256_add_c, + &vpx_idct16x16_10_add_neon, TX_16X16, 10), make_tuple(&vpx_fdct16x16_c, - &vp9_idct16x16_256_add_c, - &vp9_idct16x16_1_add_neon, + &vpx_idct16x16_256_add_c, + &vpx_idct16x16_1_add_neon, TX_16X16, 1), make_tuple(&vpx_fdct8x8_c, - &vp9_idct8x8_64_add_c, - &vp9_idct8x8_12_add_neon, + &vpx_idct8x8_64_add_c, + &vpx_idct8x8_12_add_neon, TX_8X8, 12), make_tuple(&vpx_fdct8x8_c, - &vp9_idct8x8_64_add_c, - &vp9_idct8x8_1_add_neon, + &vpx_idct8x8_64_add_c, + &vpx_idct8x8_1_add_neon, TX_8X8, 1), make_tuple(&vpx_fdct4x4_c, - &vp9_idct4x4_16_add_c, - &vp9_idct4x4_1_add_neon, + &vpx_idct4x4_16_add_c, + &vpx_idct4x4_1_add_neon, TX_4X4, 1))); #endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -266,32 +266,32 @@ INSTANTIATE_TEST_CASE_P( SSE2, PartialIDctTest, ::testing::Values( make_tuple(&vpx_fdct32x32_c, - &vp9_idct32x32_1024_add_c, - &vp9_idct32x32_34_add_sse2, + &vpx_idct32x32_1024_add_c, + &vpx_idct32x32_34_add_sse2, TX_32X32, 34), make_tuple(&vpx_fdct32x32_c, - &vp9_idct32x32_1024_add_c, - &vp9_idct32x32_1_add_sse2, + &vpx_idct32x32_1024_add_c, + &vpx_idct32x32_1_add_sse2, TX_32X32, 1), make_tuple(&vpx_fdct16x16_c, - &vp9_idct16x16_256_add_c, - &vp9_idct16x16_10_add_sse2, + &vpx_idct16x16_256_add_c, + &vpx_idct16x16_10_add_sse2, TX_16X16, 10), make_tuple(&vpx_fdct16x16_c, - &vp9_idct16x16_256_add_c, - &vp9_idct16x16_1_add_sse2, + &vpx_idct16x16_256_add_c, + &vpx_idct16x16_1_add_sse2, TX_16X16, 1), make_tuple(&vpx_fdct8x8_c, - &vp9_idct8x8_64_add_c, - &vp9_idct8x8_12_add_sse2, + &vpx_idct8x8_64_add_c, + &vpx_idct8x8_12_add_sse2, TX_8X8, 12), make_tuple(&vpx_fdct8x8_c, - &vp9_idct8x8_64_add_c, - &vp9_idct8x8_1_add_sse2, + &vpx_idct8x8_64_add_c, + &vpx_idct8x8_1_add_sse2, TX_8X8, 1), make_tuple(&vpx_fdct4x4_c, - &vp9_idct4x4_16_add_c, - &vp9_idct4x4_1_add_sse2, + &vpx_idct4x4_16_add_c, + &vpx_idct4x4_1_add_sse2, TX_4X4, 1))); #endif @@ -301,8 +301,8 @@ INSTANTIATE_TEST_CASE_P( SSSE3_64, PartialIDctTest, ::testing::Values( make_tuple(&vpx_fdct8x8_c, - &vp9_idct8x8_64_add_c, - &vp9_idct8x8_12_add_ssse3, + &vpx_idct8x8_64_add_c, + &vpx_idct8x8_12_add_ssse3, TX_8X8, 12))); #endif @@ -311,32 +311,32 @@ INSTANTIATE_TEST_CASE_P( MSA, PartialIDctTest, ::testing::Values( make_tuple(&vpx_fdct32x32_c, - &vp9_idct32x32_1024_add_c, - &vp9_idct32x32_34_add_msa, + &vpx_idct32x32_1024_add_c, + &vpx_idct32x32_34_add_msa, TX_32X32, 34), make_tuple(&vpx_fdct32x32_c, - &vp9_idct32x32_1024_add_c, - &vp9_idct32x32_1_add_msa, + &vpx_idct32x32_1024_add_c, + &vpx_idct32x32_1_add_msa, TX_32X32, 1), make_tuple(&vpx_fdct16x16_c, - &vp9_idct16x16_256_add_c, - &vp9_idct16x16_10_add_msa, + &vpx_idct16x16_256_add_c, + &vpx_idct16x16_10_add_msa, TX_16X16, 10), make_tuple(&vpx_fdct16x16_c, - &vp9_idct16x16_256_add_c, - &vp9_idct16x16_1_add_msa, + &vpx_idct16x16_256_add_c, + &vpx_idct16x16_1_add_msa, TX_16X16, 1), make_tuple(&vpx_fdct8x8_c, - &vp9_idct8x8_64_add_c, - &vp9_idct8x8_12_add_msa, + &vpx_idct8x8_64_add_c, + &vpx_idct8x8_12_add_msa, TX_8X8, 10), make_tuple(&vpx_fdct8x8_c, - &vp9_idct8x8_64_add_c, - &vp9_idct8x8_1_add_msa, + &vpx_idct8x8_64_add_c, + &vpx_idct8x8_1_add_msa, TX_8X8, 1), make_tuple(&vpx_fdct4x4_c, - &vp9_idct4x4_16_add_c, - &vp9_idct4x4_1_add_msa, + &vpx_idct4x4_16_add_c, + &vpx_idct4x4_1_add_msa, TX_4X4, 1))); #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE diff --git a/test/register_state_check.h b/test/register_state_check.h index 8d4beea5f..8e72f9113 100644 --- a/test/register_state_check.h +++ b/test/register_state_check.h @@ -96,7 +96,7 @@ class RegisterStateCheck { extern "C" { // Save the d8-d15 registers into store. -void vp9_push_neon(int64_t *store); +void vpx_push_neon(int64_t *store); } namespace libvpx_test { @@ -111,7 +111,7 @@ class RegisterStateCheck { private: static bool StoreRegisters(int64_t store[8]) { - vp9_push_neon(store); + vpx_push_neon(store); return true; } @@ -119,7 +119,7 @@ class RegisterStateCheck { bool Check() const { if (!initialized_) return false; int64_t post_store[8]; - vp9_push_neon(post_store); + vpx_push_neon(post_store); for (int i = 0; i < 8; ++i) { EXPECT_EQ(pre_store_[i], post_store[i]) << "d" << i + 8 << " has been modified"; diff --git a/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c index 848f7c0aa..c10979b64 100644 --- a/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c @@ -38,11 +38,11 @@ void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, switch (tx_type) { case DCT_DCT: // DCT in both horizontal and vertical - vp9_idct4_rows_dspr2(input, outptr); - vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride); + vpx_idct4_rows_dspr2(input, outptr); + vpx_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride); break; case ADST_DCT: // ADST in vertical, DCT in horizontal - vp9_idct4_rows_dspr2(input, outptr); + vpx_idct4_rows_dspr2(input, outptr); outptr = out; @@ -69,7 +69,7 @@ void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, temp_in[i * 4 + j] = out[j * 4 + i]; } } - vp9_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride); + vpx_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride); break; case ADST_ADST: // ADST in both directions for (i = 0; i < 4; ++i) { diff --git a/vp9/common/mips/msa/vp9_idct16x16_msa.c b/vp9/common/mips/msa/vp9_idct16x16_msa.c index 59b8930b5..5adf0aaac 100644 --- a/vp9/common/mips/msa/vp9_idct16x16_msa.c +++ b/vp9/common/mips/msa/vp9_idct16x16_msa.c @@ -24,13 +24,13 @@ void vp9_iht16x16_256_add_msa(const int16_t *input, uint8_t *dst, /* transform rows */ for (i = 0; i < 2; ++i) { /* process 16 * 8 block */ - vp9_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7))); + vpx_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7))); } /* transform columns */ for (i = 0; i < 2; ++i) { /* process 8 * 16 block */ - vp9_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)), + vpx_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)), dst_stride); } break; @@ -38,12 +38,12 @@ void vp9_iht16x16_256_add_msa(const int16_t *input, uint8_t *dst, /* transform rows */ for (i = 0; i < 2; ++i) { /* process 16 * 8 block */ - vp9_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7))); + vpx_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7))); } /* transform columns */ for (i = 0; i < 2; ++i) { - vp9_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)), + vpx_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)), dst_stride); } break; @@ -51,13 +51,13 @@ void vp9_iht16x16_256_add_msa(const int16_t *input, uint8_t *dst, /* transform rows */ for (i = 0; i < 2; ++i) { /* process 16 * 8 block */ - vp9_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7))); + vpx_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7))); } /* transform columns */ for (i = 0; i < 2; ++i) { /* process 8 * 16 block */ - vp9_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)), + vpx_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)), dst_stride); } break; @@ -65,12 +65,12 @@ void vp9_iht16x16_256_add_msa(const int16_t *input, uint8_t *dst, /* transform rows */ for (i = 0; i < 2; ++i) { /* process 16 * 8 block */ - vp9_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7))); + vpx_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7))); } /* transform columns */ for (i = 0; i < 2; ++i) { - vp9_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)), + vpx_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)), dst_stride); } break; diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c index b15f7f370..146bf4298 100644 --- a/vp9/common/vp9_idct.c +++ b/vp9/common/vp9_idct.c @@ -123,18 +123,18 @@ void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, int eob) { if (eob > 1) - vp9_idct4x4_16_add(input, dest, stride); + vpx_idct4x4_16_add(input, dest, stride); else - vp9_idct4x4_1_add(input, dest, stride); + vpx_idct4x4_1_add(input, dest, stride); } void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, int eob) { if (eob > 1) - vp9_iwht4x4_16_add(input, dest, stride); + vpx_iwht4x4_16_add(input, dest, stride); else - vp9_iwht4x4_1_add(input, dest, stride); + vpx_iwht4x4_1_add(input, dest, stride); } void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, @@ -148,11 +148,11 @@ void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, // Combine that with code here. if (eob == 1) // DC only DCT coefficient - vp9_idct8x8_1_add(input, dest, stride); + vpx_idct8x8_1_add(input, dest, stride); else if (eob <= 12) - vp9_idct8x8_12_add(input, dest, stride); + vpx_idct8x8_12_add(input, dest, stride); else - vp9_idct8x8_64_add(input, dest, stride); + vpx_idct8x8_64_add(input, dest, stride); } void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, @@ -161,22 +161,22 @@ void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, * coefficients. Use eobs to separate different cases. */ if (eob == 1) /* DC only DCT coefficient. */ - vp9_idct16x16_1_add(input, dest, stride); + vpx_idct16x16_1_add(input, dest, stride); else if (eob <= 10) - vp9_idct16x16_10_add(input, dest, stride); + vpx_idct16x16_10_add(input, dest, stride); else - vp9_idct16x16_256_add(input, dest, stride); + vpx_idct16x16_256_add(input, dest, stride); } void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, int eob) { if (eob == 1) - vp9_idct32x32_1_add(input, dest, stride); + vpx_idct32x32_1_add(input, dest, stride); else if (eob <= 34) // non-zero coeff only in upper-left 8x8 - vp9_idct32x32_34_add(input, dest, stride); + vpx_idct32x32_34_add(input, dest, stride); else - vp9_idct32x32_1024_add(input, dest, stride); + vpx_idct32x32_1024_add(input, dest, stride); } // iht @@ -210,10 +210,10 @@ void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int tx_type, int bd) { const highbd_transform_2d IHT_4[] = { - { vp9_highbd_idct4_c, vp9_highbd_idct4_c }, // DCT_DCT = 0 - { highbd_iadst4_c, vp9_highbd_idct4_c }, // ADST_DCT = 1 - { vp9_highbd_idct4_c, highbd_iadst4_c }, // DCT_ADST = 2 - { highbd_iadst4_c, highbd_iadst4_c } // ADST_ADST = 3 + { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0 + { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1 + { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2 + { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3 }; uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); @@ -242,10 +242,10 @@ void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, } static const highbd_transform_2d HIGH_IHT_8[] = { - { vp9_highbd_idct8_c, vp9_highbd_idct8_c }, // DCT_DCT = 0 - { highbd_iadst8_c, vp9_highbd_idct8_c }, // ADST_DCT = 1 - { vp9_highbd_idct8_c, highbd_iadst8_c }, // DCT_ADST = 2 - { highbd_iadst8_c, highbd_iadst8_c } // ADST_ADST = 3 + { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0 + { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1 + { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2 + { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3 }; void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, @@ -277,10 +277,10 @@ void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, } static const highbd_transform_2d HIGH_IHT_16[] = { - { vp9_highbd_idct16_c, vp9_highbd_idct16_c }, // DCT_DCT = 0 - { highbd_iadst16_c, vp9_highbd_idct16_c }, // ADST_DCT = 1 - { vp9_highbd_idct16_c, highbd_iadst16_c }, // DCT_ADST = 2 - { highbd_iadst16_c, highbd_iadst16_c } // ADST_ADST = 3 + { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0 + { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1 + { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2 + { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3 }; void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, @@ -315,18 +315,18 @@ void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, int eob, int bd) { if (eob > 1) - vp9_highbd_idct4x4_16_add(input, dest, stride, bd); + vpx_highbd_idct4x4_16_add(input, dest, stride, bd); else - vp9_highbd_idct4x4_1_add(input, dest, stride, bd); + vpx_highbd_idct4x4_1_add(input, dest, stride, bd); } void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, int eob, int bd) { if (eob > 1) - vp9_highbd_iwht4x4_16_add(input, dest, stride, bd); + vpx_highbd_iwht4x4_16_add(input, dest, stride, bd); else - vp9_highbd_iwht4x4_1_add(input, dest, stride, bd); + vpx_highbd_iwht4x4_1_add(input, dest, stride, bd); } void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, @@ -340,11 +340,11 @@ void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, // Combine that with code here. // DC only DCT coefficient if (eob == 1) { - vp9_highbd_idct8x8_1_add(input, dest, stride, bd); + vpx_highbd_idct8x8_1_add(input, dest, stride, bd); } else if (eob <= 10) { - vp9_highbd_idct8x8_10_add(input, dest, stride, bd); + vpx_highbd_idct8x8_10_add(input, dest, stride, bd); } else { - vp9_highbd_idct8x8_64_add(input, dest, stride, bd); + vpx_highbd_idct8x8_64_add(input, dest, stride, bd); } } @@ -354,11 +354,11 @@ void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest, // coefficients. Use eobs to separate different cases. // DC only DCT coefficient. if (eob == 1) { - vp9_highbd_idct16x16_1_add(input, dest, stride, bd); + vpx_highbd_idct16x16_1_add(input, dest, stride, bd); } else if (eob <= 10) { - vp9_highbd_idct16x16_10_add(input, dest, stride, bd); + vpx_highbd_idct16x16_10_add(input, dest, stride, bd); } else { - vp9_highbd_idct16x16_256_add(input, dest, stride, bd); + vpx_highbd_idct16x16_256_add(input, dest, stride, bd); } } @@ -366,11 +366,11 @@ void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, int eob, int bd) { // Non-zero coeff only in upper-left 8x8 if (eob == 1) { - vp9_highbd_idct32x32_1_add(input, dest, stride, bd); + vpx_highbd_idct32x32_1_add(input, dest, stride, bd); } else if (eob <= 34) { - vp9_highbd_idct32x32_34_add(input, dest, stride, bd); + vpx_highbd_idct32x32_34_add(input, dest, stride, bd); } else { - vp9_highbd_idct32x32_1024_add(input, dest, stride, bd); + vpx_highbd_idct32x32_1024_add(input, dest, stride, bd); } } diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h index 7a7dc1d64..b5a3fbf36 100644 --- a/vp9/common/vp9_idct.h +++ b/vp9/common/vp9_idct.h @@ -44,8 +44,8 @@ void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, int eob); void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, int eob); -void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, int - eob); +void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob); void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, int eob); diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 735aaf141..a49cd0aee 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -88,8 +88,6 @@ endif VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c -VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_save_reg_neon$(ASM) - ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c diff --git a/vpx_dsp/arm/fwd_txfm_neon.c b/vpx_dsp/arm/fwd_txfm_neon.c index 79afc91b2..9f9de98d9 100644 --- a/vpx_dsp/arm/fwd_txfm_neon.c +++ b/vpx_dsp/arm/fwd_txfm_neon.c @@ -170,7 +170,7 @@ void vpx_fdct8x8_neon(const int16_t *input, int16_t *final_output, int stride) { } } // for { - // from vp9_dct_sse2.c + // from vpx_dct_sse2.c // Post-condition (division by two) // division of two 16 bits signed numbers using shifts // n / 2 = (n - (n >> 15)) >> 1 diff --git a/vpx_dsp/arm/idct16x16_1_add_neon.asm b/vpx_dsp/arm/idct16x16_1_add_neon.asm index b1fd21bb6..dc459e20d 100644 --- a/vpx_dsp/arm/idct16x16_1_add_neon.asm +++ b/vpx_dsp/arm/idct16x16_1_add_neon.asm @@ -8,21 +8,21 @@ ; - EXPORT |vp9_idct16x16_1_add_neon| + EXPORT |vpx_idct16x16_1_add_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vp9_idct16x16_1_add_neon(int16_t *input, uint8_t *dest, +;void vpx_idct16x16_1_add_neon(int16_t *input, uint8_t *dest, ; int dest_stride) ; ; r0 int16_t input ; r1 uint8_t *dest ; r2 int dest_stride) -|vp9_idct16x16_1_add_neon| PROC +|vpx_idct16x16_1_add_neon| PROC ldrsh r0, [r0] ; generate cospi_16_64 = 11585 @@ -193,6 +193,6 @@ vst1.64 {d31}, [r12], r2 bx lr - ENDP ; |vp9_idct16x16_1_add_neon| + ENDP ; |vpx_idct16x16_1_add_neon| END diff --git a/vpx_dsp/arm/idct16x16_1_add_neon.c b/vpx_dsp/arm/idct16x16_1_add_neon.c index aa035e770..f734e4802 100644 --- a/vpx_dsp/arm/idct16x16_1_add_neon.c +++ b/vpx_dsp/arm/idct16x16_1_add_neon.c @@ -13,7 +13,7 @@ #include "vpx_dsp/inv_txfm.h" #include "vpx_ports/mem.h" -void vp9_idct16x16_1_add_neon( +void vpx_idct16x16_1_add_neon( int16_t *input, uint8_t *dest, int dest_stride) { diff --git a/vpx_dsp/arm/idct16x16_add_neon.asm b/vpx_dsp/arm/idct16x16_add_neon.asm index a13c0d04b..22a0c9594 100644 --- a/vpx_dsp/arm/idct16x16_add_neon.asm +++ b/vpx_dsp/arm/idct16x16_add_neon.asm @@ -8,10 +8,10 @@ ; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp9_idct16x16_256_add_neon_pass1| - EXPORT |vp9_idct16x16_256_add_neon_pass2| - EXPORT |vp9_idct16x16_10_add_neon_pass1| - EXPORT |vp9_idct16x16_10_add_neon_pass2| + EXPORT |vpx_idct16x16_256_add_neon_pass1| + EXPORT |vpx_idct16x16_256_add_neon_pass2| + EXPORT |vpx_idct16x16_10_add_neon_pass1| + EXPORT |vpx_idct16x16_10_add_neon_pass2| ARM REQUIRE8 PRESERVE8 @@ -36,7 +36,7 @@ MEND AREA Block, CODE, READONLY ; name this block of code -;void |vp9_idct16x16_256_add_neon_pass1|(int16_t *input, +;void |vpx_idct16x16_256_add_neon_pass1|(int16_t *input, ; int16_t *output, int output_stride) ; ; r0 int16_t input @@ -46,7 +46,7 @@ ; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output ; will be stored back into q8-q15 registers. This function will touch q0-q7 ; registers and use them as buffer during calculation. -|vp9_idct16x16_256_add_neon_pass1| PROC +|vpx_idct16x16_256_add_neon_pass1| PROC ; TODO(hkuang): Find a better way to load the elements. ; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15 @@ -273,9 +273,9 @@ vst1.64 {d31}, [r1], r2 bx lr - ENDP ; |vp9_idct16x16_256_add_neon_pass1| + ENDP ; |vpx_idct16x16_256_add_neon_pass1| -;void vp9_idct16x16_256_add_neon_pass2(int16_t *src, +;void vpx_idct16x16_256_add_neon_pass2(int16_t *src, ; int16_t *output, ; int16_t *pass1Output, ; int16_t skip_adding, @@ -292,7 +292,7 @@ ; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output ; will be stored back into q8-q15 registers. This function will touch q0-q7 ; registers and use them as buffer during calculation. -|vp9_idct16x16_256_add_neon_pass2| PROC +|vpx_idct16x16_256_add_neon_pass2| PROC push {r3-r9} ; TODO(hkuang): Find a better way to load the elements. @@ -784,9 +784,9 @@ skip_adding_dest end_idct16x16_pass2 pop {r3-r9} bx lr - ENDP ; |vp9_idct16x16_256_add_neon_pass2| + ENDP ; |vpx_idct16x16_256_add_neon_pass2| -;void |vp9_idct16x16_10_add_neon_pass1|(int16_t *input, +;void |vpx_idct16x16_10_add_neon_pass1|(int16_t *input, ; int16_t *output, int output_stride) ; ; r0 int16_t input @@ -796,7 +796,7 @@ end_idct16x16_pass2 ; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output ; will be stored back into q8-q15 registers. This function will touch q0-q7 ; registers and use them as buffer during calculation. -|vp9_idct16x16_10_add_neon_pass1| PROC +|vpx_idct16x16_10_add_neon_pass1| PROC ; TODO(hkuang): Find a better way to load the elements. ; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15 @@ -905,9 +905,9 @@ end_idct16x16_pass2 vst1.64 {d31}, [r1], r2 bx lr - ENDP ; |vp9_idct16x16_10_add_neon_pass1| + ENDP ; |vpx_idct16x16_10_add_neon_pass1| -;void vp9_idct16x16_10_add_neon_pass2(int16_t *src, +;void vpx_idct16x16_10_add_neon_pass2(int16_t *src, ; int16_t *output, ; int16_t *pass1Output, ; int16_t skip_adding, @@ -924,7 +924,7 @@ end_idct16x16_pass2 ; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output ; will be stored back into q8-q15 registers. This function will touch q0-q7 ; registers and use them as buffer during calculation. -|vp9_idct16x16_10_add_neon_pass2| PROC +|vpx_idct16x16_10_add_neon_pass2| PROC push {r3-r9} ; TODO(hkuang): Find a better way to load the elements. @@ -1175,5 +1175,5 @@ end_idct16x16_pass2 end_idct10_16x16_pass2 pop {r3-r9} bx lr - ENDP ; |vp9_idct16x16_10_add_neon_pass2| + ENDP ; |vpx_idct16x16_10_add_neon_pass2| END diff --git a/vpx_dsp/arm/idct16x16_add_neon.c b/vpx_dsp/arm/idct16x16_add_neon.c index 545388ae6..651ebb21f 100644 --- a/vpx_dsp/arm/idct16x16_add_neon.c +++ b/vpx_dsp/arm/idct16x16_add_neon.c @@ -82,7 +82,7 @@ static INLINE void TRANSPOSE8X8( return; } -void vp9_idct16x16_256_add_neon_pass1( +void vpx_idct16x16_256_add_neon_pass1( int16_t *in, int16_t *out, int output_stride) { @@ -320,7 +320,7 @@ void vp9_idct16x16_256_add_neon_pass1( return; } -void vp9_idct16x16_256_add_neon_pass2( +void vpx_idct16x16_256_add_neon_pass2( int16_t *src, int16_t *out, int16_t *pass1Output, @@ -879,7 +879,7 @@ void vp9_idct16x16_256_add_neon_pass2( return; } -void vp9_idct16x16_10_add_neon_pass1( +void vpx_idct16x16_10_add_neon_pass1( int16_t *in, int16_t *out, int output_stride) { @@ -1017,7 +1017,7 @@ void vp9_idct16x16_10_add_neon_pass1( return; } -void vp9_idct16x16_10_add_neon_pass2( +void vpx_idct16x16_10_add_neon_pass2( int16_t *src, int16_t *out, int16_t *pass1Output, diff --git a/vpx_dsp/arm/idct16x16_neon.c b/vpx_dsp/arm/idct16x16_neon.c index c3b5cf29c..352979aa1 100644 --- a/vpx_dsp/arm/idct16x16_neon.c +++ b/vpx_dsp/arm/idct16x16_neon.c @@ -10,19 +10,19 @@ #include "vpx_dsp/vpx_dsp_common.h" -void vp9_idct16x16_256_add_neon_pass1(const int16_t *input, +void vpx_idct16x16_256_add_neon_pass1(const int16_t *input, int16_t *output, int output_stride); -void vp9_idct16x16_256_add_neon_pass2(const int16_t *src, +void vpx_idct16x16_256_add_neon_pass2(const int16_t *src, int16_t *output, int16_t *pass1Output, int16_t skip_adding, uint8_t *dest, int dest_stride); -void vp9_idct16x16_10_add_neon_pass1(const int16_t *input, +void vpx_idct16x16_10_add_neon_pass1(const int16_t *input, int16_t *output, int output_stride); -void vp9_idct16x16_10_add_neon_pass2(const int16_t *src, +void vpx_idct16x16_10_add_neon_pass2(const int16_t *src, int16_t *output, int16_t *pass1Output, int16_t skip_adding, @@ -31,11 +31,11 @@ void vp9_idct16x16_10_add_neon_pass2(const int16_t *src, #if HAVE_NEON_ASM /* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */ -extern void vp9_push_neon(int64_t *store); -extern void vp9_pop_neon(int64_t *store); +extern void vpx_push_neon(int64_t *store); +extern void vpx_pop_neon(int64_t *store); #endif // HAVE_NEON_ASM -void vp9_idct16x16_256_add_neon(const int16_t *input, +void vpx_idct16x16_256_add_neon(const int16_t *input, uint8_t *dest, int dest_stride) { #if HAVE_NEON_ASM int64_t store_reg[8]; @@ -45,18 +45,18 @@ void vp9_idct16x16_256_add_neon(const int16_t *input, #if HAVE_NEON_ASM // save d8-d15 register values. - vp9_push_neon(store_reg); + vpx_push_neon(store_reg); #endif /* Parallel idct on the upper 8 rows */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the // stage 6 result in pass1_output. - vp9_idct16x16_256_add_neon_pass1(input, pass1_output, 8); + vpx_idct16x16_256_add_neon_pass1(input, pass1_output, 8); // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines // with result in pass1(pass1_output) to calculate final result in stage 7 // which will be saved into row_idct_output. - vp9_idct16x16_256_add_neon_pass2(input+1, + vpx_idct16x16_256_add_neon_pass2(input+1, row_idct_output, pass1_output, 0, @@ -66,12 +66,12 @@ void vp9_idct16x16_256_add_neon(const int16_t *input, /* Parallel idct on the lower 8 rows */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the // stage 6 result in pass1_output. - vp9_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8); + vpx_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8); // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines // with result in pass1(pass1_output) to calculate final result in stage 7 // which will be saved into row_idct_output. - vp9_idct16x16_256_add_neon_pass2(input+8*16+1, + vpx_idct16x16_256_add_neon_pass2(input+8*16+1, row_idct_output+8, pass1_output, 0, @@ -81,12 +81,12 @@ void vp9_idct16x16_256_add_neon(const int16_t *input, /* Parallel idct on the left 8 columns */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the // stage 6 result in pass1_output. - vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8); + vpx_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8); // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines // with result in pass1(pass1_output) to calculate final result in stage 7. // Then add the result to the destination data. - vp9_idct16x16_256_add_neon_pass2(row_idct_output+1, + vpx_idct16x16_256_add_neon_pass2(row_idct_output+1, row_idct_output, pass1_output, 1, @@ -96,12 +96,12 @@ void vp9_idct16x16_256_add_neon(const int16_t *input, /* Parallel idct on the right 8 columns */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the // stage 6 result in pass1_output. - vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); + vpx_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines // with result in pass1(pass1_output) to calculate final result in stage 7. // Then add the result to the destination data. - vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1, + vpx_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1, row_idct_output+8, pass1_output, 1, @@ -110,13 +110,13 @@ void vp9_idct16x16_256_add_neon(const int16_t *input, #if HAVE_NEON_ASM // restore d8-d15 register values. - vp9_pop_neon(store_reg); + vpx_pop_neon(store_reg); #endif return; } -void vp9_idct16x16_10_add_neon(const int16_t *input, +void vpx_idct16x16_10_add_neon(const int16_t *input, uint8_t *dest, int dest_stride) { #if HAVE_NEON_ASM int64_t store_reg[8]; @@ -126,18 +126,18 @@ void vp9_idct16x16_10_add_neon(const int16_t *input, #if HAVE_NEON_ASM // save d8-d15 register values. - vp9_push_neon(store_reg); + vpx_push_neon(store_reg); #endif /* Parallel idct on the upper 8 rows */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the // stage 6 result in pass1_output. - vp9_idct16x16_10_add_neon_pass1(input, pass1_output, 8); + vpx_idct16x16_10_add_neon_pass1(input, pass1_output, 8); // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines // with result in pass1(pass1_output) to calculate final result in stage 7 // which will be saved into row_idct_output. - vp9_idct16x16_10_add_neon_pass2(input+1, + vpx_idct16x16_10_add_neon_pass2(input+1, row_idct_output, pass1_output, 0, @@ -149,12 +149,12 @@ void vp9_idct16x16_10_add_neon(const int16_t *input, /* Parallel idct on the left 8 columns */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the // stage 6 result in pass1_output. - vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8); + vpx_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8); // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines // with result in pass1(pass1_output) to calculate final result in stage 7. // Then add the result to the destination data. - vp9_idct16x16_256_add_neon_pass2(row_idct_output+1, + vpx_idct16x16_256_add_neon_pass2(row_idct_output+1, row_idct_output, pass1_output, 1, @@ -164,12 +164,12 @@ void vp9_idct16x16_10_add_neon(const int16_t *input, /* Parallel idct on the right 8 columns */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the // stage 6 result in pass1_output. - vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); + vpx_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines // with result in pass1(pass1_output) to calculate final result in stage 7. // Then add the result to the destination data. - vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1, + vpx_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1, row_idct_output+8, pass1_output, 1, @@ -178,7 +178,7 @@ void vp9_idct16x16_10_add_neon(const int16_t *input, #if HAVE_NEON_ASM // restore d8-d15 register values. - vp9_pop_neon(store_reg); + vpx_pop_neon(store_reg); #endif return; diff --git a/vpx_dsp/arm/idct32x32_1_add_neon.asm b/vpx_dsp/arm/idct32x32_1_add_neon.asm index d290d0753..96d276b4d 100644 --- a/vpx_dsp/arm/idct32x32_1_add_neon.asm +++ b/vpx_dsp/arm/idct32x32_1_add_neon.asm @@ -7,7 +7,7 @@ ; file in the root of the source tree. ; - EXPORT |vp9_idct32x32_1_add_neon| + EXPORT |vpx_idct32x32_1_add_neon| ARM REQUIRE8 PRESERVE8 @@ -64,14 +64,14 @@ vst1.8 {q15},[$dst], $stride MEND -;void vp9_idct32x32_1_add_neon(int16_t *input, uint8_t *dest, +;void vpx_idct32x32_1_add_neon(int16_t *input, uint8_t *dest, ; int dest_stride) ; ; r0 int16_t input ; r1 uint8_t *dest ; r2 int dest_stride -|vp9_idct32x32_1_add_neon| PROC +|vpx_idct32x32_1_add_neon| PROC push {lr} pld [r1] add r3, r1, #16 ; r3 dest + 16 for second loop @@ -140,5 +140,5 @@ diff_positive_32_32_loop bne diff_positive_32_32_loop pop {pc} - ENDP ; |vp9_idct32x32_1_add_neon| + ENDP ; |vpx_idct32x32_1_add_neon| END diff --git a/vpx_dsp/arm/idct32x32_1_add_neon.c b/vpx_dsp/arm/idct32x32_1_add_neon.c index e9c9c30c2..c25c0c4a5 100644 --- a/vpx_dsp/arm/idct32x32_1_add_neon.c +++ b/vpx_dsp/arm/idct32x32_1_add_neon.c @@ -115,7 +115,7 @@ static INLINE void ST_16x8( return; } -void vp9_idct32x32_1_add_neon( +void vpx_idct32x32_1_add_neon( int16_t *input, uint8_t *dest, int dest_stride) { diff --git a/vpx_dsp/arm/idct32x32_add_neon.asm b/vpx_dsp/arm/idct32x32_add_neon.asm index 72e933eee..7483ee77e 100644 --- a/vpx_dsp/arm/idct32x32_add_neon.asm +++ b/vpx_dsp/arm/idct32x32_add_neon.asm @@ -43,7 +43,7 @@ cospi_30_64 EQU 1606 cospi_31_64 EQU 804 - EXPORT |vp9_idct32x32_1024_add_neon| + EXPORT |vpx_idct32x32_1024_add_neon| ARM REQUIRE8 PRESERVE8 @@ -288,7 +288,7 @@ cospi_31_64 EQU 804 MEND ; -------------------------------------------------------------------------- -;void vp9_idct32x32_1024_add_neon(int16_t *input, uint8_t *dest, int dest_stride); +;void vpx_idct32x32_1024_add_neon(int16_t *input, uint8_t *dest, int dest_stride); ; ; r0 int16_t *input, ; r1 uint8_t *dest, @@ -303,7 +303,7 @@ cospi_31_64 EQU 804 ; r9 dest + 15 * dest_stride, descending (14, 13, 12, ...) ; r10 dest + 16 * dest_stride, ascending (17, 18, 19, ...) -|vp9_idct32x32_1024_add_neon| PROC +|vpx_idct32x32_1024_add_neon| PROC ; This function does one pass of idct32x32 transform. ; ; This is done by transposing the input and then doing a 1d transform on @@ -1295,5 +1295,5 @@ idct32_bands_end_2nd_pass vpop {d8-d15} pop {r4-r11} bx lr - ENDP ; |vp9_idct32x32_1024_add_neon| + ENDP ; |vpx_idct32x32_1024_add_neon| END diff --git a/vpx_dsp/arm/idct32x32_add_neon.c b/vpx_dsp/arm/idct32x32_add_neon.c index 3656a7696..025437eb9 100644 --- a/vpx_dsp/arm/idct32x32_add_neon.c +++ b/vpx_dsp/arm/idct32x32_add_neon.c @@ -454,7 +454,7 @@ static INLINE void idct32_bands_end_2nd_pass( return; } -void vp9_idct32x32_1024_add_neon( +void vpx_idct32x32_1024_add_neon( int16_t *input, uint8_t *dest, int stride) { diff --git a/vpx_dsp/arm/idct4x4_1_add_neon.asm b/vpx_dsp/arm/idct4x4_1_add_neon.asm index 0d4a721c4..adab715dd 100644 --- a/vpx_dsp/arm/idct4x4_1_add_neon.asm +++ b/vpx_dsp/arm/idct4x4_1_add_neon.asm @@ -8,21 +8,21 @@ ; - EXPORT |vp9_idct4x4_1_add_neon| + EXPORT |vpx_idct4x4_1_add_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vp9_idct4x4_1_add_neon(int16_t *input, uint8_t *dest, +;void vpx_idct4x4_1_add_neon(int16_t *input, uint8_t *dest, ; int dest_stride) ; ; r0 int16_t input ; r1 uint8_t *dest ; r2 int dest_stride) -|vp9_idct4x4_1_add_neon| PROC +|vpx_idct4x4_1_add_neon| PROC ldrsh r0, [r0] ; generate cospi_16_64 = 11585 @@ -63,6 +63,6 @@ vst1.32 {d7[1]}, [r12] bx lr - ENDP ; |vp9_idct4x4_1_add_neon| + ENDP ; |vpx_idct4x4_1_add_neon| END diff --git a/vpx_dsp/arm/idct4x4_1_add_neon.c b/vpx_dsp/arm/idct4x4_1_add_neon.c index 75e14ccde..ea618700c 100644 --- a/vpx_dsp/arm/idct4x4_1_add_neon.c +++ b/vpx_dsp/arm/idct4x4_1_add_neon.c @@ -13,7 +13,7 @@ #include "vpx_dsp/inv_txfm.h" #include "vpx_ports/mem.h" -void vp9_idct4x4_1_add_neon( +void vpx_idct4x4_1_add_neon( int16_t *input, uint8_t *dest, int dest_stride) { diff --git a/vpx_dsp/arm/idct4x4_add_neon.asm b/vpx_dsp/arm/idct4x4_add_neon.asm index 00283fc8d..877fbd634 100644 --- a/vpx_dsp/arm/idct4x4_add_neon.asm +++ b/vpx_dsp/arm/idct4x4_add_neon.asm @@ -8,7 +8,7 @@ ; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp9_idct4x4_16_add_neon| + EXPORT |vpx_idct4x4_16_add_neon| ARM REQUIRE8 PRESERVE8 @@ -16,13 +16,13 @@ AREA ||.text||, CODE, READONLY, ALIGN=2 AREA Block, CODE, READONLY ; name this block of code -;void vp9_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int dest_stride) +;void vpx_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int dest_stride) ; ; r0 int16_t input ; r1 uint8_t *dest ; r2 int dest_stride) -|vp9_idct4x4_16_add_neon| PROC +|vpx_idct4x4_16_add_neon| PROC ; The 2D transform is done with two passes which are actually pretty ; similar. We first transform the rows. This is done by transposing @@ -185,6 +185,6 @@ vst1.32 {d26[1]}, [r1], r2 vst1.32 {d26[0]}, [r1] ; no post-increment bx lr - ENDP ; |vp9_idct4x4_16_add_neon| + ENDP ; |vpx_idct4x4_16_add_neon| END diff --git a/vpx_dsp/arm/idct4x4_add_neon.c b/vpx_dsp/arm/idct4x4_add_neon.c index dc91e0f30..3c975c99b 100644 --- a/vpx_dsp/arm/idct4x4_add_neon.c +++ b/vpx_dsp/arm/idct4x4_add_neon.c @@ -10,7 +10,7 @@ #include -void vp9_idct4x4_16_add_neon( +void vpx_idct4x4_16_add_neon( int16_t *input, uint8_t *dest, int dest_stride) { diff --git a/vpx_dsp/arm/idct8x8_1_add_neon.asm b/vpx_dsp/arm/idct8x8_1_add_neon.asm index 421d202d4..dbbff364f 100644 --- a/vpx_dsp/arm/idct8x8_1_add_neon.asm +++ b/vpx_dsp/arm/idct8x8_1_add_neon.asm @@ -8,21 +8,21 @@ ; - EXPORT |vp9_idct8x8_1_add_neon| + EXPORT |vpx_idct8x8_1_add_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vp9_idct8x8_1_add_neon(int16_t *input, uint8_t *dest, +;void vpx_idct8x8_1_add_neon(int16_t *input, uint8_t *dest, ; int dest_stride) ; ; r0 int16_t input ; r1 uint8_t *dest ; r2 int dest_stride) -|vp9_idct8x8_1_add_neon| PROC +|vpx_idct8x8_1_add_neon| PROC ldrsh r0, [r0] ; generate cospi_16_64 = 11585 @@ -83,6 +83,6 @@ vst1.64 {d31}, [r12], r2 bx lr - ENDP ; |vp9_idct8x8_1_add_neon| + ENDP ; |vpx_idct8x8_1_add_neon| END diff --git a/vpx_dsp/arm/idct8x8_1_add_neon.c b/vpx_dsp/arm/idct8x8_1_add_neon.c index 10e8e931e..c1b801fad 100644 --- a/vpx_dsp/arm/idct8x8_1_add_neon.c +++ b/vpx_dsp/arm/idct8x8_1_add_neon.c @@ -13,7 +13,7 @@ #include "vpx_dsp/inv_txfm.h" #include "vpx_ports/mem.h" -void vp9_idct8x8_1_add_neon( +void vpx_idct8x8_1_add_neon( int16_t *input, uint8_t *dest, int dest_stride) { diff --git a/vpx_dsp/arm/idct8x8_add_neon.asm b/vpx_dsp/arm/idct8x8_add_neon.asm index ab5bb6920..6ab59b41b 100644 --- a/vpx_dsp/arm/idct8x8_add_neon.asm +++ b/vpx_dsp/arm/idct8x8_add_neon.asm @@ -8,8 +8,8 @@ ; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp9_idct8x8_64_add_neon| - EXPORT |vp9_idct8x8_12_add_neon| + EXPORT |vpx_idct8x8_64_add_neon| + EXPORT |vpx_idct8x8_12_add_neon| ARM REQUIRE8 PRESERVE8 @@ -198,13 +198,13 @@ MEND AREA Block, CODE, READONLY ; name this block of code -;void vp9_idct8x8_64_add_neon(int16_t *input, uint8_t *dest, int dest_stride) +;void vpx_idct8x8_64_add_neon(int16_t *input, uint8_t *dest, int dest_stride) ; ; r0 int16_t input ; r1 uint8_t *dest ; r2 int dest_stride) -|vp9_idct8x8_64_add_neon| PROC +|vpx_idct8x8_64_add_neon| PROC push {r4-r9} vpush {d8-d15} vld1.s16 {q8,q9}, [r0]! @@ -308,15 +308,15 @@ vpop {d8-d15} pop {r4-r9} bx lr - ENDP ; |vp9_idct8x8_64_add_neon| + ENDP ; |vpx_idct8x8_64_add_neon| -;void vp9_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int dest_stride) +;void vpx_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int dest_stride) ; ; r0 int16_t input ; r1 uint8_t *dest ; r2 int dest_stride) -|vp9_idct8x8_12_add_neon| PROC +|vpx_idct8x8_12_add_neon| PROC push {r4-r9} vpush {d8-d15} vld1.s16 {q8,q9}, [r0]! @@ -514,6 +514,6 @@ vpop {d8-d15} pop {r4-r9} bx lr - ENDP ; |vp9_idct8x8_12_add_neon| + ENDP ; |vpx_idct8x8_12_add_neon| END diff --git a/vpx_dsp/arm/idct8x8_add_neon.c b/vpx_dsp/arm/idct8x8_add_neon.c index ea3ce4943..4b2c2a6f8 100644 --- a/vpx_dsp/arm/idct8x8_add_neon.c +++ b/vpx_dsp/arm/idct8x8_add_neon.c @@ -238,7 +238,7 @@ static INLINE void IDCT8x8_1D( return; } -void vp9_idct8x8_64_add_neon( +void vpx_idct8x8_64_add_neon( int16_t *input, uint8_t *dest, int dest_stride) { @@ -351,7 +351,7 @@ void vp9_idct8x8_64_add_neon( return; } -void vp9_idct8x8_12_add_neon( +void vpx_idct8x8_12_add_neon( int16_t *input, uint8_t *dest, int dest_stride) { diff --git a/vp9/common/arm/neon/vp9_save_reg_neon.asm b/vpx_dsp/arm/save_reg_neon.asm similarity index 88% rename from vp9/common/arm/neon/vp9_save_reg_neon.asm rename to vpx_dsp/arm/save_reg_neon.asm index 71c3e7077..c9ca10801 100644 --- a/vp9/common/arm/neon/vp9_save_reg_neon.asm +++ b/vpx_dsp/arm/save_reg_neon.asm @@ -9,8 +9,8 @@ ; - EXPORT |vp9_push_neon| - EXPORT |vp9_pop_neon| + EXPORT |vpx_push_neon| + EXPORT |vpx_pop_neon| ARM REQUIRE8 @@ -18,14 +18,14 @@ AREA ||.text||, CODE, READONLY, ALIGN=2 -|vp9_push_neon| PROC +|vpx_push_neon| PROC vst1.i64 {d8, d9, d10, d11}, [r0]! vst1.i64 {d12, d13, d14, d15}, [r0]! bx lr ENDP -|vp9_pop_neon| PROC +|vpx_pop_neon| PROC vld1.i64 {d8, d9, d10, d11}, [r0]! vld1.i64 {d12, d13, d14, d15}, [r0]! bx lr diff --git a/vpx_dsp/fwd_txfm.c b/vpx_dsp/fwd_txfm.c index c82e1c1d5..7baaa8b0d 100644 --- a/vpx_dsp/fwd_txfm.c +++ b/vpx_dsp/fwd_txfm.c @@ -754,7 +754,7 @@ void vpx_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) { for (j = 0; j < 32; ++j) // TODO(cd): see quality impact of only doing // output[j * 32 + i] = (temp_out[j] + 1) >> 2; - // PS: also change code in vp9/encoder/x86/vp9_dct_sse2.c + // PS: also change code in vpx_dsp/x86/vpx_dct_sse2.c output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; } diff --git a/vpx_dsp/inv_txfm.c b/vpx_dsp/inv_txfm.c index 6babf54d0..3afa8cdc4 100644 --- a/vpx_dsp/inv_txfm.c +++ b/vpx_dsp/inv_txfm.c @@ -13,7 +13,7 @@ #include "vpx_dsp/inv_txfm.h" -void vp9_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { +void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, 0.5 shifts per pixel. */ int i; @@ -65,7 +65,7 @@ void vp9_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { } } -void vp9_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) { +void vpx_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) { int i; tran_high_t a1, e1; tran_low_t tmp[4]; @@ -111,7 +111,7 @@ void idct4_c(const tran_low_t *input, tran_low_t *output) { output[3] = WRAPLOW(step[0] - step[3], 8); } -void vp9_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { +void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { tran_low_t out[4 * 4]; tran_low_t *outptr = out; int i, j; @@ -136,7 +136,7 @@ void vp9_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { } } -void vp9_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, +void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride) { int i; tran_high_t a1; @@ -198,7 +198,7 @@ void idct8_c(const tran_low_t *input, tran_low_t *output) { output[7] = WRAPLOW(step1[0] - step1[7], 8); } -void vp9_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { +void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { tran_low_t out[8 * 8]; tran_low_t *outptr = out; int i, j; @@ -223,7 +223,7 @@ void vp9_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { } } -void vp9_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { +void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_high_t a1; tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); @@ -350,7 +350,7 @@ void iadst8_c(const tran_low_t *input, tran_low_t *output) { output[7] = WRAPLOW(-x1, 8); } -void vp9_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) { +void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) { tran_low_t out[8 * 8] = { 0 }; tran_low_t *outptr = out; int i, j; @@ -541,7 +541,7 @@ void idct16_c(const tran_low_t *input, tran_low_t *output) { output[15] = WRAPLOW(step2[0] - step2[15], 8); } -void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, +void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride) { tran_low_t out[16 * 16]; tran_low_t *outptr = out; @@ -739,7 +739,7 @@ void iadst16_c(const tran_low_t *input, tran_low_t *output) { output[15] = WRAPLOW(-x1, 8); } -void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, +void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int stride) { tran_low_t out[16 * 16] = { 0 }; tran_low_t *outptr = out; @@ -766,7 +766,7 @@ void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, } } -void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { +void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_high_t a1; tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); @@ -1146,7 +1146,7 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) { output[31] = WRAPLOW(step1[0] - step1[31], 8); } -void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, +void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride) { tran_low_t out[32 * 32]; tran_low_t *outptr = out; @@ -1185,7 +1185,7 @@ void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, } } -void vp9_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, +void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int stride) { tran_low_t out[32 * 32] = {0}; tran_low_t *outptr = out; @@ -1212,7 +1212,7 @@ void vp9_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, } } -void vp9_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { +void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_high_t a1; @@ -1228,7 +1228,7 @@ void vp9_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { } #if CONFIG_VP9_HIGHBITDEPTH -void vp9_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, +void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, 0.5 shifts per pixel. */ @@ -1282,7 +1282,7 @@ void vp9_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, } } -void vp9_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, +void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, int dest_stride, int bd) { int i; tran_high_t a1, e1; @@ -1315,7 +1315,7 @@ void vp9_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, } } -void vp9_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) { +void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_low_t step[4]; tran_high_t temp1, temp2; (void) bd; @@ -1336,7 +1336,7 @@ void vp9_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) { output[3] = WRAPLOW(step[0] - step[3], bd); } -void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, +void vpx_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { tran_low_t out[4 * 4]; tran_low_t *outptr = out; @@ -1346,7 +1346,7 @@ void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, // Rows for (i = 0; i < 4; ++i) { - vp9_highbd_idct4_c(input, outptr, bd); + vpx_highbd_idct4_c(input, outptr, bd); input += 4; outptr += 4; } @@ -1355,7 +1355,7 @@ void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; - vp9_highbd_idct4_c(temp_in, temp_out, bd); + vpx_highbd_idct4_c(temp_in, temp_out, bd); for (j = 0; j < 4; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); @@ -1363,7 +1363,7 @@ void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, } } -void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, +void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, int dest_stride, int bd) { int i; tran_high_t a1; @@ -1383,7 +1383,7 @@ void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, } } -void vp9_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) { +void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_low_t step1[8], step2[8]; tran_high_t temp1, temp2; // stage 1 @@ -1401,7 +1401,7 @@ void vp9_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) { step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); // stage 2 & stage 3 - even half - vp9_highbd_idct4_c(step1, step1, bd); + vpx_highbd_idct4_c(step1, step1, bd); // stage 2 - odd half step2[4] = WRAPLOW(step1[4] + step1[5], bd); @@ -1428,7 +1428,7 @@ void vp9_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) { output[7] = WRAPLOW(step1[0] - step1[7], bd); } -void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, +void vpx_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { tran_low_t out[8 * 8]; tran_low_t *outptr = out; @@ -1438,7 +1438,7 @@ void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, // First transform rows. for (i = 0; i < 8; ++i) { - vp9_highbd_idct8_c(input, outptr, bd); + vpx_highbd_idct8_c(input, outptr, bd); input += 8; outptr += 8; } @@ -1447,7 +1447,7 @@ void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; - vp9_highbd_idct8_c(temp_in, temp_out, bd); + vpx_highbd_idct8_c(temp_in, temp_out, bd); for (j = 0; j < 8; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); @@ -1455,7 +1455,7 @@ void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, } } -void vp9_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, +void vpx_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { int i, j; tran_high_t a1; @@ -1471,7 +1471,7 @@ void vp9_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, } } -void highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) { +void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; tran_low_t x0 = input[0]; @@ -1509,7 +1509,7 @@ void highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) { output[3] = WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3, bd), bd); } -void highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { +void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; tran_low_t x0 = input[7]; @@ -1586,7 +1586,7 @@ void highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { output[7] = WRAPLOW(-x1, bd); } -void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, +void vpx_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { tran_low_t out[8 * 8] = { 0 }; tran_low_t *outptr = out; @@ -1597,7 +1597,7 @@ void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, // First transform rows. // Only first 4 row has non-zero coefs. for (i = 0; i < 4; ++i) { - vp9_highbd_idct8_c(input, outptr, bd); + vpx_highbd_idct8_c(input, outptr, bd); input += 8; outptr += 8; } @@ -1605,7 +1605,7 @@ void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; - vp9_highbd_idct8_c(temp_in, temp_out, bd); + vpx_highbd_idct8_c(temp_in, temp_out, bd); for (j = 0; j < 8; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); @@ -1613,7 +1613,7 @@ void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, } } -void vp9_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { +void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_low_t step1[16], step2[16]; tran_high_t temp1, temp2; (void) bd; @@ -1779,7 +1779,7 @@ void vp9_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { output[15] = WRAPLOW(step2[0] - step2[15], bd); } -void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, +void vpx_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { tran_low_t out[16 * 16]; tran_low_t *outptr = out; @@ -1789,7 +1789,7 @@ void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, // First transform rows. for (i = 0; i < 16; ++i) { - vp9_highbd_idct16_c(input, outptr, bd); + vpx_highbd_idct16_c(input, outptr, bd); input += 16; outptr += 16; } @@ -1798,7 +1798,7 @@ void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; - vp9_highbd_idct16_c(temp_in, temp_out, bd); + vpx_highbd_idct16_c(temp_in, temp_out, bd); for (j = 0; j < 16; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); @@ -1806,7 +1806,7 @@ void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, } } -void highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { +void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; tran_high_t s9, s10, s11, s12, s13, s14, s15; @@ -1976,7 +1976,7 @@ void highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { output[15] = WRAPLOW(-x1, bd); } -void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, +void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { tran_low_t out[16 * 16] = { 0 }; tran_low_t *outptr = out; @@ -1987,7 +1987,7 @@ void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, // First transform rows. Since all non-zero dct coefficients are in // upper-left 4x4 area, we only need to calculate first 4 rows here. for (i = 0; i < 4; ++i) { - vp9_highbd_idct16_c(input, outptr, bd); + vpx_highbd_idct16_c(input, outptr, bd); input += 16; outptr += 16; } @@ -1996,7 +1996,7 @@ void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j*16 + i]; - vp9_highbd_idct16_c(temp_in, temp_out, bd); + vpx_highbd_idct16_c(temp_in, temp_out, bd); for (j = 0; j < 16; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); @@ -2004,7 +2004,7 @@ void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, } } -void vp9_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, +void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { int i, j; tran_high_t a1; @@ -2021,7 +2021,8 @@ void vp9_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, } } -void highbd_idct32_c(const tran_low_t *input, tran_low_t *output, int bd) { +static void highbd_idct32_c(const tran_low_t *input, + tran_low_t *output, int bd) { tran_low_t step1[32], step2[32]; tran_high_t temp1, temp2; (void) bd; @@ -2389,7 +2390,7 @@ void highbd_idct32_c(const tran_low_t *input, tran_low_t *output, int bd) { output[31] = WRAPLOW(step1[0] - step1[31], bd); } -void vp9_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, +void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { tran_low_t out[32 * 32]; tran_low_t *outptr = out; @@ -2429,7 +2430,7 @@ void vp9_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, } } -void vp9_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8, +void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { tran_low_t out[32 * 32] = {0}; tran_low_t *outptr = out; @@ -2456,7 +2457,7 @@ void vp9_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8, } } -void vp9_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, +void vpx_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { int i, j; int a1; diff --git a/vpx_dsp/inv_txfm.h b/vpx_dsp/inv_txfm.h index b11039ab8..23588139e 100644 --- a/vpx_dsp/inv_txfm.h +++ b/vpx_dsp/inv_txfm.h @@ -97,14 +97,13 @@ void iadst8_c(const tran_low_t *input, tran_low_t *output); void iadst16_c(const tran_low_t *input, tran_low_t *output); #if CONFIG_VP9_HIGHBITDEPTH -void vp9_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd); -void vp9_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd); -void vp9_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd); -void highbd_idct32_c(const tran_low_t *input, tran_low_t *output, int bd); +void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd); +void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd); +void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd); -void highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd); -void highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd); -void highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd); +void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd); +void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd); +void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd); static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans, int bd) { diff --git a/vpx_dsp/mips/idct16x16_msa.c b/vpx_dsp/mips/idct16x16_msa.c index 3da0edcca..5faac715e 100644 --- a/vpx_dsp/mips/idct16x16_msa.c +++ b/vpx_dsp/mips/idct16x16_msa.c @@ -10,7 +10,7 @@ #include "vpx_dsp/mips/inv_txfm_msa.h" -void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output) { +void vpx_idct16_1d_rows_msa(const int16_t *input, int16_t *output) { v8i16 loc0, loc1, loc2, loc3; v8i16 reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14; v8i16 reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15; @@ -103,7 +103,7 @@ void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output) { ST_SH8(reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15, (output + 8), 16); } -void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, +void vpx_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, int32_t dst_stride) { v8i16 loc0, loc1, loc2, loc3; v8i16 reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14; @@ -201,7 +201,7 @@ void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg7, reg9, reg1, reg15); } -void vp9_idct16x16_256_add_msa(const int16_t *input, uint8_t *dst, +void vpx_idct16x16_256_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { int32_t i; DECLARE_ALIGNED(32, int16_t, out_arr[16 * 16]); @@ -210,25 +210,25 @@ void vp9_idct16x16_256_add_msa(const int16_t *input, uint8_t *dst, /* transform rows */ for (i = 0; i < 2; ++i) { /* process 16 * 8 block */ - vp9_idct16_1d_rows_msa((input + (i << 7)), (out + (i << 7))); + vpx_idct16_1d_rows_msa((input + (i << 7)), (out + (i << 7))); } /* transform columns */ for (i = 0; i < 2; ++i) { /* process 8 * 16 block */ - vp9_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)), + vpx_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)), dst_stride); } } -void vp9_idct16x16_10_add_msa(const int16_t *input, uint8_t *dst, +void vpx_idct16x16_10_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { uint8_t i; DECLARE_ALIGNED(32, int16_t, out_arr[16 * 16]); int16_t *out = out_arr; /* process 16 * 8 block */ - vp9_idct16_1d_rows_msa(input, out); + vpx_idct16_1d_rows_msa(input, out); /* short case just considers top 4 rows as valid output */ out += 4 * 16; @@ -255,12 +255,12 @@ void vp9_idct16x16_10_add_msa(const int16_t *input, uint8_t *dst, /* transform columns */ for (i = 0; i < 2; ++i) { /* process 8 * 16 block */ - vp9_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)), + vpx_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)), dst_stride); } } -void vp9_idct16x16_1_add_msa(const int16_t *input, uint8_t *dst, +void vpx_idct16x16_1_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { uint8_t i; int16_t out; @@ -290,7 +290,7 @@ void vp9_idct16x16_1_add_msa(const int16_t *input, uint8_t *dst, } } -void vp9_iadst16_1d_rows_msa(const int16_t *input, int16_t *output) { +void vpx_iadst16_1d_rows_msa(const int16_t *input, int16_t *output) { v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15; v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15; @@ -321,7 +321,7 @@ void vp9_iadst16_1d_rows_msa(const int16_t *input, int16_t *output) { ST_SH8(l8, l9, l10, l11, l12, l13, l14, l15, (output + 8), 16); } -void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, +void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, int32_t dst_stride) { v8i16 v0, v2, v4, v6, k0, k1, k2, k3; v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15; diff --git a/vpx_dsp/mips/idct32x32_msa.c b/vpx_dsp/mips/idct32x32_msa.c index 03d280729..d5b3966e0 100644 --- a/vpx_dsp/mips/idct32x32_msa.c +++ b/vpx_dsp/mips/idct32x32_msa.c @@ -10,8 +10,8 @@ #include "vpx_dsp/mips/inv_txfm_msa.h" -static void vp9_idct32x8_row_transpose_store(const int16_t *input, - int16_t *tmp_buf) { +static void idct32x8_row_transpose_store(const int16_t *input, + int16_t *tmp_buf) { v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7; /* 1st & 2nd 8x8 */ @@ -38,8 +38,8 @@ static void vp9_idct32x8_row_transpose_store(const int16_t *input, ST_SH4(m6, n6, m7, n7, (tmp_buf + 28 * 8), 8); } -static void vp9_idct32x8_row_even_process_store(int16_t *tmp_buf, - int16_t *tmp_eve_buf) { +static void idct32x8_row_even_process_store(int16_t *tmp_buf, + int16_t *tmp_eve_buf) { v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3; v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; v8i16 stp0, stp1, stp2, stp3, stp4, stp5, stp6, stp7; @@ -122,8 +122,8 @@ static void vp9_idct32x8_row_even_process_store(int16_t *tmp_buf, ST_SH(loc3, (tmp_eve_buf + 7 * 8)); } -static void vp9_idct32x8_row_odd_process_store(int16_t *tmp_buf, - int16_t *tmp_odd_buf) { +static void idct32x8_row_odd_process_store(int16_t *tmp_buf, + int16_t *tmp_odd_buf) { v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3; v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; @@ -240,10 +240,10 @@ static void vp9_idct32x8_row_odd_process_store(int16_t *tmp_buf, ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 12 * 8), 8); } -static void vp9_idct_butterfly_transpose_store(int16_t *tmp_buf, - int16_t *tmp_eve_buf, - int16_t *tmp_odd_buf, - int16_t *dst) { +static void idct_butterfly_transpose_store(int16_t *tmp_buf, + int16_t *tmp_eve_buf, + int16_t *tmp_odd_buf, + int16_t *dst) { v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3; v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7; @@ -341,20 +341,20 @@ static void vp9_idct_butterfly_transpose_store(int16_t *tmp_buf, ST_SH4(m6, n6, m7, n7, (dst + 24 + 4 * 32), 32); } -static void vp9_idct32x8_1d_rows_msa(const int16_t *input, int16_t *output) { +static void idct32x8_1d_rows_msa(const int16_t *input, int16_t *output) { DECLARE_ALIGNED(32, int16_t, tmp_buf[8 * 32]); DECLARE_ALIGNED(32, int16_t, tmp_odd_buf[16 * 8]); DECLARE_ALIGNED(32, int16_t, tmp_eve_buf[16 * 8]); - vp9_idct32x8_row_transpose_store(input, &tmp_buf[0]); - vp9_idct32x8_row_even_process_store(&tmp_buf[0], &tmp_eve_buf[0]); - vp9_idct32x8_row_odd_process_store(&tmp_buf[0], &tmp_odd_buf[0]); - vp9_idct_butterfly_transpose_store(&tmp_buf[0], &tmp_eve_buf[0], - &tmp_odd_buf[0], output); + idct32x8_row_transpose_store(input, &tmp_buf[0]); + idct32x8_row_even_process_store(&tmp_buf[0], &tmp_eve_buf[0]); + idct32x8_row_odd_process_store(&tmp_buf[0], &tmp_odd_buf[0]); + idct_butterfly_transpose_store(&tmp_buf[0], &tmp_eve_buf[0], + &tmp_odd_buf[0], output); } -static void vp9_idct8x32_column_even_process_store(int16_t *tmp_buf, - int16_t *tmp_eve_buf) { +static void idct8x32_column_even_process_store(int16_t *tmp_buf, + int16_t *tmp_eve_buf) { v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3; v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; v8i16 stp0, stp1, stp2, stp3, stp4, stp5, stp6, stp7; @@ -433,8 +433,8 @@ static void vp9_idct8x32_column_even_process_store(int16_t *tmp_buf, ST_SH2(loc2, loc0, (tmp_eve_buf + 8 * 8), 8); } -static void vp9_idct8x32_column_odd_process_store(int16_t *tmp_buf, - int16_t *tmp_odd_buf) { +static void idct8x32_column_odd_process_store(int16_t *tmp_buf, + int16_t *tmp_odd_buf) { v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3; v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; @@ -540,10 +540,10 @@ static void vp9_idct8x32_column_odd_process_store(int16_t *tmp_buf, ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 12 * 8), 8); } -static void vp9_idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf, - int16_t *tmp_odd_buf, - uint8_t *dst, - int32_t dst_stride) { +static void idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf, + int16_t *tmp_odd_buf, + uint8_t *dst, + int32_t dst_stride) { v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3; v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7; @@ -627,18 +627,18 @@ static void vp9_idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf, n1, n3, n5, n7); } -static void vp9_idct8x32_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, - int32_t dst_stride) { +static void idct8x32_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, + int32_t dst_stride) { DECLARE_ALIGNED(32, int16_t, tmp_odd_buf[16 * 8]); DECLARE_ALIGNED(32, int16_t, tmp_eve_buf[16 * 8]); - vp9_idct8x32_column_even_process_store(input, &tmp_eve_buf[0]); - vp9_idct8x32_column_odd_process_store(input, &tmp_odd_buf[0]); - vp9_idct8x32_column_butterfly_addblk(&tmp_eve_buf[0], &tmp_odd_buf[0], - dst, dst_stride); + idct8x32_column_even_process_store(input, &tmp_eve_buf[0]); + idct8x32_column_odd_process_store(input, &tmp_odd_buf[0]); + idct8x32_column_butterfly_addblk(&tmp_eve_buf[0], &tmp_odd_buf[0], + dst, dst_stride); } -void vp9_idct32x32_1024_add_msa(const int16_t *input, uint8_t *dst, +void vpx_idct32x32_1024_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { int32_t i; DECLARE_ALIGNED(32, int16_t, out_arr[32 * 32]); @@ -647,18 +647,18 @@ void vp9_idct32x32_1024_add_msa(const int16_t *input, uint8_t *dst, /* transform rows */ for (i = 0; i < 4; ++i) { /* process 32 * 8 block */ - vp9_idct32x8_1d_rows_msa((input + (i << 8)), (out_ptr + (i << 8))); + idct32x8_1d_rows_msa((input + (i << 8)), (out_ptr + (i << 8))); } /* transform columns */ for (i = 0; i < 4; ++i) { /* process 8 * 32 block */ - vp9_idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)), - dst_stride); + idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)), + dst_stride); } } -void vp9_idct32x32_34_add_msa(const int16_t *input, uint8_t *dst, +void vpx_idct32x32_34_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { int32_t i; DECLARE_ALIGNED(32, int16_t, out_arr[32 * 32]); @@ -693,17 +693,17 @@ void vp9_idct32x32_34_add_msa(const int16_t *input, uint8_t *dst, out_ptr = out_arr; /* rows: only upper-left 8x8 has non-zero coeff */ - vp9_idct32x8_1d_rows_msa(input, out_ptr); + idct32x8_1d_rows_msa(input, out_ptr); /* transform columns */ for (i = 0; i < 4; ++i) { /* process 8 * 32 block */ - vp9_idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)), - dst_stride); + idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)), + dst_stride); } } -void vp9_idct32x32_1_add_msa(const int16_t *input, uint8_t *dst, +void vpx_idct32x32_1_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { int32_t i; int16_t out; diff --git a/vpx_dsp/mips/idct4x4_msa.c b/vpx_dsp/mips/idct4x4_msa.c index 91b15d26d..f289d8eda 100644 --- a/vpx_dsp/mips/idct4x4_msa.c +++ b/vpx_dsp/mips/idct4x4_msa.c @@ -10,7 +10,7 @@ #include "vpx_dsp/mips/inv_txfm_msa.h" -void vp9_iwht4x4_16_add_msa(const int16_t *input, uint8_t *dst, +void vpx_iwht4x4_16_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { v8i16 in0, in1, in2, in3; v4i32 in0_r, in1_r, in2_r, in3_r, in4_r; @@ -47,7 +47,7 @@ void vp9_iwht4x4_16_add_msa(const int16_t *input, uint8_t *dst, ADDBLK_ST4x4_UB(in0, in3, in1, in2, dst, dst_stride); } -void vp9_iwht4x4_1_add_msa(const int16_t *input, uint8_t *dst, +void vpx_iwht4x4_1_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { int16_t a1, e1; v8i16 in1, in0 = { 0 }; @@ -67,7 +67,7 @@ void vp9_iwht4x4_1_add_msa(const int16_t *input, uint8_t *dst, ADDBLK_ST4x4_UB(in0, in1, in1, in1, dst, dst_stride); } -void vp9_idct4x4_16_add_msa(const int16_t *input, uint8_t *dst, +void vpx_idct4x4_16_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { v8i16 in0, in1, in2, in3; @@ -84,7 +84,7 @@ void vp9_idct4x4_16_add_msa(const int16_t *input, uint8_t *dst, ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride); } -void vp9_idct4x4_1_add_msa(const int16_t *input, uint8_t *dst, +void vpx_idct4x4_1_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { int16_t out; v8i16 vec; diff --git a/vpx_dsp/mips/idct8x8_msa.c b/vpx_dsp/mips/idct8x8_msa.c index 3488db2b2..fd667e456 100644 --- a/vpx_dsp/mips/idct8x8_msa.c +++ b/vpx_dsp/mips/idct8x8_msa.c @@ -10,7 +10,7 @@ #include "vpx_dsp/mips/inv_txfm_msa.h" -void vp9_idct8x8_64_add_msa(const int16_t *input, uint8_t *dst, +void vpx_idct8x8_64_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { v8i16 in0, in1, in2, in3, in4, in5, in6, in7; @@ -38,7 +38,7 @@ void vp9_idct8x8_64_add_msa(const int16_t *input, uint8_t *dst, VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7); } -void vp9_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst, +void vpx_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { v8i16 in0, in1, in2, in3, in4, in5, in6, in7; v8i16 s0, s1, s2, s3, s4, s5, s6, s7, k0, k1, k2, k3, m0, m1, m2, m3; @@ -99,7 +99,7 @@ void vp9_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst, VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7); } -void vp9_idct8x8_1_add_msa(const int16_t *input, uint8_t *dst, +void vpx_idct8x8_1_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { int16_t out; int32_t val; diff --git a/vpx_dsp/mips/inv_txfm_dspr2.h b/vpx_dsp/mips/inv_txfm_dspr2.h index 537830b50..abd850911 100644 --- a/vpx_dsp/mips/inv_txfm_dspr2.h +++ b/vpx_dsp/mips/inv_txfm_dspr2.h @@ -49,10 +49,10 @@ extern "C" { ); \ out; }) -void vp9_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, +void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int dest_stride); -void vp9_idct4_rows_dspr2(const int16_t *input, int16_t *output); -void vp9_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, +void vpx_idct4_rows_dspr2(const int16_t *input, int16_t *output); +void vpx_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, int dest_stride); void iadst4_dspr2(const int16_t *input, int16_t *output); void idct8_rows_dspr2(const int16_t *input, int16_t *output, uint32_t no_rows); diff --git a/vpx_dsp/mips/inv_txfm_msa.h b/vpx_dsp/mips/inv_txfm_msa.h index 8266541c5..1458561a6 100644 --- a/vpx_dsp/mips/inv_txfm_msa.h +++ b/vpx_dsp/mips/inv_txfm_msa.h @@ -401,10 +401,10 @@ MADD_SHORT(out14, out15, k1_m, k2_m, out14, out15); \ } -void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, +void vpx_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, int32_t dst_stride); -void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output); -void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, +void vpx_idct16_1d_rows_msa(const int16_t *input, int16_t *output); +void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, int32_t dst_stride); -void vp9_iadst16_1d_rows_msa(const int16_t *input, int16_t *output); +void vpx_iadst16_1d_rows_msa(const int16_t *input, int16_t *output); #endif // VPX_DSP_MIPS_INV_TXFM_MSA_H_ diff --git a/vpx_dsp/mips/itrans16_dspr2.c b/vpx_dsp/mips/itrans16_dspr2.c index 0e6e759b7..6d41e6190 100644 --- a/vpx_dsp/mips/itrans16_dspr2.c +++ b/vpx_dsp/mips/itrans16_dspr2.c @@ -887,7 +887,7 @@ void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, } } -void vp9_idct16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, +void vpx_idct16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, int dest_stride) { DECLARE_ALIGNED(32, int16_t, out[16 * 16]); uint32_t pos = 45; @@ -906,7 +906,7 @@ void vp9_idct16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, idct16_cols_add_blk_dspr2(out, dest, dest_stride); } -void vp9_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest, +void vpx_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest, int dest_stride) { DECLARE_ALIGNED(32, int16_t, out[16 * 16]); int16_t *outptr = out; @@ -955,7 +955,7 @@ void vp9_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest, idct16_cols_add_blk_dspr2(out, dest, dest_stride); } -void vp9_idct16x16_1_add_dspr2(const int16_t *input, uint8_t *dest, +void vpx_idct16x16_1_add_dspr2(const int16_t *input, uint8_t *dest, int dest_stride) { uint32_t pos = 45; int32_t out; diff --git a/vpx_dsp/mips/itrans32_cols_dspr2.c b/vpx_dsp/mips/itrans32_cols_dspr2.c index c9cda52e3..553acb0f5 100644 --- a/vpx_dsp/mips/itrans32_cols_dspr2.c +++ b/vpx_dsp/mips/itrans32_cols_dspr2.c @@ -13,7 +13,7 @@ #include "vpx_dsp/txfm_common.h" #if HAVE_DSPR2 -void vp9_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, +void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int dest_stride) { int16_t step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6; int16_t step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13; diff --git a/vpx_dsp/mips/itrans32_dspr2.c b/vpx_dsp/mips/itrans32_dspr2.c index 25966346c..523da1df1 100644 --- a/vpx_dsp/mips/itrans32_dspr2.c +++ b/vpx_dsp/mips/itrans32_dspr2.c @@ -865,7 +865,7 @@ static void idct32_rows_dspr2(const int16_t *input, int16_t *output, } } -void vp9_idct32x32_1024_add_dspr2(const int16_t *input, uint8_t *dest, +void vpx_idct32x32_1024_add_dspr2(const int16_t *input, uint8_t *dest, int dest_stride) { DECLARE_ALIGNED(32, int16_t, out[32 * 32]); int16_t *outptr = out; @@ -882,10 +882,10 @@ void vp9_idct32x32_1024_add_dspr2(const int16_t *input, uint8_t *dest, idct32_rows_dspr2(input, outptr, 32); // Columns - vp9_idct32_cols_add_blk_dspr2(out, dest, dest_stride); + vpx_idct32_cols_add_blk_dspr2(out, dest, dest_stride); } -void vp9_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest, +void vpx_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest, int stride) { DECLARE_ALIGNED(32, int16_t, out[32 * 32]); int16_t *outptr = out; @@ -944,10 +944,10 @@ void vp9_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest, } // Columns - vp9_idct32_cols_add_blk_dspr2(out, dest, stride); + vpx_idct32_cols_add_blk_dspr2(out, dest, stride); } -void vp9_idct32x32_1_add_dspr2(const int16_t *input, uint8_t *dest, +void vpx_idct32x32_1_add_dspr2(const int16_t *input, uint8_t *dest, int stride) { int r, out; int32_t a1, absa1; diff --git a/vpx_dsp/mips/itrans4_dspr2.c b/vpx_dsp/mips/itrans4_dspr2.c index b48e73bba..ecb8bd3de 100644 --- a/vpx_dsp/mips/itrans4_dspr2.c +++ b/vpx_dsp/mips/itrans4_dspr2.c @@ -14,7 +14,7 @@ #include "vpx_dsp/txfm_common.h" #if HAVE_DSPR2 -void vp9_idct4_rows_dspr2(const int16_t *input, int16_t *output) { +void vpx_idct4_rows_dspr2(const int16_t *input, int16_t *output) { int16_t step_0, step_1, step_2, step_3; int Temp0, Temp1, Temp2, Temp3; const int const_2_power_13 = 8192; @@ -99,7 +99,7 @@ void vp9_idct4_rows_dspr2(const int16_t *input, int16_t *output) { } } -void vp9_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, +void vpx_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, int dest_stride) { int16_t step_0, step_1, step_2, step_3; int Temp0, Temp1, Temp2, Temp3; @@ -221,7 +221,7 @@ void vp9_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, } } -void vp9_idct4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, +void vpx_idct4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, int dest_stride) { DECLARE_ALIGNED(32, int16_t, out[4 * 4]); int16_t *outptr = out; @@ -235,13 +235,13 @@ void vp9_idct4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, ); // Rows - vp9_idct4_rows_dspr2(input, outptr); + vpx_idct4_rows_dspr2(input, outptr); // Columns - vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride); + vpx_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride); } -void vp9_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest, +void vpx_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest, int dest_stride) { int a1, absa1; int r; diff --git a/vpx_dsp/mips/itrans8_dspr2.c b/vpx_dsp/mips/itrans8_dspr2.c index d3baba936..823e845d5 100644 --- a/vpx_dsp/mips/itrans8_dspr2.c +++ b/vpx_dsp/mips/itrans8_dspr2.c @@ -442,7 +442,7 @@ void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, } } -void vp9_idct8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, +void vpx_idct8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, int dest_stride) { DECLARE_ALIGNED(32, int16_t, out[8 * 8]); int16_t *outptr = out; @@ -462,7 +462,7 @@ void vp9_idct8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride); } -void vp9_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest, +void vpx_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest, int dest_stride) { DECLARE_ALIGNED(32, int16_t, out[8 * 8]); int16_t *outptr = out; @@ -507,7 +507,7 @@ void vp9_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest, idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride); } -void vp9_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest, +void vpx_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest, int dest_stride) { uint32_t pos = 45; int32_t out; diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk index 6c2e520ee..812ee9864 100644 --- a/vpx_dsp/vpx_dsp.mk +++ b/vpx_dsp/vpx_dsp.mk @@ -186,6 +186,7 @@ endif # ARCH_X86_64 endif # CONFIG_USE_X86INC ifeq ($(HAVE_NEON_ASM),yes) +DSP_SRCS-yes += arm/save_reg_neon$(ASM) DSP_SRCS-yes += arm/idct4x4_1_add_neon$(ASM) DSP_SRCS-yes += arm/idct4x4_add_neon$(ASM) DSP_SRCS-yes += arm/idct8x8_1_add_neon$(ASM) diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index ca564bcf2..d9624f5ef 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -598,183 +598,183 @@ if (vpx_config("CONFIG_VP9") eq "yes") { if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Note as optimized versions of these functions are added we need to add a check to ensure # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only. - add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct4x4_1_add/; + add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct4x4_1_add/; - add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct4x4_16_add/; + add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct4x4_16_add/; - add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct8x8_1_add/; + add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct8x8_1_add/; - add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct8x8_64_add/; + add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct8x8_64_add/; - add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct8x8_12_add/; + add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct8x8_12_add/; - add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct16x16_1_add/; + add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct16x16_1_add/; - add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct16x16_256_add/; + add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct16x16_256_add/; - add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct16x16_10_add/; + add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct16x16_10_add/; - add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct32x32_1024_add/; + add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_1024_add/; - add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct32x32_34_add/; + add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_34_add/; - add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct32x32_1_add/; + add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_1_add/; - add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_iwht4x4_1_add/; + add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_iwht4x4_1_add/; - add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_iwht4x4_16_add/; + add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_iwht4x4_16_add/; - add_proto qw/void vp9_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - specialize qw/vp9_highbd_idct4x4_1_add/; + add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + specialize qw/vpx_highbd_idct4x4_1_add/; - add_proto qw/void vp9_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - specialize qw/vp9_highbd_idct8x8_1_add/; + add_proto qw/void vpx_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + specialize qw/vpx_highbd_idct8x8_1_add/; - add_proto qw/void vp9_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - specialize qw/vp9_highbd_idct16x16_1_add/; + add_proto qw/void vpx_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + specialize qw/vpx_highbd_idct16x16_1_add/; - add_proto qw/void vp9_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - specialize qw/vp9_highbd_idct32x32_1024_add/; + add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + specialize qw/vpx_highbd_idct32x32_1024_add/; - add_proto qw/void vp9_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - specialize qw/vp9_highbd_idct32x32_34_add/; + add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + specialize qw/vpx_highbd_idct32x32_34_add/; - add_proto qw/void vp9_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - specialize qw/vp9_highbd_idct32x32_1_add/; + add_proto qw/void vpx_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + specialize qw/vpx_highbd_idct32x32_1_add/; - add_proto qw/void vp9_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - specialize qw/vp9_highbd_iwht4x4_1_add/; + add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + specialize qw/vpx_highbd_iwht4x4_1_add/; - add_proto qw/void vp9_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - specialize qw/vp9_highbd_iwht4x4_16_add/; + add_proto qw/void vpx_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + specialize qw/vpx_highbd_iwht4x4_16_add/; # Force C versions if CONFIG_EMULATE_HARDWARE is 1 if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") { - add_proto qw/void vp9_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - specialize qw/vp9_highbd_idct4x4_16_add/; + add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + specialize qw/vpx_highbd_idct4x4_16_add/; - add_proto qw/void vp9_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - specialize qw/vp9_highbd_idct8x8_64_add/; + add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + specialize qw/vpx_highbd_idct8x8_64_add/; - add_proto qw/void vp9_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - specialize qw/vp9_highbd_idct8x8_10_add/; + add_proto qw/void vpx_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + specialize qw/vpx_highbd_idct8x8_10_add/; - add_proto qw/void vp9_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - specialize qw/vp9_highbd_idct16x16_256_add/; + add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + specialize qw/vpx_highbd_idct16x16_256_add/; - add_proto qw/void vp9_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - specialize qw/vp9_highbd_idct16x16_10_add/; + add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + specialize qw/vpx_highbd_idct16x16_10_add/; } else { - add_proto qw/void vp9_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - specialize qw/vp9_highbd_idct4x4_16_add sse2/; + add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + specialize qw/vpx_highbd_idct4x4_16_add sse2/; - add_proto qw/void vp9_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - specialize qw/vp9_highbd_idct8x8_64_add sse2/; + add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + specialize qw/vpx_highbd_idct8x8_64_add sse2/; - add_proto qw/void vp9_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - specialize qw/vp9_highbd_idct8x8_10_add sse2/; + add_proto qw/void vpx_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + specialize qw/vpx_highbd_idct8x8_10_add sse2/; - add_proto qw/void vp9_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - specialize qw/vp9_highbd_idct16x16_256_add sse2/; + add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + specialize qw/vpx_highbd_idct16x16_256_add sse2/; - add_proto qw/void vp9_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - specialize qw/vp9_highbd_idct16x16_10_add sse2/; + add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; + specialize qw/vpx_highbd_idct16x16_10_add sse2/; } # CONFIG_EMULATE_HARDWARE } else { # Force C versions if CONFIG_EMULATE_HARDWARE is 1 if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") { - add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct4x4_1_add/; + add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct4x4_1_add/; - add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct4x4_16_add/; + add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct4x4_16_add/; - add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct8x8_1_add/; + add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct8x8_1_add/; - add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct8x8_64_add/; + add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct8x8_64_add/; - add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct8x8_12_add/; + add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct8x8_12_add/; - add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct16x16_1_add/; + add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct16x16_1_add/; - add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct16x16_256_add/; + add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct16x16_256_add/; - add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct16x16_10_add/; + add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct16x16_10_add/; - add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct32x32_1024_add/; + add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_1024_add/; - add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct32x32_34_add/; + add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_34_add/; - add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct32x32_1_add/; + add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_1_add/; - add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_iwht4x4_1_add/; + add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_iwht4x4_1_add/; - add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_iwht4x4_16_add/; + add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_iwht4x4_16_add/; } else { - add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct4x4_1_add sse2 neon dspr2 msa/; + add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct4x4_1_add sse2 neon dspr2 msa/; - add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct4x4_16_add sse2 neon dspr2 msa/; + add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct4x4_16_add sse2 neon dspr2 msa/; - add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct8x8_1_add sse2 neon dspr2 msa/; + add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct8x8_1_add sse2 neon dspr2 msa/; - add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc"; + add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc"; - add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc"; + add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc"; - add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct16x16_1_add sse2 neon dspr2 msa/; + add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct16x16_1_add sse2 neon dspr2 msa/; - add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct16x16_256_add sse2 neon dspr2 msa/; + add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct16x16_256_add sse2 neon dspr2 msa/; - add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct16x16_10_add sse2 neon dspr2 msa/; + add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct16x16_10_add sse2 neon dspr2 msa/; - add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2 msa/; + add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_1024_add sse2 neon dspr2 msa/; - add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2 msa/; + add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_34_add sse2 neon_asm dspr2 msa/; # Need to add 34 eob idct32x32 neon implementation. - $vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon; + $vpx_idct32x32_34_add_neon_asm=vpx_idct32x32_1024_add_neon; - add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_idct32x32_1_add sse2 neon dspr2 msa/; + add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_1_add sse2 neon dspr2 msa/; - add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_iwht4x4_1_add msa/; + add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_iwht4x4_1_add msa/; - add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vp9_iwht4x4_16_add msa/, "$sse2_x86inc"; + add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_iwht4x4_16_add msa/, "$sse2_x86inc"; } # CONFIG_EMULATE_HARDWARE } # CONFIG_VP9_HIGHBITDEPTH } # CONFIG_VP9 diff --git a/vpx_dsp/x86/inv_txfm_sse2.asm b/vpx_dsp/x86/inv_txfm_sse2.asm index 69b68e6d8..8cea1374d 100644 --- a/vpx_dsp/x86/inv_txfm_sse2.asm +++ b/vpx_dsp/x86/inv_txfm_sse2.asm @@ -7,6 +7,9 @@ ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; + +%define program_name vpx + %include "third_party/x86inc/x86inc.asm" SECTION .text diff --git a/vpx_dsp/x86/inv_txfm_sse2.c b/vpx_dsp/x86/inv_txfm_sse2.c index 125fe2281..f3af68f08 100644 --- a/vpx_dsp/x86/inv_txfm_sse2.c +++ b/vpx_dsp/x86/inv_txfm_sse2.c @@ -21,7 +21,7 @@ *(int *)(dest) = _mm_cvtsi128_si32(d0); \ } -void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) { +void vpx_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i zero = _mm_setzero_si128(); const __m128i eight = _mm_set1_epi16(8); const __m128i cst = _mm_setr_epi16( @@ -151,7 +151,7 @@ void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) { } } -void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { +void vpx_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { __m128i dc_value; const __m128i zero = _mm_setzero_si128(); int a; @@ -449,7 +449,7 @@ void iadst4_sse2(__m128i *in) { out7 = _mm_subs_epi16(stp1_0, stp2_7); \ } -void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) { +void vpx_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i zero = _mm_setzero_si128(); const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1 << 4); @@ -480,7 +480,7 @@ void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) { // 2-D for (i = 0; i < 2; i++) { - // 8x8 Transpose is copied from vp9_fdct8x8_sse2() + // 8x8 Transpose is copied from vpx_fdct8x8_sse2() TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); @@ -518,7 +518,7 @@ void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) { RECON_AND_STORE(dest + 7 * stride, in7); } -void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { +void vpx_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { __m128i dc_value; const __m128i zero = _mm_setzero_si128(); int a; @@ -555,7 +555,7 @@ void idct8_sse2(__m128i *in) { __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7; __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - // 8x8 Transpose is copied from vp9_fdct8x8_sse2() + // 8x8 Transpose is copied from vpx_fdct8x8_sse2() TRANSPOSE_8X8(in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7], in0, in1, in2, in3, in4, in5, in6, in7); @@ -792,7 +792,7 @@ void iadst8_sse2(__m128i *in) { in[7] = _mm_sub_epi16(k__const_0, s1); } -void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) { +void vpx_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i zero = _mm_setzero_si128(); const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1 << 4); @@ -1169,7 +1169,7 @@ void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) { stp2_10, stp2_13, stp2_11, stp2_12) \ } -void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest, +void vpx_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1 << 5); @@ -1294,7 +1294,7 @@ void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest, } } -void vp9_idct16x16_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { +void vpx_idct16x16_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { __m128i dc_value; const __m128i zero = _mm_setzero_si128(); int a, i; @@ -2152,7 +2152,7 @@ void iadst16_sse2(__m128i *in0, __m128i *in1) { iadst16_8col(in1); } -void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, +void vpx_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1 << 5); @@ -3029,7 +3029,7 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, } // Only upper-left 8x8 has non-zero coeff -void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest, +void vpx_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<5); @@ -3188,7 +3188,7 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest, } } -void vp9_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest, +void vpx_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1 << 5); @@ -3464,7 +3464,7 @@ void vp9_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest, } } -void vp9_idct32x32_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { +void vpx_idct32x32_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { __m128i dc_value; const __m128i zero = _mm_setzero_si128(); int a, i; @@ -3498,7 +3498,7 @@ static INLINE __m128i clamp_high_sse2(__m128i value, int bd) { return retval; } -void vp9_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8, +void vpx_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { tran_low_t out[4 * 4]; tran_low_t *outptr = out; @@ -3561,7 +3561,7 @@ void vp9_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8, } else { // Run the un-optimised row transform for (i = 0; i < 4; ++i) { - vp9_highbd_idct4_c(input, outptr, bd); + vpx_highbd_idct4_c(input, outptr, bd); input += 4; outptr += 4; } @@ -3605,7 +3605,7 @@ void vp9_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8, for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; - vp9_highbd_idct4_c(temp_in, temp_out, bd); + vpx_highbd_idct4_c(temp_in, temp_out, bd); for (j = 0; j < 4; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); @@ -3614,7 +3614,7 @@ void vp9_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8, } } -void vp9_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8, +void vpx_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { tran_low_t out[8 * 8]; tran_low_t *outptr = out; @@ -3679,7 +3679,7 @@ void vp9_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8, } else { // Run the un-optimised row transform for (i = 0; i < 8; ++i) { - vp9_highbd_idct8_c(input, outptr, bd); + vpx_highbd_idct8_c(input, outptr, bd); input += 8; outptr += 8; } @@ -3706,7 +3706,7 @@ void vp9_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8, for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; - vp9_highbd_idct8_c(temp_in, temp_out, bd); + vpx_highbd_idct8_c(temp_in, temp_out, bd); for (j = 0; j < 8; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); @@ -3715,7 +3715,7 @@ void vp9_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8, } } -void vp9_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8, +void vpx_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { tran_low_t out[8 * 8] = { 0 }; tran_low_t *outptr = out; @@ -3783,7 +3783,7 @@ void vp9_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8, } else { // Run the un-optimised row transform for (i = 0; i < 4; ++i) { - vp9_highbd_idct8_c(input, outptr, bd); + vpx_highbd_idct8_c(input, outptr, bd); input += 8; outptr += 8; } @@ -3810,7 +3810,7 @@ void vp9_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8, for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; - vp9_highbd_idct8_c(temp_in, temp_out, bd); + vpx_highbd_idct8_c(temp_in, temp_out, bd); for (j = 0; j < 8; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); @@ -3819,7 +3819,7 @@ void vp9_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8, } } -void vp9_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8, +void vpx_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { tran_low_t out[16 * 16]; tran_low_t *outptr = out; @@ -3892,7 +3892,7 @@ void vp9_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8, } else { // Run the un-optimised row transform for (i = 0; i < 16; ++i) { - vp9_highbd_idct16_c(input, outptr, bd); + vpx_highbd_idct16_c(input, outptr, bd); input += 16; outptr += 16; } @@ -3924,7 +3924,7 @@ void vp9_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8, for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; - vp9_highbd_idct16_c(temp_in, temp_out, bd); + vpx_highbd_idct16_c(temp_in, temp_out, bd); for (j = 0; j < 16; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); @@ -3933,7 +3933,7 @@ void vp9_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8, } } -void vp9_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8, +void vpx_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { tran_low_t out[16 * 16] = { 0 }; tran_low_t *outptr = out; @@ -4011,7 +4011,7 @@ void vp9_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8, } else { // Run the un-optimised row transform for (i = 0; i < 4; ++i) { - vp9_highbd_idct16_c(input, outptr, bd); + vpx_highbd_idct16_c(input, outptr, bd); input += 16; outptr += 16; } @@ -4043,7 +4043,7 @@ void vp9_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8, for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; - vp9_highbd_idct16_c(temp_in, temp_out, bd); + vpx_highbd_idct16_c(temp_in, temp_out, bd); for (j = 0; j < 16; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); diff --git a/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm b/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm index 2c1060710..0d5b1e932 100644 --- a/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm +++ b/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm @@ -7,6 +7,9 @@ ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; + +%define program_name vpx + %include "third_party/x86inc/x86inc.asm" ; This file provides SSSE3 version of the inverse transformation. Part