diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc index 18c12a857..796a2e96a 100644 --- a/test/fdct4x4_test.cc +++ b/test/fdct4x4_test.cc @@ -27,7 +27,7 @@ using libvpx_test::ACMRandom; namespace { void fdct4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/, int stride, int /*tx_type*/) { - vp9_short_fdct4x4_c(in, out, stride); + vp9_fdct4x4_c(in, out, stride); } void idct4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst, int stride, int /*tx_type*/) { diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 72b2126da..60636eee0 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -695,11 +695,17 @@ specialize vp9_short_fht8x8 sse2 prototype void vp9_short_fht16x16 "int16_t *InputData, int16_t *OutputData, int pitch, int tx_type" specialize vp9_short_fht16x16 sse2 +prototype void vp9_fwht4x4 "int16_t *input, int16_t *output, int stride" +specialize vp9_fwht4x4 + +prototype void vp9_fdct4x4 "int16_t *input, int16_t *output, int stride" +specialize vp9_fdct4x4 sse2 + prototype void vp9_fdct8x8 "int16_t *input, int16_t *output, int stride" specialize vp9_fdct8x8 sse2 -prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int stride" -specialize vp9_short_fdct4x4 sse2 +prototype void vp9_fdct16x16 "int16_t *input, int16_t *output, int stride" +specialize vp9_fdct16x16 sse2 prototype void vp9_fdct32x32 "int16_t *input, int16_t *output, int stride" specialize vp9_fdct32x32 sse2 @@ -707,12 +713,6 @@ specialize vp9_fdct32x32 sse2 prototype void vp9_fdct32x32_rd "int16_t *input, int16_t *output, int stride" specialize vp9_fdct32x32_rd sse2 -prototype void vp9_fdct16x16 "int16_t *input, int16_t *output, int stride" -specialize vp9_fdct16x16 sse2 - -prototype void vp9_short_walsh4x4 "int16_t *InputData, int16_t *OutputData, int pitch" -specialize vp9_short_walsh4x4 - # # Motion search # diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 0aae53502..94fcf9101 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -36,7 +36,7 @@ static void fdct4(const int16_t *input, int16_t *output) { output[3] = dct_const_round_shift(temp2); } -void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int stride) { +void vp9_fdct4x4_c(int16_t *input, int16_t *output, int stride) { // The 2D transform is done with two passes which are actually pretty // similar. In the first one, we transform the columns and transpose // the results. In the second one, we transform the rows. To achieve that, @@ -585,7 +585,7 @@ void vp9_short_fht8x8_c(int16_t *input, int16_t *output, /* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per pixel. */ -void vp9_short_walsh4x4_c(int16_t *input, int16_t *output, int stride) { +void vp9_fwht4x4_c(int16_t *input, int16_t *output, int stride) { int i; int a1, b1, c1, d1, e1; int16_t *ip = input; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index edb20ba2f..57a0b3487 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1865,7 +1865,7 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { if (lossless) { // printf("Switching to lossless\n"); - cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4; + cpi->mb.fwd_txm4x4 = vp9_fwht4x4; cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add; cpi->mb.optimize = 0; cpi->common.lf.filter_level = 0; @@ -1873,7 +1873,7 @@ static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { cpi->common.tx_mode = ONLY_4X4; } else { // printf("Not lossless\n"); - cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4; + cpi->mb.fwd_txm4x4 = vp9_fdct4x4; cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add; } } diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 05b166219..afd6fc5fa 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -959,9 +959,9 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->optimize_coefficients = 0; } - cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4; + cpi->mb.fwd_txm4x4 = vp9_fdct4x4; if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) { - cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4; + cpi->mb.fwd_txm4x4 = vp9_fwht4x4; } if (cpi->sf.subpel_search_method == SUBPEL_ITERATIVE) { diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c index ae298c9e7..25b9e7e46 100644 --- a/vp9/encoder/x86/vp9_dct_sse2.c +++ b/vp9/encoder/x86/vp9_dct_sse2.c @@ -12,7 +12,7 @@ #include "vp9/common/vp9_idct.h" // for cospi constants #include "vpx_ports/mem.h" -void vp9_short_fdct4x4_sse2(int16_t *input, int16_t *output, int stride) { +void vp9_fdct4x4_sse2(int16_t *input, int16_t *output, int stride) { // The 2D transform is done with two passes which are actually pretty // similar. In the first one, we transform the columns and transpose // the results. In the second one, we transform the rows. To achieve that,