Finally removing "short" from transform names.

Change-Id: I5259b68dc1bcceb153e3ffe638a79a59a3019e9d
This commit is contained in:
Dmitry Kovalev 2014-02-06 11:54:15 -08:00
parent 4d8ebc9ec4
commit 005fc6970b
11 changed files with 210 additions and 233 deletions

View File

@ -273,7 +273,7 @@ void fdct16x16_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
} }
void fht16x16_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { void fht16x16_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
vp9_short_fht16x16_c(in, out, stride, tx_type); vp9_fht16x16_c(in, out, stride, tx_type);
} }
class Trans16x16TestBase { class Trans16x16TestBase {
@ -507,10 +507,10 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
C, Trans16x16HT, C, Trans16x16HT,
::testing::Values( ::testing::Values(
make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 0), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0),
make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 1), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1),
make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 2), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2),
make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 3))); make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3)));
#if HAVE_SSE2 #if HAVE_SSE2
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
@ -521,9 +521,9 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
SSE2, Trans16x16HT, SSE2, Trans16x16HT,
::testing::Values( ::testing::Values(
make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 0), make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 0),
make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 1), make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 1),
make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2), make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2),
make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3))); make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3)));
#endif #endif
} // namespace } // namespace

View File

@ -45,7 +45,7 @@ void fdct4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
} }
void fht4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { void fht4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
vp9_short_fht4x4_c(in, out, stride, tx_type); vp9_fht4x4_c(in, out, stride, tx_type);
} }
class Trans4x4TestBase { class Trans4x4TestBase {
@ -281,10 +281,10 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
C, Trans4x4HT, C, Trans4x4HT,
::testing::Values( ::testing::Values(
make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 0), make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0),
make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 1), make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1),
make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 2), make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2),
make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 3))); make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3)));
#if HAVE_SSE2 #if HAVE_SSE2
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
@ -295,10 +295,10 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4HT, SSE2, Trans4x4HT,
::testing::Values( ::testing::Values(
make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0), make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0),
make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1), make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1),
make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2), make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2),
make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3))); make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3)));
#endif #endif
} // namespace } // namespace

View File

@ -44,7 +44,7 @@ void fdct8x8_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
} }
void fht8x8_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { void fht8x8_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
vp9_short_fht8x8_c(in, out, stride, tx_type); vp9_fht8x8_c(in, out, stride, tx_type);
} }
class FwdTrans8x8TestBase { class FwdTrans8x8TestBase {
@ -308,10 +308,10 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
C, FwdTrans8x8HT, C, FwdTrans8x8HT,
::testing::Values( ::testing::Values(
make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 0), make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0),
make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 1), make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1),
make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 2), make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2),
make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 3))); make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3)));
#if HAVE_SSE2 #if HAVE_SSE2
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
@ -321,9 +321,9 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
SSE2, FwdTrans8x8HT, SSE2, FwdTrans8x8HT,
::testing::Values( ::testing::Values(
make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0), make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0),
make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1), make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1),
make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2), make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2),
make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3))); make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3)));
#endif #endif
} // namespace } // namespace

View File

@ -707,14 +707,14 @@ if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
fi fi
# fdct functions # fdct functions
prototype void vp9_short_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type" prototype void vp9_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type"
specialize vp9_short_fht4x4 sse2 avx2 specialize vp9_fht4x4 sse2 avx2
prototype void vp9_short_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type" prototype void vp9_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type"
specialize vp9_short_fht8x8 sse2 avx2 specialize vp9_fht8x8 sse2 avx2
prototype void vp9_short_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type" prototype void vp9_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type"
specialize vp9_short_fht16x16 sse2 avx2 specialize vp9_fht16x16 sse2 avx2
prototype void vp9_fwht4x4 "const int16_t *input, int16_t *output, int stride" prototype void vp9_fwht4x4 "const int16_t *input, int16_t *output, int stride"
specialize vp9_fwht4x4 specialize vp9_fwht4x4

View File

@ -18,8 +18,6 @@
#include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_dct.h"
static INLINE int fdct_round_shift(int input) { static INLINE int fdct_round_shift(int input) {
int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
assert(INT16_MIN <= rv && rv <= INT16_MAX); assert(INT16_MIN <= rv && rv <= INT16_MAX);
@ -157,8 +155,11 @@ static const transform_2d FHT_4[] = {
{ fadst4, fadst4 } // ADST_ADST = 3 { fadst4, fadst4 } // ADST_ADST = 3
}; };
void vp9_short_fht4x4_c(const int16_t *input, int16_t *output, void vp9_fht4x4_c(const int16_t *input, int16_t *output,
int stride, int tx_type) { int stride, int tx_type) {
if (tx_type == DCT_DCT) {
vp9_fdct4x4_c(input, output, stride);
} else {
int16_t out[4 * 4]; int16_t out[4 * 4];
int16_t *outptr = &out[0]; int16_t *outptr = &out[0];
int i, j; int i, j;
@ -184,6 +185,7 @@ void vp9_short_fht4x4_c(const int16_t *input, int16_t *output,
for (j = 0; j < 4; ++j) for (j = 0; j < 4; ++j)
output[j + i * 4] = (temp_out[j] + 1) >> 2; output[j + i * 4] = (temp_out[j] + 1) >> 2;
} }
}
} }
static void fdct8(const int16_t *input, int16_t *output) { static void fdct8(const int16_t *input, int16_t *output) {
@ -565,8 +567,11 @@ static const transform_2d FHT_8[] = {
{ fadst8, fadst8 } // ADST_ADST = 3 { fadst8, fadst8 } // ADST_ADST = 3
}; };
void vp9_short_fht8x8_c(const int16_t *input, int16_t *output, void vp9_fht8x8_c(const int16_t *input, int16_t *output,
int stride, int tx_type) { int stride, int tx_type) {
if (tx_type == DCT_DCT) {
vp9_fdct8x8_c(input, output, stride);
} else {
int16_t out[64]; int16_t out[64];
int16_t *outptr = &out[0]; int16_t *outptr = &out[0];
int i, j; int i, j;
@ -590,6 +595,7 @@ void vp9_short_fht8x8_c(const int16_t *input, int16_t *output,
for (j = 0; j < 8; ++j) for (j = 0; j < 8; ++j)
output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1; output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
} }
}
} }
/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per /* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
@ -958,8 +964,11 @@ static const transform_2d FHT_16[] = {
{ fadst16, fadst16 } // ADST_ADST = 3 { fadst16, fadst16 } // ADST_ADST = 3
}; };
void vp9_short_fht16x16_c(const int16_t *input, int16_t *output, void vp9_fht16x16_c(const int16_t *input, int16_t *output,
int stride, int tx_type) { int stride, int tx_type) {
if (tx_type == DCT_DCT) {
vp9_fdct16x16_c(input, output, stride);
} else {
int16_t out[256]; int16_t out[256];
int16_t *outptr = &out[0]; int16_t *outptr = &out[0];
int i, j; int i, j;
@ -973,7 +982,6 @@ void vp9_short_fht16x16_c(const int16_t *input, int16_t *output,
ht.cols(temp_in, temp_out); ht.cols(temp_in, temp_out);
for (j = 0; j < 16; ++j) for (j = 0; j < 16; ++j)
outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
// outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
} }
// Rows // Rows
@ -984,6 +992,7 @@ void vp9_short_fht16x16_c(const int16_t *input, int16_t *output,
for (j = 0; j < 16; ++j) for (j = 0; j < 16; ++j)
output[j + i * 16] = temp_out[j]; output[j + i * 16] = temp_out[j];
} }
}
} }
static INLINE int dct_32_round(int input) { static INLINE int dct_32_round(int input) {
@ -1375,27 +1384,3 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
out[j + i * 32] = temp_out[j]; out[j + i * 32] = temp_out[j];
} }
} }
void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output,
int stride) {
if (tx_type == DCT_DCT)
vp9_fdct4x4(input, output, stride);
else
vp9_short_fht4x4(input, output, stride, tx_type);
}
void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output,
int stride) {
if (tx_type == DCT_DCT)
vp9_fdct8x8(input, output, stride);
else
vp9_short_fht8x8(input, output, stride, tx_type);
}
void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output,
int stride) {
if (tx_type == DCT_DCT)
vp9_fdct16x16(input, output, stride);
else
vp9_short_fht16x16(input, output, stride, tx_type);
}

View File

@ -1,32 +0,0 @@
/*
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_DCT_H_
#define VP9_ENCODER_VP9_DCT_H_
#ifdef __cplusplus
extern "C" {
#endif
void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output,
int stride);
void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output,
int stride);
void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output,
int stride);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP9_ENCODER_VP9_DCT_H_

View File

@ -19,7 +19,6 @@
#include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_dct.h"
#include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_rdopt.h"
@ -571,7 +570,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
if (!x->skip_recode) { if (!x->skip_recode) {
vp9_subtract_block(16, 16, src_diff, diff_stride, vp9_subtract_block(16, 16, src_diff, diff_stride,
src, p->src.stride, dst, pd->dst.stride); src, p->src.stride, dst, pd->dst.stride);
vp9_fht16x16(tx_type, src_diff, coeff, diff_stride); vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff, p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan_order->scan, pd->dequant, p->zbin_extra, eob, scan_order->scan,
@ -591,7 +590,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
if (!x->skip_recode) { if (!x->skip_recode) {
vp9_subtract_block(8, 8, src_diff, diff_stride, vp9_subtract_block(8, 8, src_diff, diff_stride,
src, p->src.stride, dst, pd->dst.stride); src, p->src.stride, dst, pd->dst.stride);
vp9_fht8x8(tx_type, src_diff, coeff, diff_stride); vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan_order->scan, pd->dequant, p->zbin_extra, eob, scan_order->scan,
@ -617,7 +616,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
vp9_subtract_block(4, 4, src_diff, diff_stride, vp9_subtract_block(4, 4, src_diff, diff_stride,
src, p->src.stride, dst, pd->dst.stride); src, p->src.stride, dst, pd->dst.stride);
if (tx_type != DCT_DCT) if (tx_type != DCT_DCT)
vp9_short_fht4x4(src_diff, coeff, diff_stride, tx_type); vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
else else
x->fwd_txm4x4(src_diff, coeff, diff_stride); x->fwd_txm4x4(src_diff, coeff, diff_stride);
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,

View File

@ -1064,7 +1064,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
so = &vp9_scan_orders[TX_4X4][tx_type]; so = &vp9_scan_orders[TX_4X4][tx_type];
if (tx_type != DCT_DCT) if (tx_type != DCT_DCT)
vp9_short_fht4x4(src_diff, coeff, 8, tx_type); vp9_fht4x4(src_diff, coeff, 8, tx_type);
else else
x->fwd_txm4x4(src_diff, coeff, 8); x->fwd_txm4x4(src_diff, coeff, 8);

View File

@ -244,32 +244,36 @@ void fadst4_avx2(__m128i *in) {
transpose_4x4_avx2(in); transpose_4x4_avx2(in);
} }
void vp9_short_fht4x4_avx2(const int16_t *input, int16_t *output, void vp9_fht4x4_avx2(const int16_t *input, int16_t *output,
int stride, int tx_type) { int stride, int tx_type) {
__m128i in[4]; __m128i in[4];
load_buffer_4x4_avx2(input, in, stride);
switch (tx_type) { switch (tx_type) {
case 0: // DCT_DCT case DCT_DCT:
fdct4_avx2(in); vp9_fdct4x4_avx2(input, output, stride);
fdct4_avx2(in);
break; break;
case 1: // ADST_DCT case ADST_DCT:
load_buffer_4x4_avx2(input, in, stride);
fadst4_avx2(in); fadst4_avx2(in);
fdct4_avx2(in); fdct4_avx2(in);
write_buffer_4x4_avx2(output, in);
break; break;
case 2: // DCT_ADST case DCT_ADST:
load_buffer_4x4_avx2(input, in, stride);
fdct4_avx2(in); fdct4_avx2(in);
fadst4_avx2(in); fadst4_avx2(in);
write_buffer_4x4_avx2(output, in);
break; break;
case 3: // ADST_ADST case ADST_ADST:
load_buffer_4x4_avx2(input, in, stride);
fadst4_avx2(in); fadst4_avx2(in);
fadst4_avx2(in); fadst4_avx2(in);
write_buffer_4x4_avx2(output, in);
break; break;
default: default:
assert(0); assert(0);
break; break;
} }
write_buffer_4x4_avx2(output, in);
} }
void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) { void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {
@ -1028,33 +1032,39 @@ void fadst8_avx2(__m128i *in) {
array_transpose_8x8_avx2(in, in); array_transpose_8x8_avx2(in, in);
} }
void vp9_short_fht8x8_avx2(const int16_t *input, int16_t *output, void vp9_fht8x8_avx2(const int16_t *input, int16_t *output,
int stride, int tx_type) { int stride, int tx_type) {
__m128i in[8]; __m128i in[8];
load_buffer_8x8_avx2(input, in, stride);
switch (tx_type) { switch (tx_type) {
case 0: // DCT_DCT case DCT_DCT:
fdct8_avx2(in); vp9_fdct8x8_avx2(input, output, stride);
fdct8_avx2(in);
break; break;
case 1: // ADST_DCT case ADST_DCT:
load_buffer_8x8_avx2(input, in, stride);
fadst8_avx2(in); fadst8_avx2(in);
fdct8_avx2(in); fdct8_avx2(in);
right_shift_8x8_avx2(in, 1);
write_buffer_8x8_avx2(output, in, 8);
break; break;
case 2: // DCT_ADST case DCT_ADST:
load_buffer_8x8_avx2(input, in, stride);
fdct8_avx2(in); fdct8_avx2(in);
fadst8_avx2(in); fadst8_avx2(in);
right_shift_8x8_avx2(in, 1);
write_buffer_8x8_avx2(output, in, 8);
break; break;
case 3: // ADST_ADST case ADST_ADST:
load_buffer_8x8_avx2(input, in, stride);
fadst8_avx2(in); fadst8_avx2(in);
fadst8_avx2(in); fadst8_avx2(in);
right_shift_8x8_avx2(in, 1);
write_buffer_8x8_avx2(output, in, 8);
break; break;
default: default:
assert(0); assert(0);
break; break;
} }
right_shift_8x8_avx2(in, 1);
write_buffer_8x8_avx2(output, in, 8);
} }
void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) { void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
@ -2534,36 +2544,39 @@ void fadst16_avx2(__m128i *in0, __m128i *in1) {
array_transpose_16x16_avx2(in0, in1); array_transpose_16x16_avx2(in0, in1);
} }
void vp9_short_fht16x16_avx2(const int16_t *input, int16_t *output, void vp9_fht16x16_avx2(const int16_t *input, int16_t *output,
int stride, int tx_type) { int stride, int tx_type) {
__m128i in0[16], in1[16]; __m128i in0[16], in1[16];
load_buffer_16x16_avx2(input, in0, in1, stride);
switch (tx_type) { switch (tx_type) {
case 0: // DCT_DCT case DCT_DCT:
fdct16_avx2(in0, in1); vp9_fdct16x16_avx2(input, output, stride);
right_shift_16x16_avx2(in0, in1);
fdct16_avx2(in0, in1);
break; break;
case 1: // ADST_DCT case ADST_DCT:
load_buffer_16x16_avx2(input, in0, in1, stride);
fadst16_avx2(in0, in1); fadst16_avx2(in0, in1);
right_shift_16x16_avx2(in0, in1); right_shift_16x16_avx2(in0, in1);
fdct16_avx2(in0, in1); fdct16_avx2(in0, in1);
write_buffer_16x16_avx2(output, in0, in1, 16);
break; break;
case 2: // DCT_ADST case DCT_ADST:
load_buffer_16x16_avx2(input, in0, in1, stride);
fdct16_avx2(in0, in1); fdct16_avx2(in0, in1);
right_shift_16x16_avx2(in0, in1); right_shift_16x16_avx2(in0, in1);
fadst16_avx2(in0, in1); fadst16_avx2(in0, in1);
write_buffer_16x16_avx2(output, in0, in1, 16);
break; break;
case 3: // ADST_ADST case ADST_ADST:
load_buffer_16x16_avx2(input, in0, in1, stride);
fadst16_avx2(in0, in1); fadst16_avx2(in0, in1);
right_shift_16x16_avx2(in0, in1); right_shift_16x16_avx2(in0, in1);
fadst16_avx2(in0, in1); fadst16_avx2(in0, in1);
write_buffer_16x16_avx2(output, in0, in1, 16);
break; break;
default: default:
assert(0); assert(0);
break; break;
} }
write_buffer_16x16_avx2(output, in0, in1, 16);
} }
#define FDCT32x32_2D_AVX2 vp9_fdct32x32_rd_avx2 #define FDCT32x32_2D_AVX2 vp9_fdct32x32_rd_avx2

View File

@ -242,32 +242,36 @@ void fadst4_sse2(__m128i *in) {
transpose_4x4(in); transpose_4x4(in);
} }
void vp9_short_fht4x4_sse2(const int16_t *input, int16_t *output, void vp9_fht4x4_sse2(const int16_t *input, int16_t *output,
int stride, int tx_type) { int stride, int tx_type) {
__m128i in[4]; __m128i in[4];
load_buffer_4x4(input, in, stride);
switch (tx_type) { switch (tx_type) {
case 0: // DCT_DCT case DCT_DCT:
fdct4_sse2(in); vp9_fdct4x4_sse2(input, output, stride);
fdct4_sse2(in);
break; break;
case 1: // ADST_DCT case ADST_DCT:
load_buffer_4x4(input, in, stride);
fadst4_sse2(in); fadst4_sse2(in);
fdct4_sse2(in); fdct4_sse2(in);
write_buffer_4x4(output, in);
break; break;
case 2: // DCT_ADST case DCT_ADST:
load_buffer_4x4(input, in, stride);
fdct4_sse2(in); fdct4_sse2(in);
fadst4_sse2(in); fadst4_sse2(in);
write_buffer_4x4(output, in);
break; break;
case 3: // ADST_ADST case ADST_ADST:
load_buffer_4x4(input, in, stride);
fadst4_sse2(in); fadst4_sse2(in);
fadst4_sse2(in); fadst4_sse2(in);
write_buffer_4x4(output, in);
break; break;
default: default:
assert(0); assert(0);
break; break;
} }
write_buffer_4x4(output, in);
} }
void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) { void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
@ -1026,33 +1030,39 @@ void fadst8_sse2(__m128i *in) {
array_transpose_8x8(in, in); array_transpose_8x8(in, in);
} }
void vp9_short_fht8x8_sse2(const int16_t *input, int16_t *output, void vp9_fht8x8_sse2(const int16_t *input, int16_t *output,
int stride, int tx_type) { int stride, int tx_type) {
__m128i in[8]; __m128i in[8];
load_buffer_8x8(input, in, stride);
switch (tx_type) { switch (tx_type) {
case 0: // DCT_DCT case DCT_DCT:
fdct8_sse2(in); vp9_fdct8x8_sse2(input, output, stride);
fdct8_sse2(in);
break; break;
case 1: // ADST_DCT case ADST_DCT:
load_buffer_8x8(input, in, stride);
fadst8_sse2(in); fadst8_sse2(in);
fdct8_sse2(in); fdct8_sse2(in);
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break; break;
case 2: // DCT_ADST case DCT_ADST:
load_buffer_8x8(input, in, stride);
fdct8_sse2(in); fdct8_sse2(in);
fadst8_sse2(in); fadst8_sse2(in);
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break; break;
case 3: // ADST_ADST case ADST_ADST:
load_buffer_8x8(input, in, stride);
fadst8_sse2(in); fadst8_sse2(in);
fadst8_sse2(in); fadst8_sse2(in);
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break; break;
default: default:
assert(0); assert(0);
break; break;
} }
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
} }
void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) { void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
@ -2532,36 +2542,39 @@ void fadst16_sse2(__m128i *in0, __m128i *in1) {
array_transpose_16x16(in0, in1); array_transpose_16x16(in0, in1);
} }
void vp9_short_fht16x16_sse2(const int16_t *input, int16_t *output, void vp9_fht16x16_sse2(const int16_t *input, int16_t *output,
int stride, int tx_type) { int stride, int tx_type) {
__m128i in0[16], in1[16]; __m128i in0[16], in1[16];
load_buffer_16x16(input, in0, in1, stride);
switch (tx_type) { switch (tx_type) {
case 0: // DCT_DCT case DCT_DCT:
fdct16_sse2(in0, in1); vp9_fdct16x16_sse2(input, output, stride);
right_shift_16x16(in0, in1);
fdct16_sse2(in0, in1);
break; break;
case 1: // ADST_DCT case ADST_DCT:
load_buffer_16x16(input, in0, in1, stride);
fadst16_sse2(in0, in1); fadst16_sse2(in0, in1);
right_shift_16x16(in0, in1); right_shift_16x16(in0, in1);
fdct16_sse2(in0, in1); fdct16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break; break;
case 2: // DCT_ADST case DCT_ADST:
load_buffer_16x16(input, in0, in1, stride);
fdct16_sse2(in0, in1); fdct16_sse2(in0, in1);
right_shift_16x16(in0, in1); right_shift_16x16(in0, in1);
fadst16_sse2(in0, in1); fadst16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break; break;
case 3: // ADST_ADST case ADST_ADST:
load_buffer_16x16(input, in0, in1, stride);
fadst16_sse2(in0, in1); fadst16_sse2(in0, in1);
right_shift_16x16(in0, in1); right_shift_16x16(in0, in1);
fadst16_sse2(in0, in1); fadst16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break; break;
default: default:
assert(0); assert(0);
break; break;
} }
write_buffer_16x16(output, in0, in1, 16);
} }
#define FDCT32x32_2D vp9_fdct32x32_rd_sse2 #define FDCT32x32_2D vp9_fdct32x32_rd_sse2

View File

@ -19,7 +19,6 @@ VP9_CX_SRCS-yes += vp9_cx_iface.c
VP9_CX_SRCS-yes += encoder/vp9_bitstream.c VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
VP9_CX_SRCS-yes += encoder/vp9_dct.c VP9_CX_SRCS-yes += encoder/vp9_dct.c
VP9_CX_SRCS-yes += encoder/vp9_dct.h
VP9_CX_SRCS-yes += encoder/vp9_encodeframe.c VP9_CX_SRCS-yes += encoder/vp9_encodeframe.c
VP9_CX_SRCS-yes += encoder/vp9_encodeframe.h VP9_CX_SRCS-yes += encoder/vp9_encodeframe.h
VP9_CX_SRCS-yes += encoder/vp9_encodemb.c VP9_CX_SRCS-yes += encoder/vp9_encodemb.c