Finally removing "short" from transform names.
Change-Id: I5259b68dc1bcceb153e3ffe638a79a59a3019e9d
This commit is contained in:
parent
4d8ebc9ec4
commit
005fc6970b
@ -273,7 +273,7 @@ void fdct16x16_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
|
||||
}
|
||||
|
||||
void fht16x16_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
|
||||
vp9_short_fht16x16_c(in, out, stride, tx_type);
|
||||
vp9_fht16x16_c(in, out, stride, tx_type);
|
||||
}
|
||||
|
||||
class Trans16x16TestBase {
|
||||
@ -507,10 +507,10 @@ INSTANTIATE_TEST_CASE_P(
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, Trans16x16HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 0),
|
||||
make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 1),
|
||||
make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 2),
|
||||
make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 3)));
|
||||
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0),
|
||||
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1),
|
||||
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2),
|
||||
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3)));
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
@ -521,9 +521,9 @@ INSTANTIATE_TEST_CASE_P(
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans16x16HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 0),
|
||||
make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 1),
|
||||
make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2),
|
||||
make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3)));
|
||||
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 0),
|
||||
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 1),
|
||||
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2),
|
||||
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3)));
|
||||
#endif
|
||||
} // namespace
|
||||
|
@ -45,7 +45,7 @@ void fdct4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
|
||||
}
|
||||
|
||||
void fht4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
|
||||
vp9_short_fht4x4_c(in, out, stride, tx_type);
|
||||
vp9_fht4x4_c(in, out, stride, tx_type);
|
||||
}
|
||||
|
||||
class Trans4x4TestBase {
|
||||
@ -281,10 +281,10 @@ INSTANTIATE_TEST_CASE_P(
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, Trans4x4HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 0),
|
||||
make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 1),
|
||||
make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 2),
|
||||
make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 3)));
|
||||
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0),
|
||||
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1),
|
||||
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2),
|
||||
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3)));
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
@ -295,10 +295,10 @@ INSTANTIATE_TEST_CASE_P(
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans4x4HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0),
|
||||
make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1),
|
||||
make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2),
|
||||
make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3)));
|
||||
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0),
|
||||
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1),
|
||||
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2),
|
||||
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3)));
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
|
@ -44,7 +44,7 @@ void fdct8x8_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
|
||||
}
|
||||
|
||||
void fht8x8_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
|
||||
vp9_short_fht8x8_c(in, out, stride, tx_type);
|
||||
vp9_fht8x8_c(in, out, stride, tx_type);
|
||||
}
|
||||
|
||||
class FwdTrans8x8TestBase {
|
||||
@ -308,10 +308,10 @@ INSTANTIATE_TEST_CASE_P(
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, FwdTrans8x8HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 0),
|
||||
make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 1),
|
||||
make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 2),
|
||||
make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 3)));
|
||||
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0),
|
||||
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1),
|
||||
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2),
|
||||
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3)));
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
@ -321,9 +321,9 @@ INSTANTIATE_TEST_CASE_P(
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, FwdTrans8x8HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0),
|
||||
make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1),
|
||||
make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2),
|
||||
make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3)));
|
||||
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0),
|
||||
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1),
|
||||
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2),
|
||||
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3)));
|
||||
#endif
|
||||
} // namespace
|
||||
|
@ -707,14 +707,14 @@ if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
|
||||
fi
|
||||
|
||||
# fdct functions
|
||||
prototype void vp9_short_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type"
|
||||
specialize vp9_short_fht4x4 sse2 avx2
|
||||
prototype void vp9_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type"
|
||||
specialize vp9_fht4x4 sse2 avx2
|
||||
|
||||
prototype void vp9_short_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type"
|
||||
specialize vp9_short_fht8x8 sse2 avx2
|
||||
prototype void vp9_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type"
|
||||
specialize vp9_fht8x8 sse2 avx2
|
||||
|
||||
prototype void vp9_short_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type"
|
||||
specialize vp9_short_fht16x16 sse2 avx2
|
||||
prototype void vp9_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type"
|
||||
specialize vp9_fht16x16 sse2 avx2
|
||||
|
||||
prototype void vp9_fwht4x4 "const int16_t *input, int16_t *output, int stride"
|
||||
specialize vp9_fwht4x4
|
||||
|
@ -18,8 +18,6 @@
|
||||
#include "vp9/common/vp9_idct.h"
|
||||
#include "vp9/common/vp9_systemdependent.h"
|
||||
|
||||
#include "vp9/encoder/vp9_dct.h"
|
||||
|
||||
static INLINE int fdct_round_shift(int input) {
|
||||
int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
|
||||
assert(INT16_MIN <= rv && rv <= INT16_MAX);
|
||||
@ -157,32 +155,36 @@ static const transform_2d FHT_4[] = {
|
||||
{ fadst4, fadst4 } // ADST_ADST = 3
|
||||
};
|
||||
|
||||
void vp9_short_fht4x4_c(const int16_t *input, int16_t *output,
|
||||
int stride, int tx_type) {
|
||||
int16_t out[4 * 4];
|
||||
int16_t *outptr = &out[0];
|
||||
int i, j;
|
||||
int16_t temp_in[4], temp_out[4];
|
||||
const transform_2d ht = FHT_4[tx_type];
|
||||
void vp9_fht4x4_c(const int16_t *input, int16_t *output,
|
||||
int stride, int tx_type) {
|
||||
if (tx_type == DCT_DCT) {
|
||||
vp9_fdct4x4_c(input, output, stride);
|
||||
} else {
|
||||
int16_t out[4 * 4];
|
||||
int16_t *outptr = &out[0];
|
||||
int i, j;
|
||||
int16_t temp_in[4], temp_out[4];
|
||||
const transform_2d ht = FHT_4[tx_type];
|
||||
|
||||
// Columns
|
||||
for (i = 0; i < 4; ++i) {
|
||||
for (j = 0; j < 4; ++j)
|
||||
temp_in[j] = input[j * stride + i] * 16;
|
||||
if (i == 0 && temp_in[0])
|
||||
temp_in[0] += 1;
|
||||
ht.cols(temp_in, temp_out);
|
||||
for (j = 0; j < 4; ++j)
|
||||
outptr[j * 4 + i] = temp_out[j];
|
||||
}
|
||||
// Columns
|
||||
for (i = 0; i < 4; ++i) {
|
||||
for (j = 0; j < 4; ++j)
|
||||
temp_in[j] = input[j * stride + i] * 16;
|
||||
if (i == 0 && temp_in[0])
|
||||
temp_in[0] += 1;
|
||||
ht.cols(temp_in, temp_out);
|
||||
for (j = 0; j < 4; ++j)
|
||||
outptr[j * 4 + i] = temp_out[j];
|
||||
}
|
||||
|
||||
// Rows
|
||||
for (i = 0; i < 4; ++i) {
|
||||
for (j = 0; j < 4; ++j)
|
||||
temp_in[j] = out[j + i * 4];
|
||||
ht.rows(temp_in, temp_out);
|
||||
for (j = 0; j < 4; ++j)
|
||||
output[j + i * 4] = (temp_out[j] + 1) >> 2;
|
||||
// Rows
|
||||
for (i = 0; i < 4; ++i) {
|
||||
for (j = 0; j < 4; ++j)
|
||||
temp_in[j] = out[j + i * 4];
|
||||
ht.rows(temp_in, temp_out);
|
||||
for (j = 0; j < 4; ++j)
|
||||
output[j + i * 4] = (temp_out[j] + 1) >> 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -565,30 +567,34 @@ static const transform_2d FHT_8[] = {
|
||||
{ fadst8, fadst8 } // ADST_ADST = 3
|
||||
};
|
||||
|
||||
void vp9_short_fht8x8_c(const int16_t *input, int16_t *output,
|
||||
int stride, int tx_type) {
|
||||
int16_t out[64];
|
||||
int16_t *outptr = &out[0];
|
||||
int i, j;
|
||||
int16_t temp_in[8], temp_out[8];
|
||||
const transform_2d ht = FHT_8[tx_type];
|
||||
void vp9_fht8x8_c(const int16_t *input, int16_t *output,
|
||||
int stride, int tx_type) {
|
||||
if (tx_type == DCT_DCT) {
|
||||
vp9_fdct8x8_c(input, output, stride);
|
||||
} else {
|
||||
int16_t out[64];
|
||||
int16_t *outptr = &out[0];
|
||||
int i, j;
|
||||
int16_t temp_in[8], temp_out[8];
|
||||
const transform_2d ht = FHT_8[tx_type];
|
||||
|
||||
// Columns
|
||||
for (i = 0; i < 8; ++i) {
|
||||
for (j = 0; j < 8; ++j)
|
||||
temp_in[j] = input[j * stride + i] * 4;
|
||||
ht.cols(temp_in, temp_out);
|
||||
for (j = 0; j < 8; ++j)
|
||||
outptr[j * 8 + i] = temp_out[j];
|
||||
}
|
||||
// Columns
|
||||
for (i = 0; i < 8; ++i) {
|
||||
for (j = 0; j < 8; ++j)
|
||||
temp_in[j] = input[j * stride + i] * 4;
|
||||
ht.cols(temp_in, temp_out);
|
||||
for (j = 0; j < 8; ++j)
|
||||
outptr[j * 8 + i] = temp_out[j];
|
||||
}
|
||||
|
||||
// Rows
|
||||
for (i = 0; i < 8; ++i) {
|
||||
for (j = 0; j < 8; ++j)
|
||||
temp_in[j] = out[j + i * 8];
|
||||
ht.rows(temp_in, temp_out);
|
||||
for (j = 0; j < 8; ++j)
|
||||
output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
|
||||
// Rows
|
||||
for (i = 0; i < 8; ++i) {
|
||||
for (j = 0; j < 8; ++j)
|
||||
temp_in[j] = out[j + i * 8];
|
||||
ht.rows(temp_in, temp_out);
|
||||
for (j = 0; j < 8; ++j)
|
||||
output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -958,31 +964,34 @@ static const transform_2d FHT_16[] = {
|
||||
{ fadst16, fadst16 } // ADST_ADST = 3
|
||||
};
|
||||
|
||||
void vp9_short_fht16x16_c(const int16_t *input, int16_t *output,
|
||||
int stride, int tx_type) {
|
||||
int16_t out[256];
|
||||
int16_t *outptr = &out[0];
|
||||
int i, j;
|
||||
int16_t temp_in[16], temp_out[16];
|
||||
const transform_2d ht = FHT_16[tx_type];
|
||||
void vp9_fht16x16_c(const int16_t *input, int16_t *output,
|
||||
int stride, int tx_type) {
|
||||
if (tx_type == DCT_DCT) {
|
||||
vp9_fdct16x16_c(input, output, stride);
|
||||
} else {
|
||||
int16_t out[256];
|
||||
int16_t *outptr = &out[0];
|
||||
int i, j;
|
||||
int16_t temp_in[16], temp_out[16];
|
||||
const transform_2d ht = FHT_16[tx_type];
|
||||
|
||||
// Columns
|
||||
for (i = 0; i < 16; ++i) {
|
||||
for (j = 0; j < 16; ++j)
|
||||
temp_in[j] = input[j * stride + i] * 4;
|
||||
ht.cols(temp_in, temp_out);
|
||||
for (j = 0; j < 16; ++j)
|
||||
outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
|
||||
// outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
|
||||
}
|
||||
// Columns
|
||||
for (i = 0; i < 16; ++i) {
|
||||
for (j = 0; j < 16; ++j)
|
||||
temp_in[j] = input[j * stride + i] * 4;
|
||||
ht.cols(temp_in, temp_out);
|
||||
for (j = 0; j < 16; ++j)
|
||||
outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
|
||||
}
|
||||
|
||||
// Rows
|
||||
for (i = 0; i < 16; ++i) {
|
||||
for (j = 0; j < 16; ++j)
|
||||
temp_in[j] = out[j + i * 16];
|
||||
ht.rows(temp_in, temp_out);
|
||||
for (j = 0; j < 16; ++j)
|
||||
output[j + i * 16] = temp_out[j];
|
||||
// Rows
|
||||
for (i = 0; i < 16; ++i) {
|
||||
for (j = 0; j < 16; ++j)
|
||||
temp_in[j] = out[j + i * 16];
|
||||
ht.rows(temp_in, temp_out);
|
||||
for (j = 0; j < 16; ++j)
|
||||
output[j + i * 16] = temp_out[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1375,27 +1384,3 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
|
||||
out[j + i * 32] = temp_out[j];
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output,
|
||||
int stride) {
|
||||
if (tx_type == DCT_DCT)
|
||||
vp9_fdct4x4(input, output, stride);
|
||||
else
|
||||
vp9_short_fht4x4(input, output, stride, tx_type);
|
||||
}
|
||||
|
||||
void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output,
|
||||
int stride) {
|
||||
if (tx_type == DCT_DCT)
|
||||
vp9_fdct8x8(input, output, stride);
|
||||
else
|
||||
vp9_short_fht8x8(input, output, stride, tx_type);
|
||||
}
|
||||
|
||||
void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output,
|
||||
int stride) {
|
||||
if (tx_type == DCT_DCT)
|
||||
vp9_fdct16x16(input, output, stride);
|
||||
else
|
||||
vp9_short_fht16x16(input, output, stride, tx_type);
|
||||
}
|
||||
|
@ -1,32 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP9_ENCODER_VP9_DCT_H_
|
||||
#define VP9_ENCODER_VP9_DCT_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output,
|
||||
int stride);
|
||||
|
||||
void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output,
|
||||
int stride);
|
||||
|
||||
void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output,
|
||||
int stride);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP9_ENCODER_VP9_DCT_H_
|
@ -19,7 +19,6 @@
|
||||
#include "vp9/common/vp9_reconintra.h"
|
||||
#include "vp9/common/vp9_systemdependent.h"
|
||||
|
||||
#include "vp9/encoder/vp9_dct.h"
|
||||
#include "vp9/encoder/vp9_encodemb.h"
|
||||
#include "vp9/encoder/vp9_quantize.h"
|
||||
#include "vp9/encoder/vp9_rdopt.h"
|
||||
@ -571,7 +570,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
if (!x->skip_recode) {
|
||||
vp9_subtract_block(16, 16, src_diff, diff_stride,
|
||||
src, p->src.stride, dst, pd->dst.stride);
|
||||
vp9_fht16x16(tx_type, src_diff, coeff, diff_stride);
|
||||
vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
|
||||
vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
|
||||
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob, scan_order->scan,
|
||||
@ -591,7 +590,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
if (!x->skip_recode) {
|
||||
vp9_subtract_block(8, 8, src_diff, diff_stride,
|
||||
src, p->src.stride, dst, pd->dst.stride);
|
||||
vp9_fht8x8(tx_type, src_diff, coeff, diff_stride);
|
||||
vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
|
||||
vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
|
||||
p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob, scan_order->scan,
|
||||
@ -617,7 +616,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
vp9_subtract_block(4, 4, src_diff, diff_stride,
|
||||
src, p->src.stride, dst, pd->dst.stride);
|
||||
if (tx_type != DCT_DCT)
|
||||
vp9_short_fht4x4(src_diff, coeff, diff_stride, tx_type);
|
||||
vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
|
||||
else
|
||||
x->fwd_txm4x4(src_diff, coeff, diff_stride);
|
||||
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
|
||||
|
@ -1064,7 +1064,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
|
||||
so = &vp9_scan_orders[TX_4X4][tx_type];
|
||||
|
||||
if (tx_type != DCT_DCT)
|
||||
vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
|
||||
vp9_fht4x4(src_diff, coeff, 8, tx_type);
|
||||
else
|
||||
x->fwd_txm4x4(src_diff, coeff, 8);
|
||||
|
||||
|
@ -244,32 +244,36 @@ void fadst4_avx2(__m128i *in) {
|
||||
transpose_4x4_avx2(in);
|
||||
}
|
||||
|
||||
void vp9_short_fht4x4_avx2(const int16_t *input, int16_t *output,
|
||||
int stride, int tx_type) {
|
||||
void vp9_fht4x4_avx2(const int16_t *input, int16_t *output,
|
||||
int stride, int tx_type) {
|
||||
__m128i in[4];
|
||||
load_buffer_4x4_avx2(input, in, stride);
|
||||
|
||||
switch (tx_type) {
|
||||
case 0: // DCT_DCT
|
||||
fdct4_avx2(in);
|
||||
fdct4_avx2(in);
|
||||
case DCT_DCT:
|
||||
vp9_fdct4x4_avx2(input, output, stride);
|
||||
break;
|
||||
case 1: // ADST_DCT
|
||||
case ADST_DCT:
|
||||
load_buffer_4x4_avx2(input, in, stride);
|
||||
fadst4_avx2(in);
|
||||
fdct4_avx2(in);
|
||||
write_buffer_4x4_avx2(output, in);
|
||||
break;
|
||||
case 2: // DCT_ADST
|
||||
case DCT_ADST:
|
||||
load_buffer_4x4_avx2(input, in, stride);
|
||||
fdct4_avx2(in);
|
||||
fadst4_avx2(in);
|
||||
write_buffer_4x4_avx2(output, in);
|
||||
break;
|
||||
case 3: // ADST_ADST
|
||||
case ADST_ADST:
|
||||
load_buffer_4x4_avx2(input, in, stride);
|
||||
fadst4_avx2(in);
|
||||
fadst4_avx2(in);
|
||||
write_buffer_4x4_avx2(output, in);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
write_buffer_4x4_avx2(output, in);
|
||||
}
|
||||
|
||||
void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {
|
||||
@ -1028,33 +1032,39 @@ void fadst8_avx2(__m128i *in) {
|
||||
array_transpose_8x8_avx2(in, in);
|
||||
}
|
||||
|
||||
void vp9_short_fht8x8_avx2(const int16_t *input, int16_t *output,
|
||||
int stride, int tx_type) {
|
||||
void vp9_fht8x8_avx2(const int16_t *input, int16_t *output,
|
||||
int stride, int tx_type) {
|
||||
__m128i in[8];
|
||||
load_buffer_8x8_avx2(input, in, stride);
|
||||
|
||||
switch (tx_type) {
|
||||
case 0: // DCT_DCT
|
||||
fdct8_avx2(in);
|
||||
fdct8_avx2(in);
|
||||
case DCT_DCT:
|
||||
vp9_fdct8x8_avx2(input, output, stride);
|
||||
break;
|
||||
case 1: // ADST_DCT
|
||||
case ADST_DCT:
|
||||
load_buffer_8x8_avx2(input, in, stride);
|
||||
fadst8_avx2(in);
|
||||
fdct8_avx2(in);
|
||||
right_shift_8x8_avx2(in, 1);
|
||||
write_buffer_8x8_avx2(output, in, 8);
|
||||
break;
|
||||
case 2: // DCT_ADST
|
||||
case DCT_ADST:
|
||||
load_buffer_8x8_avx2(input, in, stride);
|
||||
fdct8_avx2(in);
|
||||
fadst8_avx2(in);
|
||||
right_shift_8x8_avx2(in, 1);
|
||||
write_buffer_8x8_avx2(output, in, 8);
|
||||
break;
|
||||
case 3: // ADST_ADST
|
||||
case ADST_ADST:
|
||||
load_buffer_8x8_avx2(input, in, stride);
|
||||
fadst8_avx2(in);
|
||||
fadst8_avx2(in);
|
||||
right_shift_8x8_avx2(in, 1);
|
||||
write_buffer_8x8_avx2(output, in, 8);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
right_shift_8x8_avx2(in, 1);
|
||||
write_buffer_8x8_avx2(output, in, 8);
|
||||
}
|
||||
|
||||
void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
|
||||
@ -2534,36 +2544,39 @@ void fadst16_avx2(__m128i *in0, __m128i *in1) {
|
||||
array_transpose_16x16_avx2(in0, in1);
|
||||
}
|
||||
|
||||
void vp9_short_fht16x16_avx2(const int16_t *input, int16_t *output,
|
||||
int stride, int tx_type) {
|
||||
void vp9_fht16x16_avx2(const int16_t *input, int16_t *output,
|
||||
int stride, int tx_type) {
|
||||
__m128i in0[16], in1[16];
|
||||
load_buffer_16x16_avx2(input, in0, in1, stride);
|
||||
|
||||
switch (tx_type) {
|
||||
case 0: // DCT_DCT
|
||||
fdct16_avx2(in0, in1);
|
||||
right_shift_16x16_avx2(in0, in1);
|
||||
fdct16_avx2(in0, in1);
|
||||
case DCT_DCT:
|
||||
vp9_fdct16x16_avx2(input, output, stride);
|
||||
break;
|
||||
case 1: // ADST_DCT
|
||||
case ADST_DCT:
|
||||
load_buffer_16x16_avx2(input, in0, in1, stride);
|
||||
fadst16_avx2(in0, in1);
|
||||
right_shift_16x16_avx2(in0, in1);
|
||||
fdct16_avx2(in0, in1);
|
||||
write_buffer_16x16_avx2(output, in0, in1, 16);
|
||||
break;
|
||||
case 2: // DCT_ADST
|
||||
case DCT_ADST:
|
||||
load_buffer_16x16_avx2(input, in0, in1, stride);
|
||||
fdct16_avx2(in0, in1);
|
||||
right_shift_16x16_avx2(in0, in1);
|
||||
fadst16_avx2(in0, in1);
|
||||
write_buffer_16x16_avx2(output, in0, in1, 16);
|
||||
break;
|
||||
case 3: // ADST_ADST
|
||||
case ADST_ADST:
|
||||
load_buffer_16x16_avx2(input, in0, in1, stride);
|
||||
fadst16_avx2(in0, in1);
|
||||
right_shift_16x16_avx2(in0, in1);
|
||||
fadst16_avx2(in0, in1);
|
||||
write_buffer_16x16_avx2(output, in0, in1, 16);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
write_buffer_16x16_avx2(output, in0, in1, 16);
|
||||
}
|
||||
|
||||
#define FDCT32x32_2D_AVX2 vp9_fdct32x32_rd_avx2
|
||||
|
@ -242,32 +242,36 @@ void fadst4_sse2(__m128i *in) {
|
||||
transpose_4x4(in);
|
||||
}
|
||||
|
||||
void vp9_short_fht4x4_sse2(const int16_t *input, int16_t *output,
|
||||
int stride, int tx_type) {
|
||||
void vp9_fht4x4_sse2(const int16_t *input, int16_t *output,
|
||||
int stride, int tx_type) {
|
||||
__m128i in[4];
|
||||
load_buffer_4x4(input, in, stride);
|
||||
|
||||
switch (tx_type) {
|
||||
case 0: // DCT_DCT
|
||||
fdct4_sse2(in);
|
||||
fdct4_sse2(in);
|
||||
case DCT_DCT:
|
||||
vp9_fdct4x4_sse2(input, output, stride);
|
||||
break;
|
||||
case 1: // ADST_DCT
|
||||
case ADST_DCT:
|
||||
load_buffer_4x4(input, in, stride);
|
||||
fadst4_sse2(in);
|
||||
fdct4_sse2(in);
|
||||
write_buffer_4x4(output, in);
|
||||
break;
|
||||
case 2: // DCT_ADST
|
||||
case DCT_ADST:
|
||||
load_buffer_4x4(input, in, stride);
|
||||
fdct4_sse2(in);
|
||||
fadst4_sse2(in);
|
||||
write_buffer_4x4(output, in);
|
||||
break;
|
||||
case 3: // ADST_ADST
|
||||
case ADST_ADST:
|
||||
load_buffer_4x4(input, in, stride);
|
||||
fadst4_sse2(in);
|
||||
fadst4_sse2(in);
|
||||
write_buffer_4x4(output, in);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
write_buffer_4x4(output, in);
|
||||
}
|
||||
|
||||
void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
|
||||
@ -1026,33 +1030,39 @@ void fadst8_sse2(__m128i *in) {
|
||||
array_transpose_8x8(in, in);
|
||||
}
|
||||
|
||||
void vp9_short_fht8x8_sse2(const int16_t *input, int16_t *output,
|
||||
int stride, int tx_type) {
|
||||
void vp9_fht8x8_sse2(const int16_t *input, int16_t *output,
|
||||
int stride, int tx_type) {
|
||||
__m128i in[8];
|
||||
load_buffer_8x8(input, in, stride);
|
||||
|
||||
switch (tx_type) {
|
||||
case 0: // DCT_DCT
|
||||
fdct8_sse2(in);
|
||||
fdct8_sse2(in);
|
||||
case DCT_DCT:
|
||||
vp9_fdct8x8_sse2(input, output, stride);
|
||||
break;
|
||||
case 1: // ADST_DCT
|
||||
case ADST_DCT:
|
||||
load_buffer_8x8(input, in, stride);
|
||||
fadst8_sse2(in);
|
||||
fdct8_sse2(in);
|
||||
right_shift_8x8(in, 1);
|
||||
write_buffer_8x8(output, in, 8);
|
||||
break;
|
||||
case 2: // DCT_ADST
|
||||
case DCT_ADST:
|
||||
load_buffer_8x8(input, in, stride);
|
||||
fdct8_sse2(in);
|
||||
fadst8_sse2(in);
|
||||
right_shift_8x8(in, 1);
|
||||
write_buffer_8x8(output, in, 8);
|
||||
break;
|
||||
case 3: // ADST_ADST
|
||||
case ADST_ADST:
|
||||
load_buffer_8x8(input, in, stride);
|
||||
fadst8_sse2(in);
|
||||
fadst8_sse2(in);
|
||||
right_shift_8x8(in, 1);
|
||||
write_buffer_8x8(output, in, 8);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
right_shift_8x8(in, 1);
|
||||
write_buffer_8x8(output, in, 8);
|
||||
}
|
||||
|
||||
void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
|
||||
@ -2532,36 +2542,39 @@ void fadst16_sse2(__m128i *in0, __m128i *in1) {
|
||||
array_transpose_16x16(in0, in1);
|
||||
}
|
||||
|
||||
void vp9_short_fht16x16_sse2(const int16_t *input, int16_t *output,
|
||||
int stride, int tx_type) {
|
||||
void vp9_fht16x16_sse2(const int16_t *input, int16_t *output,
|
||||
int stride, int tx_type) {
|
||||
__m128i in0[16], in1[16];
|
||||
load_buffer_16x16(input, in0, in1, stride);
|
||||
|
||||
switch (tx_type) {
|
||||
case 0: // DCT_DCT
|
||||
fdct16_sse2(in0, in1);
|
||||
right_shift_16x16(in0, in1);
|
||||
fdct16_sse2(in0, in1);
|
||||
case DCT_DCT:
|
||||
vp9_fdct16x16_sse2(input, output, stride);
|
||||
break;
|
||||
case 1: // ADST_DCT
|
||||
case ADST_DCT:
|
||||
load_buffer_16x16(input, in0, in1, stride);
|
||||
fadst16_sse2(in0, in1);
|
||||
right_shift_16x16(in0, in1);
|
||||
fdct16_sse2(in0, in1);
|
||||
write_buffer_16x16(output, in0, in1, 16);
|
||||
break;
|
||||
case 2: // DCT_ADST
|
||||
case DCT_ADST:
|
||||
load_buffer_16x16(input, in0, in1, stride);
|
||||
fdct16_sse2(in0, in1);
|
||||
right_shift_16x16(in0, in1);
|
||||
fadst16_sse2(in0, in1);
|
||||
write_buffer_16x16(output, in0, in1, 16);
|
||||
break;
|
||||
case 3: // ADST_ADST
|
||||
case ADST_ADST:
|
||||
load_buffer_16x16(input, in0, in1, stride);
|
||||
fadst16_sse2(in0, in1);
|
||||
right_shift_16x16(in0, in1);
|
||||
fadst16_sse2(in0, in1);
|
||||
write_buffer_16x16(output, in0, in1, 16);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
write_buffer_16x16(output, in0, in1, 16);
|
||||
}
|
||||
|
||||
#define FDCT32x32_2D vp9_fdct32x32_rd_sse2
|
||||
|
@ -19,7 +19,6 @@ VP9_CX_SRCS-yes += vp9_cx_iface.c
|
||||
|
||||
VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
|
||||
VP9_CX_SRCS-yes += encoder/vp9_dct.c
|
||||
VP9_CX_SRCS-yes += encoder/vp9_dct.h
|
||||
VP9_CX_SRCS-yes += encoder/vp9_encodeframe.c
|
||||
VP9_CX_SRCS-yes += encoder/vp9_encodeframe.h
|
||||
VP9_CX_SRCS-yes += encoder/vp9_encodemb.c
|
||||
|
Loading…
Reference in New Issue
Block a user