Adds high bitdepth transform functions and tests
Adds various high bitdepth transform functions and tests. Much of the changes are related to using typedefs tran_low_t and tran_high_t for the final transform cofficients and intermediate stages of the transform computation respectively rather than fixed types int16_t/int. When vp9_highbitdepth configure flag is off, these map tp int16_t/int32_t, but when the flag is on, they map to int32_t/int64_t to make space for needed extra precision. Change-Id: I3c56de79e15b904d6f655b62ffae170729befdd8
This commit is contained in:
parent
1e4136d35d
commit
10783d4f3a
1
configure
vendored
1
configure
vendored
@ -281,6 +281,7 @@ EXPERIMENT_LIST="
|
|||||||
spatial_svc
|
spatial_svc
|
||||||
vp9_temporal_denoising
|
vp9_temporal_denoising
|
||||||
fp_mb_stats
|
fp_mb_stats
|
||||||
|
emulate_hardware_highbitdepth
|
||||||
"
|
"
|
||||||
CONFIG_LIST="
|
CONFIG_LIST="
|
||||||
external_build
|
external_build
|
||||||
|
@ -581,6 +581,8 @@ TEST_P(ConvolveTest, CheckScalingFiltering) {
|
|||||||
|
|
||||||
using std::tr1::make_tuple;
|
using std::tr1::make_tuple;
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
#else
|
||||||
const ConvolveFunctions convolve8_c(
|
const ConvolveFunctions convolve8_c(
|
||||||
vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c,
|
vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c,
|
||||||
vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c,
|
vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c,
|
||||||
@ -600,8 +602,11 @@ INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
|
|||||||
make_tuple(64, 32, &convolve8_c),
|
make_tuple(64, 32, &convolve8_c),
|
||||||
make_tuple(32, 64, &convolve8_c),
|
make_tuple(32, 64, &convolve8_c),
|
||||||
make_tuple(64, 64, &convolve8_c)));
|
make_tuple(64, 64, &convolve8_c)));
|
||||||
|
#endif
|
||||||
|
|
||||||
#if HAVE_SSE2
|
#if HAVE_SSE2 && ARCH_X86_64
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
#else
|
||||||
const ConvolveFunctions convolve8_sse2(
|
const ConvolveFunctions convolve8_sse2(
|
||||||
vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2,
|
vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2,
|
||||||
vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2,
|
vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2,
|
||||||
@ -622,6 +627,7 @@ INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
|
|||||||
make_tuple(32, 64, &convolve8_sse2),
|
make_tuple(32, 64, &convolve8_sse2),
|
||||||
make_tuple(64, 64, &convolve8_sse2)));
|
make_tuple(64, 64, &convolve8_sse2)));
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#if HAVE_SSSE3
|
#if HAVE_SSSE3
|
||||||
const ConvolveFunctions convolve8_ssse3(
|
const ConvolveFunctions convolve8_ssse3(
|
||||||
|
@ -20,12 +20,9 @@
|
|||||||
|
|
||||||
#include "./vp9_rtcd.h"
|
#include "./vp9_rtcd.h"
|
||||||
#include "vp9/common/vp9_entropy.h"
|
#include "vp9/common/vp9_entropy.h"
|
||||||
|
#include "vpx/vpx_codec.h"
|
||||||
#include "vpx/vpx_integer.h"
|
#include "vpx/vpx_integer.h"
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *output, int pitch);
|
|
||||||
}
|
|
||||||
|
|
||||||
using libvpx_test::ACMRandom;
|
using libvpx_test::ACMRandom;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
@ -258,42 +255,72 @@ void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef void (*FdctFunc)(const int16_t *in, int16_t *out, int stride);
|
typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
|
||||||
typedef void (*IdctFunc)(const int16_t *in, uint8_t *out, int stride);
|
typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
|
||||||
typedef void (*FhtFunc)(const int16_t *in, int16_t *out, int stride,
|
typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
|
||||||
int tx_type);
|
int tx_type);
|
||||||
typedef void (*IhtFunc)(const int16_t *in, uint8_t *out, int stride,
|
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
|
||||||
int tx_type);
|
int tx_type);
|
||||||
|
|
||||||
typedef std::tr1::tuple<FdctFunc, IdctFunc, int> Dct16x16Param;
|
typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct16x16Param;
|
||||||
typedef std::tr1::tuple<FhtFunc, IhtFunc, int> Ht16x16Param;
|
typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht16x16Param;
|
||||||
|
|
||||||
void fdct16x16_ref(const int16_t *in, int16_t *out, int stride,
|
void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride,
|
||||||
int /*tx_type*/) {
|
int /*tx_type*/) {
|
||||||
vp9_fdct16x16_c(in, out, stride);
|
vp9_fdct16x16_c(in, out, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
void idct16x16_ref(const int16_t *in, uint8_t *dest, int stride,
|
void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
|
||||||
int /*tx_type*/) {
|
int /*tx_type*/) {
|
||||||
vp9_idct16x16_256_add_c(in, dest, stride);
|
vp9_idct16x16_256_add_c(in, dest, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
void fht16x16_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
|
void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride,
|
||||||
|
int tx_type) {
|
||||||
vp9_fht16x16_c(in, out, stride, tx_type);
|
vp9_fht16x16_c(in, out, stride, tx_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
void iht16x16_ref(const int16_t *in, uint8_t *dest, int stride, int tx_type) {
|
void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
|
||||||
|
int tx_type) {
|
||||||
vp9_iht16x16_256_add_c(in, dest, stride, tx_type);
|
vp9_iht16x16_256_add_c(in, dest, stride, tx_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) {
|
||||||
|
vp9_high_idct16x16_256_add_c(in, out, stride, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) {
|
||||||
|
vp9_high_idct16x16_256_add_c(in, out, stride, 12);
|
||||||
|
}
|
||||||
|
|
||||||
|
void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,
|
||||||
|
int tx_type) {
|
||||||
|
idct16x16_10(in, out, stride);
|
||||||
|
}
|
||||||
|
|
||||||
|
void idct16x16_12_ref(const tran_low_t *in, uint8_t *out, int stride,
|
||||||
|
int tx_type) {
|
||||||
|
idct16x16_12(in, out, stride);
|
||||||
|
}
|
||||||
|
|
||||||
|
void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
|
||||||
|
vp9_high_iht16x16_256_add_c(in, out, stride, tx_type, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
|
||||||
|
vp9_high_iht16x16_256_add_c(in, out, stride, tx_type, 12);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
class Trans16x16TestBase {
|
class Trans16x16TestBase {
|
||||||
public:
|
public:
|
||||||
virtual ~Trans16x16TestBase() {}
|
virtual ~Trans16x16TestBase() {}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void RunFwdTxfm(int16_t *in, int16_t *out, int stride) = 0;
|
virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
|
||||||
|
|
||||||
virtual void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) = 0;
|
virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
|
||||||
|
|
||||||
void RunAccuracyCheck() {
|
void RunAccuracyCheck() {
|
||||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||||
@ -302,23 +329,48 @@ class Trans16x16TestBase {
|
|||||||
const int count_test_block = 10000;
|
const int count_test_block = 10000;
|
||||||
for (int i = 0; i < count_test_block; ++i) {
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
|
||||||
|
#endif
|
||||||
|
|
||||||
// Initialize a test block with input range [-255, 255].
|
// Initialize a test block with input range [-mask_, mask_].
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
src[j] = rnd.Rand8();
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
dst[j] = rnd.Rand8();
|
src[j] = rnd.Rand8();
|
||||||
test_input_block[j] = src[j] - dst[j];
|
dst[j] = rnd.Rand8();
|
||||||
|
test_input_block[j] = src[j] - dst[j];
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
} else {
|
||||||
|
src16[j] = rnd.Rand16() & mask_;
|
||||||
|
dst16[j] = rnd.Rand16() & mask_;
|
||||||
|
test_input_block[j] = src16[j] - dst16[j];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
|
ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
|
||||||
test_temp_block, pitch_));
|
test_temp_block, pitch_));
|
||||||
ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
|
ASM_REGISTER_STATE_CHECK(
|
||||||
|
RunInvTxfm(test_temp_block, dst, pitch_));
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
} else {
|
||||||
|
ASM_REGISTER_STATE_CHECK(
|
||||||
|
RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
const uint32_t diff =
|
||||||
|
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
|
||||||
|
#else
|
||||||
const uint32_t diff = dst[j] - src[j];
|
const uint32_t diff = dst[j] - src[j];
|
||||||
|
#endif
|
||||||
const uint32_t error = diff * diff;
|
const uint32_t error = diff * diff;
|
||||||
if (max_error < error)
|
if (max_error < error)
|
||||||
max_error = error;
|
max_error = error;
|
||||||
@ -326,10 +378,10 @@ class Trans16x16TestBase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPECT_GE(1u, max_error)
|
EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
|
||||||
<< "Error: 16x16 FHT/IHT has an individual round trip error > 1";
|
<< "Error: 16x16 FHT/IHT has an individual round trip error > 1";
|
||||||
|
|
||||||
EXPECT_GE(count_test_block , total_error)
|
EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
|
||||||
<< "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
|
<< "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -337,13 +389,13 @@ class Trans16x16TestBase {
|
|||||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||||
const int count_test_block = 1000;
|
const int count_test_block = 1000;
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
|
||||||
|
|
||||||
for (int i = 0; i < count_test_block; ++i) {
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
// Initialize a test block with input range [-255, 255].
|
// Initialize a test block with input range [-mask_, mask_].
|
||||||
for (int j = 0; j < kNumCoeffs; ++j)
|
for (int j = 0; j < kNumCoeffs; ++j)
|
||||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
|
||||||
|
|
||||||
fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
|
fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
|
||||||
ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
|
ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
|
||||||
@ -359,21 +411,21 @@ class Trans16x16TestBase {
|
|||||||
const int count_test_block = 1000;
|
const int count_test_block = 1000;
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
|
||||||
|
|
||||||
for (int i = 0; i < count_test_block; ++i) {
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
// Initialize a test block with input range [-255, 255].
|
// Initialize a test block with input range [-mask_, mask_].
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
|
||||||
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
|
input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
|
||||||
}
|
}
|
||||||
if (i == 0) {
|
if (i == 0) {
|
||||||
for (int j = 0; j < kNumCoeffs; ++j)
|
for (int j = 0; j < kNumCoeffs; ++j)
|
||||||
input_extreme_block[j] = 255;
|
input_extreme_block[j] = mask_;
|
||||||
} else if (i == 1) {
|
} else if (i == 1) {
|
||||||
for (int j = 0; j < kNumCoeffs; ++j)
|
for (int j = 0; j < kNumCoeffs; ++j)
|
||||||
input_extreme_block[j] = -255;
|
input_extreme_block[j] = -mask_;
|
||||||
}
|
}
|
||||||
|
|
||||||
fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
|
fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
|
||||||
@ -383,7 +435,7 @@ class Trans16x16TestBase {
|
|||||||
// The minimum quant value is 4.
|
// The minimum quant value is 4.
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
EXPECT_EQ(output_block[j], output_ref_block[j]);
|
EXPECT_EQ(output_block[j], output_ref_block[j]);
|
||||||
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
|
EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
|
||||||
<< "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
|
<< "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -394,39 +446,65 @@ class Trans16x16TestBase {
|
|||||||
const int count_test_block = 1000;
|
const int count_test_block = 1000;
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
|
||||||
|
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, ref, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, ref, kNumCoeffs);
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref16, kNumCoeffs);
|
||||||
|
#endif
|
||||||
|
|
||||||
for (int i = 0; i < count_test_block; ++i) {
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
// Initialize a test block with input range [-255, 255].
|
// Initialize a test block with input range [-mask_, mask_].
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
if (bit_depth_ == VPX_BITS_8)
|
||||||
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
|
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||||
|
else
|
||||||
|
input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
|
||||||
|
input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
|
||||||
}
|
}
|
||||||
if (i == 0)
|
if (i == 0)
|
||||||
for (int j = 0; j < kNumCoeffs; ++j)
|
for (int j = 0; j < kNumCoeffs; ++j)
|
||||||
input_extreme_block[j] = 255;
|
input_extreme_block[j] = mask_;
|
||||||
if (i == 1)
|
if (i == 1)
|
||||||
for (int j = 0; j < kNumCoeffs; ++j)
|
for (int j = 0; j < kNumCoeffs; ++j)
|
||||||
input_extreme_block[j] = -255;
|
input_extreme_block[j] = -mask_;
|
||||||
|
|
||||||
fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
|
fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
|
||||||
|
|
||||||
// clear reconstructed pixel buffers
|
// clear reconstructed pixel buffers
|
||||||
vpx_memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
|
vpx_memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
|
||||||
vpx_memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
|
vpx_memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
vpx_memset(dst16, 0, kNumCoeffs * sizeof(uint16_t));
|
||||||
|
vpx_memset(ref16, 0, kNumCoeffs * sizeof(uint16_t));
|
||||||
|
#endif
|
||||||
|
|
||||||
// quantization with maximum allowed step sizes
|
// quantization with maximum allowed step sizes
|
||||||
output_ref_block[0] = (output_ref_block[0] / dc_thred) * dc_thred;
|
output_ref_block[0] = (output_ref_block[0] / dc_thred) * dc_thred;
|
||||||
for (int j = 1; j < kNumCoeffs; ++j)
|
for (int j = 1; j < kNumCoeffs; ++j)
|
||||||
output_ref_block[j] = (output_ref_block[j] / ac_thred) * ac_thred;
|
output_ref_block[j] = (output_ref_block[j] / ac_thred) * ac_thred;
|
||||||
inv_txfm_ref(output_ref_block, ref, pitch_, tx_type_);
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
|
inv_txfm_ref(output_ref_block, ref, pitch_, tx_type_);
|
||||||
|
ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
|
||||||
for (int j = 0; j < kNumCoeffs; ++j)
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
EXPECT_EQ(ref[j], dst[j]);
|
} else {
|
||||||
|
inv_txfm_ref(output_ref_block, CONVERT_TO_BYTEPTR(ref16), pitch_,
|
||||||
|
tx_type_);
|
||||||
|
ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block,
|
||||||
|
CONVERT_TO_BYTEPTR(dst16), pitch_));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
|
for (int j = 0; j < kNumCoeffs; ++j)
|
||||||
|
EXPECT_EQ(ref[j], dst[j]);
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
} else {
|
||||||
|
for (int j = 0; j < kNumCoeffs; ++j)
|
||||||
|
EXPECT_EQ(ref16[j], dst16[j]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -434,28 +512,52 @@ class Trans16x16TestBase {
|
|||||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||||
const int count_test_block = 1000;
|
const int count_test_block = 1000;
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, coeff, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
|
||||||
|
#endif
|
||||||
|
|
||||||
for (int i = 0; i < count_test_block; ++i) {
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
double out_r[kNumCoeffs];
|
double out_r[kNumCoeffs];
|
||||||
|
|
||||||
// Initialize a test block with input range [-255, 255].
|
// Initialize a test block with input range [-255, 255].
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
src[j] = rnd.Rand8();
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
dst[j] = rnd.Rand8();
|
src[j] = rnd.Rand8();
|
||||||
in[j] = src[j] - dst[j];
|
dst[j] = rnd.Rand8();
|
||||||
|
in[j] = src[j] - dst[j];
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
} else {
|
||||||
|
src16[j] = rnd.Rand16() & mask_;
|
||||||
|
dst16[j] = rnd.Rand16() & mask_;
|
||||||
|
in[j] = src16[j] - dst16[j];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
reference_16x16_dct_2d(in, out_r);
|
reference_16x16_dct_2d(in, out_r);
|
||||||
for (int j = 0; j < kNumCoeffs; ++j)
|
for (int j = 0; j < kNumCoeffs; ++j)
|
||||||
coeff[j] = round(out_r[j]);
|
coeff[j] = round(out_r[j]);
|
||||||
|
|
||||||
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
|
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
} else {
|
||||||
|
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
|
||||||
|
16));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
const uint32_t diff =
|
||||||
|
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
|
||||||
|
#else
|
||||||
const uint32_t diff = dst[j] - src[j];
|
const uint32_t diff = dst[j] - src[j];
|
||||||
|
#endif
|
||||||
const uint32_t error = diff * diff;
|
const uint32_t error = diff * diff;
|
||||||
EXPECT_GE(1u, error)
|
EXPECT_GE(1u, error)
|
||||||
<< "Error: 16x16 IDCT has error " << error
|
<< "Error: 16x16 IDCT has error " << error
|
||||||
@ -465,6 +567,8 @@ class Trans16x16TestBase {
|
|||||||
}
|
}
|
||||||
int pitch_;
|
int pitch_;
|
||||||
int tx_type_;
|
int tx_type_;
|
||||||
|
vpx_bit_depth_t bit_depth_;
|
||||||
|
int mask_;
|
||||||
FhtFunc fwd_txfm_ref;
|
FhtFunc fwd_txfm_ref;
|
||||||
IhtFunc inv_txfm_ref;
|
IhtFunc inv_txfm_ref;
|
||||||
};
|
};
|
||||||
@ -479,17 +583,34 @@ class Trans16x16DCT
|
|||||||
fwd_txfm_ = GET_PARAM(0);
|
fwd_txfm_ = GET_PARAM(0);
|
||||||
inv_txfm_ = GET_PARAM(1);
|
inv_txfm_ = GET_PARAM(1);
|
||||||
tx_type_ = GET_PARAM(2);
|
tx_type_ = GET_PARAM(2);
|
||||||
|
bit_depth_ = GET_PARAM(3);
|
||||||
pitch_ = 16;
|
pitch_ = 16;
|
||||||
fwd_txfm_ref = fdct16x16_ref;
|
fwd_txfm_ref = fdct16x16_ref;
|
||||||
inv_txfm_ref = idct16x16_ref;
|
inv_txfm_ref = idct16x16_ref;
|
||||||
|
mask_ = (1 << bit_depth_) - 1;
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
switch (bit_depth_) {
|
||||||
|
case 10:
|
||||||
|
inv_txfm_ref = idct16x16_10_ref;
|
||||||
|
break;
|
||||||
|
case 12:
|
||||||
|
inv_txfm_ref = idct16x16_12_ref;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
inv_txfm_ref = idct16x16_ref;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
inv_txfm_ref = idct16x16_ref;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
|
void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
|
||||||
fwd_txfm_(in, out, stride);
|
fwd_txfm_(in, out, stride);
|
||||||
}
|
}
|
||||||
void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
|
void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
|
||||||
inv_txfm_(out, dst, stride);
|
inv_txfm_(out, dst, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -529,17 +650,34 @@ class Trans16x16HT
|
|||||||
fwd_txfm_ = GET_PARAM(0);
|
fwd_txfm_ = GET_PARAM(0);
|
||||||
inv_txfm_ = GET_PARAM(1);
|
inv_txfm_ = GET_PARAM(1);
|
||||||
tx_type_ = GET_PARAM(2);
|
tx_type_ = GET_PARAM(2);
|
||||||
|
bit_depth_ = GET_PARAM(3);
|
||||||
pitch_ = 16;
|
pitch_ = 16;
|
||||||
fwd_txfm_ref = fht16x16_ref;
|
fwd_txfm_ref = fht16x16_ref;
|
||||||
inv_txfm_ref = iht16x16_ref;
|
inv_txfm_ref = iht16x16_ref;
|
||||||
|
mask_ = (1 << bit_depth_) - 1;
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
switch (bit_depth_) {
|
||||||
|
case VPX_BITS_10:
|
||||||
|
inv_txfm_ref = iht16x16_10;
|
||||||
|
break;
|
||||||
|
case VPX_BITS_12:
|
||||||
|
inv_txfm_ref = iht16x16_12;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
inv_txfm_ref = iht16x16_ref;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
inv_txfm_ref = iht16x16_ref;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
|
void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
|
||||||
fwd_txfm_(in, out, stride, tx_type_);
|
fwd_txfm_(in, out, stride, tx_type_);
|
||||||
}
|
}
|
||||||
void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
|
void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
|
||||||
inv_txfm_(out, dst, stride, tx_type_);
|
inv_txfm_(out, dst, stride, tx_type_);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -567,45 +705,78 @@ TEST_P(Trans16x16HT, QuantCheck) {
|
|||||||
|
|
||||||
using std::tr1::make_tuple;
|
using std::tr1::make_tuple;
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
C, Trans16x16DCT,
|
C, Trans16x16DCT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0)));
|
make_tuple(&vp9_high_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10),
|
||||||
|
make_tuple(&vp9_high_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12),
|
||||||
|
make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
|
||||||
|
#else
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
C, Trans16x16DCT,
|
||||||
|
::testing::Values(
|
||||||
|
make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
C, Trans16x16HT,
|
C, Trans16x16HT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0),
|
make_tuple(&vp9_high_fht16x16_c, &iht16x16_10, 0, VPX_BITS_10),
|
||||||
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1),
|
make_tuple(&vp9_high_fht16x16_c, &iht16x16_10, 1, VPX_BITS_10),
|
||||||
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2),
|
make_tuple(&vp9_high_fht16x16_c, &iht16x16_10, 2, VPX_BITS_10),
|
||||||
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3)));
|
make_tuple(&vp9_high_fht16x16_c, &iht16x16_10, 3, VPX_BITS_10),
|
||||||
|
make_tuple(&vp9_high_fht16x16_c, &iht16x16_12, 0, VPX_BITS_12),
|
||||||
|
make_tuple(&vp9_high_fht16x16_c, &iht16x16_12, 1, VPX_BITS_12),
|
||||||
|
make_tuple(&vp9_high_fht16x16_c, &iht16x16_12, 2, VPX_BITS_12),
|
||||||
|
make_tuple(&vp9_high_fht16x16_c, &iht16x16_12, 3, VPX_BITS_12),
|
||||||
|
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
|
||||||
|
#else
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
C, Trans16x16HT,
|
||||||
|
::testing::Values(
|
||||||
|
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
|
||||||
|
#endif
|
||||||
|
|
||||||
#if HAVE_NEON_ASM
|
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
NEON, Trans16x16DCT,
|
NEON, Trans16x16DCT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fdct16x16_c,
|
make_tuple(&vp9_fdct16x16_c,
|
||||||
&vp9_idct16x16_256_add_neon, 0)));
|
&vp9_idct16x16_256_add_neon, 0, VPX_BITS_8)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_SSE2
|
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
SSE2, Trans16x16DCT,
|
SSE2, Trans16x16DCT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fdct16x16_sse2,
|
make_tuple(&vp9_fdct16x16_sse2,
|
||||||
&vp9_idct16x16_256_add_sse2, 0)));
|
&vp9_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
SSE2, Trans16x16HT,
|
SSE2, Trans16x16HT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 0),
|
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 0,
|
||||||
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 1),
|
VPX_BITS_8),
|
||||||
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2),
|
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 1,
|
||||||
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3)));
|
VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2,
|
||||||
|
VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3,
|
||||||
|
VPX_BITS_8)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_SSSE3
|
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
SSSE3, Trans16x16DCT,
|
SSSE3, Trans16x16DCT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_ssse3, 0)));
|
make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_ssse3, 0,
|
||||||
|
VPX_BITS_8)));
|
||||||
#endif
|
#endif
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#include "./vpx_config.h"
|
#include "./vpx_config.h"
|
||||||
#include "./vp9_rtcd.h"
|
#include "./vp9_rtcd.h"
|
||||||
#include "vp9/common/vp9_entropy.h"
|
#include "vp9/common/vp9_entropy.h"
|
||||||
|
#include "vpx/vpx_codec.h"
|
||||||
#include "vpx/vpx_integer.h"
|
#include "vpx/vpx_integer.h"
|
||||||
|
|
||||||
using libvpx_test::ACMRandom;
|
using libvpx_test::ACMRandom;
|
||||||
@ -71,10 +72,21 @@ void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef void (*FwdTxfmFunc)(const int16_t *in, int16_t *out, int stride);
|
typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
|
||||||
typedef void (*InvTxfmFunc)(const int16_t *in, uint8_t *out, int stride);
|
typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
|
||||||
|
|
||||||
typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int> Trans32x32Param;
|
typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
|
||||||
|
Trans32x32Param;
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
|
||||||
|
vp9_high_idct32x32_1024_add_c(in, out, stride, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
|
||||||
|
vp9_high_idct32x32_1024_add_c(in, out, stride, 12);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> {
|
class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> {
|
||||||
public:
|
public:
|
||||||
@ -84,12 +96,16 @@ class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> {
|
|||||||
inv_txfm_ = GET_PARAM(1);
|
inv_txfm_ = GET_PARAM(1);
|
||||||
version_ = GET_PARAM(2); // 0: high precision forward transform
|
version_ = GET_PARAM(2); // 0: high precision forward transform
|
||||||
// 1: low precision version for rd loop
|
// 1: low precision version for rd loop
|
||||||
|
bit_depth_ = GET_PARAM(3);
|
||||||
|
mask_ = (1 << bit_depth_) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
int version_;
|
int version_;
|
||||||
|
vpx_bit_depth_t bit_depth_;
|
||||||
|
int mask_;
|
||||||
FwdTxfmFunc fwd_txfm_;
|
FwdTxfmFunc fwd_txfm_;
|
||||||
InvTxfmFunc inv_txfm_;
|
InvTxfmFunc inv_txfm_;
|
||||||
};
|
};
|
||||||
@ -100,23 +116,47 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
|
|||||||
int64_t total_error = 0;
|
int64_t total_error = 0;
|
||||||
const int count_test_block = 1000;
|
const int count_test_block = 1000;
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
|
||||||
|
#endif
|
||||||
|
|
||||||
for (int i = 0; i < count_test_block; ++i) {
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
// Initialize a test block with input range [-255, 255].
|
// Initialize a test block with input range [-mask_, mask_].
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
src[j] = rnd.Rand8();
|
if (bit_depth_ == 8) {
|
||||||
dst[j] = rnd.Rand8();
|
src[j] = rnd.Rand8();
|
||||||
test_input_block[j] = src[j] - dst[j];
|
dst[j] = rnd.Rand8();
|
||||||
|
test_input_block[j] = src[j] - dst[j];
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
} else {
|
||||||
|
src16[j] = rnd.Rand16() & mask_;
|
||||||
|
dst16[j] = rnd.Rand16() & mask_;
|
||||||
|
test_input_block[j] = src16[j] - dst16[j];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
|
ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
|
||||||
ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
|
ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
} else {
|
||||||
|
ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block,
|
||||||
|
CONVERT_TO_BYTEPTR(dst16), 32));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
const uint32_t diff =
|
||||||
|
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
|
||||||
|
#else
|
||||||
const uint32_t diff = dst[j] - src[j];
|
const uint32_t diff = dst[j] - src[j];
|
||||||
|
#endif
|
||||||
const uint32_t error = diff * diff;
|
const uint32_t error = diff * diff;
|
||||||
if (max_error < error)
|
if (max_error < error)
|
||||||
max_error = error;
|
max_error = error;
|
||||||
@ -129,10 +169,10 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
|
|||||||
total_error /= 45;
|
total_error /= 45;
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPECT_GE(1u, max_error)
|
EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
|
||||||
<< "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
|
<< "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
|
||||||
|
|
||||||
EXPECT_GE(count_test_block, total_error)
|
EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
|
||||||
<< "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
|
<< "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -141,12 +181,12 @@ TEST_P(Trans32x32Test, CoeffCheck) {
|
|||||||
const int count_test_block = 1000;
|
const int count_test_block = 1000;
|
||||||
|
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
|
||||||
|
|
||||||
for (int i = 0; i < count_test_block; ++i) {
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
for (int j = 0; j < kNumCoeffs; ++j)
|
for (int j = 0; j < kNumCoeffs; ++j)
|
||||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
|
||||||
|
|
||||||
const int stride = 32;
|
const int stride = 32;
|
||||||
vp9_fdct32x32_c(input_block, output_ref_block, stride);
|
vp9_fdct32x32_c(input_block, output_ref_block, stride);
|
||||||
@ -170,21 +210,21 @@ TEST_P(Trans32x32Test, MemCheck) {
|
|||||||
|
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
|
||||||
|
|
||||||
for (int i = 0; i < count_test_block; ++i) {
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
// Initialize a test block with input range [-255, 255].
|
// Initialize a test block with input range [-mask_, mask_].
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
|
||||||
input_extreme_block[j] = rnd.Rand8() & 1 ? 255 : -255;
|
input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
|
||||||
}
|
}
|
||||||
if (i == 0) {
|
if (i == 0) {
|
||||||
for (int j = 0; j < kNumCoeffs; ++j)
|
for (int j = 0; j < kNumCoeffs; ++j)
|
||||||
input_extreme_block[j] = 255;
|
input_extreme_block[j] = mask_;
|
||||||
} else if (i == 1) {
|
} else if (i == 1) {
|
||||||
for (int j = 0; j < kNumCoeffs; ++j)
|
for (int j = 0; j < kNumCoeffs; ++j)
|
||||||
input_extreme_block[j] = -255;
|
input_extreme_block[j] = -mask_;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int stride = 32;
|
const int stride = 32;
|
||||||
@ -201,9 +241,9 @@ TEST_P(Trans32x32Test, MemCheck) {
|
|||||||
EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
|
EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
|
||||||
<< "Error: 32x32 FDCT rd has mismatched coefficients";
|
<< "Error: 32x32 FDCT rd has mismatched coefficients";
|
||||||
}
|
}
|
||||||
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_ref_block[j]))
|
EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_ref_block[j]))
|
||||||
<< "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
|
<< "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
|
||||||
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
|
EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
|
||||||
<< "Error: 32x32 FDCT has coefficient larger than "
|
<< "Error: 32x32 FDCT has coefficient larger than "
|
||||||
<< "4*DCT_MAX_VALUE";
|
<< "4*DCT_MAX_VALUE";
|
||||||
}
|
}
|
||||||
@ -214,26 +254,49 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
|
|||||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||||
const int count_test_block = 1000;
|
const int count_test_block = 1000;
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, coeff, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
|
||||||
|
#endif
|
||||||
|
|
||||||
for (int i = 0; i < count_test_block; ++i) {
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
double out_r[kNumCoeffs];
|
double out_r[kNumCoeffs];
|
||||||
|
|
||||||
// Initialize a test block with input range [-255, 255]
|
// Initialize a test block with input range [-255, 255]
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
src[j] = rnd.Rand8();
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
dst[j] = rnd.Rand8();
|
src[j] = rnd.Rand8();
|
||||||
in[j] = src[j] - dst[j];
|
dst[j] = rnd.Rand8();
|
||||||
|
in[j] = src[j] - dst[j];
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
} else {
|
||||||
|
src16[j] = rnd.Rand16() & mask_;
|
||||||
|
dst16[j] = rnd.Rand16() & mask_;
|
||||||
|
in[j] = src16[j] - dst16[j];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
reference_32x32_dct_2d(in, out_r);
|
reference_32x32_dct_2d(in, out_r);
|
||||||
for (int j = 0; j < kNumCoeffs; ++j)
|
for (int j = 0; j < kNumCoeffs; ++j)
|
||||||
coeff[j] = round(out_r[j]);
|
coeff[j] = round(out_r[j]);
|
||||||
ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
|
ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
} else {
|
||||||
|
ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CONVERT_TO_BYTEPTR(dst16), 32));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
const int diff =
|
||||||
|
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
|
||||||
|
#else
|
||||||
const int diff = dst[j] - src[j];
|
const int diff = dst[j] - src[j];
|
||||||
|
#endif
|
||||||
const int error = diff * diff;
|
const int error = diff * diff;
|
||||||
EXPECT_GE(1, error)
|
EXPECT_GE(1, error)
|
||||||
<< "Error: 32x32 IDCT has error " << error
|
<< "Error: 32x32 IDCT has error " << error
|
||||||
@ -244,39 +307,59 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
|
|||||||
|
|
||||||
using std::tr1::make_tuple;
|
using std::tr1::make_tuple;
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
C, Trans32x32Test,
|
C, Trans32x32Test,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fdct32x32_c, &vp9_idct32x32_1024_add_c, 0),
|
make_tuple(&vp9_high_fdct32x32_c,
|
||||||
make_tuple(&vp9_fdct32x32_rd_c, &vp9_idct32x32_1024_add_c, 1)));
|
&idct32x32_10, 0, VPX_BITS_10),
|
||||||
|
make_tuple(&vp9_high_fdct32x32_rd_c,
|
||||||
|
&idct32x32_10, 1, VPX_BITS_10),
|
||||||
|
make_tuple(&vp9_high_fdct32x32_c,
|
||||||
|
&idct32x32_12, 0, VPX_BITS_12),
|
||||||
|
make_tuple(&vp9_high_fdct32x32_rd_c,
|
||||||
|
&idct32x32_12, 1, VPX_BITS_12),
|
||||||
|
make_tuple(&vp9_fdct32x32_c,
|
||||||
|
&vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fdct32x32_rd_c,
|
||||||
|
&vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
|
||||||
|
#else
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
C, Trans32x32Test,
|
||||||
|
::testing::Values(
|
||||||
|
make_tuple(&vp9_fdct32x32_c,
|
||||||
|
&vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fdct32x32_rd_c,
|
||||||
|
&vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
|
||||||
|
#endif
|
||||||
|
|
||||||
#if HAVE_NEON_ASM
|
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
NEON, Trans32x32Test,
|
NEON, Trans32x32Test,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fdct32x32_c,
|
make_tuple(&vp9_fdct32x32_c,
|
||||||
&vp9_idct32x32_1024_add_neon, 0),
|
&vp9_idct32x32_1024_add_neon, 0, VPX_BITS_8),
|
||||||
make_tuple(&vp9_fdct32x32_rd_c,
|
make_tuple(&vp9_fdct32x32_rd_c,
|
||||||
&vp9_idct32x32_1024_add_neon, 1)));
|
&vp9_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_SSE2
|
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
SSE2, Trans32x32Test,
|
SSE2, Trans32x32Test,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fdct32x32_sse2,
|
make_tuple(&vp9_fdct32x32_sse2,
|
||||||
&vp9_idct32x32_1024_add_sse2, 0),
|
&vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
|
||||||
make_tuple(&vp9_fdct32x32_rd_sse2,
|
make_tuple(&vp9_fdct32x32_rd_sse2,
|
||||||
&vp9_idct32x32_1024_add_sse2, 1)));
|
&vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_AVX2
|
#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
AVX2, Trans32x32Test,
|
AVX2, Trans32x32Test,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fdct32x32_avx2,
|
make_tuple(&vp9_fdct32x32_avx2,
|
||||||
&vp9_idct32x32_1024_add_sse2, 0),
|
&vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
|
||||||
make_tuple(&vp9_fdct32x32_rd_avx2,
|
make_tuple(&vp9_fdct32x32_rd_avx2,
|
||||||
&vp9_idct32x32_1024_add_sse2, 1)));
|
&vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
|
||||||
#endif
|
#endif
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -20,46 +20,71 @@
|
|||||||
|
|
||||||
#include "./vp9_rtcd.h"
|
#include "./vp9_rtcd.h"
|
||||||
#include "vp9/common/vp9_entropy.h"
|
#include "vp9/common/vp9_entropy.h"
|
||||||
|
#include "vpx/vpx_codec.h"
|
||||||
#include "vpx/vpx_integer.h"
|
#include "vpx/vpx_integer.h"
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *output, int pitch);
|
|
||||||
}
|
|
||||||
|
|
||||||
using libvpx_test::ACMRandom;
|
using libvpx_test::ACMRandom;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
const int kNumCoeffs = 16;
|
const int kNumCoeffs = 16;
|
||||||
typedef void (*FdctFunc)(const int16_t *in, int16_t *out, int stride);
|
typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
|
||||||
typedef void (*IdctFunc)(const int16_t *in, uint8_t *out, int stride);
|
typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
|
||||||
typedef void (*FhtFunc)(const int16_t *in, int16_t *out, int stride,
|
typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
|
||||||
int tx_type);
|
int tx_type);
|
||||||
typedef void (*IhtFunc)(const int16_t *in, uint8_t *out, int stride,
|
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
|
||||||
int tx_type);
|
int tx_type);
|
||||||
|
|
||||||
typedef std::tr1::tuple<FdctFunc, IdctFunc, int> Dct4x4Param;
|
typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct4x4Param;
|
||||||
typedef std::tr1::tuple<FhtFunc, IhtFunc, int> Ht4x4Param;
|
typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht4x4Param;
|
||||||
|
|
||||||
void fdct4x4_ref(const int16_t *in, int16_t *out, int stride, int /*tx_type*/) {
|
void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride,
|
||||||
|
int tx_type) {
|
||||||
vp9_fdct4x4_c(in, out, stride);
|
vp9_fdct4x4_c(in, out, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
void fht4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
|
void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
|
||||||
vp9_fht4x4_c(in, out, stride, tx_type);
|
vp9_fht4x4_c(in, out, stride, tx_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
void fwht4x4_ref(const int16_t *in, int16_t *out, int stride, int /*tx_type*/) {
|
void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
|
||||||
|
int tx_type) {
|
||||||
vp9_fwht4x4_c(in, out, stride);
|
vp9_fwht4x4_c(in, out, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
void idct4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
|
||||||
|
vp9_high_idct4x4_16_add_c(in, out, stride, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
void idct4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
|
||||||
|
vp9_high_idct4x4_16_add_c(in, out, stride, 12);
|
||||||
|
}
|
||||||
|
|
||||||
|
void iht4x4_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
|
||||||
|
vp9_high_iht4x4_16_add_c(in, out, stride, tx_type, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
void iht4x4_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
|
||||||
|
vp9_high_iht4x4_16_add_c(in, out, stride, tx_type, 12);
|
||||||
|
}
|
||||||
|
|
||||||
|
void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
|
||||||
|
vp9_high_iwht4x4_16_add_c(in, out, stride, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
|
||||||
|
vp9_high_iwht4x4_16_add_c(in, out, stride, 12);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
class Trans4x4TestBase {
|
class Trans4x4TestBase {
|
||||||
public:
|
public:
|
||||||
virtual ~Trans4x4TestBase() {}
|
virtual ~Trans4x4TestBase() {}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void RunFwdTxfm(const int16_t *in, int16_t *out, int stride) = 0;
|
virtual void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) = 0;
|
||||||
|
|
||||||
virtual void RunInvTxfm(const int16_t *out, uint8_t *dst, int stride) = 0;
|
virtual void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) = 0;
|
||||||
|
|
||||||
void RunAccuracyCheck(int limit) {
|
void RunAccuracyCheck(int limit) {
|
||||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||||
@ -68,23 +93,47 @@ class Trans4x4TestBase {
|
|||||||
const int count_test_block = 10000;
|
const int count_test_block = 10000;
|
||||||
for (int i = 0; i < count_test_block; ++i) {
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
|
||||||
|
#endif
|
||||||
|
|
||||||
// Initialize a test block with input range [-255, 255].
|
// Initialize a test block with input range [-255, 255].
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
src[j] = rnd.Rand8();
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
dst[j] = rnd.Rand8();
|
src[j] = rnd.Rand8();
|
||||||
test_input_block[j] = src[j] - dst[j];
|
dst[j] = rnd.Rand8();
|
||||||
|
test_input_block[j] = src[j] - dst[j];
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
} else {
|
||||||
|
src16[j] = rnd.Rand16() & mask_;
|
||||||
|
dst16[j] = rnd.Rand16() & mask_;
|
||||||
|
test_input_block[j] = src16[j] - dst16[j];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
|
ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
|
||||||
test_temp_block, pitch_));
|
test_temp_block, pitch_));
|
||||||
ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
|
ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
} else {
|
||||||
|
ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block,
|
||||||
|
CONVERT_TO_BYTEPTR(dst16), pitch_));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
const uint32_t diff =
|
||||||
|
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
|
||||||
|
#else
|
||||||
const uint32_t diff = dst[j] - src[j];
|
const uint32_t diff = dst[j] - src[j];
|
||||||
|
#endif
|
||||||
const uint32_t error = diff * diff;
|
const uint32_t error = diff * diff;
|
||||||
if (max_error < error)
|
if (max_error < error)
|
||||||
max_error = error;
|
max_error = error;
|
||||||
@ -105,13 +154,13 @@ class Trans4x4TestBase {
|
|||||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||||
const int count_test_block = 5000;
|
const int count_test_block = 5000;
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
|
||||||
|
|
||||||
for (int i = 0; i < count_test_block; ++i) {
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
// Initialize a test block with input range [-255, 255].
|
// Initialize a test block with input range [-mask_, mask_].
|
||||||
for (int j = 0; j < kNumCoeffs; ++j)
|
for (int j = 0; j < kNumCoeffs; ++j)
|
||||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
|
||||||
|
|
||||||
fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
|
fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
|
||||||
ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
|
ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
|
||||||
@ -127,21 +176,21 @@ class Trans4x4TestBase {
|
|||||||
const int count_test_block = 5000;
|
const int count_test_block = 5000;
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
|
||||||
|
|
||||||
for (int i = 0; i < count_test_block; ++i) {
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
// Initialize a test block with input range [-255, 255].
|
// Initialize a test block with input range [-mask_, mask_].
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
|
||||||
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
|
input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
|
||||||
}
|
}
|
||||||
if (i == 0) {
|
if (i == 0) {
|
||||||
for (int j = 0; j < kNumCoeffs; ++j)
|
for (int j = 0; j < kNumCoeffs; ++j)
|
||||||
input_extreme_block[j] = 255;
|
input_extreme_block[j] = mask_;
|
||||||
} else if (i == 1) {
|
} else if (i == 1) {
|
||||||
for (int j = 0; j < kNumCoeffs; ++j)
|
for (int j = 0; j < kNumCoeffs; ++j)
|
||||||
input_extreme_block[j] = -255;
|
input_extreme_block[j] = -mask_;
|
||||||
}
|
}
|
||||||
|
|
||||||
fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
|
fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
|
||||||
@ -151,8 +200,8 @@ class Trans4x4TestBase {
|
|||||||
// The minimum quant value is 4.
|
// The minimum quant value is 4.
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
EXPECT_EQ(output_block[j], output_ref_block[j]);
|
EXPECT_EQ(output_block[j], output_ref_block[j]);
|
||||||
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
|
EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
|
||||||
<< "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
|
<< "Error: 4x4 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -161,24 +210,48 @@ class Trans4x4TestBase {
|
|||||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||||
const int count_test_block = 1000;
|
const int count_test_block = 1000;
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, coeff, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
|
||||||
|
#endif
|
||||||
|
|
||||||
for (int i = 0; i < count_test_block; ++i) {
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
// Initialize a test block with input range [-255, 255].
|
// Initialize a test block with input range [-mask_, mask_].
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
src[j] = rnd.Rand8();
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
dst[j] = rnd.Rand8();
|
src[j] = rnd.Rand8();
|
||||||
in[j] = src[j] - dst[j];
|
dst[j] = rnd.Rand8();
|
||||||
|
in[j] = src[j] - dst[j];
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
} else {
|
||||||
|
src16[j] = rnd.Rand16() & mask_;
|
||||||
|
dst16[j] = rnd.Rand16() & mask_;
|
||||||
|
in[j] = src16[j] - dst16[j];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fwd_txfm_ref(in, coeff, pitch_, tx_type_);
|
fwd_txfm_ref(in, coeff, pitch_, tx_type_);
|
||||||
|
|
||||||
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
|
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
} else {
|
||||||
|
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
|
||||||
|
pitch_));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
const uint32_t diff =
|
||||||
|
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
|
||||||
|
#else
|
||||||
const uint32_t diff = dst[j] - src[j];
|
const uint32_t diff = dst[j] - src[j];
|
||||||
|
#endif
|
||||||
const uint32_t error = diff * diff;
|
const uint32_t error = diff * diff;
|
||||||
EXPECT_GE(static_cast<uint32_t>(limit), error)
|
EXPECT_GE(static_cast<uint32_t>(limit), error)
|
||||||
<< "Error: 4x4 IDCT has error " << error
|
<< "Error: 4x4 IDCT has error " << error
|
||||||
@ -190,6 +263,8 @@ class Trans4x4TestBase {
|
|||||||
int pitch_;
|
int pitch_;
|
||||||
int tx_type_;
|
int tx_type_;
|
||||||
FhtFunc fwd_txfm_ref;
|
FhtFunc fwd_txfm_ref;
|
||||||
|
vpx_bit_depth_t bit_depth_;
|
||||||
|
int mask_;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Trans4x4DCT
|
class Trans4x4DCT
|
||||||
@ -204,14 +279,16 @@ class Trans4x4DCT
|
|||||||
tx_type_ = GET_PARAM(2);
|
tx_type_ = GET_PARAM(2);
|
||||||
pitch_ = 4;
|
pitch_ = 4;
|
||||||
fwd_txfm_ref = fdct4x4_ref;
|
fwd_txfm_ref = fdct4x4_ref;
|
||||||
|
bit_depth_ = GET_PARAM(3);
|
||||||
|
mask_ = (1 << bit_depth_) - 1;
|
||||||
}
|
}
|
||||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void RunFwdTxfm(const int16_t *in, int16_t *out, int stride) {
|
void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
|
||||||
fwd_txfm_(in, out, stride);
|
fwd_txfm_(in, out, stride);
|
||||||
}
|
}
|
||||||
void RunInvTxfm(const int16_t *out, uint8_t *dst, int stride) {
|
void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
|
||||||
inv_txfm_(out, dst, stride);
|
inv_txfm_(out, dst, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -247,15 +324,17 @@ class Trans4x4HT
|
|||||||
tx_type_ = GET_PARAM(2);
|
tx_type_ = GET_PARAM(2);
|
||||||
pitch_ = 4;
|
pitch_ = 4;
|
||||||
fwd_txfm_ref = fht4x4_ref;
|
fwd_txfm_ref = fht4x4_ref;
|
||||||
|
bit_depth_ = GET_PARAM(3);
|
||||||
|
mask_ = (1 << bit_depth_) - 1;
|
||||||
}
|
}
|
||||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void RunFwdTxfm(const int16_t *in, int16_t *out, int stride) {
|
void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
|
||||||
fwd_txfm_(in, out, stride, tx_type_);
|
fwd_txfm_(in, out, stride, tx_type_);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RunInvTxfm(const int16_t *out, uint8_t *dst, int stride) {
|
void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
|
||||||
inv_txfm_(out, dst, stride, tx_type_);
|
inv_txfm_(out, dst, stride, tx_type_);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -291,14 +370,16 @@ class Trans4x4WHT
|
|||||||
tx_type_ = GET_PARAM(2);
|
tx_type_ = GET_PARAM(2);
|
||||||
pitch_ = 4;
|
pitch_ = 4;
|
||||||
fwd_txfm_ref = fwht4x4_ref;
|
fwd_txfm_ref = fwht4x4_ref;
|
||||||
|
bit_depth_ = GET_PARAM(3);
|
||||||
|
mask_ = (1 << bit_depth_) - 1;
|
||||||
}
|
}
|
||||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void RunFwdTxfm(const int16_t *in, int16_t *out, int stride) {
|
void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
|
||||||
fwd_txfm_(in, out, stride);
|
fwd_txfm_(in, out, stride);
|
||||||
}
|
}
|
||||||
void RunInvTxfm(const int16_t *out, uint8_t *dst, int stride) {
|
void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
|
||||||
inv_txfm_(out, dst, stride);
|
inv_txfm_(out, dst, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -323,57 +404,95 @@ TEST_P(Trans4x4WHT, InvAccuracyCheck) {
|
|||||||
}
|
}
|
||||||
using std::tr1::make_tuple;
|
using std::tr1::make_tuple;
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
C, Trans4x4DCT,
|
C, Trans4x4DCT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0)));
|
make_tuple(&vp9_high_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10),
|
||||||
|
make_tuple(&vp9_high_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12),
|
||||||
|
make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));
|
||||||
|
#else
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
C, Trans4x4DCT,
|
||||||
|
::testing::Values(
|
||||||
|
make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
C, Trans4x4HT,
|
C, Trans4x4HT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0),
|
make_tuple(&vp9_high_fht4x4_c, &iht4x4_10, 0, VPX_BITS_10),
|
||||||
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1),
|
make_tuple(&vp9_high_fht4x4_c, &iht4x4_10, 1, VPX_BITS_10),
|
||||||
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2),
|
make_tuple(&vp9_high_fht4x4_c, &iht4x4_10, 2, VPX_BITS_10),
|
||||||
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3)));
|
make_tuple(&vp9_high_fht4x4_c, &iht4x4_10, 3, VPX_BITS_10),
|
||||||
|
make_tuple(&vp9_high_fht4x4_c, &iht4x4_12, 0, VPX_BITS_12),
|
||||||
|
make_tuple(&vp9_high_fht4x4_c, &iht4x4_12, 1, VPX_BITS_12),
|
||||||
|
make_tuple(&vp9_high_fht4x4_c, &iht4x4_12, 2, VPX_BITS_12),
|
||||||
|
make_tuple(&vp9_high_fht4x4_c, &iht4x4_12, 3, VPX_BITS_12),
|
||||||
|
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
|
||||||
|
#else
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
C, Trans4x4HT,
|
||||||
|
::testing::Values(
|
||||||
|
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
C, Trans4x4WHT,
|
C, Trans4x4WHT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0)));
|
make_tuple(&vp9_high_fwht4x4_c, &iwht4x4_10, 0, VPX_BITS_10),
|
||||||
|
make_tuple(&vp9_high_fwht4x4_c, &iwht4x4_12, 0, VPX_BITS_12),
|
||||||
|
make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
|
||||||
|
#else
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
C, Trans4x4WHT,
|
||||||
|
::testing::Values(
|
||||||
|
make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
|
||||||
|
#endif
|
||||||
|
|
||||||
#if HAVE_NEON_ASM
|
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
NEON, Trans4x4DCT,
|
NEON, Trans4x4DCT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fdct4x4_c,
|
make_tuple(&vp9_fdct4x4_c,
|
||||||
&vp9_idct4x4_16_add_neon, 0)));
|
&vp9_idct4x4_16_add_neon, 0, VPX_BITS_8)));
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
DISABLED_NEON, Trans4x4HT,
|
DISABLED_NEON, Trans4x4HT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 0),
|
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 0, VPX_BITS_8),
|
||||||
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1),
|
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1, VPX_BITS_8),
|
||||||
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2),
|
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2, VPX_BITS_8),
|
||||||
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3)));
|
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if CONFIG_USE_X86INC && HAVE_MMX
|
#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
MMX, Trans4x4WHT,
|
MMX, Trans4x4WHT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0)));
|
make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_SSE2
|
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
SSE2, Trans4x4DCT,
|
SSE2, Trans4x4DCT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fdct4x4_sse2,
|
make_tuple(&vp9_fdct4x4_sse2,
|
||||||
&vp9_idct4x4_16_add_sse2, 0)));
|
&vp9_idct4x4_16_add_sse2, 0, VPX_BITS_8)));
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
SSE2, Trans4x4HT,
|
SSE2, Trans4x4HT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0),
|
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0, VPX_BITS_8),
|
||||||
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1),
|
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1, VPX_BITS_8),
|
||||||
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2),
|
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2, VPX_BITS_8),
|
||||||
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3)));
|
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3, VPX_BITS_8)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -20,45 +20,96 @@
|
|||||||
|
|
||||||
#include "./vp9_rtcd.h"
|
#include "./vp9_rtcd.h"
|
||||||
#include "vp9/common/vp9_entropy.h"
|
#include "vp9/common/vp9_entropy.h"
|
||||||
|
#include "vpx/vpx_codec.h"
|
||||||
#include "vpx/vpx_integer.h"
|
#include "vpx/vpx_integer.h"
|
||||||
|
|
||||||
extern "C" {
|
const int kNumCoeffs = 64;
|
||||||
void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *output, int pitch);
|
const double kPi = 3.141592653589793238462643383279502884;
|
||||||
|
void reference_8x8_dct_1d(const double in[8], double out[8], int stride) {
|
||||||
|
const double kInvSqrt2 = 0.707106781186547524400844362104;
|
||||||
|
for (int k = 0; k < 8; k++) {
|
||||||
|
out[k] = 0.0;
|
||||||
|
for (int n = 0; n < 8; n++)
|
||||||
|
out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 16.0);
|
||||||
|
if (k == 0)
|
||||||
|
out[k] = out[k] * kInvSqrt2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
|
||||||
|
double output[kNumCoeffs]) {
|
||||||
|
// First transform columns
|
||||||
|
for (int i = 0; i < 8; ++i) {
|
||||||
|
double temp_in[8], temp_out[8];
|
||||||
|
for (int j = 0; j < 8; ++j)
|
||||||
|
temp_in[j] = input[j*8 + i];
|
||||||
|
reference_8x8_dct_1d(temp_in, temp_out, 1);
|
||||||
|
for (int j = 0; j < 8; ++j)
|
||||||
|
output[j * 8 + i] = temp_out[j];
|
||||||
|
}
|
||||||
|
// Then transform rows
|
||||||
|
for (int i = 0; i < 8; ++i) {
|
||||||
|
double temp_in[8], temp_out[8];
|
||||||
|
for (int j = 0; j < 8; ++j)
|
||||||
|
temp_in[j] = output[j + i*8];
|
||||||
|
reference_8x8_dct_1d(temp_in, temp_out, 1);
|
||||||
|
// Scale by some magic number
|
||||||
|
for (int j = 0; j < 8; ++j)
|
||||||
|
output[j + i * 8] = temp_out[j] * 2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
using libvpx_test::ACMRandom;
|
using libvpx_test::ACMRandom;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
typedef void (*FdctFunc)(const int16_t *in, int16_t *out, int stride);
|
typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
|
||||||
typedef void (*IdctFunc)(const int16_t *in, uint8_t *out, int stride);
|
typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
|
||||||
typedef void (*FhtFunc)(const int16_t *in, int16_t *out, int stride,
|
typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
|
||||||
int tx_type);
|
int tx_type);
|
||||||
typedef void (*IhtFunc)(const int16_t *in, uint8_t *out, int stride,
|
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
|
||||||
int tx_type);
|
int tx_type);
|
||||||
|
|
||||||
typedef std::tr1::tuple<FdctFunc, IdctFunc, int> Dct8x8Param;
|
typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
|
||||||
typedef std::tr1::tuple<FhtFunc, IhtFunc, int> Ht8x8Param;
|
typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
|
||||||
|
|
||||||
void fdct8x8_ref(const int16_t *in, int16_t *out, int stride, int /*tx_type*/) {
|
void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
|
||||||
vp9_fdct8x8_c(in, out, stride);
|
vp9_fdct8x8_c(in, out, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
void fht8x8_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
|
void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
|
||||||
vp9_fht8x8_c(in, out, stride, tx_type);
|
vp9_fht8x8_c(in, out, stride, tx_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) {
|
||||||
|
vp9_high_idct8x8_64_add_c(in, out, stride, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) {
|
||||||
|
vp9_high_idct8x8_64_add_c(in, out, stride, 12);
|
||||||
|
}
|
||||||
|
|
||||||
|
void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
|
||||||
|
vp9_high_iht8x8_64_add_c(in, out, stride, tx_type, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
|
||||||
|
vp9_high_iht8x8_64_add_c(in, out, stride, tx_type, 12);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
class FwdTrans8x8TestBase {
|
class FwdTrans8x8TestBase {
|
||||||
public:
|
public:
|
||||||
virtual ~FwdTrans8x8TestBase() {}
|
virtual ~FwdTrans8x8TestBase() {}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void RunFwdTxfm(int16_t *in, int16_t *out, int stride) = 0;
|
virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
|
||||||
virtual void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) = 0;
|
virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
|
||||||
|
|
||||||
void RunSignBiasCheck() {
|
void RunSignBiasCheck() {
|
||||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 64);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_output_block, 64);
|
||||||
int count_sign_block[64][2];
|
int count_sign_block[64][2];
|
||||||
const int count_test_block = 100000;
|
const int count_test_block = 100000;
|
||||||
|
|
||||||
@ -67,7 +118,8 @@ class FwdTrans8x8TestBase {
|
|||||||
for (int i = 0; i < count_test_block; ++i) {
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
// Initialize a test block with input range [-255, 255].
|
// Initialize a test block with input range [-255, 255].
|
||||||
for (int j = 0; j < 64; ++j)
|
for (int j = 0; j < 64; ++j)
|
||||||
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
|
test_input_block[j] = ((rnd.Rand16() >> (16 - bit_depth_)) & mask_) -
|
||||||
|
((rnd.Rand16() >> (16 - bit_depth_)) & mask_);
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
RunFwdTxfm(test_input_block, test_output_block, pitch_));
|
RunFwdTxfm(test_input_block, test_output_block, pitch_));
|
||||||
|
|
||||||
@ -82,7 +134,7 @@ class FwdTrans8x8TestBase {
|
|||||||
for (int j = 0; j < 64; ++j) {
|
for (int j = 0; j < 64; ++j) {
|
||||||
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
|
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
|
||||||
const int max_diff = 1125;
|
const int max_diff = 1125;
|
||||||
EXPECT_LT(diff, max_diff)
|
EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
|
||||||
<< "Error: 8x8 FDCT/FHT has a sign bias > "
|
<< "Error: 8x8 FDCT/FHT has a sign bias > "
|
||||||
<< 1. * max_diff / count_test_block * 100 << "%"
|
<< 1. * max_diff / count_test_block * 100 << "%"
|
||||||
<< " for input range [-255, 255] at index " << j
|
<< " for input range [-255, 255] at index " << j
|
||||||
@ -111,7 +163,7 @@ class FwdTrans8x8TestBase {
|
|||||||
for (int j = 0; j < 64; ++j) {
|
for (int j = 0; j < 64; ++j) {
|
||||||
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
|
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
|
||||||
const int max_diff = 10000;
|
const int max_diff = 10000;
|
||||||
EXPECT_LT(diff, max_diff)
|
EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
|
||||||
<< "Error: 4x4 FDCT/FHT has a sign bias > "
|
<< "Error: 4x4 FDCT/FHT has a sign bias > "
|
||||||
<< 1. * max_diff / count_test_block * 100 << "%"
|
<< 1. * max_diff / count_test_block * 100 << "%"
|
||||||
<< " for input range [-15, 15] at index " << j
|
<< " for input range [-15, 15] at index " << j
|
||||||
@ -127,16 +179,28 @@ class FwdTrans8x8TestBase {
|
|||||||
int total_error = 0;
|
int total_error = 0;
|
||||||
const int count_test_block = 100000;
|
const int count_test_block = 100000;
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, 64);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, 64);
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, 64);
|
||||||
|
#endif
|
||||||
|
|
||||||
for (int i = 0; i < count_test_block; ++i) {
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
// Initialize a test block with input range [-255, 255].
|
// Initialize a test block with input range [-255, 255].
|
||||||
for (int j = 0; j < 64; ++j) {
|
for (int j = 0; j < 64; ++j) {
|
||||||
src[j] = rnd.Rand8();
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
dst[j] = rnd.Rand8();
|
src[j] = rnd.Rand8();
|
||||||
test_input_block[j] = src[j] - dst[j];
|
dst[j] = rnd.Rand8();
|
||||||
|
test_input_block[j] = src[j] - dst[j];
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
} else {
|
||||||
|
src16[j] = rnd.Rand16() & mask_;
|
||||||
|
dst16[j] = rnd.Rand16() & mask_;
|
||||||
|
test_input_block[j] = src16[j] - dst16[j];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
@ -152,11 +216,23 @@ class FwdTrans8x8TestBase {
|
|||||||
test_temp_block[j] *= 4;
|
test_temp_block[j] *= 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ASM_REGISTER_STATE_CHECK(
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
RunInvTxfm(test_temp_block, dst, pitch_));
|
ASM_REGISTER_STATE_CHECK(
|
||||||
|
RunInvTxfm(test_temp_block, dst, pitch_));
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
} else {
|
||||||
|
ASM_REGISTER_STATE_CHECK(
|
||||||
|
RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
for (int j = 0; j < 64; ++j) {
|
for (int j = 0; j < 64; ++j) {
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
const int diff =
|
||||||
|
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
|
||||||
|
#else
|
||||||
const int diff = dst[j] - src[j];
|
const int diff = dst[j] - src[j];
|
||||||
|
#endif
|
||||||
const int error = diff * diff;
|
const int error = diff * diff;
|
||||||
if (max_error < error)
|
if (max_error < error)
|
||||||
max_error = error;
|
max_error = error;
|
||||||
@ -164,11 +240,11 @@ class FwdTrans8x8TestBase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPECT_GE(1, max_error)
|
EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
|
||||||
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
|
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
|
||||||
<< " roundtrip error > 1";
|
<< " roundtrip error > 1";
|
||||||
|
|
||||||
EXPECT_GE(count_test_block/5, total_error)
|
EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8))/5, total_error)
|
||||||
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
|
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
|
||||||
<< "error > 1/5 per block";
|
<< "error > 1/5 per block";
|
||||||
}
|
}
|
||||||
@ -180,37 +256,68 @@ class FwdTrans8x8TestBase {
|
|||||||
int total_coeff_error = 0;
|
int total_coeff_error = 0;
|
||||||
const int count_test_block = 100000;
|
const int count_test_block = 100000;
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, 64);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, ref_temp_block, 64);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_temp_block, 64);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, 64);
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, 64);
|
||||||
|
#endif
|
||||||
|
|
||||||
for (int i = 0; i < count_test_block; ++i) {
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
// Initialize a test block with input range [-255, 255].
|
// Initialize a test block with input range [-mask_, mask_].
|
||||||
for (int j = 0; j < 64; ++j) {
|
for (int j = 0; j < 64; ++j) {
|
||||||
if (i == 0) {
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
src[j] = 255;
|
if (i == 0) {
|
||||||
dst[j] = 0;
|
src[j] = 255;
|
||||||
} else if (i == 1) {
|
dst[j] = 0;
|
||||||
src[j] = 0;
|
} else if (i == 1) {
|
||||||
dst[j] = 255;
|
src[j] = 0;
|
||||||
|
dst[j] = 255;
|
||||||
|
} else {
|
||||||
|
src[j] = rnd.Rand8() % 2 ? 255 : 0;
|
||||||
|
dst[j] = rnd.Rand8() % 2 ? 255 : 0;
|
||||||
|
}
|
||||||
|
test_input_block[j] = src[j] - dst[j];
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
} else {
|
} else {
|
||||||
src[j] = rnd.Rand8() % 2 ? 255 : 0;
|
if (i == 0) {
|
||||||
dst[j] = rnd.Rand8() % 2 ? 255 : 0;
|
src16[j] = mask_;
|
||||||
|
dst16[j] = 0;
|
||||||
|
} else if (i == 1) {
|
||||||
|
src16[j] = 0;
|
||||||
|
dst16[j] = mask_;
|
||||||
|
} else {
|
||||||
|
src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
|
||||||
|
dst16[j] = rnd.Rand8() % 2 ? mask_ : 0;
|
||||||
|
}
|
||||||
|
test_input_block[j] = src16[j] - dst16[j];
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
test_input_block[j] = src[j] - dst[j];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
RunFwdTxfm(test_input_block, test_temp_block, pitch_));
|
RunFwdTxfm(test_input_block, test_temp_block, pitch_));
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, tx_type_));
|
fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, tx_type_));
|
||||||
ASM_REGISTER_STATE_CHECK(
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
RunInvTxfm(test_temp_block, dst, pitch_));
|
ASM_REGISTER_STATE_CHECK(
|
||||||
|
RunInvTxfm(test_temp_block, dst, pitch_));
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
} else {
|
||||||
|
ASM_REGISTER_STATE_CHECK(
|
||||||
|
RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
for (int j = 0; j < 64; ++j) {
|
for (int j = 0; j < 64; ++j) {
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
const int diff =
|
||||||
|
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
|
||||||
|
#else
|
||||||
const int diff = dst[j] - src[j];
|
const int diff = dst[j] - src[j];
|
||||||
|
#endif
|
||||||
const int error = diff * diff;
|
const int error = diff * diff;
|
||||||
if (max_error < error)
|
if (max_error < error)
|
||||||
max_error = error;
|
max_error = error;
|
||||||
@ -220,11 +327,11 @@ class FwdTrans8x8TestBase {
|
|||||||
total_coeff_error += abs(coeff_diff);
|
total_coeff_error += abs(coeff_diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPECT_GE(1, max_error)
|
EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
|
||||||
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
|
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
|
||||||
<< "an individual roundtrip error > 1";
|
<< "an individual roundtrip error > 1";
|
||||||
|
|
||||||
EXPECT_GE(count_test_block/5, total_error)
|
EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8))/5, total_error)
|
||||||
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
|
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
|
||||||
<< " roundtrip error > 1/5 per block";
|
<< " roundtrip error > 1/5 per block";
|
||||||
|
|
||||||
@ -234,9 +341,97 @@ class FwdTrans8x8TestBase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RunInvAccuracyCheck() {
|
||||||
|
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||||
|
const int count_test_block = 1000;
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
|
double out_r[kNumCoeffs];
|
||||||
|
|
||||||
|
// Initialize a test block with input range [-255, 255].
|
||||||
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
|
src[j] = rnd.Rand8() % 2 ? 255 : 0;
|
||||||
|
dst[j] = src[j] > 0 ? 0 : 255;
|
||||||
|
in[j] = src[j] - dst[j];
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
} else {
|
||||||
|
src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
|
||||||
|
dst16[j] = src16[j] > 0 ? 0 : mask_;
|
||||||
|
in[j] = src16[j] - dst16[j];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
reference_8x8_dct_2d(in, out_r);
|
||||||
|
for (int j = 0; j < kNumCoeffs; ++j)
|
||||||
|
coeff[j] = round(out_r[j]);
|
||||||
|
|
||||||
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
|
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
} else {
|
||||||
|
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
|
||||||
|
pitch_));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
const uint32_t diff =
|
||||||
|
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
|
||||||
|
#else
|
||||||
|
const uint32_t diff = dst[j] - src[j];
|
||||||
|
#endif
|
||||||
|
const uint32_t error = diff * diff;
|
||||||
|
EXPECT_GE(1u << 2 * (bit_depth_ - 8), error)
|
||||||
|
<< "Error: 8x8 IDCT has error " << error
|
||||||
|
<< " at index " << j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RunFwdAccuracyCheck() {
|
||||||
|
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||||
|
const int count_test_block = 1000;
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_r, kNumCoeffs);
|
||||||
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
|
||||||
|
|
||||||
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
|
double out_r[kNumCoeffs];
|
||||||
|
|
||||||
|
// Initialize a test block with input range [-mask_, mask_].
|
||||||
|
for (int j = 0; j < kNumCoeffs; ++j)
|
||||||
|
in[j] = rnd.Rand8() % 2 == 0 ? mask_ : -mask_;
|
||||||
|
|
||||||
|
RunFwdTxfm(in, coeff, pitch_);
|
||||||
|
reference_8x8_dct_2d(in, out_r);
|
||||||
|
for (int j = 0; j < kNumCoeffs; ++j)
|
||||||
|
coeff_r[j] = round(out_r[j]);
|
||||||
|
|
||||||
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
|
const uint32_t diff = coeff[j] - coeff_r[j];
|
||||||
|
const uint32_t error = diff * diff;
|
||||||
|
EXPECT_GE(9u << 2 * (bit_depth_ - 8), error)
|
||||||
|
<< "Error: 8x8 DCT has error " << error
|
||||||
|
<< " at index " << j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
int pitch_;
|
int pitch_;
|
||||||
int tx_type_;
|
int tx_type_;
|
||||||
FhtFunc fwd_txfm_ref;
|
FhtFunc fwd_txfm_ref;
|
||||||
|
vpx_bit_depth_t bit_depth_;
|
||||||
|
int mask_;
|
||||||
};
|
};
|
||||||
|
|
||||||
class FwdTrans8x8DCT
|
class FwdTrans8x8DCT
|
||||||
@ -251,15 +446,17 @@ class FwdTrans8x8DCT
|
|||||||
tx_type_ = GET_PARAM(2);
|
tx_type_ = GET_PARAM(2);
|
||||||
pitch_ = 8;
|
pitch_ = 8;
|
||||||
fwd_txfm_ref = fdct8x8_ref;
|
fwd_txfm_ref = fdct8x8_ref;
|
||||||
|
bit_depth_ = GET_PARAM(3);
|
||||||
|
mask_ = (1 << bit_depth_) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
|
void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
|
||||||
fwd_txfm_(in, out, stride);
|
fwd_txfm_(in, out, stride);
|
||||||
}
|
}
|
||||||
void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
|
void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
|
||||||
inv_txfm_(out, dst, stride);
|
inv_txfm_(out, dst, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -279,6 +476,14 @@ TEST_P(FwdTrans8x8DCT, ExtremalCheck) {
|
|||||||
RunExtremalCheck();
|
RunExtremalCheck();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(FwdTrans8x8DCT, FwdAccuracyCheck) {
|
||||||
|
RunFwdAccuracyCheck();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(FwdTrans8x8DCT, InvAccuracyCheck) {
|
||||||
|
RunInvAccuracyCheck();
|
||||||
|
}
|
||||||
|
|
||||||
class FwdTrans8x8HT
|
class FwdTrans8x8HT
|
||||||
: public FwdTrans8x8TestBase,
|
: public FwdTrans8x8TestBase,
|
||||||
public ::testing::TestWithParam<Ht8x8Param> {
|
public ::testing::TestWithParam<Ht8x8Param> {
|
||||||
@ -291,15 +496,17 @@ class FwdTrans8x8HT
|
|||||||
tx_type_ = GET_PARAM(2);
|
tx_type_ = GET_PARAM(2);
|
||||||
pitch_ = 8;
|
pitch_ = 8;
|
||||||
fwd_txfm_ref = fht8x8_ref;
|
fwd_txfm_ref = fht8x8_ref;
|
||||||
|
bit_depth_ = GET_PARAM(3);
|
||||||
|
mask_ = (1 << bit_depth_) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
|
void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
|
||||||
fwd_txfm_(in, out, stride, tx_type_);
|
fwd_txfm_(in, out, stride, tx_type_);
|
||||||
}
|
}
|
||||||
void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
|
void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
|
||||||
inv_txfm_(out, dst, stride, tx_type_);
|
inv_txfm_(out, dst, stride, tx_type_);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -321,50 +528,81 @@ TEST_P(FwdTrans8x8HT, ExtremalCheck) {
|
|||||||
|
|
||||||
using std::tr1::make_tuple;
|
using std::tr1::make_tuple;
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
C, FwdTrans8x8DCT,
|
C, FwdTrans8x8DCT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0)));
|
make_tuple(&vp9_high_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
|
||||||
|
make_tuple(&vp9_high_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12),
|
||||||
|
make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));
|
||||||
|
#else
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
C, FwdTrans8x8DCT,
|
||||||
|
::testing::Values(
|
||||||
|
make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
C, FwdTrans8x8HT,
|
C, FwdTrans8x8HT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0),
|
make_tuple(&vp9_high_fht8x8_c, &iht8x8_10, 0, VPX_BITS_10),
|
||||||
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1),
|
make_tuple(&vp9_high_fht8x8_c, &iht8x8_10, 1, VPX_BITS_10),
|
||||||
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2),
|
make_tuple(&vp9_high_fht8x8_c, &iht8x8_10, 2, VPX_BITS_10),
|
||||||
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3)));
|
make_tuple(&vp9_high_fht8x8_c, &iht8x8_10, 3, VPX_BITS_10),
|
||||||
|
make_tuple(&vp9_high_fht8x8_c, &iht8x8_12, 0, VPX_BITS_12),
|
||||||
|
make_tuple(&vp9_high_fht8x8_c, &iht8x8_12, 1, VPX_BITS_12),
|
||||||
|
make_tuple(&vp9_high_fht8x8_c, &iht8x8_12, 2, VPX_BITS_12),
|
||||||
|
make_tuple(&vp9_high_fht8x8_c, &iht8x8_12, 3, VPX_BITS_12),
|
||||||
|
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
|
||||||
|
#else
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
C, FwdTrans8x8HT,
|
||||||
|
::testing::Values(
|
||||||
|
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
|
||||||
|
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
|
||||||
|
#endif
|
||||||
|
|
||||||
#if HAVE_NEON_ASM
|
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
NEON, FwdTrans8x8DCT,
|
NEON, FwdTrans8x8DCT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fdct8x8_neon, &vp9_idct8x8_64_add_neon, 0)));
|
make_tuple(&vp9_fdct8x8_neon, &vp9_idct8x8_64_add_neon, 0,
|
||||||
|
VPX_BITS_8)));
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
DISABLED_NEON, FwdTrans8x8HT,
|
DISABLED_NEON, FwdTrans8x8HT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 0),
|
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 0, VPX_BITS_8),
|
||||||
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1),
|
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
|
||||||
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2),
|
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
|
||||||
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3)));
|
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_SSE2
|
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
SSE2, FwdTrans8x8DCT,
|
SSE2, FwdTrans8x8DCT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fdct8x8_sse2, &vp9_idct8x8_64_add_sse2, 0)));
|
make_tuple(&vp9_fdct8x8_sse2, &vp9_idct8x8_64_add_sse2, 0,
|
||||||
|
VPX_BITS_8)));
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
SSE2, FwdTrans8x8HT,
|
SSE2, FwdTrans8x8HT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0),
|
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0, VPX_BITS_8),
|
||||||
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1),
|
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1, VPX_BITS_8),
|
||||||
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2),
|
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2, VPX_BITS_8),
|
||||||
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3)));
|
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3, VPX_BITS_8)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_SSSE3 && ARCH_X86_64
|
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
SSSE3, FwdTrans8x8DCT,
|
SSSE3, FwdTrans8x8DCT,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vp9_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0)));
|
make_tuple(&vp9_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0,
|
||||||
|
VPX_BITS_8)));
|
||||||
#endif
|
#endif
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -109,7 +109,8 @@ TEST(VP9Idct8x8Test, AccuracyCheck) {
|
|||||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||||
const int count_test_block = 10000;
|
const int count_test_block = 10000;
|
||||||
for (int i = 0; i < count_test_block; ++i) {
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
int16_t input[64], coeff[64];
|
int16_t input[64];
|
||||||
|
tran_low_t coeff[64];
|
||||||
double output_r[64];
|
double output_r[64];
|
||||||
uint8_t dst[64], src[64];
|
uint8_t dst[64], src[64];
|
||||||
|
|
||||||
|
@ -26,8 +26,8 @@
|
|||||||
using libvpx_test::ACMRandom;
|
using libvpx_test::ACMRandom;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
typedef void (*FwdTxfmFunc)(const int16_t *in, int16_t *out, int stride);
|
typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
|
||||||
typedef void (*InvTxfmFunc)(const int16_t *in, uint8_t *out, int stride);
|
typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
|
||||||
typedef std::tr1::tuple<FwdTxfmFunc,
|
typedef std::tr1::tuple<FwdTxfmFunc,
|
||||||
InvTxfmFunc,
|
InvTxfmFunc,
|
||||||
InvTxfmFunc,
|
InvTxfmFunc,
|
||||||
@ -74,8 +74,8 @@ TEST_P(PartialIDctTest, RunQuantCheck) {
|
|||||||
FAIL() << "Wrong Size!";
|
FAIL() << "Wrong Size!";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_coef_block1, kMaxNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block1, kMaxNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_coef_block2, kMaxNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block2, kMaxNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst1, kMaxNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst1, kMaxNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst2, kMaxNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst2, kMaxNumCoeffs);
|
||||||
|
|
||||||
@ -83,7 +83,7 @@ TEST_P(PartialIDctTest, RunQuantCheck) {
|
|||||||
const int block_size = size * size;
|
const int block_size = size * size;
|
||||||
|
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kMaxNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kMaxNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kMaxNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kMaxNumCoeffs);
|
||||||
|
|
||||||
int max_error = 0;
|
int max_error = 0;
|
||||||
for (int i = 0; i < count_test_block; ++i) {
|
for (int i = 0; i < count_test_block; ++i) {
|
||||||
@ -153,8 +153,8 @@ TEST_P(PartialIDctTest, ResultsMatch) {
|
|||||||
FAIL() << "Wrong Size!";
|
FAIL() << "Wrong Size!";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_coef_block1, kMaxNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block1, kMaxNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_coef_block2, kMaxNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block2, kMaxNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst1, kMaxNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst1, kMaxNumCoeffs);
|
||||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst2, kMaxNumCoeffs);
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst2, kMaxNumCoeffs);
|
||||||
const int count_test_block = 1000;
|
const int count_test_block = 1000;
|
||||||
@ -229,6 +229,7 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
&vp9_idct4x4_16_add_c,
|
&vp9_idct4x4_16_add_c,
|
||||||
&vp9_idct4x4_1_add_c,
|
&vp9_idct4x4_1_add_c,
|
||||||
TX_4X4, 1)));
|
TX_4X4, 1)));
|
||||||
|
|
||||||
#if HAVE_NEON_ASM
|
#if HAVE_NEON_ASM
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
NEON, PartialIDctTest,
|
NEON, PartialIDctTest,
|
||||||
@ -259,7 +260,7 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
TX_4X4, 1)));
|
TX_4X4, 1)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_SSE2
|
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
SSE2, PartialIDctTest,
|
SSE2, PartialIDctTest,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
@ -293,7 +294,7 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
TX_4X4, 1)));
|
TX_4X4, 1)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_SSSE3 && ARCH_X86_64
|
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
SSSE3_64, PartialIDctTest,
|
SSSE3_64, PartialIDctTest,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
@ -303,7 +304,7 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
TX_8X8, 12)));
|
TX_8X8, 12)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_SSSE3
|
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
SSSE3, PartialIDctTest,
|
SSSE3, PartialIDctTest,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#include "vp9/common/vp9_common_data.h"
|
#include "vp9/common/vp9_common_data.h"
|
||||||
#include "vp9/common/vp9_enums.h"
|
#include "vp9/common/vp9_enums.h"
|
||||||
#include "vp9/common/vp9_filter.h"
|
#include "vp9/common/vp9_filter.h"
|
||||||
|
#include "vp9/common/vp9_idct.h"
|
||||||
#include "vp9/common/vp9_mv.h"
|
#include "vp9/common/vp9_mv.h"
|
||||||
#include "vp9/common/vp9_scale.h"
|
#include "vp9/common/vp9_scale.h"
|
||||||
#include "vp9/common/vp9_seg_common.h"
|
#include "vp9/common/vp9_seg_common.h"
|
||||||
@ -176,7 +177,7 @@ struct buf_2d {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct macroblockd_plane {
|
struct macroblockd_plane {
|
||||||
int16_t *dqcoeff;
|
tran_low_t *dqcoeff;
|
||||||
PLANE_TYPE plane_type;
|
PLANE_TYPE plane_type;
|
||||||
int subsampling_x;
|
int subsampling_x;
|
||||||
int subsampling_y;
|
int subsampling_y;
|
||||||
@ -223,11 +224,17 @@ typedef struct macroblockd {
|
|||||||
/* mc buffer */
|
/* mc buffer */
|
||||||
DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]);
|
DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]);
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
/* Bit depth: 8, 10, 12 */
|
||||||
|
int bd;
|
||||||
|
DECLARE_ALIGNED(16, uint16_t, mc_buf_high[80 * 2 * 80 * 2]);
|
||||||
|
#endif
|
||||||
|
|
||||||
int lossless;
|
int lossless;
|
||||||
|
|
||||||
int corrupted;
|
int corrupted;
|
||||||
|
|
||||||
DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
|
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
|
||||||
|
|
||||||
ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
|
ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
|
||||||
ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16];
|
ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16];
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -36,52 +36,69 @@ extern "C" {
|
|||||||
#define dual_set_epi16(a, b) \
|
#define dual_set_epi16(a, b) \
|
||||||
_mm_set_epi16(b, b, b, b, a, a, a, a)
|
_mm_set_epi16(b, b, b, b, a, a, a, a)
|
||||||
|
|
||||||
|
// Note:
|
||||||
|
// tran_low_t is the datatype used for final transform coefficients.
|
||||||
|
// tran_high_t is the datatype used for intermediate transform stages.
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
typedef int64_t tran_high_t;
|
||||||
|
typedef int32_t tran_low_t;
|
||||||
|
#else
|
||||||
|
typedef int32_t tran_high_t;
|
||||||
|
typedef int16_t tran_low_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
// Constants:
|
// Constants:
|
||||||
// for (int i = 1; i< 32; ++i)
|
// for (int i = 1; i< 32; ++i)
|
||||||
// printf("static const int cospi_%d_64 = %.0f;\n", i,
|
// printf("static const int cospi_%d_64 = %.0f;\n", i,
|
||||||
// round(16384 * cos(i*M_PI/64)));
|
// round(16384 * cos(i*M_PI/64)));
|
||||||
// Note: sin(k*Pi/64) = cos((32-k)*Pi/64)
|
// Note: sin(k*Pi/64) = cos((32-k)*Pi/64)
|
||||||
static const int cospi_1_64 = 16364;
|
static const tran_high_t cospi_1_64 = 16364;
|
||||||
static const int cospi_2_64 = 16305;
|
static const tran_high_t cospi_2_64 = 16305;
|
||||||
static const int cospi_3_64 = 16207;
|
static const tran_high_t cospi_3_64 = 16207;
|
||||||
static const int cospi_4_64 = 16069;
|
static const tran_high_t cospi_4_64 = 16069;
|
||||||
static const int cospi_5_64 = 15893;
|
static const tran_high_t cospi_5_64 = 15893;
|
||||||
static const int cospi_6_64 = 15679;
|
static const tran_high_t cospi_6_64 = 15679;
|
||||||
static const int cospi_7_64 = 15426;
|
static const tran_high_t cospi_7_64 = 15426;
|
||||||
static const int cospi_8_64 = 15137;
|
static const tran_high_t cospi_8_64 = 15137;
|
||||||
static const int cospi_9_64 = 14811;
|
static const tran_high_t cospi_9_64 = 14811;
|
||||||
static const int cospi_10_64 = 14449;
|
static const tran_high_t cospi_10_64 = 14449;
|
||||||
static const int cospi_11_64 = 14053;
|
static const tran_high_t cospi_11_64 = 14053;
|
||||||
static const int cospi_12_64 = 13623;
|
static const tran_high_t cospi_12_64 = 13623;
|
||||||
static const int cospi_13_64 = 13160;
|
static const tran_high_t cospi_13_64 = 13160;
|
||||||
static const int cospi_14_64 = 12665;
|
static const tran_high_t cospi_14_64 = 12665;
|
||||||
static const int cospi_15_64 = 12140;
|
static const tran_high_t cospi_15_64 = 12140;
|
||||||
static const int cospi_16_64 = 11585;
|
static const tran_high_t cospi_16_64 = 11585;
|
||||||
static const int cospi_17_64 = 11003;
|
static const tran_high_t cospi_17_64 = 11003;
|
||||||
static const int cospi_18_64 = 10394;
|
static const tran_high_t cospi_18_64 = 10394;
|
||||||
static const int cospi_19_64 = 9760;
|
static const tran_high_t cospi_19_64 = 9760;
|
||||||
static const int cospi_20_64 = 9102;
|
static const tran_high_t cospi_20_64 = 9102;
|
||||||
static const int cospi_21_64 = 8423;
|
static const tran_high_t cospi_21_64 = 8423;
|
||||||
static const int cospi_22_64 = 7723;
|
static const tran_high_t cospi_22_64 = 7723;
|
||||||
static const int cospi_23_64 = 7005;
|
static const tran_high_t cospi_23_64 = 7005;
|
||||||
static const int cospi_24_64 = 6270;
|
static const tran_high_t cospi_24_64 = 6270;
|
||||||
static const int cospi_25_64 = 5520;
|
static const tran_high_t cospi_25_64 = 5520;
|
||||||
static const int cospi_26_64 = 4756;
|
static const tran_high_t cospi_26_64 = 4756;
|
||||||
static const int cospi_27_64 = 3981;
|
static const tran_high_t cospi_27_64 = 3981;
|
||||||
static const int cospi_28_64 = 3196;
|
static const tran_high_t cospi_28_64 = 3196;
|
||||||
static const int cospi_29_64 = 2404;
|
static const tran_high_t cospi_29_64 = 2404;
|
||||||
static const int cospi_30_64 = 1606;
|
static const tran_high_t cospi_30_64 = 1606;
|
||||||
static const int cospi_31_64 = 804;
|
static const tran_high_t cospi_31_64 = 804;
|
||||||
|
|
||||||
// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
|
// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
|
||||||
static const int sinpi_1_9 = 5283;
|
static const tran_high_t sinpi_1_9 = 5283;
|
||||||
static const int sinpi_2_9 = 9929;
|
static const tran_high_t sinpi_2_9 = 9929;
|
||||||
static const int sinpi_3_9 = 13377;
|
static const tran_high_t sinpi_3_9 = 13377;
|
||||||
static const int sinpi_4_9 = 15212;
|
static const tran_high_t sinpi_4_9 = 15212;
|
||||||
|
|
||||||
static INLINE int dct_const_round_shift(int input) {
|
static INLINE tran_low_t dct_const_round_shift(tran_high_t input) {
|
||||||
int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
|
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
|
||||||
#if CONFIG_COEFFICIENT_RANGE_CHECKING
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
// For valid highbitdepth VP9 streams, intermediate stage coefficients will
|
||||||
|
// stay within the ranges:
|
||||||
|
// - 8 bit: signed 16 bit integer
|
||||||
|
// - 10 bit: signed 18 bit integer
|
||||||
|
// - 12 bit: signed 20 bit integer
|
||||||
|
#elif CONFIG_COEFFICIENT_RANGE_CHECKING
|
||||||
// For valid VP9 input streams, intermediate stage coefficients should always
|
// For valid VP9 input streams, intermediate stage coefficients should always
|
||||||
// stay within the range of a signed 16 bit integer. Coefficients can go out
|
// stay within the range of a signed 16 bit integer. Coefficients can go out
|
||||||
// of this range for invalid/corrupt VP9 streams. However, strictly checking
|
// of this range for invalid/corrupt VP9 streams. However, strictly checking
|
||||||
@ -91,32 +108,59 @@ static INLINE int dct_const_round_shift(int input) {
|
|||||||
assert(INT16_MIN <= rv);
|
assert(INT16_MIN <= rv);
|
||||||
assert(rv <= INT16_MAX);
|
assert(rv <= INT16_MAX);
|
||||||
#endif
|
#endif
|
||||||
return (int16_t)rv;
|
return (tran_low_t)rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef void (*transform_1d)(const int16_t*, int16_t*);
|
typedef void (*transform_1d)(const tran_low_t*, tran_low_t*);
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
transform_1d cols, rows; // vertical and horizontal
|
transform_1d cols, rows; // vertical and horizontal
|
||||||
} transform_2d;
|
} transform_2d;
|
||||||
|
|
||||||
void vp9_iwht4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob);
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
typedef void (*high_transform_1d)(const tran_low_t*, tran_low_t*, int bd);
|
||||||
|
|
||||||
void vp9_idct4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob);
|
typedef struct {
|
||||||
void vp9_idct8x8_add(const int16_t *input, uint8_t *dest, int stride, int eob);
|
high_transform_1d cols, rows; // vertical and horizontal
|
||||||
void vp9_idct16x16_add(const int16_t *input, uint8_t *dest, int stride, int
|
} high_transform_2d;
|
||||||
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
|
||||||
|
void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||||
|
int eob);
|
||||||
|
void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||||
|
int eob);
|
||||||
|
void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||||
|
int eob);
|
||||||
|
void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, int
|
||||||
eob);
|
eob);
|
||||||
void vp9_idct32x32_add(const int16_t *input, uint8_t *dest, int stride,
|
void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||||
int eob);
|
int eob);
|
||||||
|
|
||||||
void vp9_iht4x4_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
|
void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
||||||
int stride, int eob);
|
int stride, int eob);
|
||||||
void vp9_iht8x8_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
|
void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
||||||
int stride, int eob);
|
int stride, int eob);
|
||||||
void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
|
void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
||||||
int stride, int eob);
|
int stride, int eob);
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
void vp9_high_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||||
|
int eob, int bd);
|
||||||
|
void vp9_high_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||||
|
int eob, int bd);
|
||||||
|
void vp9_high_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||||
|
int eob, int bd);
|
||||||
|
void vp9_high_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||||
|
int eob, int bd);
|
||||||
|
void vp9_high_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||||
|
int eob, int bd);
|
||||||
|
void vp9_high_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
|
||||||
|
uint8_t *dest, int stride, int eob, int bd);
|
||||||
|
void vp9_high_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
|
||||||
|
uint8_t *dest, int stride, int eob, int bd);
|
||||||
|
void vp9_high_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
|
||||||
|
uint8_t *dest, int stride, int eob, int bd);
|
||||||
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
#endif
|
#endif
|
||||||
|
@ -6,6 +6,7 @@ print <<EOF
|
|||||||
|
|
||||||
#include "vpx/vpx_integer.h"
|
#include "vpx/vpx_integer.h"
|
||||||
#include "vp9/common/vp9_enums.h"
|
#include "vp9/common/vp9_enums.h"
|
||||||
|
#include "vp9/common/vp9_idct.h"
|
||||||
|
|
||||||
struct macroblockd;
|
struct macroblockd;
|
||||||
|
|
||||||
@ -329,68 +330,177 @@ $vp9_convolve8_avg_vert_neon_asm=vp9_convolve8_avg_vert_neon;
|
|||||||
#
|
#
|
||||||
# dct
|
# dct
|
||||||
#
|
#
|
||||||
add_proto qw/void vp9_idct4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
|
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||||
specialize qw/vp9_idct4x4_1_add sse2 neon_asm dspr2/;
|
add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
$vp9_idct4x4_1_add_neon_asm=vp9_idct4x4_1_add_neon;
|
specialize qw/vp9_idct4x4_1_add/;
|
||||||
|
|
||||||
add_proto qw/void vp9_idct4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct4x4_16_add sse2 neon_asm dspr2/;
|
specialize qw/vp9_idct4x4_16_add/;
|
||||||
$vp9_idct4x4_16_add_neon_asm=vp9_idct4x4_16_add_neon;
|
|
||||||
|
|
||||||
add_proto qw/void vp9_idct8x8_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct8x8_1_add sse2 neon_asm dspr2/;
|
specialize qw/vp9_idct8x8_1_add/;
|
||||||
$vp9_idct8x8_1_add_neon_asm=vp9_idct8x8_1_add_neon;
|
|
||||||
|
|
||||||
add_proto qw/void vp9_idct8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct8x8_64_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
|
specialize qw/vp9_idct8x8_64_add/;
|
||||||
$vp9_idct8x8_64_add_neon_asm=vp9_idct8x8_64_add_neon;
|
|
||||||
|
|
||||||
add_proto qw/void vp9_idct8x8_12_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct8x8_12_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
|
specialize qw/vp9_idct8x8_12_add/;
|
||||||
$vp9_idct8x8_12_add_neon_asm=vp9_idct8x8_12_add_neon;
|
|
||||||
|
|
||||||
add_proto qw/void vp9_idct16x16_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/;
|
specialize qw/vp9_idct16x16_1_add/;
|
||||||
$vp9_idct16x16_1_add_neon_asm=vp9_idct16x16_1_add_neon;
|
|
||||||
|
|
||||||
add_proto qw/void vp9_idct16x16_256_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/;
|
specialize qw/vp9_idct16x16_256_add/;
|
||||||
$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
|
|
||||||
|
|
||||||
add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/;
|
specialize qw/vp9_idct16x16_10_add/;
|
||||||
$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon;
|
|
||||||
|
|
||||||
add_proto qw/void vp9_idct32x32_1024_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/;
|
specialize qw/vp9_idct32x32_1024_add/;
|
||||||
$vp9_idct32x32_1024_add_neon_asm=vp9_idct32x32_1024_add_neon;
|
|
||||||
|
|
||||||
add_proto qw/void vp9_idct32x32_34_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/;
|
specialize qw/vp9_idct32x32_34_add/;
|
||||||
$vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;
|
|
||||||
|
|
||||||
add_proto qw/void vp9_idct32x32_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/;
|
specialize qw/vp9_idct32x32_1_add/;
|
||||||
$vp9_idct32x32_1_add_neon_asm=vp9_idct32x32_1_add_neon;
|
|
||||||
|
|
||||||
add_proto qw/void vp9_iht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||||
specialize qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/;
|
specialize qw/vp9_iht4x4_16_add/;
|
||||||
$vp9_iht4x4_16_add_neon_asm=vp9_iht4x4_16_add_neon;
|
|
||||||
|
|
||||||
add_proto qw/void vp9_iht8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||||
specialize qw/vp9_iht8x8_64_add sse2 neon_asm dspr2/;
|
specialize qw/vp9_iht8x8_64_add/;
|
||||||
$vp9_iht8x8_64_add_neon_asm=vp9_iht8x8_64_add_neon;
|
|
||||||
|
|
||||||
add_proto qw/void vp9_iht16x16_256_add/, "const int16_t *input, uint8_t *output, int pitch, int tx_type";
|
add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
|
||||||
specialize qw/vp9_iht16x16_256_add sse2 dspr2/;
|
specialize qw/vp9_iht16x16_256_add/;
|
||||||
|
|
||||||
|
# dct and add
|
||||||
|
|
||||||
|
add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_iwht4x4_1_add/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_iwht4x4_16_add/;
|
||||||
|
} else {
|
||||||
|
add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_idct4x4_1_add sse2 neon_asm dspr2/;
|
||||||
|
$vp9_idct4x4_1_add_neon_asm=vp9_idct4x4_1_add_neon;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_idct4x4_16_add sse2 neon_asm dspr2/;
|
||||||
|
$vp9_idct4x4_16_add_neon_asm=vp9_idct4x4_16_add_neon;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_idct8x8_1_add sse2 neon_asm dspr2/;
|
||||||
|
$vp9_idct8x8_1_add_neon_asm=vp9_idct8x8_1_add_neon;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_idct8x8_64_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
|
||||||
|
$vp9_idct8x8_64_add_neon_asm=vp9_idct8x8_64_add_neon;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_idct8x8_12_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
|
||||||
|
$vp9_idct8x8_12_add_neon_asm=vp9_idct8x8_12_add_neon;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/;
|
||||||
|
$vp9_idct16x16_1_add_neon_asm=vp9_idct16x16_1_add_neon;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/;
|
||||||
|
$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/;
|
||||||
|
$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/;
|
||||||
|
$vp9_idct32x32_1024_add_neon_asm=vp9_idct32x32_1024_add_neon;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/;
|
||||||
|
$vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/;
|
||||||
|
$vp9_idct32x32_1_add_neon_asm=vp9_idct32x32_1_add_neon;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||||
|
specialize qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/;
|
||||||
|
$vp9_iht4x4_16_add_neon_asm=vp9_iht4x4_16_add_neon;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||||
|
specialize qw/vp9_iht8x8_64_add sse2 neon_asm dspr2/;
|
||||||
|
$vp9_iht8x8_64_add_neon_asm=vp9_iht8x8_64_add_neon;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
|
||||||
|
specialize qw/vp9_iht16x16_256_add sse2 dspr2/;
|
||||||
|
|
||||||
|
# dct and add
|
||||||
|
|
||||||
|
add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_iwht4x4_1_add/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_iwht4x4_16_add/;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# High bitdepth functions
|
||||||
|
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||||
|
#
|
||||||
|
# dct
|
||||||
|
#
|
||||||
|
add_proto qw/void vp9_high_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||||
|
specialize qw/vp9_high_idct4x4_1_add/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||||
|
specialize qw/vp9_high_idct4x4_16_add/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||||
|
specialize qw/vp9_high_idct8x8_1_add/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||||
|
specialize qw/vp9_high_idct8x8_64_add/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||||
|
specialize qw/vp9_high_idct8x8_10_add/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||||
|
specialize qw/vp9_high_idct16x16_1_add/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||||
|
specialize qw/vp9_high_idct16x16_256_add/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||||
|
specialize qw/vp9_high_idct16x16_10_add/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||||
|
specialize qw/vp9_high_idct32x32_1024_add/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||||
|
specialize qw/vp9_high_idct32x32_34_add/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||||
|
specialize qw/vp9_high_idct32x32_1_add/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
|
||||||
|
specialize qw/vp9_high_iht4x4_16_add/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
|
||||||
|
specialize qw/vp9_high_iht8x8_64_add/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type, int bd";
|
||||||
|
specialize qw/vp9_high_iht16x16_256_add/;
|
||||||
|
|
||||||
# dct and add
|
# dct and add
|
||||||
|
|
||||||
add_proto qw/void vp9_iwht4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_high_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||||
specialize qw/vp9_iwht4x4_1_add/;
|
specialize qw/vp9_high_iwht4x4_1_add/;
|
||||||
|
|
||||||
add_proto qw/void vp9_iwht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_high_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||||
specialize qw/vp9_iwht4x4_16_add/;
|
specialize qw/vp9_high_iwht4x4_16_add/;
|
||||||
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
# Encoder functions below this point.
|
# Encoder functions below this point.
|
||||||
@ -706,23 +816,42 @@ add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
|
|||||||
specialize qw/vp9_get_mb_ss/, "$sse2_x86inc";
|
specialize qw/vp9_get_mb_ss/, "$sse2_x86inc";
|
||||||
# ENCODEMB INVOKE
|
# ENCODEMB INVOKE
|
||||||
|
|
||||||
add_proto qw/int64_t vp9_block_error/, "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz";
|
|
||||||
specialize qw/vp9_block_error avx2/, "$sse2_x86inc";
|
|
||||||
|
|
||||||
add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
|
add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
|
||||||
specialize qw/vp9_subtract_block neon/, "$sse2_x86inc";
|
specialize qw/vp9_subtract_block neon/, "$sse2_x86inc";
|
||||||
|
|
||||||
add_proto qw/void vp9_quantize_fp/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||||
specialize qw/vp9_quantize_fp neon/, "$ssse3_x86_64";
|
# the transform coefficients are held in 32-bit
|
||||||
|
# values, so the assembler code for vp9_block_error can no longer be used.
|
||||||
|
add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
|
||||||
|
specialize qw/vp9_block_error/;
|
||||||
|
|
||||||
add_proto qw/void vp9_quantize_fp_32x32/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||||
specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64";
|
specialize qw/vp9_quantize_fp/;
|
||||||
|
|
||||||
add_proto qw/void vp9_quantize_b/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||||
specialize qw/vp9_quantize_b/, "$ssse3_x86_64";
|
specialize qw/vp9_quantize_fp_32x32/;
|
||||||
|
|
||||||
add_proto qw/void vp9_quantize_b_32x32/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||||
specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";
|
specialize qw/vp9_quantize_b/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||||
|
specialize qw/vp9_quantize_b_32x32/;
|
||||||
|
} else {
|
||||||
|
add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
|
||||||
|
specialize qw/vp9_block_error avx2/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||||
|
specialize qw/vp9_quantize_fp neon/, "$ssse3_x86_64";
|
||||||
|
|
||||||
|
add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||||
|
specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64";
|
||||||
|
|
||||||
|
add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||||
|
specialize qw/vp9_quantize_b/, "$ssse3_x86_64";
|
||||||
|
|
||||||
|
add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||||
|
specialize qw/vp9_quantize_b_32x32/;
|
||||||
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
# Structured Similarity (SSIM)
|
# Structured Similarity (SSIM)
|
||||||
@ -736,44 +865,86 @@ if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# fdct functions
|
# fdct functions
|
||||||
add_proto qw/void vp9_fht4x4/, "const int16_t *input, int16_t *output, int stride, int tx_type";
|
|
||||||
specialize qw/vp9_fht4x4 sse2/;
|
|
||||||
|
|
||||||
add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int stride, int tx_type";
|
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||||
specialize qw/vp9_fht8x8 sse2/;
|
add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||||
|
specialize qw/vp9_fht4x4/;
|
||||||
|
|
||||||
add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type";
|
add_proto qw/void vp9_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||||
specialize qw/vp9_fht16x16 sse2/;
|
specialize qw/vp9_fht8x8/;
|
||||||
|
|
||||||
add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride";
|
add_proto qw/void vp9_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||||
specialize qw/vp9_fwht4x4/, "$mmx_x86inc";
|
specialize qw/vp9_fht16x16/;
|
||||||
|
|
||||||
add_proto qw/void vp9_fdct4x4_1/, "const int16_t *input, int16_t *output, int stride";
|
add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
specialize qw/vp9_fdct4x4_1 sse2/;
|
specialize qw/vp9_fwht4x4/;
|
||||||
|
|
||||||
add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride";
|
add_proto qw/void vp9_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
specialize qw/vp9_fdct4x4 sse2/;
|
specialize qw/vp9_fdct4x4_1/;
|
||||||
|
|
||||||
add_proto qw/void vp9_fdct8x8_1/, "const int16_t *input, int16_t *output, int stride";
|
add_proto qw/void vp9_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
specialize qw/vp9_fdct8x8_1 sse2 neon/;
|
specialize qw/vp9_fdct4x4/;
|
||||||
|
|
||||||
add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stride";
|
add_proto qw/void vp9_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
specialize qw/vp9_fdct8x8 sse2 neon/, "$ssse3_x86_64";
|
specialize qw/vp9_fdct8x8_1/;
|
||||||
|
|
||||||
add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, int16_t *output, int stride";
|
add_proto qw/void vp9_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
specialize qw/vp9_fdct16x16_1 sse2/;
|
specialize qw/vp9_fdct8x8/;
|
||||||
|
|
||||||
add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride";
|
add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
specialize qw/vp9_fdct16x16 sse2/;
|
specialize qw/vp9_fdct16x16_1/;
|
||||||
|
|
||||||
add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, int16_t *output, int stride";
|
add_proto qw/void vp9_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
specialize qw/vp9_fdct32x32_1 sse2/;
|
specialize qw/vp9_fdct16x16/;
|
||||||
|
|
||||||
add_proto qw/void vp9_fdct32x32/, "const int16_t *input, int16_t *output, int stride";
|
add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
specialize qw/vp9_fdct32x32 sse2 avx2/;
|
specialize qw/vp9_fdct32x32_1/;
|
||||||
|
|
||||||
add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, int16_t *output, int stride";
|
add_proto qw/void vp9_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
specialize qw/vp9_fdct32x32_rd sse2 avx2/;
|
specialize qw/vp9_fdct32x32/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_fdct32x32_rd/;
|
||||||
|
} else {
|
||||||
|
add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||||
|
specialize qw/vp9_fht4x4 sse2/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||||
|
specialize qw/vp9_fht8x8 sse2/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||||
|
specialize qw/vp9_fht16x16 sse2/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_fwht4x4/, "$mmx_x86inc";
|
||||||
|
|
||||||
|
add_proto qw/void vp9_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_fdct4x4_1 sse2/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_fdct4x4 sse2/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_fdct8x8_1 sse2 neon/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_fdct8x8 sse2 neon/, "$ssse3_x86_64";
|
||||||
|
|
||||||
|
add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_fdct16x16_1 sse2/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_fdct16x16 sse2/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_fdct32x32_1 sse2/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_fdct32x32 sse2 avx2/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_fdct32x32_rd sse2 avx2/;
|
||||||
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
# Motion search
|
# Motion search
|
||||||
@ -1369,7 +1540,79 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||||||
add_proto qw/unsigned int vp9_high_12_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
|
add_proto qw/unsigned int vp9_high_12_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
|
||||||
specialize qw/vp9_high_12_mse8x8/;
|
specialize qw/vp9_high_12_mse8x8/;
|
||||||
|
|
||||||
|
# ENCODEMB INVOKE
|
||||||
|
|
||||||
|
add_proto qw/int64_t vp9_high_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
|
||||||
|
specialize qw/vp9_high_block_error/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
|
||||||
|
specialize qw/vp9_high_subtract_block/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||||
|
specialize qw/vp9_high_quantize_fp/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||||
|
specialize qw/vp9_high_quantize_fp_32x32/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||||
|
specialize qw/vp9_high_quantize_b/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||||
|
specialize qw/vp9_high_quantize_b_32x32/;
|
||||||
|
|
||||||
|
#
|
||||||
|
# Structured Similarity (SSIM)
|
||||||
|
#
|
||||||
|
if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
|
||||||
|
add_proto qw/void vp9_high_ssim_parms_8x8/, "uint16_t *s, int sp, uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
|
||||||
|
specialize qw/vp9_high_ssim_parms_8x8/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_ssim_parms_8x8_shift/, "uint16_t *s, int sp, uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr, unsigned int bd, unsigned int shift";
|
||||||
|
specialize qw/vp9_high_ssim_parms_8x8_shift/;
|
||||||
|
}
|
||||||
|
|
||||||
|
# fdct functions
|
||||||
|
add_proto qw/void vp9_high_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||||
|
specialize qw/vp9_high_fht4x4/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||||
|
specialize qw/vp9_high_fht8x8/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||||
|
specialize qw/vp9_high_fht16x16/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_high_fwht4x4/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_high_fdct4x4/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_high_fdct8x8_1/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_high_fdct8x8/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_high_fdct16x16_1/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_high_fdct16x16/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_high_fdct32x32_1/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_high_fdct32x32/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_high_fdct32x32_rd/;
|
||||||
|
|
||||||
|
add_proto qw/void vp9_high_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
|
||||||
|
specialize qw/vp9_high_temporal_filter_apply/;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
# End vp9_high encoder functions
|
||||||
|
|
||||||
}
|
}
|
||||||
# end encoder functions
|
# end encoder functions
|
||||||
|
@ -195,7 +195,7 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
|
|||||||
struct macroblockd_plane *const pd = &xd->plane[plane];
|
struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||||
if (eob > 0) {
|
if (eob > 0) {
|
||||||
TX_TYPE tx_type = DCT_DCT;
|
TX_TYPE tx_type = DCT_DCT;
|
||||||
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
||||||
if (xd->lossless) {
|
if (xd->lossless) {
|
||||||
tx_type = DCT_DCT;
|
tx_type = DCT_DCT;
|
||||||
vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
|
vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
|
||||||
|
@ -51,7 +51,7 @@
|
|||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
|
static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
|
||||||
int16_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
|
tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
|
||||||
int ctx, const int16_t *scan, const int16_t *nb,
|
int ctx, const int16_t *scan, const int16_t *nb,
|
||||||
vp9_reader *r) {
|
vp9_reader *r) {
|
||||||
const int max_eob = 16 << (tx_size << 1);
|
const int max_eob = 16 << (tx_size << 1);
|
||||||
|
@ -28,8 +28,8 @@ typedef struct {
|
|||||||
|
|
||||||
struct macroblock_plane {
|
struct macroblock_plane {
|
||||||
DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]);
|
DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]);
|
||||||
int16_t *qcoeff;
|
tran_low_t *qcoeff;
|
||||||
int16_t *coeff;
|
tran_low_t *coeff;
|
||||||
uint16_t *eobs;
|
uint16_t *eobs;
|
||||||
struct buf_2d src;
|
struct buf_2d src;
|
||||||
|
|
||||||
@ -119,8 +119,12 @@ struct macroblock {
|
|||||||
// Used to store sub partition's choices.
|
// Used to store sub partition's choices.
|
||||||
MV pred_mv[MAX_REF_FRAMES];
|
MV pred_mv[MAX_REF_FRAMES];
|
||||||
|
|
||||||
void (*fwd_txm4x4)(const int16_t *input, int16_t *output, int stride);
|
void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
|
||||||
void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob);
|
void (*itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob);
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
void (*high_itxm_add)(const tran_low_t *input, uint8_t *dest, int stride,
|
||||||
|
int eob, int bd);
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@ -30,13 +30,13 @@ static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
|
|||||||
for (i = 0; i < MAX_MB_PLANE; ++i) {
|
for (i = 0; i < MAX_MB_PLANE; ++i) {
|
||||||
for (k = 0; k < 3; ++k) {
|
for (k = 0; k < 3; ++k) {
|
||||||
CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
|
CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
|
||||||
vpx_memalign(16, num_pix * sizeof(int16_t)));
|
vpx_memalign(16, num_pix * sizeof(*ctx->coeff[i][k])));
|
||||||
CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
|
CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
|
||||||
vpx_memalign(16, num_pix * sizeof(int16_t)));
|
vpx_memalign(16, num_pix * sizeof(*ctx->qcoeff[i][k])));
|
||||||
CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
|
CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
|
||||||
vpx_memalign(16, num_pix * sizeof(int16_t)));
|
vpx_memalign(16, num_pix * sizeof(*ctx->dqcoeff[i][k])));
|
||||||
CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
|
CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
|
||||||
vpx_memalign(16, num_pix * sizeof(uint16_t)));
|
vpx_memalign(16, num_pix * sizeof(*ctx->eobs[i][k])));
|
||||||
ctx->coeff_pbuf[i][k] = ctx->coeff[i][k];
|
ctx->coeff_pbuf[i][k] = ctx->coeff[i][k];
|
||||||
ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k];
|
ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k];
|
||||||
ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];
|
ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];
|
||||||
|
@ -19,15 +19,15 @@ struct VP9_COMP;
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
MODE_INFO mic;
|
MODE_INFO mic;
|
||||||
uint8_t *zcoeff_blk;
|
uint8_t *zcoeff_blk;
|
||||||
int16_t *coeff[MAX_MB_PLANE][3];
|
tran_low_t *coeff[MAX_MB_PLANE][3];
|
||||||
int16_t *qcoeff[MAX_MB_PLANE][3];
|
tran_low_t *qcoeff[MAX_MB_PLANE][3];
|
||||||
int16_t *dqcoeff[MAX_MB_PLANE][3];
|
tran_low_t *dqcoeff[MAX_MB_PLANE][3];
|
||||||
uint16_t *eobs[MAX_MB_PLANE][3];
|
uint16_t *eobs[MAX_MB_PLANE][3];
|
||||||
|
|
||||||
// dual buffer pointers, 0: in use, 1: best in store
|
// dual buffer pointers, 0: in use, 1: best in store
|
||||||
int16_t *coeff_pbuf[MAX_MB_PLANE][3];
|
tran_low_t *coeff_pbuf[MAX_MB_PLANE][3];
|
||||||
int16_t *qcoeff_pbuf[MAX_MB_PLANE][3];
|
tran_low_t *qcoeff_pbuf[MAX_MB_PLANE][3];
|
||||||
int16_t *dqcoeff_pbuf[MAX_MB_PLANE][3];
|
tran_low_t *dqcoeff_pbuf[MAX_MB_PLANE][3];
|
||||||
uint16_t *eobs_pbuf[MAX_MB_PLANE][3];
|
uint16_t *eobs_pbuf[MAX_MB_PLANE][3];
|
||||||
|
|
||||||
int is_coded;
|
int is_coded;
|
||||||
|
@ -18,15 +18,17 @@
|
|||||||
#include "vp9/common/vp9_idct.h"
|
#include "vp9/common/vp9_idct.h"
|
||||||
#include "vp9/common/vp9_systemdependent.h"
|
#include "vp9/common/vp9_systemdependent.h"
|
||||||
|
|
||||||
static INLINE int fdct_round_shift(int input) {
|
static INLINE tran_high_t fdct_round_shift(tran_high_t input) {
|
||||||
int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
|
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
|
||||||
assert(INT16_MIN <= rv && rv <= INT16_MAX);
|
// TODO(debargha, peter.derivaz): Find new bounds for this assert
|
||||||
|
// and make the bounds consts.
|
||||||
|
// assert(INT16_MIN <= rv && rv <= INT16_MAX);
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fdct4(const int16_t *input, int16_t *output) {
|
static void fdct4(const tran_low_t *input, tran_low_t *output) {
|
||||||
int16_t step[4];
|
tran_high_t step[4];
|
||||||
int temp1, temp2;
|
tran_high_t temp1, temp2;
|
||||||
|
|
||||||
step[0] = input[0] + input[3];
|
step[0] = input[0] + input[3];
|
||||||
step[1] = input[1] + input[2];
|
step[1] = input[1] + input[2];
|
||||||
@ -43,9 +45,9 @@ static void fdct4(const int16_t *input, int16_t *output) {
|
|||||||
output[3] = fdct_round_shift(temp2);
|
output[3] = fdct_round_shift(temp2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_fdct4x4_1_c(const int16_t *input, int16_t *output, int stride) {
|
void vp9_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) {
|
||||||
int r, c;
|
int r, c;
|
||||||
int16_t sum = 0;
|
tran_low_t sum = 0;
|
||||||
for (r = 0; r < 4; ++r)
|
for (r = 0; r < 4; ++r)
|
||||||
for (c = 0; c < 4; ++c)
|
for (c = 0; c < 4; ++c)
|
||||||
sum += input[r * stride + c];
|
sum += input[r * stride + c];
|
||||||
@ -54,7 +56,7 @@ void vp9_fdct4x4_1_c(const int16_t *input, int16_t *output, int stride) {
|
|||||||
output[1] = 0;
|
output[1] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) {
|
void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
|
||||||
// The 2D transform is done with two passes which are actually pretty
|
// The 2D transform is done with two passes which are actually pretty
|
||||||
// similar. In the first one, we transform the columns and transpose
|
// similar. In the first one, we transform the columns and transpose
|
||||||
// the results. In the second one, we transform the rows. To achieve that,
|
// the results. In the second one, we transform the rows. To achieve that,
|
||||||
@ -63,22 +65,23 @@ void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) {
|
|||||||
// in normal/row positions).
|
// in normal/row positions).
|
||||||
int pass;
|
int pass;
|
||||||
// We need an intermediate buffer between passes.
|
// We need an intermediate buffer between passes.
|
||||||
int16_t intermediate[4 * 4];
|
tran_low_t intermediate[4 * 4];
|
||||||
const int16_t *in = input;
|
const int16_t *in_pass0 = input;
|
||||||
int16_t *out = intermediate;
|
const tran_low_t *in = NULL;
|
||||||
|
tran_low_t *out = intermediate;
|
||||||
// Do the two transform/transpose passes
|
// Do the two transform/transpose passes
|
||||||
for (pass = 0; pass < 2; ++pass) {
|
for (pass = 0; pass < 2; ++pass) {
|
||||||
/*canbe16*/ int input[4];
|
tran_high_t input[4]; // canbe16
|
||||||
/*canbe16*/ int step[4];
|
tran_high_t step[4]; // canbe16
|
||||||
/*needs32*/ int temp1, temp2;
|
tran_high_t temp1, temp2; // needs32
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < 4; ++i) {
|
for (i = 0; i < 4; ++i) {
|
||||||
// Load inputs.
|
// Load inputs.
|
||||||
if (0 == pass) {
|
if (0 == pass) {
|
||||||
input[0] = in[0 * stride] * 16;
|
input[0] = in_pass0[0 * stride] * 16;
|
||||||
input[1] = in[1 * stride] * 16;
|
input[1] = in_pass0[1 * stride] * 16;
|
||||||
input[2] = in[2 * stride] * 16;
|
input[2] = in_pass0[2 * stride] * 16;
|
||||||
input[3] = in[3 * stride] * 16;
|
input[3] = in_pass0[3 * stride] * 16;
|
||||||
if (i == 0 && input[0]) {
|
if (i == 0 && input[0]) {
|
||||||
input[0] += 1;
|
input[0] += 1;
|
||||||
}
|
}
|
||||||
@ -102,6 +105,7 @@ void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) {
|
|||||||
out[1] = fdct_round_shift(temp1);
|
out[1] = fdct_round_shift(temp1);
|
||||||
out[3] = fdct_round_shift(temp2);
|
out[3] = fdct_round_shift(temp2);
|
||||||
// Do next column (which is a transposed row in second/horizontal pass)
|
// Do next column (which is a transposed row in second/horizontal pass)
|
||||||
|
in_pass0++;
|
||||||
in++;
|
in++;
|
||||||
out += 4;
|
out += 4;
|
||||||
}
|
}
|
||||||
@ -119,9 +123,9 @@ void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fadst4(const int16_t *input, int16_t *output) {
|
static void fadst4(const tran_low_t *input, tran_low_t *output) {
|
||||||
int x0, x1, x2, x3;
|
tran_high_t x0, x1, x2, x3;
|
||||||
int s0, s1, s2, s3, s4, s5, s6, s7;
|
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
|
||||||
|
|
||||||
x0 = input[0];
|
x0 = input[0];
|
||||||
x1 = input[1];
|
x1 = input[1];
|
||||||
@ -166,15 +170,15 @@ static const transform_2d FHT_4[] = {
|
|||||||
{ fadst4, fadst4 } // ADST_ADST = 3
|
{ fadst4, fadst4 } // ADST_ADST = 3
|
||||||
};
|
};
|
||||||
|
|
||||||
void vp9_fht4x4_c(const int16_t *input, int16_t *output,
|
void vp9_fht4x4_c(const int16_t *input, tran_low_t *output,
|
||||||
int stride, int tx_type) {
|
int stride, int tx_type) {
|
||||||
if (tx_type == DCT_DCT) {
|
if (tx_type == DCT_DCT) {
|
||||||
vp9_fdct4x4_c(input, output, stride);
|
vp9_fdct4x4_c(input, output, stride);
|
||||||
} else {
|
} else {
|
||||||
int16_t out[4 * 4];
|
tran_low_t out[4 * 4];
|
||||||
int16_t *outptr = &out[0];
|
tran_low_t *outptr = &out[0];
|
||||||
int i, j;
|
int i, j;
|
||||||
int16_t temp_in[4], temp_out[4];
|
tran_low_t temp_in[4], temp_out[4];
|
||||||
const transform_2d ht = FHT_4[tx_type];
|
const transform_2d ht = FHT_4[tx_type];
|
||||||
|
|
||||||
// Columns
|
// Columns
|
||||||
@ -199,10 +203,10 @@ void vp9_fht4x4_c(const int16_t *input, int16_t *output,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fdct8(const int16_t *input, int16_t *output) {
|
static void fdct8(const tran_low_t *input, tran_low_t *output) {
|
||||||
/*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;
|
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
|
||||||
/*needs32*/ int t0, t1, t2, t3;
|
tran_high_t t0, t1, t2, t3; // needs32
|
||||||
/*canbe16*/ int x0, x1, x2, x3;
|
tran_high_t x0, x1, x2, x3; // canbe16
|
||||||
|
|
||||||
// stage 1
|
// stage 1
|
||||||
s0 = input[0] + input[7];
|
s0 = input[0] + input[7];
|
||||||
@ -251,9 +255,9 @@ static void fdct8(const int16_t *input, int16_t *output) {
|
|||||||
output[7] = fdct_round_shift(t3);
|
output[7] = fdct_round_shift(t3);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_fdct8x8_1_c(const int16_t *input, int16_t *output, int stride) {
|
void vp9_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) {
|
||||||
int r, c;
|
int r, c;
|
||||||
int16_t sum = 0;
|
tran_low_t sum = 0;
|
||||||
for (r = 0; r < 8; ++r)
|
for (r = 0; r < 8; ++r)
|
||||||
for (c = 0; c < 8; ++c)
|
for (c = 0; c < 8; ++c)
|
||||||
sum += input[r * stride + c];
|
sum += input[r * stride + c];
|
||||||
@ -262,16 +266,16 @@ void vp9_fdct8x8_1_c(const int16_t *input, int16_t *output, int stride) {
|
|||||||
output[1] = 0;
|
output[1] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) {
|
void vp9_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
|
||||||
int i, j;
|
int i, j;
|
||||||
int16_t intermediate[64];
|
tran_low_t intermediate[64];
|
||||||
|
|
||||||
// Transform columns
|
// Transform columns
|
||||||
{
|
{
|
||||||
int16_t *output = intermediate;
|
tran_low_t *output = intermediate;
|
||||||
/*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;
|
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
|
||||||
/*needs32*/ int t0, t1, t2, t3;
|
tran_high_t t0, t1, t2, t3; // needs32
|
||||||
/*canbe16*/ int x0, x1, x2, x3;
|
tran_high_t x0, x1, x2, x3; // canbe16
|
||||||
|
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < 8; i++) {
|
for (i = 0; i < 8; i++) {
|
||||||
@ -333,9 +337,9 @@ void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_fdct16x16_1_c(const int16_t *input, int16_t *output, int stride) {
|
void vp9_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) {
|
||||||
int r, c;
|
int r, c;
|
||||||
int16_t sum = 0;
|
tran_low_t sum = 0;
|
||||||
for (r = 0; r < 16; ++r)
|
for (r = 0; r < 16; ++r)
|
||||||
for (c = 0; c < 16; ++c)
|
for (c = 0; c < 16; ++c)
|
||||||
sum += input[r * stride + c];
|
sum += input[r * stride + c];
|
||||||
@ -344,7 +348,7 @@ void vp9_fdct16x16_1_c(const int16_t *input, int16_t *output, int stride) {
|
|||||||
output[1] = 0;
|
output[1] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) {
|
void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
|
||||||
// The 2D transform is done with two passes which are actually pretty
|
// The 2D transform is done with two passes which are actually pretty
|
||||||
// similar. In the first one, we transform the columns and transpose
|
// similar. In the first one, we transform the columns and transpose
|
||||||
// the results. In the second one, we transform the rows. To achieve that,
|
// the results. In the second one, we transform the rows. To achieve that,
|
||||||
@ -353,37 +357,38 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) {
|
|||||||
// in normal/row positions).
|
// in normal/row positions).
|
||||||
int pass;
|
int pass;
|
||||||
// We need an intermediate buffer between passes.
|
// We need an intermediate buffer between passes.
|
||||||
int16_t intermediate[256];
|
tran_low_t intermediate[256];
|
||||||
const int16_t *in = input;
|
const int16_t *in_pass0 = input;
|
||||||
int16_t *out = intermediate;
|
const tran_low_t *in = NULL;
|
||||||
|
tran_low_t *out = intermediate;
|
||||||
// Do the two transform/transpose passes
|
// Do the two transform/transpose passes
|
||||||
for (pass = 0; pass < 2; ++pass) {
|
for (pass = 0; pass < 2; ++pass) {
|
||||||
/*canbe16*/ int step1[8];
|
tran_high_t step1[8]; // canbe16
|
||||||
/*canbe16*/ int step2[8];
|
tran_high_t step2[8]; // canbe16
|
||||||
/*canbe16*/ int step3[8];
|
tran_high_t step3[8]; // canbe16
|
||||||
/*canbe16*/ int input[8];
|
tran_high_t input[8]; // canbe16
|
||||||
/*needs32*/ int temp1, temp2;
|
tran_high_t temp1, temp2; // needs32
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < 16; i++) {
|
for (i = 0; i < 16; i++) {
|
||||||
if (0 == pass) {
|
if (0 == pass) {
|
||||||
// Calculate input for the first 8 results.
|
// Calculate input for the first 8 results.
|
||||||
input[0] = (in[0 * stride] + in[15 * stride]) * 4;
|
input[0] = (in_pass0[0 * stride] + in_pass0[15 * stride]) * 4;
|
||||||
input[1] = (in[1 * stride] + in[14 * stride]) * 4;
|
input[1] = (in_pass0[1 * stride] + in_pass0[14 * stride]) * 4;
|
||||||
input[2] = (in[2 * stride] + in[13 * stride]) * 4;
|
input[2] = (in_pass0[2 * stride] + in_pass0[13 * stride]) * 4;
|
||||||
input[3] = (in[3 * stride] + in[12 * stride]) * 4;
|
input[3] = (in_pass0[3 * stride] + in_pass0[12 * stride]) * 4;
|
||||||
input[4] = (in[4 * stride] + in[11 * stride]) * 4;
|
input[4] = (in_pass0[4 * stride] + in_pass0[11 * stride]) * 4;
|
||||||
input[5] = (in[5 * stride] + in[10 * stride]) * 4;
|
input[5] = (in_pass0[5 * stride] + in_pass0[10 * stride]) * 4;
|
||||||
input[6] = (in[6 * stride] + in[ 9 * stride]) * 4;
|
input[6] = (in_pass0[6 * stride] + in_pass0[ 9 * stride]) * 4;
|
||||||
input[7] = (in[7 * stride] + in[ 8 * stride]) * 4;
|
input[7] = (in_pass0[7 * stride] + in_pass0[ 8 * stride]) * 4;
|
||||||
// Calculate input for the next 8 results.
|
// Calculate input for the next 8 results.
|
||||||
step1[0] = (in[7 * stride] - in[ 8 * stride]) * 4;
|
step1[0] = (in_pass0[7 * stride] - in_pass0[ 8 * stride]) * 4;
|
||||||
step1[1] = (in[6 * stride] - in[ 9 * stride]) * 4;
|
step1[1] = (in_pass0[6 * stride] - in_pass0[ 9 * stride]) * 4;
|
||||||
step1[2] = (in[5 * stride] - in[10 * stride]) * 4;
|
step1[2] = (in_pass0[5 * stride] - in_pass0[10 * stride]) * 4;
|
||||||
step1[3] = (in[4 * stride] - in[11 * stride]) * 4;
|
step1[3] = (in_pass0[4 * stride] - in_pass0[11 * stride]) * 4;
|
||||||
step1[4] = (in[3 * stride] - in[12 * stride]) * 4;
|
step1[4] = (in_pass0[3 * stride] - in_pass0[12 * stride]) * 4;
|
||||||
step1[5] = (in[2 * stride] - in[13 * stride]) * 4;
|
step1[5] = (in_pass0[2 * stride] - in_pass0[13 * stride]) * 4;
|
||||||
step1[6] = (in[1 * stride] - in[14 * stride]) * 4;
|
step1[6] = (in_pass0[1 * stride] - in_pass0[14 * stride]) * 4;
|
||||||
step1[7] = (in[0 * stride] - in[15 * stride]) * 4;
|
step1[7] = (in_pass0[0 * stride] - in_pass0[15 * stride]) * 4;
|
||||||
} else {
|
} else {
|
||||||
// Calculate input for the first 8 results.
|
// Calculate input for the first 8 results.
|
||||||
input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
|
input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
|
||||||
@ -406,9 +411,9 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) {
|
|||||||
}
|
}
|
||||||
// Work on the first eight values; fdct8(input, even_results);
|
// Work on the first eight values; fdct8(input, even_results);
|
||||||
{
|
{
|
||||||
/*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;
|
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
|
||||||
/*needs32*/ int t0, t1, t2, t3;
|
tran_high_t t0, t1, t2, t3; // needs32
|
||||||
/*canbe16*/ int x0, x1, x2, x3;
|
tran_high_t x0, x1, x2, x3; // canbe16
|
||||||
|
|
||||||
// stage 1
|
// stage 1
|
||||||
s0 = input[0] + input[7];
|
s0 = input[0] + input[7];
|
||||||
@ -514,6 +519,7 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) {
|
|||||||
}
|
}
|
||||||
// Do next column (which is a transposed row in second/horizontal pass)
|
// Do next column (which is a transposed row in second/horizontal pass)
|
||||||
in++;
|
in++;
|
||||||
|
in_pass0++;
|
||||||
out += 16;
|
out += 16;
|
||||||
}
|
}
|
||||||
// Setup in/out for next pass.
|
// Setup in/out for next pass.
|
||||||
@ -522,17 +528,17 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fadst8(const int16_t *input, int16_t *output) {
|
static void fadst8(const tran_low_t *input, tran_low_t *output) {
|
||||||
int s0, s1, s2, s3, s4, s5, s6, s7;
|
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
|
||||||
|
|
||||||
int x0 = input[7];
|
tran_high_t x0 = input[7];
|
||||||
int x1 = input[0];
|
tran_high_t x1 = input[0];
|
||||||
int x2 = input[5];
|
tran_high_t x2 = input[5];
|
||||||
int x3 = input[2];
|
tran_high_t x3 = input[2];
|
||||||
int x4 = input[3];
|
tran_high_t x4 = input[3];
|
||||||
int x5 = input[4];
|
tran_high_t x5 = input[4];
|
||||||
int x6 = input[1];
|
tran_high_t x6 = input[1];
|
||||||
int x7 = input[6];
|
tran_high_t x7 = input[6];
|
||||||
|
|
||||||
// stage 1
|
// stage 1
|
||||||
s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
|
s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
|
||||||
@ -600,15 +606,15 @@ static const transform_2d FHT_8[] = {
|
|||||||
{ fadst8, fadst8 } // ADST_ADST = 3
|
{ fadst8, fadst8 } // ADST_ADST = 3
|
||||||
};
|
};
|
||||||
|
|
||||||
void vp9_fht8x8_c(const int16_t *input, int16_t *output,
|
void vp9_fht8x8_c(const int16_t *input, tran_low_t *output,
|
||||||
int stride, int tx_type) {
|
int stride, int tx_type) {
|
||||||
if (tx_type == DCT_DCT) {
|
if (tx_type == DCT_DCT) {
|
||||||
vp9_fdct8x8_c(input, output, stride);
|
vp9_fdct8x8_c(input, output, stride);
|
||||||
} else {
|
} else {
|
||||||
int16_t out[64];
|
tran_low_t out[64];
|
||||||
int16_t *outptr = &out[0];
|
tran_low_t *outptr = &out[0];
|
||||||
int i, j;
|
int i, j;
|
||||||
int16_t temp_in[8], temp_out[8];
|
tran_low_t temp_in[8], temp_out[8];
|
||||||
const transform_2d ht = FHT_8[tx_type];
|
const transform_2d ht = FHT_8[tx_type];
|
||||||
|
|
||||||
// Columns
|
// Columns
|
||||||
@ -633,17 +639,18 @@ void vp9_fht8x8_c(const int16_t *input, int16_t *output,
|
|||||||
|
|
||||||
/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
|
/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
|
||||||
pixel. */
|
pixel. */
|
||||||
void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride) {
|
void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
|
||||||
int i;
|
int i;
|
||||||
int a1, b1, c1, d1, e1;
|
tran_high_t a1, b1, c1, d1, e1;
|
||||||
const int16_t *ip = input;
|
const int16_t *ip_pass0 = input;
|
||||||
int16_t *op = output;
|
const tran_low_t *ip = NULL;
|
||||||
|
tran_low_t *op = output;
|
||||||
|
|
||||||
for (i = 0; i < 4; i++) {
|
for (i = 0; i < 4; i++) {
|
||||||
a1 = ip[0 * stride];
|
a1 = ip_pass0[0 * stride];
|
||||||
b1 = ip[1 * stride];
|
b1 = ip_pass0[1 * stride];
|
||||||
c1 = ip[2 * stride];
|
c1 = ip_pass0[2 * stride];
|
||||||
d1 = ip[3 * stride];
|
d1 = ip_pass0[3 * stride];
|
||||||
|
|
||||||
a1 += b1;
|
a1 += b1;
|
||||||
d1 = d1 - c1;
|
d1 = d1 - c1;
|
||||||
@ -657,7 +664,7 @@ void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride) {
|
|||||||
op[8] = d1;
|
op[8] = d1;
|
||||||
op[12] = b1;
|
op[12] = b1;
|
||||||
|
|
||||||
ip++;
|
ip_pass0++;
|
||||||
op++;
|
op++;
|
||||||
}
|
}
|
||||||
ip = output;
|
ip = output;
|
||||||
@ -687,12 +694,12 @@ void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Rewrote to use same algorithm as others.
|
// Rewrote to use same algorithm as others.
|
||||||
static void fdct16(const int16_t in[16], int16_t out[16]) {
|
static void fdct16(const tran_low_t in[16], tran_low_t out[16]) {
|
||||||
/*canbe16*/ int step1[8];
|
tran_high_t step1[8]; // canbe16
|
||||||
/*canbe16*/ int step2[8];
|
tran_high_t step2[8]; // canbe16
|
||||||
/*canbe16*/ int step3[8];
|
tran_high_t step3[8]; // canbe16
|
||||||
/*canbe16*/ int input[8];
|
tran_high_t input[8]; // canbe16
|
||||||
/*needs32*/ int temp1, temp2;
|
tran_high_t temp1, temp2; // needs32
|
||||||
|
|
||||||
// step 1
|
// step 1
|
||||||
input[0] = in[0] + in[15];
|
input[0] = in[0] + in[15];
|
||||||
@ -715,9 +722,9 @@ static void fdct16(const int16_t in[16], int16_t out[16]) {
|
|||||||
|
|
||||||
// fdct8(step, step);
|
// fdct8(step, step);
|
||||||
{
|
{
|
||||||
/*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;
|
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
|
||||||
/*needs32*/ int t0, t1, t2, t3;
|
tran_high_t t0, t1, t2, t3; // needs32
|
||||||
/*canbe16*/ int x0, x1, x2, x3;
|
tran_high_t x0, x1, x2, x3; // canbe16
|
||||||
|
|
||||||
// stage 1
|
// stage 1
|
||||||
s0 = input[0] + input[7];
|
s0 = input[0] + input[7];
|
||||||
@ -828,25 +835,26 @@ static void fdct16(const int16_t in[16], int16_t out[16]) {
|
|||||||
out[15] = fdct_round_shift(temp2);
|
out[15] = fdct_round_shift(temp2);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fadst16(const int16_t *input, int16_t *output) {
|
static void fadst16(const tran_low_t *input, tran_low_t *output) {
|
||||||
int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
|
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
|
||||||
|
tran_high_t s9, s10, s11, s12, s13, s14, s15;
|
||||||
|
|
||||||
int x0 = input[15];
|
tran_high_t x0 = input[15];
|
||||||
int x1 = input[0];
|
tran_high_t x1 = input[0];
|
||||||
int x2 = input[13];
|
tran_high_t x2 = input[13];
|
||||||
int x3 = input[2];
|
tran_high_t x3 = input[2];
|
||||||
int x4 = input[11];
|
tran_high_t x4 = input[11];
|
||||||
int x5 = input[4];
|
tran_high_t x5 = input[4];
|
||||||
int x6 = input[9];
|
tran_high_t x6 = input[9];
|
||||||
int x7 = input[6];
|
tran_high_t x7 = input[6];
|
||||||
int x8 = input[7];
|
tran_high_t x8 = input[7];
|
||||||
int x9 = input[8];
|
tran_high_t x9 = input[8];
|
||||||
int x10 = input[5];
|
tran_high_t x10 = input[5];
|
||||||
int x11 = input[10];
|
tran_high_t x11 = input[10];
|
||||||
int x12 = input[3];
|
tran_high_t x12 = input[3];
|
||||||
int x13 = input[12];
|
tran_high_t x13 = input[12];
|
||||||
int x14 = input[1];
|
tran_high_t x14 = input[1];
|
||||||
int x15 = input[14];
|
tran_high_t x15 = input[14];
|
||||||
|
|
||||||
// stage 1
|
// stage 1
|
||||||
s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
|
s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
|
||||||
@ -997,15 +1005,15 @@ static const transform_2d FHT_16[] = {
|
|||||||
{ fadst16, fadst16 } // ADST_ADST = 3
|
{ fadst16, fadst16 } // ADST_ADST = 3
|
||||||
};
|
};
|
||||||
|
|
||||||
void vp9_fht16x16_c(const int16_t *input, int16_t *output,
|
void vp9_fht16x16_c(const int16_t *input, tran_low_t *output,
|
||||||
int stride, int tx_type) {
|
int stride, int tx_type) {
|
||||||
if (tx_type == DCT_DCT) {
|
if (tx_type == DCT_DCT) {
|
||||||
vp9_fdct16x16_c(input, output, stride);
|
vp9_fdct16x16_c(input, output, stride);
|
||||||
} else {
|
} else {
|
||||||
int16_t out[256];
|
tran_low_t out[256];
|
||||||
int16_t *outptr = &out[0];
|
tran_low_t *outptr = &out[0];
|
||||||
int i, j;
|
int i, j;
|
||||||
int16_t temp_in[16], temp_out[16];
|
tran_low_t temp_in[16], temp_out[16];
|
||||||
const transform_2d ht = FHT_16[tx_type];
|
const transform_2d ht = FHT_16[tx_type];
|
||||||
|
|
||||||
// Columns
|
// Columns
|
||||||
@ -1028,19 +1036,21 @@ void vp9_fht16x16_c(const int16_t *input, int16_t *output,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE int dct_32_round(int input) {
|
static INLINE tran_high_t dct_32_round(tran_high_t input) {
|
||||||
int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
|
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
|
||||||
assert(-131072 <= rv && rv <= 131071);
|
// TODO(debargha, peter.derivaz): Find new bounds for this assert,
|
||||||
|
// and make the bounds consts.
|
||||||
|
// assert(-131072 <= rv && rv <= 131071);
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE int half_round_shift(int input) {
|
static INLINE tran_high_t half_round_shift(tran_high_t input) {
|
||||||
int rv = (input + 1 + (input < 0)) >> 2;
|
tran_high_t rv = (input + 1 + (input < 0)) >> 2;
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fdct32(const int *input, int *output, int round) {
|
static void fdct32(const tran_high_t *input, tran_high_t *output, int round) {
|
||||||
int step[32];
|
tran_high_t step[32];
|
||||||
// Stage 1
|
// Stage 1
|
||||||
step[0] = input[0] + input[(32 - 1)];
|
step[0] = input[0] + input[(32 - 1)];
|
||||||
step[1] = input[1] + input[(32 - 2)];
|
step[1] = input[1] + input[(32 - 2)];
|
||||||
@ -1362,9 +1372,9 @@ static void fdct32(const int *input, int *output, int round) {
|
|||||||
output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);
|
output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_fdct32x32_1_c(const int16_t *input, int16_t *output, int stride) {
|
void vp9_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride) {
|
||||||
int r, c;
|
int r, c;
|
||||||
int16_t sum = 0;
|
tran_low_t sum = 0;
|
||||||
for (r = 0; r < 32; ++r)
|
for (r = 0; r < 32; ++r)
|
||||||
for (c = 0; c < 32; ++c)
|
for (c = 0; c < 32; ++c)
|
||||||
sum += input[r * stride + c];
|
sum += input[r * stride + c];
|
||||||
@ -1373,13 +1383,13 @@ void vp9_fdct32x32_1_c(const int16_t *input, int16_t *output, int stride) {
|
|||||||
output[1] = 0;
|
output[1] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) {
|
void vp9_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
|
||||||
int i, j;
|
int i, j;
|
||||||
int output[32 * 32];
|
tran_high_t output[32 * 32];
|
||||||
|
|
||||||
// Columns
|
// Columns
|
||||||
for (i = 0; i < 32; ++i) {
|
for (i = 0; i < 32; ++i) {
|
||||||
int temp_in[32], temp_out[32];
|
tran_high_t temp_in[32], temp_out[32];
|
||||||
for (j = 0; j < 32; ++j)
|
for (j = 0; j < 32; ++j)
|
||||||
temp_in[j] = input[j * stride + i] * 4;
|
temp_in[j] = input[j * stride + i] * 4;
|
||||||
fdct32(temp_in, temp_out, 0);
|
fdct32(temp_in, temp_out, 0);
|
||||||
@ -1389,7 +1399,7 @@ void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) {
|
|||||||
|
|
||||||
// Rows
|
// Rows
|
||||||
for (i = 0; i < 32; ++i) {
|
for (i = 0; i < 32; ++i) {
|
||||||
int temp_in[32], temp_out[32];
|
tran_high_t temp_in[32], temp_out[32];
|
||||||
for (j = 0; j < 32; ++j)
|
for (j = 0; j < 32; ++j)
|
||||||
temp_in[j] = output[j + i * 32];
|
temp_in[j] = output[j + i * 32];
|
||||||
fdct32(temp_in, temp_out, 0);
|
fdct32(temp_in, temp_out, 0);
|
||||||
@ -1401,13 +1411,13 @@ void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) {
|
|||||||
// Note that although we use dct_32_round in dct32 computation flow,
|
// Note that although we use dct_32_round in dct32 computation flow,
|
||||||
// this 2d fdct32x32 for rate-distortion optimization loop is operating
|
// this 2d fdct32x32 for rate-distortion optimization loop is operating
|
||||||
// within 16 bits precision.
|
// within 16 bits precision.
|
||||||
void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
|
void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) {
|
||||||
int i, j;
|
int i, j;
|
||||||
int output[32 * 32];
|
tran_high_t output[32 * 32];
|
||||||
|
|
||||||
// Columns
|
// Columns
|
||||||
for (i = 0; i < 32; ++i) {
|
for (i = 0; i < 32; ++i) {
|
||||||
int temp_in[32], temp_out[32];
|
tran_high_t temp_in[32], temp_out[32];
|
||||||
for (j = 0; j < 32; ++j)
|
for (j = 0; j < 32; ++j)
|
||||||
temp_in[j] = input[j * stride + i] * 4;
|
temp_in[j] = input[j * stride + i] * 4;
|
||||||
fdct32(temp_in, temp_out, 0);
|
fdct32(temp_in, temp_out, 0);
|
||||||
@ -1420,7 +1430,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
|
|||||||
|
|
||||||
// Rows
|
// Rows
|
||||||
for (i = 0; i < 32; ++i) {
|
for (i = 0; i < 32; ++i) {
|
||||||
int temp_in[32], temp_out[32];
|
tran_high_t temp_in[32], temp_out[32];
|
||||||
for (j = 0; j < 32; ++j)
|
for (j = 0; j < 32; ++j)
|
||||||
temp_in[j] = output[j + i * 32];
|
temp_in[j] = output[j + i * 32];
|
||||||
fdct32(temp_in, temp_out, 1);
|
fdct32(temp_in, temp_out, 1);
|
||||||
@ -1428,3 +1438,61 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
|
|||||||
out[j + i * 32] = temp_out[j];
|
out[j + i * 32] = temp_out[j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
void vp9_high_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
|
||||||
|
vp9_fdct4x4_c(input, output, stride);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_high_fht4x4_c(const int16_t *input, tran_low_t *output,
|
||||||
|
int stride, int tx_type) {
|
||||||
|
vp9_fht4x4_c(input, output, stride, tx_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_high_fdct8x8_1_c(const int16_t *input, tran_low_t *final_output,
|
||||||
|
int stride) {
|
||||||
|
vp9_fdct8x8_1_c(input, final_output, stride);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_high_fdct8x8_c(const int16_t *input, tran_low_t *final_output,
|
||||||
|
int stride) {
|
||||||
|
vp9_fdct8x8_c(input, final_output, stride);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_high_fdct16x16_1_c(const int16_t *input, tran_low_t *output,
|
||||||
|
int stride) {
|
||||||
|
vp9_fdct16x16_1_c(input, output, stride);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_high_fdct16x16_c(const int16_t *input, tran_low_t *output,
|
||||||
|
int stride) {
|
||||||
|
vp9_fdct16x16_c(input, output, stride);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_high_fht8x8_c(const int16_t *input, tran_low_t *output,
|
||||||
|
int stride, int tx_type) {
|
||||||
|
vp9_fht8x8_c(input, output, stride, tx_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_high_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
|
||||||
|
vp9_fwht4x4_c(input, output, stride);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_high_fht16x16_c(const int16_t *input, tran_low_t *output,
|
||||||
|
int stride, int tx_type) {
|
||||||
|
vp9_fht16x16_c(input, output, stride, tx_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_high_fdct32x32_1_c(const int16_t *input, tran_low_t *out, int stride) {
|
||||||
|
vp9_fdct32x32_1_c(input, out, stride);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_high_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
|
||||||
|
vp9_fdct32x32_c(input, out, stride);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_high_fdct32x32_rd_c(const int16_t *input, tran_low_t *out,
|
||||||
|
int stride) {
|
||||||
|
vp9_fdct32x32_rd_c(input, out, stride);
|
||||||
|
}
|
||||||
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
@ -107,9 +107,9 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
|
|||||||
vp9_token_state tokens[1025][2];
|
vp9_token_state tokens[1025][2];
|
||||||
unsigned best_index[1025][2];
|
unsigned best_index[1025][2];
|
||||||
uint8_t token_cache[1024];
|
uint8_t token_cache[1024];
|
||||||
const int16_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
|
const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
|
||||||
int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
||||||
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
||||||
const int eob = p->eobs[block];
|
const int eob = p->eobs[block];
|
||||||
const PLANE_TYPE type = pd->plane_type;
|
const PLANE_TYPE type = pd->plane_type;
|
||||||
const int default_eob = 16 << (tx_size << 1);
|
const int default_eob = 16 << (tx_size << 1);
|
||||||
@ -294,22 +294,33 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static INLINE void fdct32x32(int rd_transform,
|
static INLINE void fdct32x32(int rd_transform,
|
||||||
const int16_t *src, int16_t *dst, int src_stride) {
|
const int16_t *src, tran_low_t *dst,
|
||||||
|
int src_stride) {
|
||||||
if (rd_transform)
|
if (rd_transform)
|
||||||
vp9_fdct32x32_rd(src, dst, src_stride);
|
vp9_fdct32x32_rd(src, dst, src_stride);
|
||||||
else
|
else
|
||||||
vp9_fdct32x32(src, dst, src_stride);
|
vp9_fdct32x32(src, dst, src_stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
static INLINE void high_fdct32x32(int rd_transform, const int16_t *src,
|
||||||
|
tran_low_t *dst, int src_stride) {
|
||||||
|
if (rd_transform)
|
||||||
|
vp9_high_fdct32x32_rd(src, dst, src_stride);
|
||||||
|
else
|
||||||
|
vp9_high_fdct32x32(src, dst, src_stride);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
|
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
|
||||||
BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
|
BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
|
||||||
MACROBLOCKD *const xd = &x->e_mbd;
|
MACROBLOCKD *const xd = &x->e_mbd;
|
||||||
const struct macroblock_plane *const p = &x->plane[plane];
|
const struct macroblock_plane *const p = &x->plane[plane];
|
||||||
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||||
const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
|
const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
|
||||||
int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
|
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
|
||||||
int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
||||||
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
||||||
uint16_t *const eob = &p->eobs[block];
|
uint16_t *const eob = &p->eobs[block];
|
||||||
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
|
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
|
||||||
int i, j;
|
int i, j;
|
||||||
@ -357,9 +368,9 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
|
|||||||
MACROBLOCKD *const xd = &x->e_mbd;
|
MACROBLOCKD *const xd = &x->e_mbd;
|
||||||
const struct macroblock_plane *const p = &x->plane[plane];
|
const struct macroblock_plane *const p = &x->plane[plane];
|
||||||
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||||
int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
|
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
|
||||||
int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
||||||
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
||||||
uint16_t *const eob = &p->eobs[block];
|
uint16_t *const eob = &p->eobs[block];
|
||||||
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
|
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
|
||||||
int i, j;
|
int i, j;
|
||||||
@ -405,9 +416,9 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
|
|||||||
const struct macroblock_plane *const p = &x->plane[plane];
|
const struct macroblock_plane *const p = &x->plane[plane];
|
||||||
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||||
const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
|
const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
|
||||||
int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
|
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
|
||||||
int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
||||||
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
||||||
uint16_t *const eob = &p->eobs[block];
|
uint16_t *const eob = &p->eobs[block];
|
||||||
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
|
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
|
||||||
int i, j;
|
int i, j;
|
||||||
@ -458,7 +469,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
|
|||||||
struct optimize_ctx *const ctx = args->ctx;
|
struct optimize_ctx *const ctx = args->ctx;
|
||||||
struct macroblock_plane *const p = &x->plane[plane];
|
struct macroblock_plane *const p = &x->plane[plane];
|
||||||
struct macroblockd_plane *const pd = &xd->plane[plane];
|
struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||||
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
||||||
int i, j;
|
int i, j;
|
||||||
uint8_t *dst;
|
uint8_t *dst;
|
||||||
ENTROPY_CONTEXT *a, *l;
|
ENTROPY_CONTEXT *a, *l;
|
||||||
@ -538,7 +549,7 @@ static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
|
|||||||
MACROBLOCKD *const xd = &x->e_mbd;
|
MACROBLOCKD *const xd = &x->e_mbd;
|
||||||
struct macroblock_plane *const p = &x->plane[plane];
|
struct macroblock_plane *const p = &x->plane[plane];
|
||||||
struct macroblockd_plane *const pd = &xd->plane[plane];
|
struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||||
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
||||||
int i, j;
|
int i, j;
|
||||||
uint8_t *dst;
|
uint8_t *dst;
|
||||||
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
|
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
|
||||||
@ -587,9 +598,9 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
|
|||||||
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
|
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
|
||||||
struct macroblock_plane *const p = &x->plane[plane];
|
struct macroblock_plane *const p = &x->plane[plane];
|
||||||
struct macroblockd_plane *const pd = &xd->plane[plane];
|
struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||||
int16_t *coeff = BLOCK_OFFSET(p->coeff, block);
|
tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
|
||||||
int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
||||||
int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
||||||
const scan_order *scan_order;
|
const scan_order *scan_order;
|
||||||
TX_TYPE tx_type;
|
TX_TYPE tx_type;
|
||||||
PREDICTION_MODE mode;
|
PREDICTION_MODE mode;
|
||||||
|
@ -556,6 +556,9 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
|
|||||||
|
|
||||||
cm->profile = oxcf->profile;
|
cm->profile = oxcf->profile;
|
||||||
cm->bit_depth = oxcf->bit_depth;
|
cm->bit_depth = oxcf->bit_depth;
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
cm->use_highbitdepth = oxcf->use_highbitdepth;
|
||||||
|
#endif
|
||||||
cm->color_space = UNKNOWN;
|
cm->color_space = UNKNOWN;
|
||||||
|
|
||||||
cm->width = oxcf->width;
|
cm->width = oxcf->width;
|
||||||
@ -613,6 +616,11 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
|
|||||||
assert(cm->bit_depth > VPX_BITS_8);
|
assert(cm->bit_depth > VPX_BITS_8);
|
||||||
|
|
||||||
cpi->oxcf = *oxcf;
|
cpi->oxcf = *oxcf;
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
if (cpi->oxcf.use_highbitdepth) {
|
||||||
|
cpi->mb.e_mbd.bd = (int)cm->bit_depth;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
|
rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
|
||||||
|
|
||||||
@ -2768,7 +2776,16 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
|
|||||||
if (oxcf->pass == 1 &&
|
if (oxcf->pass == 1 &&
|
||||||
(!cpi->use_svc || is_two_pass_svc(cpi))) {
|
(!cpi->use_svc || is_two_pass_svc(cpi))) {
|
||||||
const int lossless = is_lossless_requested(oxcf);
|
const int lossless = is_lossless_requested(oxcf);
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
if (cpi->oxcf.use_highbitdepth)
|
||||||
|
cpi->mb.fwd_txm4x4 = lossless ? vp9_high_fwht4x4 : vp9_high_fdct4x4;
|
||||||
|
else
|
||||||
|
cpi->mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4;
|
||||||
|
cpi->mb.high_itxm_add = lossless ? vp9_high_iwht4x4_add :
|
||||||
|
vp9_high_idct4x4_add;
|
||||||
|
#else
|
||||||
cpi->mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4;
|
cpi->mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4;
|
||||||
|
#endif
|
||||||
cpi->mb.itxm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
|
cpi->mb.itxm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
|
||||||
vp9_first_pass(cpi, source);
|
vp9_first_pass(cpi, source);
|
||||||
} else if (oxcf->pass == 2 &&
|
} else if (oxcf->pass == 2 &&
|
||||||
|
@ -217,6 +217,9 @@ typedef struct VP9EncoderConfig {
|
|||||||
|
|
||||||
vp8e_tuning tuning;
|
vp8e_tuning tuning;
|
||||||
vp9e_tune_content content;
|
vp9e_tune_content content;
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
int use_highbitdepth;
|
||||||
|
#endif
|
||||||
} VP9EncoderConfig;
|
} VP9EncoderConfig;
|
||||||
|
|
||||||
static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
|
static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
|
||||||
|
@ -19,9 +19,9 @@
|
|||||||
#include "vp9/encoder/vp9_quantize.h"
|
#include "vp9/encoder/vp9_quantize.h"
|
||||||
#include "vp9/encoder/vp9_rd.h"
|
#include "vp9/encoder/vp9_rd.h"
|
||||||
|
|
||||||
void vp9_quantize_dc(const int16_t *coeff_ptr, int skip_block,
|
void vp9_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
|
||||||
const int16_t *round_ptr, const int16_t quant,
|
const int16_t *round_ptr, const int16_t quant,
|
||||||
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
|
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||||
const int16_t dequant_ptr, uint16_t *eob_ptr) {
|
const int16_t dequant_ptr, uint16_t *eob_ptr) {
|
||||||
const int rc = 0;
|
const int rc = 0;
|
||||||
const int coeff = coeff_ptr[rc];
|
const int coeff = coeff_ptr[rc];
|
||||||
@ -40,9 +40,9 @@ void vp9_quantize_dc(const int16_t *coeff_ptr, int skip_block,
|
|||||||
*eob_ptr = eob + 1;
|
*eob_ptr = eob + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_quantize_dc_32x32(const int16_t *coeff_ptr, int skip_block,
|
void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
|
||||||
const int16_t *round_ptr, const int16_t quant,
|
const int16_t *round_ptr, const int16_t quant,
|
||||||
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
|
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||||
const int16_t dequant_ptr, uint16_t *eob_ptr) {
|
const int16_t dequant_ptr, uint16_t *eob_ptr) {
|
||||||
const int rc = 0;
|
const int rc = 0;
|
||||||
const int coeff = coeff_ptr[rc];
|
const int coeff = coeff_ptr[rc];
|
||||||
@ -62,11 +62,11 @@ void vp9_quantize_dc_32x32(const int16_t *coeff_ptr, int skip_block,
|
|||||||
*eob_ptr = eob + 1;
|
*eob_ptr = eob + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t count,
|
void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||||
int skip_block,
|
int skip_block,
|
||||||
const int16_t *zbin_ptr, const int16_t *round_ptr,
|
const int16_t *zbin_ptr, const int16_t *round_ptr,
|
||||||
const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
|
const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
|
||||||
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
|
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||||
const int16_t *dequant_ptr,
|
const int16_t *dequant_ptr,
|
||||||
int zbin_oq_value, uint16_t *eob_ptr,
|
int zbin_oq_value, uint16_t *eob_ptr,
|
||||||
const int16_t *scan, const int16_t *iscan) {
|
const int16_t *scan, const int16_t *iscan) {
|
||||||
@ -78,13 +78,13 @@ void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t count,
|
|||||||
(void)zbin_oq_value;
|
(void)zbin_oq_value;
|
||||||
(void)iscan;
|
(void)iscan;
|
||||||
|
|
||||||
vpx_memset(qcoeff_ptr, 0, count * sizeof(int16_t));
|
vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
|
||||||
vpx_memset(dqcoeff_ptr, 0, count * sizeof(int16_t));
|
vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
|
||||||
|
|
||||||
if (!skip_block) {
|
if (!skip_block) {
|
||||||
// Quantization pass: All coefficients with index >= zero_flag are
|
// Quantization pass: All coefficients with index >= zero_flag are
|
||||||
// skippable. Note: zero_flag can be zero.
|
// skippable. Note: zero_flag can be zero.
|
||||||
for (i = 0; i < count; i++) {
|
for (i = 0; i < n_coeffs; i++) {
|
||||||
const int rc = scan[i];
|
const int rc = scan[i];
|
||||||
const int coeff = coeff_ptr[rc];
|
const int coeff = coeff_ptr[rc];
|
||||||
const int coeff_sign = (coeff >> 31);
|
const int coeff_sign = (coeff >> 31);
|
||||||
@ -105,12 +105,12 @@ void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t count,
|
|||||||
|
|
||||||
// TODO(jingning) Refactor this file and combine functions with similar
|
// TODO(jingning) Refactor this file and combine functions with similar
|
||||||
// operations.
|
// operations.
|
||||||
void vp9_quantize_fp_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
|
void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||||
int skip_block,
|
int skip_block,
|
||||||
const int16_t *zbin_ptr, const int16_t *round_ptr,
|
const int16_t *zbin_ptr, const int16_t *round_ptr,
|
||||||
const int16_t *quant_ptr,
|
const int16_t *quant_ptr,
|
||||||
const int16_t *quant_shift_ptr,
|
const int16_t *quant_shift_ptr,
|
||||||
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
|
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||||
const int16_t *dequant_ptr,
|
const int16_t *dequant_ptr,
|
||||||
int zbin_oq_value, uint16_t *eob_ptr,
|
int zbin_oq_value, uint16_t *eob_ptr,
|
||||||
const int16_t *scan, const int16_t *iscan) {
|
const int16_t *scan, const int16_t *iscan) {
|
||||||
@ -120,8 +120,8 @@ void vp9_quantize_fp_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
|
|||||||
(void)zbin_oq_value;
|
(void)zbin_oq_value;
|
||||||
(void)iscan;
|
(void)iscan;
|
||||||
|
|
||||||
vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
|
vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
|
||||||
vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
|
vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
|
||||||
|
|
||||||
if (!skip_block) {
|
if (!skip_block) {
|
||||||
for (i = 0; i < n_coeffs; i++) {
|
for (i = 0; i < n_coeffs; i++) {
|
||||||
@ -146,27 +146,27 @@ void vp9_quantize_fp_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
|
|||||||
*eob_ptr = eob + 1;
|
*eob_ptr = eob + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t count,
|
void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||||
int skip_block,
|
int skip_block,
|
||||||
const int16_t *zbin_ptr, const int16_t *round_ptr,
|
const int16_t *zbin_ptr, const int16_t *round_ptr,
|
||||||
const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
|
const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
|
||||||
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
|
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||||
const int16_t *dequant_ptr,
|
const int16_t *dequant_ptr,
|
||||||
int zbin_oq_value, uint16_t *eob_ptr,
|
int zbin_oq_value, uint16_t *eob_ptr,
|
||||||
const int16_t *scan, const int16_t *iscan) {
|
const int16_t *scan, const int16_t *iscan) {
|
||||||
int i, non_zero_count = (int)count, eob = -1;
|
int i, non_zero_count = (int)n_coeffs, eob = -1;
|
||||||
const int zbins[2] = { zbin_ptr[0] + zbin_oq_value,
|
const int zbins[2] = { zbin_ptr[0] + zbin_oq_value,
|
||||||
zbin_ptr[1] + zbin_oq_value };
|
zbin_ptr[1] + zbin_oq_value };
|
||||||
const int nzbins[2] = { zbins[0] * -1,
|
const int nzbins[2] = { zbins[0] * -1,
|
||||||
zbins[1] * -1 };
|
zbins[1] * -1 };
|
||||||
(void)iscan;
|
(void)iscan;
|
||||||
|
|
||||||
vpx_memset(qcoeff_ptr, 0, count * sizeof(int16_t));
|
vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
|
||||||
vpx_memset(dqcoeff_ptr, 0, count * sizeof(int16_t));
|
vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
|
||||||
|
|
||||||
if (!skip_block) {
|
if (!skip_block) {
|
||||||
// Pre-scan pass
|
// Pre-scan pass
|
||||||
for (i = (int)count - 1; i >= 0; i--) {
|
for (i = (int)n_coeffs - 1; i >= 0; i--) {
|
||||||
const int rc = scan[i];
|
const int rc = scan[i];
|
||||||
const int coeff = coeff_ptr[rc];
|
const int coeff = coeff_ptr[rc];
|
||||||
|
|
||||||
@ -199,12 +199,12 @@ void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t count,
|
|||||||
*eob_ptr = eob + 1;
|
*eob_ptr = eob + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
|
void vp9_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||||
int skip_block,
|
int skip_block,
|
||||||
const int16_t *zbin_ptr, const int16_t *round_ptr,
|
const int16_t *zbin_ptr, const int16_t *round_ptr,
|
||||||
const int16_t *quant_ptr,
|
const int16_t *quant_ptr,
|
||||||
const int16_t *quant_shift_ptr,
|
const int16_t *quant_shift_ptr,
|
||||||
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
|
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||||
const int16_t *dequant_ptr,
|
const int16_t *dequant_ptr,
|
||||||
int zbin_oq_value, uint16_t *eob_ptr,
|
int zbin_oq_value, uint16_t *eob_ptr,
|
||||||
const int16_t *scan, const int16_t *iscan) {
|
const int16_t *scan, const int16_t *iscan) {
|
||||||
@ -217,8 +217,8 @@ void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
|
|||||||
int i, eob = -1;
|
int i, eob = -1;
|
||||||
(void)iscan;
|
(void)iscan;
|
||||||
|
|
||||||
vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
|
vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
|
||||||
vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
|
vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
|
||||||
|
|
||||||
if (!skip_block) {
|
if (!skip_block) {
|
||||||
// Pre-scan pass
|
// Pre-scan pass
|
||||||
@ -280,13 +280,19 @@ static void invert_quant(int16_t *quant, int16_t *shift, int d) {
|
|||||||
*shift = 1 << (16 - l);
|
*shift = 1 << (16 - l);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int get_qzbin_factor(int q, vpx_bit_depth_t bit_depth) {
|
||||||
|
int quant = vp9_dc_quant(q, 0);
|
||||||
|
(void) bit_depth;
|
||||||
|
return q == 0 ? 64 : (quant < 148 ? 84 : 80);
|
||||||
|
}
|
||||||
|
|
||||||
void vp9_init_quantizer(VP9_COMP *cpi) {
|
void vp9_init_quantizer(VP9_COMP *cpi) {
|
||||||
VP9_COMMON *const cm = &cpi->common;
|
VP9_COMMON *const cm = &cpi->common;
|
||||||
QUANTS *const quants = &cpi->quants;
|
QUANTS *const quants = &cpi->quants;
|
||||||
int i, q, quant;
|
int i, q, quant;
|
||||||
|
|
||||||
for (q = 0; q < QINDEX_RANGE; q++) {
|
for (q = 0; q < QINDEX_RANGE; q++) {
|
||||||
const int qzbin_factor = q == 0 ? 64 : (vp9_dc_quant(q, 0) < 148 ? 84 : 80);
|
const int qzbin_factor = get_qzbin_factor(q, cm->bit_depth);
|
||||||
const int qrounding_factor = q == 0 ? 64 : 48;
|
const int qrounding_factor = q == 0 ? 64 : 48;
|
||||||
|
|
||||||
for (i = 0; i < 2; ++i) {
|
for (i = 0; i < 2; ++i) {
|
||||||
|
@ -37,17 +37,29 @@ typedef struct {
|
|||||||
DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]);
|
DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]);
|
||||||
} QUANTS;
|
} QUANTS;
|
||||||
|
|
||||||
void vp9_quantize_dc(const int16_t *coeff_ptr, int skip_block,
|
void vp9_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
|
||||||
const int16_t *round_ptr, const int16_t quant_ptr,
|
const int16_t *round_ptr, const int16_t quant_ptr,
|
||||||
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
|
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||||
const int16_t dequant_ptr, uint16_t *eob_ptr);
|
const int16_t dequant_ptr, uint16_t *eob_ptr);
|
||||||
void vp9_quantize_dc_32x32(const int16_t *coeff_ptr, int skip_block,
|
void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
|
||||||
const int16_t *round_ptr, const int16_t quant_ptr,
|
const int16_t *round_ptr, const int16_t quant_ptr,
|
||||||
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
|
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||||
const int16_t dequant_ptr, uint16_t *eob_ptr);
|
const int16_t dequant_ptr, uint16_t *eob_ptr);
|
||||||
void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
|
void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
|
||||||
const int16_t *scan, const int16_t *iscan);
|
const int16_t *scan, const int16_t *iscan);
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
void vp9_high_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
|
||||||
|
const int16_t *round_ptr, const int16_t quant_ptr,
|
||||||
|
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||||
|
const int16_t dequant_ptr, uint16_t *eob_ptr);
|
||||||
|
void vp9_high_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
|
||||||
|
const int16_t *round_ptr,
|
||||||
|
const int16_t quant_ptr, tran_low_t *qcoeff_ptr,
|
||||||
|
tran_low_t *dqcoeff_ptr,
|
||||||
|
const int16_t dequant_ptr, uint16_t *eob_ptr);
|
||||||
|
#endif
|
||||||
|
|
||||||
struct VP9_COMP;
|
struct VP9_COMP;
|
||||||
struct VP9Common;
|
struct VP9Common;
|
||||||
|
|
||||||
|
@ -249,7 +249,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
|
|||||||
*out_dist_sum = dist_sum << 4;
|
*out_dist_sum = dist_sum << 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t vp9_block_error_c(const int16_t *coeff, const int16_t *dqcoeff,
|
int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
|
||||||
intptr_t block_size, int64_t *ssz) {
|
intptr_t block_size, int64_t *ssz) {
|
||||||
int i;
|
int i;
|
||||||
int64_t error = 0, sqcoeff = 0;
|
int64_t error = 0, sqcoeff = 0;
|
||||||
@ -288,7 +288,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
|
|||||||
const PLANE_TYPE type = pd->plane_type;
|
const PLANE_TYPE type = pd->plane_type;
|
||||||
const int16_t *band_count = &band_counts[tx_size][1];
|
const int16_t *band_count = &band_counts[tx_size][1];
|
||||||
const int eob = p->eobs[block];
|
const int eob = p->eobs[block];
|
||||||
const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
||||||
unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
|
unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
|
||||||
x->token_costs[tx_size][type][is_inter_block(mbmi)];
|
x->token_costs[tx_size][type][is_inter_block(mbmi)];
|
||||||
uint8_t token_cache[32 * 32];
|
uint8_t token_cache[32 * 32];
|
||||||
@ -358,8 +358,8 @@ static void dist_block(int plane, int block, TX_SIZE tx_size,
|
|||||||
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||||
int64_t this_sse;
|
int64_t this_sse;
|
||||||
int shift = tx_size == TX_32X32 ? 0 : 2;
|
int shift = tx_size == TX_32X32 ? 0 : 2;
|
||||||
int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
|
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
|
||||||
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
||||||
args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
|
args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
|
||||||
&this_sse) >> shift;
|
&this_sse) >> shift;
|
||||||
args->sse = this_sse >> shift;
|
args->sse = this_sse >> shift;
|
||||||
@ -405,8 +405,8 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
|
|||||||
dist_block(plane, block, tx_size, args);
|
dist_block(plane, block, tx_size, args);
|
||||||
} else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
|
} else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
|
||||||
// compute DC coefficient
|
// compute DC coefficient
|
||||||
int16_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
|
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
|
||||||
int16_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
|
tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
|
||||||
vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
|
vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
|
||||||
args->sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
|
args->sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
|
||||||
args->dist = args->sse;
|
args->dist = args->sse;
|
||||||
@ -690,7 +690,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
|
|||||||
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
|
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
|
||||||
int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
|
int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
|
||||||
p->src_diff);
|
p->src_diff);
|
||||||
int16_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
|
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
|
||||||
xd->mi[0]->bmi[block].as_mode = mode;
|
xd->mi[0]->bmi[block].as_mode = mode;
|
||||||
vp9_predict_intra_block(xd, block, 1,
|
vp9_predict_intra_block(xd, block, 1,
|
||||||
TX_4X4, mode,
|
TX_4X4, mode,
|
||||||
@ -1137,7 +1137,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
|
|||||||
for (idy = 0; idy < height / 4; ++idy) {
|
for (idy = 0; idy < height / 4; ++idy) {
|
||||||
for (idx = 0; idx < width / 4; ++idx) {
|
for (idx = 0; idx < width / 4; ++idx) {
|
||||||
int64_t ssz, rd, rd1, rd2;
|
int64_t ssz, rd, rd1, rd2;
|
||||||
int16_t* coeff;
|
tran_low_t* coeff;
|
||||||
|
|
||||||
k += (idy * 2 + idx);
|
k += (idy * 2 + idx);
|
||||||
coeff = BLOCK_OFFSET(p->coeff, k);
|
coeff = BLOCK_OFFSET(p->coeff, k);
|
||||||
|
@ -212,7 +212,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
|
|||||||
TOKENEXTRA *t = *tp; /* store tokens starting here */
|
TOKENEXTRA *t = *tp; /* store tokens starting here */
|
||||||
int eob = p->eobs[block];
|
int eob = p->eobs[block];
|
||||||
const PLANE_TYPE type = pd->plane_type;
|
const PLANE_TYPE type = pd->plane_type;
|
||||||
const int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
||||||
const int segment_id = mbmi->segment_id;
|
const int segment_id = mbmi->segment_id;
|
||||||
const int16_t *scan, *nb;
|
const int16_t *scan, *nb;
|
||||||
const scan_order *so;
|
const scan_order *so;
|
||||||
|
@ -686,6 +686,10 @@ static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx,
|
|||||||
|
|
||||||
if (res == VPX_CODEC_OK) {
|
if (res == VPX_CODEC_OK) {
|
||||||
set_encoder_config(&priv->oxcf, &priv->cfg, &priv->extra_cfg);
|
set_encoder_config(&priv->oxcf, &priv->cfg, &priv->extra_cfg);
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
priv->oxcf.use_highbitdepth =
|
||||||
|
(ctx->init_flags & VPX_CODEC_USE_HIGHBITDEPTH) ? 1 : 0;
|
||||||
|
#endif
|
||||||
priv->cpi = vp9_create_compressor(&priv->oxcf);
|
priv->cpi = vp9_create_compressor(&priv->oxcf);
|
||||||
if (priv->cpi == NULL)
|
if (priv->cpi == NULL)
|
||||||
res = VPX_CODEC_MEM_ERROR;
|
res = VPX_CODEC_MEM_ERROR;
|
||||||
@ -1333,6 +1337,9 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
|
|||||||
CODEC_INTERFACE(vpx_codec_vp9_cx) = {
|
CODEC_INTERFACE(vpx_codec_vp9_cx) = {
|
||||||
"WebM Project VP9 Encoder" VERSION_STRING,
|
"WebM Project VP9 Encoder" VERSION_STRING,
|
||||||
VPX_CODEC_INTERNAL_ABI_VERSION,
|
VPX_CODEC_INTERNAL_ABI_VERSION,
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
VPX_CODEC_CAP_HIGHBITDEPTH |
|
||||||
|
#endif
|
||||||
VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR, // vpx_codec_caps_t
|
VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR, // vpx_codec_caps_t
|
||||||
encoder_init, // vpx_codec_init_fn_t
|
encoder_init, // vpx_codec_init_fn_t
|
||||||
encoder_destroy, // vpx_codec_destroy_fn_t
|
encoder_destroy, // vpx_codec_destroy_fn_t
|
||||||
|
Loading…
x
Reference in New Issue
Block a user