Merge "Isolate vp10's inv_txfm from vp9"
This commit is contained in:
commit
b0bfea4f5f
@ -167,6 +167,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc
|
||||
TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) := test_intra_pred_speed.cc
|
||||
TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) += ../md5_utils.h ../md5_utils.c
|
||||
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm_test.cc
|
||||
endif # CONFIG_SHARED
|
||||
|
||||
include $(SRC_PATH_BARE)/test/test-data.mk
|
||||
|
321
test/vp10_inv_txfm_test.cc
Normal file
321
test/vp10_inv_txfm_test.cc
Normal file
@ -0,0 +1,321 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vp10_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
#include "vp10/common/blockd.h"
|
||||
#include "vp10/common/scan.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vp10/common/vp10_inv_txfm.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
namespace {
|
||||
const double PI = 3.141592653589793238462643383279502884;
|
||||
const double kInvSqrt2 = 0.707106781186547524400844362104;
|
||||
|
||||
void reference_idct_1d(const double *in, double *out, int size) {
|
||||
for (int n = 0; n < size; ++n) {
|
||||
out[n] = 0;
|
||||
for (int k = 0; k < size; ++k) {
|
||||
if (k == 0)
|
||||
out[n] += kInvSqrt2 * in[k] * cos(PI * (2 * n + 1) * k / (2 * size));
|
||||
else
|
||||
out[n] += in[k] * cos(PI * (2 * n + 1) * k / (2 * size));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef void (*IdctFuncRef)(const double *in, double *out, int size);
|
||||
typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out);
|
||||
|
||||
class TransTestBase {
|
||||
public:
|
||||
virtual ~TransTestBase() {}
|
||||
|
||||
protected:
|
||||
void RunInvAccuracyCheck() {
|
||||
tran_low_t *input = new tran_low_t[txfm_size_];
|
||||
tran_low_t *output = new tran_low_t[txfm_size_];
|
||||
double *ref_input = new double[txfm_size_];
|
||||
double *ref_output = new double[txfm_size_];
|
||||
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 5000;
|
||||
for (int ti = 0; ti < count_test_block; ++ti) {
|
||||
for (int ni = 0; ni < txfm_size_; ++ni) {
|
||||
input[ni] = rnd.Rand8() - rnd.Rand8();
|
||||
ref_input[ni] = static_cast<double>(input[ni]);
|
||||
}
|
||||
|
||||
fwd_txfm_(input, output);
|
||||
fwd_txfm_ref_(ref_input, ref_output, txfm_size_);
|
||||
|
||||
for (int ni = 0; ni < txfm_size_; ++ni) {
|
||||
EXPECT_LE(
|
||||
abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))),
|
||||
max_error_);
|
||||
}
|
||||
}
|
||||
|
||||
delete[] input;
|
||||
delete[] output;
|
||||
delete[] ref_input;
|
||||
delete[] ref_output;
|
||||
}
|
||||
|
||||
double max_error_;
|
||||
int txfm_size_;
|
||||
IdctFunc fwd_txfm_;
|
||||
IdctFuncRef fwd_txfm_ref_;
|
||||
};
|
||||
|
||||
typedef std::tr1::tuple<IdctFunc, IdctFuncRef, int, int> IdctParam;
|
||||
class Vp10InvTxfm
|
||||
: public TransTestBase,
|
||||
public ::testing::TestWithParam<IdctParam> {
|
||||
public:
|
||||
virtual void SetUp() {
|
||||
fwd_txfm_ = GET_PARAM(0);
|
||||
fwd_txfm_ref_ = GET_PARAM(1);
|
||||
txfm_size_ = GET_PARAM(2);
|
||||
max_error_ = GET_PARAM(3);
|
||||
}
|
||||
virtual void TearDown() {}
|
||||
};
|
||||
|
||||
TEST_P(Vp10InvTxfm, RunInvAccuracyCheck) {
|
||||
RunInvAccuracyCheck();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, Vp10InvTxfm,
|
||||
::testing::Values(
|
||||
IdctParam(&vp10_idct4_c, &reference_idct_1d, 4, 1),
|
||||
IdctParam(&vp10_idct8_c, &reference_idct_1d, 8, 2),
|
||||
IdctParam(&vp10_idct16_c, &reference_idct_1d, 16, 4),
|
||||
IdctParam(&vp10_idct32_c, &reference_idct_1d, 32, 6))
|
||||
);
|
||||
|
||||
typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
|
||||
typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
|
||||
typedef std::tr1::tuple<FwdTxfmFunc,
|
||||
InvTxfmFunc,
|
||||
InvTxfmFunc,
|
||||
TX_SIZE, int> PartialInvTxfmParam;
|
||||
const int kMaxNumCoeffs = 1024;
|
||||
class Vp10PartialIDctTest
|
||||
: public ::testing::TestWithParam<PartialInvTxfmParam> {
|
||||
public:
|
||||
virtual ~Vp10PartialIDctTest() {}
|
||||
virtual void SetUp() {
|
||||
ftxfm_ = GET_PARAM(0);
|
||||
full_itxfm_ = GET_PARAM(1);
|
||||
partial_itxfm_ = GET_PARAM(2);
|
||||
tx_size_ = GET_PARAM(3);
|
||||
last_nonzero_ = GET_PARAM(4);
|
||||
}
|
||||
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
protected:
|
||||
int last_nonzero_;
|
||||
TX_SIZE tx_size_;
|
||||
FwdTxfmFunc ftxfm_;
|
||||
InvTxfmFunc full_itxfm_;
|
||||
InvTxfmFunc partial_itxfm_;
|
||||
};
|
||||
|
||||
TEST_P(Vp10PartialIDctTest, RunQuantCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int size;
|
||||
switch (tx_size_) {
|
||||
case TX_4X4:
|
||||
size = 4;
|
||||
break;
|
||||
case TX_8X8:
|
||||
size = 8;
|
||||
break;
|
||||
case TX_16X16:
|
||||
size = 16;
|
||||
break;
|
||||
case TX_32X32:
|
||||
size = 32;
|
||||
break;
|
||||
default:
|
||||
FAIL() << "Wrong Size!";
|
||||
break;
|
||||
}
|
||||
DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
|
||||
|
||||
const int count_test_block = 1000;
|
||||
const int block_size = size * size;
|
||||
|
||||
DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
|
||||
|
||||
int max_error = 0;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// clear out destination buffer
|
||||
memset(dst1, 0, sizeof(*dst1) * block_size);
|
||||
memset(dst2, 0, sizeof(*dst2) * block_size);
|
||||
memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
|
||||
memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
|
||||
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
if (i == 0) {
|
||||
for (int j = 0; j < block_size; ++j)
|
||||
input_extreme_block[j] = 255;
|
||||
} else if (i == 1) {
|
||||
for (int j = 0; j < block_size; ++j)
|
||||
input_extreme_block[j] = -255;
|
||||
} else {
|
||||
for (int j = 0; j < block_size; ++j) {
|
||||
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
|
||||
}
|
||||
}
|
||||
|
||||
ftxfm_(input_extreme_block, output_ref_block, size);
|
||||
|
||||
// quantization with maximum allowed step sizes
|
||||
test_coef_block1[0] = (output_ref_block[0] / 1336) * 1336;
|
||||
for (int j = 1; j < last_nonzero_; ++j)
|
||||
test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]]
|
||||
= (output_ref_block[j] / 1828) * 1828;
|
||||
}
|
||||
|
||||
ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
|
||||
ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block1, dst2, size));
|
||||
|
||||
for (int j = 0; j < block_size; ++j) {
|
||||
const int diff = dst1[j] - dst2[j];
|
||||
const int error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_EQ(0, max_error)
|
||||
<< "Error: partial inverse transform produces different results";
|
||||
}
|
||||
|
||||
TEST_P(Vp10PartialIDctTest, ResultsMatch) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int size;
|
||||
switch (tx_size_) {
|
||||
case TX_4X4:
|
||||
size = 4;
|
||||
break;
|
||||
case TX_8X8:
|
||||
size = 8;
|
||||
break;
|
||||
case TX_16X16:
|
||||
size = 16;
|
||||
break;
|
||||
case TX_32X32:
|
||||
size = 32;
|
||||
break;
|
||||
default:
|
||||
FAIL() << "Wrong Size!";
|
||||
break;
|
||||
}
|
||||
DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
|
||||
const int count_test_block = 1000;
|
||||
const int max_coeff = 32766 / 4;
|
||||
const int block_size = size * size;
|
||||
int max_error = 0;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// clear out destination buffer
|
||||
memset(dst1, 0, sizeof(*dst1) * block_size);
|
||||
memset(dst2, 0, sizeof(*dst2) * block_size);
|
||||
memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
|
||||
memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
|
||||
int max_energy_leftover = max_coeff * max_coeff;
|
||||
for (int j = 0; j < last_nonzero_; ++j) {
|
||||
int16_t coef = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) *
|
||||
(rnd.Rand16() - 32768) / 65536);
|
||||
max_energy_leftover -= coef * coef;
|
||||
if (max_energy_leftover < 0) {
|
||||
max_energy_leftover = 0;
|
||||
coef = 0;
|
||||
}
|
||||
test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]] = coef;
|
||||
}
|
||||
|
||||
memcpy(test_coef_block2, test_coef_block1,
|
||||
sizeof(*test_coef_block2) * block_size);
|
||||
|
||||
ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
|
||||
ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block2, dst2, size));
|
||||
|
||||
for (int j = 0; j < block_size; ++j) {
|
||||
const int diff = dst1[j] - dst2[j];
|
||||
const int error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_EQ(0, max_error)
|
||||
<< "Error: partial inverse transform produces different results";
|
||||
}
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, Vp10PartialIDctTest,
|
||||
::testing::Values(
|
||||
make_tuple(&vpx_fdct32x32_c,
|
||||
&vp10_idct32x32_1024_add_c,
|
||||
&vp10_idct32x32_34_add_c,
|
||||
TX_32X32, 34),
|
||||
make_tuple(&vpx_fdct32x32_c,
|
||||
&vp10_idct32x32_1024_add_c,
|
||||
&vp10_idct32x32_1_add_c,
|
||||
TX_32X32, 1),
|
||||
make_tuple(&vpx_fdct16x16_c,
|
||||
&vp10_idct16x16_256_add_c,
|
||||
&vp10_idct16x16_10_add_c,
|
||||
TX_16X16, 10),
|
||||
make_tuple(&vpx_fdct16x16_c,
|
||||
&vp10_idct16x16_256_add_c,
|
||||
&vp10_idct16x16_1_add_c,
|
||||
TX_16X16, 1),
|
||||
make_tuple(&vpx_fdct8x8_c,
|
||||
&vp10_idct8x8_64_add_c,
|
||||
&vp10_idct8x8_12_add_c,
|
||||
TX_8X8, 12),
|
||||
make_tuple(&vpx_fdct8x8_c,
|
||||
&vp10_idct8x8_64_add_c,
|
||||
&vp10_idct8x8_1_add_c,
|
||||
TX_8X8, 1),
|
||||
make_tuple(&vpx_fdct4x4_c,
|
||||
&vp10_idct4x4_16_add_c,
|
||||
&vp10_idct4x4_1_add_c,
|
||||
TX_4X4, 1)));
|
||||
} // namespace
|
@ -695,6 +695,13 @@ DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_32x32[1024]) = {
|
||||
1023,
|
||||
};
|
||||
|
||||
const scan_order vp10_default_scan_orders[TX_SIZES] = {
|
||||
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
|
||||
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
|
||||
{default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
|
||||
{default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
|
||||
};
|
||||
|
||||
const scan_order vp10_scan_orders[TX_SIZES][TX_TYPES] = {
|
||||
{ // TX_4X4
|
||||
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
|
||||
|
@ -29,6 +29,7 @@ typedef struct {
|
||||
const int16_t *neighbors;
|
||||
} scan_order;
|
||||
|
||||
extern const scan_order vp10_default_scan_orders[TX_SIZES];
|
||||
extern const scan_order vp10_scan_orders[TX_SIZES][TX_TYPES];
|
||||
|
||||
static INLINE int get_coef_context(const int16_t *neighbors,
|
||||
|
2499
vp10/common/vp10_inv_txfm.c
Normal file
2499
vp10/common/vp10_inv_txfm.c
Normal file
File diff suppressed because it is too large
Load Diff
122
vp10/common/vp10_inv_txfm.h
Normal file
122
vp10/common/vp10_inv_txfm.h
Normal file
@ -0,0 +1,122 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VPX_DSP_INV_TXFM_H_
|
||||
#define VPX_DSP_INV_TXFM_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_dsp/txfm_common.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static INLINE tran_low_t check_range(tran_high_t input) {
|
||||
#if CONFIG_COEFFICIENT_RANGE_CHECKING
|
||||
// For valid VP9 input streams, intermediate stage coefficients should always
|
||||
// stay within the range of a signed 16 bit integer. Coefficients can go out
|
||||
// of this range for invalid/corrupt VP9 streams. However, strictly checking
|
||||
// this range for every intermediate coefficient can burdensome for a decoder,
|
||||
// therefore the following assertion is only enabled when configured with
|
||||
// --enable-coefficient-range-checking.
|
||||
assert(INT16_MIN <= input);
|
||||
assert(input <= INT16_MAX);
|
||||
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
|
||||
return (tran_low_t)input;
|
||||
}
|
||||
|
||||
static INLINE tran_low_t dct_const_round_shift(tran_high_t input) {
|
||||
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
|
||||
return check_range(rv);
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
static INLINE tran_low_t highbd_check_range(tran_high_t input,
|
||||
int bd) {
|
||||
#if CONFIG_COEFFICIENT_RANGE_CHECKING
|
||||
// For valid highbitdepth VP9 streams, intermediate stage coefficients will
|
||||
// stay within the ranges:
|
||||
// - 8 bit: signed 16 bit integer
|
||||
// - 10 bit: signed 18 bit integer
|
||||
// - 12 bit: signed 20 bit integer
|
||||
const int32_t int_max = (1 << (7 + bd)) - 1;
|
||||
const int32_t int_min = -int_max - 1;
|
||||
assert(int_min <= input);
|
||||
assert(input <= int_max);
|
||||
(void) int_min;
|
||||
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
|
||||
(void) bd;
|
||||
return (tran_low_t)input;
|
||||
}
|
||||
|
||||
static INLINE tran_low_t highbd_dct_const_round_shift(tran_high_t input,
|
||||
int bd) {
|
||||
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
|
||||
return highbd_check_range(rv, bd);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#if CONFIG_EMULATE_HARDWARE
|
||||
// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a
|
||||
// non-normative method to handle overflows. A stream that causes
|
||||
// overflows in the inverse transform is considered invalid in VP9,
|
||||
// and a hardware implementer is free to choose any reasonable
|
||||
// method to handle overflows. However to aid in hardware
|
||||
// verification they can use a specific implementation of the
|
||||
// WRAPLOW() macro below that is identical to their intended
|
||||
// hardware implementation (and also use configure options to trigger
|
||||
// the C-implementation of the transform).
|
||||
//
|
||||
// The particular WRAPLOW implementation below performs strict
|
||||
// overflow wrapping to match common hardware implementations.
|
||||
// bd of 8 uses trans_low with 16bits, need to remove 16bits
|
||||
// bd of 10 uses trans_low with 18bits, need to remove 14bits
|
||||
// bd of 12 uses trans_low with 20bits, need to remove 12bits
|
||||
// bd of x uses trans_low with 8+x bits, need to remove 24-x bits
|
||||
#define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd))
|
||||
#else
|
||||
#define WRAPLOW(x, bd) ((int32_t)(x))
|
||||
#endif // CONFIG_EMULATE_HARDWARE
|
||||
|
||||
void vp10_idct4_c(const tran_low_t *input, tran_low_t *output);
|
||||
void vp10_idct8_c(const tran_low_t *input, tran_low_t *output);
|
||||
void vp10_idct16_c(const tran_low_t *input, tran_low_t *output);
|
||||
void vp10_idct32_c(const tran_low_t *input, tran_low_t *output);
|
||||
void vp10_iadst4_c(const tran_low_t *input, tran_low_t *output);
|
||||
void vp10_iadst8_c(const tran_low_t *input, tran_low_t *output);
|
||||
void vp10_iadst16_c(const tran_low_t *input, tran_low_t *output);
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
void vp10_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
void vp10_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
void vp10_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
|
||||
void vp10_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
void vp10_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
void vp10_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
|
||||
static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
|
||||
int bd) {
|
||||
trans = WRAPLOW(trans, bd);
|
||||
return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd);
|
||||
}
|
||||
#endif
|
||||
|
||||
static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) {
|
||||
trans = WRAPLOW(trans, 8);
|
||||
return clip_pixel(WRAPLOW(dest + trans, 8));
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
#endif // VPX_DSP_INV_TXFM_H_
|
@ -289,6 +289,188 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
specialize qw/vp10_fwht4x4 msa/, "$mmx_x86inc";
|
||||
}
|
||||
|
||||
# Inverse transform
|
||||
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
# Note as optimized versions of these functions are added we need to add a check to ensure
|
||||
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
|
||||
add_proto qw/void vp10_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp10_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct4x4_16_add/;
|
||||
|
||||
add_proto qw/void vp10_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct8x8_1_add/;
|
||||
|
||||
add_proto qw/void vp10_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct8x8_64_add/;
|
||||
|
||||
add_proto qw/void vp10_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct8x8_12_add/;
|
||||
|
||||
add_proto qw/void vp10_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct16x16_1_add/;
|
||||
|
||||
add_proto qw/void vp10_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct16x16_256_add/;
|
||||
|
||||
add_proto qw/void vp10_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct16x16_10_add/;
|
||||
|
||||
add_proto qw/void vp10_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct32x32_1024_add/;
|
||||
|
||||
add_proto qw/void vp10_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct32x32_34_add/;
|
||||
|
||||
add_proto qw/void vp10_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct32x32_1_add/;
|
||||
|
||||
add_proto qw/void vp10_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_iwht4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp10_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_iwht4x4_16_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp10_highbd_idct4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp10_highbd_idct8x8_1_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp10_highbd_idct16x16_1_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp10_highbd_idct32x32_1024_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp10_highbd_idct32x32_34_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp10_highbd_idct32x32_1_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp10_highbd_iwht4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp10_highbd_iwht4x4_16_add/;
|
||||
|
||||
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
|
||||
if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
|
||||
add_proto qw/void vp10_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp10_highbd_idct4x4_16_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp10_highbd_idct8x8_64_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp10_highbd_idct8x8_10_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp10_highbd_idct16x16_256_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp10_highbd_idct16x16_10_add/;
|
||||
} else {
|
||||
add_proto qw/void vp10_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp10_highbd_idct4x4_16_add sse2/;
|
||||
|
||||
add_proto qw/void vp10_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp10_highbd_idct8x8_64_add sse2/;
|
||||
|
||||
add_proto qw/void vp10_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp10_highbd_idct8x8_10_add sse2/;
|
||||
|
||||
add_proto qw/void vp10_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp10_highbd_idct16x16_256_add sse2/;
|
||||
|
||||
add_proto qw/void vp10_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp10_highbd_idct16x16_10_add sse2/;
|
||||
} # CONFIG_EMULATE_HARDWARE
|
||||
} else {
|
||||
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
|
||||
if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
|
||||
add_proto qw/void vp10_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp10_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct4x4_16_add/;
|
||||
|
||||
add_proto qw/void vp10_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct8x8_1_add/;
|
||||
|
||||
add_proto qw/void vp10_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct8x8_64_add/;
|
||||
|
||||
add_proto qw/void vp10_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct8x8_12_add/;
|
||||
|
||||
add_proto qw/void vp10_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct16x16_1_add/;
|
||||
|
||||
add_proto qw/void vp10_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct16x16_256_add/;
|
||||
|
||||
add_proto qw/void vp10_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct16x16_10_add/;
|
||||
|
||||
add_proto qw/void vp10_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct32x32_1024_add/;
|
||||
|
||||
add_proto qw/void vp10_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct32x32_34_add/;
|
||||
|
||||
add_proto qw/void vp10_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct32x32_1_add/;
|
||||
|
||||
add_proto qw/void vp10_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_iwht4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp10_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_iwht4x4_16_add/;
|
||||
} else {
|
||||
add_proto qw/void vp10_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct4x4_1_add sse2/;
|
||||
|
||||
add_proto qw/void vp10_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct4x4_16_add sse2/;
|
||||
|
||||
add_proto qw/void vp10_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct8x8_1_add sse2/;
|
||||
|
||||
add_proto qw/void vp10_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct8x8_64_add sse2/;
|
||||
|
||||
add_proto qw/void vp10_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct8x8_12_add sse2/;
|
||||
|
||||
add_proto qw/void vp10_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct16x16_1_add sse2/;
|
||||
|
||||
add_proto qw/void vp10_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct16x16_256_add sse2/;
|
||||
|
||||
add_proto qw/void vp10_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct16x16_10_add sse2/;
|
||||
|
||||
add_proto qw/void vp10_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct32x32_1024_add sse2/;
|
||||
|
||||
add_proto qw/void vp10_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct32x32_34_add sse2/;
|
||||
|
||||
add_proto qw/void vp10_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_idct32x32_1_add sse2/;
|
||||
|
||||
add_proto qw/void vp10_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_iwht4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp10_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp10_iwht4x4_16_add/;
|
||||
} # CONFIG_EMULATE_HARDWARE
|
||||
} # CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#
|
||||
# Motion search
|
||||
#
|
||||
|
4058
vp10/common/x86/vp10_inv_txfm_sse2.c
Normal file
4058
vp10/common/x86/vp10_inv_txfm_sse2.c
Normal file
File diff suppressed because it is too large
Load Diff
184
vp10/common/x86/vp10_inv_txfm_sse2.h
Normal file
184
vp10/common/x86/vp10_inv_txfm_sse2.h
Normal file
@ -0,0 +1,184 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VPX_DSP_X86_INV_TXFM_SSE2_H_
|
||||
#define VPX_DSP_X86_INV_TXFM_SSE2_H_
|
||||
|
||||
#include <emmintrin.h> // SSE2
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vp10/common/vp10_inv_txfm.h"
|
||||
|
||||
// perform 8x8 transpose
|
||||
static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
|
||||
const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
|
||||
const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
|
||||
const __m128i tr0_2 = _mm_unpackhi_epi16(in[0], in[1]);
|
||||
const __m128i tr0_3 = _mm_unpackhi_epi16(in[2], in[3]);
|
||||
const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
|
||||
const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
|
||||
const __m128i tr0_6 = _mm_unpackhi_epi16(in[4], in[5]);
|
||||
const __m128i tr0_7 = _mm_unpackhi_epi16(in[6], in[7]);
|
||||
|
||||
const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
|
||||
const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_4, tr0_5);
|
||||
const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
|
||||
const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_4, tr0_5);
|
||||
const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_2, tr0_3);
|
||||
const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
|
||||
const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_2, tr0_3);
|
||||
const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
|
||||
|
||||
res[0] = _mm_unpacklo_epi64(tr1_0, tr1_1);
|
||||
res[1] = _mm_unpackhi_epi64(tr1_0, tr1_1);
|
||||
res[2] = _mm_unpacklo_epi64(tr1_2, tr1_3);
|
||||
res[3] = _mm_unpackhi_epi64(tr1_2, tr1_3);
|
||||
res[4] = _mm_unpacklo_epi64(tr1_4, tr1_5);
|
||||
res[5] = _mm_unpackhi_epi64(tr1_4, tr1_5);
|
||||
res[6] = _mm_unpacklo_epi64(tr1_6, tr1_7);
|
||||
res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7);
|
||||
}
|
||||
|
||||
#define TRANSPOSE_8X4(in0, in1, in2, in3, out0, out1) \
|
||||
{ \
|
||||
const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \
|
||||
const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \
|
||||
\
|
||||
in0 = _mm_unpacklo_epi32(tr0_0, tr0_1); /* i1 i0 */ \
|
||||
in1 = _mm_unpackhi_epi32(tr0_0, tr0_1); /* i3 i2 */ \
|
||||
}
|
||||
|
||||
static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) {
|
||||
const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
|
||||
const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
|
||||
const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
|
||||
const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
|
||||
|
||||
const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
|
||||
const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
|
||||
const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
|
||||
const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
|
||||
|
||||
out[0] = _mm_unpacklo_epi64(tr1_0, tr1_4);
|
||||
out[1] = _mm_unpackhi_epi64(tr1_0, tr1_4);
|
||||
out[2] = _mm_unpacklo_epi64(tr1_2, tr1_6);
|
||||
out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6);
|
||||
}
|
||||
|
||||
static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) {
|
||||
__m128i tbuf[8];
|
||||
array_transpose_8x8(res0, res0);
|
||||
array_transpose_8x8(res1, tbuf);
|
||||
array_transpose_8x8(res0 + 8, res1);
|
||||
array_transpose_8x8(res1 + 8, res1 + 8);
|
||||
|
||||
res0[8] = tbuf[0];
|
||||
res0[9] = tbuf[1];
|
||||
res0[10] = tbuf[2];
|
||||
res0[11] = tbuf[3];
|
||||
res0[12] = tbuf[4];
|
||||
res0[13] = tbuf[5];
|
||||
res0[14] = tbuf[6];
|
||||
res0[15] = tbuf[7];
|
||||
}
|
||||
|
||||
static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) {
|
||||
in[0] = _mm_load_si128((const __m128i *)(input + 0 * 16));
|
||||
in[1] = _mm_load_si128((const __m128i *)(input + 1 * 16));
|
||||
in[2] = _mm_load_si128((const __m128i *)(input + 2 * 16));
|
||||
in[3] = _mm_load_si128((const __m128i *)(input + 3 * 16));
|
||||
in[4] = _mm_load_si128((const __m128i *)(input + 4 * 16));
|
||||
in[5] = _mm_load_si128((const __m128i *)(input + 5 * 16));
|
||||
in[6] = _mm_load_si128((const __m128i *)(input + 6 * 16));
|
||||
in[7] = _mm_load_si128((const __m128i *)(input + 7 * 16));
|
||||
|
||||
in[8] = _mm_load_si128((const __m128i *)(input + 8 * 16));
|
||||
in[9] = _mm_load_si128((const __m128i *)(input + 9 * 16));
|
||||
in[10] = _mm_load_si128((const __m128i *)(input + 10 * 16));
|
||||
in[11] = _mm_load_si128((const __m128i *)(input + 11 * 16));
|
||||
in[12] = _mm_load_si128((const __m128i *)(input + 12 * 16));
|
||||
in[13] = _mm_load_si128((const __m128i *)(input + 13 * 16));
|
||||
in[14] = _mm_load_si128((const __m128i *)(input + 14 * 16));
|
||||
in[15] = _mm_load_si128((const __m128i *)(input + 15 * 16));
|
||||
}
|
||||
|
||||
#define RECON_AND_STORE(dest, in_x) \
|
||||
{ \
|
||||
__m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \
|
||||
d0 = _mm_unpacklo_epi8(d0, zero); \
|
||||
d0 = _mm_add_epi16(in_x, d0); \
|
||||
d0 = _mm_packus_epi16(d0, d0); \
|
||||
_mm_storel_epi64((__m128i *)(dest), d0); \
|
||||
}
|
||||
|
||||
static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) {
|
||||
const __m128i final_rounding = _mm_set1_epi16(1<<5);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
// Final rounding and shift
|
||||
in[0] = _mm_adds_epi16(in[0], final_rounding);
|
||||
in[1] = _mm_adds_epi16(in[1], final_rounding);
|
||||
in[2] = _mm_adds_epi16(in[2], final_rounding);
|
||||
in[3] = _mm_adds_epi16(in[3], final_rounding);
|
||||
in[4] = _mm_adds_epi16(in[4], final_rounding);
|
||||
in[5] = _mm_adds_epi16(in[5], final_rounding);
|
||||
in[6] = _mm_adds_epi16(in[6], final_rounding);
|
||||
in[7] = _mm_adds_epi16(in[7], final_rounding);
|
||||
in[8] = _mm_adds_epi16(in[8], final_rounding);
|
||||
in[9] = _mm_adds_epi16(in[9], final_rounding);
|
||||
in[10] = _mm_adds_epi16(in[10], final_rounding);
|
||||
in[11] = _mm_adds_epi16(in[11], final_rounding);
|
||||
in[12] = _mm_adds_epi16(in[12], final_rounding);
|
||||
in[13] = _mm_adds_epi16(in[13], final_rounding);
|
||||
in[14] = _mm_adds_epi16(in[14], final_rounding);
|
||||
in[15] = _mm_adds_epi16(in[15], final_rounding);
|
||||
|
||||
in[0] = _mm_srai_epi16(in[0], 6);
|
||||
in[1] = _mm_srai_epi16(in[1], 6);
|
||||
in[2] = _mm_srai_epi16(in[2], 6);
|
||||
in[3] = _mm_srai_epi16(in[3], 6);
|
||||
in[4] = _mm_srai_epi16(in[4], 6);
|
||||
in[5] = _mm_srai_epi16(in[5], 6);
|
||||
in[6] = _mm_srai_epi16(in[6], 6);
|
||||
in[7] = _mm_srai_epi16(in[7], 6);
|
||||
in[8] = _mm_srai_epi16(in[8], 6);
|
||||
in[9] = _mm_srai_epi16(in[9], 6);
|
||||
in[10] = _mm_srai_epi16(in[10], 6);
|
||||
in[11] = _mm_srai_epi16(in[11], 6);
|
||||
in[12] = _mm_srai_epi16(in[12], 6);
|
||||
in[13] = _mm_srai_epi16(in[13], 6);
|
||||
in[14] = _mm_srai_epi16(in[14], 6);
|
||||
in[15] = _mm_srai_epi16(in[15], 6);
|
||||
|
||||
RECON_AND_STORE(dest + 0 * stride, in[0]);
|
||||
RECON_AND_STORE(dest + 1 * stride, in[1]);
|
||||
RECON_AND_STORE(dest + 2 * stride, in[2]);
|
||||
RECON_AND_STORE(dest + 3 * stride, in[3]);
|
||||
RECON_AND_STORE(dest + 4 * stride, in[4]);
|
||||
RECON_AND_STORE(dest + 5 * stride, in[5]);
|
||||
RECON_AND_STORE(dest + 6 * stride, in[6]);
|
||||
RECON_AND_STORE(dest + 7 * stride, in[7]);
|
||||
RECON_AND_STORE(dest + 8 * stride, in[8]);
|
||||
RECON_AND_STORE(dest + 9 * stride, in[9]);
|
||||
RECON_AND_STORE(dest + 10 * stride, in[10]);
|
||||
RECON_AND_STORE(dest + 11 * stride, in[11]);
|
||||
RECON_AND_STORE(dest + 12 * stride, in[12]);
|
||||
RECON_AND_STORE(dest + 13 * stride, in[13]);
|
||||
RECON_AND_STORE(dest + 14 * stride, in[14]);
|
||||
RECON_AND_STORE(dest + 15 * stride, in[15]);
|
||||
}
|
||||
|
||||
void idct4_sse2(__m128i *in);
|
||||
void idct8_sse2(__m128i *in);
|
||||
void idct16_sse2(__m128i *in0, __m128i *in1);
|
||||
void iadst4_sse2(__m128i *in);
|
||||
void iadst8_sse2(__m128i *in);
|
||||
void iadst16_sse2(__m128i *in0, __m128i *in1);
|
||||
|
||||
#endif // VPX_DSP_X86_INV_TXFM_SSE2_H_
|
@ -19,7 +19,6 @@ VP10_COMMON_SRCS-yes += common/entropymode.c
|
||||
VP10_COMMON_SRCS-yes += common/entropymv.c
|
||||
VP10_COMMON_SRCS-yes += common/frame_buffers.c
|
||||
VP10_COMMON_SRCS-yes += common/frame_buffers.h
|
||||
VP10_COMMON_SRCS-yes += common/idct.c
|
||||
VP10_COMMON_SRCS-yes += common/alloccommon.h
|
||||
VP10_COMMON_SRCS-yes += common/blockd.h
|
||||
VP10_COMMON_SRCS-yes += common/common.h
|
||||
@ -30,6 +29,9 @@ VP10_COMMON_SRCS-yes += common/enums.h
|
||||
VP10_COMMON_SRCS-yes += common/filter.h
|
||||
VP10_COMMON_SRCS-yes += common/filter.c
|
||||
VP10_COMMON_SRCS-yes += common/idct.h
|
||||
VP10_COMMON_SRCS-yes += common/idct.c
|
||||
VP10_COMMON_SRCS-yes += common/vp10_inv_txfm.h
|
||||
VP10_COMMON_SRCS-yes += common/vp10_inv_txfm.c
|
||||
VP10_COMMON_SRCS-yes += common/loopfilter.h
|
||||
VP10_COMMON_SRCS-yes += common/thread_common.h
|
||||
VP10_COMMON_SRCS-yes += common/mv.h
|
||||
@ -91,4 +93,7 @@ VP10_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iht4x4_add_neon.c
|
||||
VP10_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iht8x8_add_neon.c
|
||||
endif
|
||||
|
||||
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_inv_txfm_sse2.c
|
||||
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_inv_txfm_sse2.h
|
||||
|
||||
$(eval $(call rtcd_h_template,vp10_rtcd,vp10/common/vp10_rtcd_defs.pl))
|
||||
|
Loading…
Reference in New Issue
Block a user