Merge "Factor inverse transform functions into vpx_dsp"
This commit is contained in:
commit
b4c7d0523a
@ -14,8 +14,7 @@
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -14,63 +14,16 @@
|
||||
#include <assert.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_dsp/txfm_common.h"
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
#include "vpx_dsp/vpx_dsp_common.h"
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vp9/common/vp9_enums.h"
|
||||
#include "vpx_dsp/inv_txfm.h"
|
||||
#include "vpx_dsp/txfm_common.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static INLINE tran_low_t check_range(tran_high_t input) {
|
||||
#if CONFIG_COEFFICIENT_RANGE_CHECKING
|
||||
// For valid VP9 input streams, intermediate stage coefficients should always
|
||||
// stay within the range of a signed 16 bit integer. Coefficients can go out
|
||||
// of this range for invalid/corrupt VP9 streams. However, strictly checking
|
||||
// this range for every intermediate coefficient can burdensome for a decoder,
|
||||
// therefore the following assertion is only enabled when configured with
|
||||
// --enable-coefficient-range-checking.
|
||||
assert(INT16_MIN <= input);
|
||||
assert(input <= INT16_MAX);
|
||||
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
|
||||
return (tran_low_t)input;
|
||||
}
|
||||
|
||||
static INLINE tran_low_t dct_const_round_shift(tran_high_t input) {
|
||||
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
|
||||
return check_range(rv);
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
static INLINE tran_low_t highbd_check_range(tran_high_t input,
|
||||
int bd) {
|
||||
#if CONFIG_COEFFICIENT_RANGE_CHECKING
|
||||
// For valid highbitdepth VP9 streams, intermediate stage coefficients will
|
||||
// stay within the ranges:
|
||||
// - 8 bit: signed 16 bit integer
|
||||
// - 10 bit: signed 18 bit integer
|
||||
// - 12 bit: signed 20 bit integer
|
||||
const int32_t int_max = (1 << (7 + bd)) - 1;
|
||||
const int32_t int_min = -int_max - 1;
|
||||
assert(int_min <= input);
|
||||
assert(input <= int_max);
|
||||
(void) int_min;
|
||||
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
|
||||
(void) bd;
|
||||
return (tran_low_t)input;
|
||||
}
|
||||
|
||||
static INLINE tran_low_t highbd_dct_const_round_shift(tran_high_t input,
|
||||
int bd) {
|
||||
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
|
||||
return highbd_check_range(rv, bd);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
typedef void (*transform_1d)(const tran_low_t*, tran_low_t*);
|
||||
|
||||
typedef struct {
|
||||
@ -85,28 +38,6 @@ typedef struct {
|
||||
} highbd_transform_2d;
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#if CONFIG_EMULATE_HARDWARE
|
||||
// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a
|
||||
// non-normative method to handle overflows. A stream that causes
|
||||
// overflows in the inverse transform is considered invalid in VP9,
|
||||
// and a hardware implementer is free to choose any reasonable
|
||||
// method to handle overflows. However to aid in hardware
|
||||
// verification they can use a specific implementation of the
|
||||
// WRAPLOW() macro below that is identical to their intended
|
||||
// hardware implementation (and also use configure options to trigger
|
||||
// the C-implementation of the transform).
|
||||
//
|
||||
// The particular WRAPLOW implementation below performs strict
|
||||
// overflow wrapping to match common hardware implementations.
|
||||
// bd of 8 uses trans_low with 16bits, need to remove 16bits
|
||||
// bd of 10 uses trans_low with 18bits, need to remove 14bits
|
||||
// bd of 12 uses trans_low with 20bits, need to remove 12bits
|
||||
// bd of x uses trans_low with 8+x bits, need to remove 24-x bits
|
||||
#define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd))
|
||||
#else
|
||||
#define WRAPLOW(x, bd) ((int32_t)(x))
|
||||
#endif // CONFIG_EMULATE_HARDWARE
|
||||
|
||||
void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int eob);
|
||||
void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
@ -126,9 +57,6 @@ void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob);
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
void vp9_highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
void vp9_highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
void vp9_highbd_idct16(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int eob, int bd);
|
||||
void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
@ -145,11 +73,6 @@ void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
|
||||
uint8_t *dest, int stride, int eob, int bd);
|
||||
void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
|
||||
uint8_t *dest, int stride, int eob, int bd);
|
||||
static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
|
||||
int bd) {
|
||||
trans = WRAPLOW(trans, bd);
|
||||
return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
@ -87,39 +87,6 @@ specialize qw/vp9_filter_by_weight8x8 sse2 msa/;
|
||||
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
# Note as optimized versions of these functions are added we need to add a check to ensure
|
||||
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
|
||||
add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_16_add/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_1_add/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_64_add/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_12_add/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_1_add/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_256_add/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_10_add/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1024_add/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_34_add/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1_add/;
|
||||
|
||||
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp9_iht4x4_16_add/;
|
||||
|
||||
@ -128,51 +95,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
|
||||
add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
|
||||
specialize qw/vp9_iht16x16_256_add/;
|
||||
|
||||
# dct and add
|
||||
|
||||
add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_16_add/;
|
||||
|
||||
} else {
|
||||
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
|
||||
if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
|
||||
add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_16_add/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_1_add/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_64_add/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_12_add/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_1_add/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_256_add/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_10_add/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1024_add/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_34_add/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1_add/;
|
||||
|
||||
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp9_iht4x4_16_add/;
|
||||
|
||||
@ -181,50 +106,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
|
||||
add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
|
||||
specialize qw/vp9_iht16x16_256_add/;
|
||||
|
||||
# dct and add
|
||||
|
||||
add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_16_add/;
|
||||
} else {
|
||||
add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_1_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_16_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_1_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
|
||||
|
||||
add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
|
||||
|
||||
add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_1_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_256_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_10_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2 msa/;
|
||||
#is this a typo?
|
||||
$vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp9_iht4x4_16_add sse2 neon dspr2 msa/;
|
||||
|
||||
@ -233,14 +115,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
|
||||
add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
|
||||
specialize qw/vp9_iht16x16_256_add sse2 dspr2 msa/;
|
||||
|
||||
# dct and add
|
||||
|
||||
add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_1_add msa/;
|
||||
|
||||
add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_16_add msa/, "$sse2_x86inc";
|
||||
}
|
||||
}
|
||||
|
||||
@ -295,24 +169,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
#
|
||||
# Note as optimized versions of these functions are added we need to add a check to ensure
|
||||
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
|
||||
add_proto qw/void vp9_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct8x8_1_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct16x16_1_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct32x32_1024_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct32x32_34_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct32x32_1_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
|
||||
specialize qw/vp9_highbd_iht4x4_16_add/;
|
||||
|
||||
@ -321,50 +177,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
|
||||
add_proto qw/void vp9_highbd_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type, int bd";
|
||||
specialize qw/vp9_highbd_iht16x16_256_add/;
|
||||
|
||||
# dct and add
|
||||
|
||||
add_proto qw/void vp9_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_iwht4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_iwht4x4_16_add/;
|
||||
|
||||
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
|
||||
if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
|
||||
|
||||
add_proto qw/void vp9_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct4x4_16_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct8x8_64_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct8x8_10_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct16x16_256_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct16x16_10_add/;
|
||||
|
||||
} else {
|
||||
|
||||
add_proto qw/void vp9_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct4x4_16_add sse2/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct8x8_64_add sse2/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct8x8_10_add sse2/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct16x16_256_add sse2/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct16x16_10_add sse2/;
|
||||
}
|
||||
}
|
||||
|
||||
#
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -17,7 +17,7 @@
|
||||
#include <tmmintrin.h> // SSSE3
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/x86/vp9_idct_intrin_sse2.h"
|
||||
#include "vpx_dsp/x86/inv_txfm_sse2.h"
|
||||
#include "vpx_dsp/x86/txfm_common_sse2.h"
|
||||
|
||||
void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
|
||||
|
@ -66,7 +66,6 @@ VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.h
|
||||
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c
|
||||
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.h
|
||||
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.c
|
||||
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_sse2.asm
|
||||
ifeq ($(CONFIG_VP9_POSTPROC),yes)
|
||||
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_mfqe_sse2.asm
|
||||
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm
|
||||
@ -96,13 +95,6 @@ VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_mfqe_msa.c
|
||||
endif
|
||||
|
||||
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
|
||||
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.h
|
||||
|
||||
ifeq ($(ARCH_X86_64), yes)
|
||||
ifeq ($(CONFIG_USE_X86INC),yes)
|
||||
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_idct_ssse3_x86_64.asm
|
||||
endif
|
||||
endif
|
||||
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_save_reg_neon$(ASM)
|
||||
|
||||
@ -111,30 +103,4 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c
|
||||
endif
|
||||
|
||||
# neon with assembly and intrinsics implementations. If both are available
|
||||
# prefer assembly.
|
||||
ifeq ($(HAVE_NEON_ASM), yes)
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_1_add_neon_asm$(ASM)
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_add_neon_asm$(ASM)
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct32x32_1_add_neon_asm$(ASM)
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct32x32_add_neon_asm$(ASM)
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_1_add_neon_asm$(ASM)
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_add_neon_asm$(ASM)
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_1_add_neon_asm$(ASM)
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_add_neon_asm$(ASM)
|
||||
else
|
||||
ifeq ($(HAVE_NEON), yes)
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_1_add_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_add_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct32x32_1_add_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct32x32_add_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_1_add_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_add_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_1_add_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_add_neon.c
|
||||
endif # HAVE_NEON
|
||||
endif # HAVE_NEON_ASM
|
||||
|
||||
$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl))
|
||||
|
@ -10,7 +10,7 @@
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "vp9/common/vp9_idct.h"
|
||||
#include "vpx_dsp/inv_txfm.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
void vp9_idct16x16_1_add_neon(
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
|
||||
void vp9_idct16x16_256_add_neon_pass1(const int16_t *input,
|
@ -12,7 +12,7 @@
|
||||
|
||||
#include "./vpx_config.h"
|
||||
|
||||
#include "vp9/common/vp9_idct.h"
|
||||
#include "vpx_dsp/inv_txfm.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
static INLINE void LD_16x8(
|
@ -10,7 +10,7 @@
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "vp9/common/vp9_idct.h"
|
||||
#include "vpx_dsp/inv_txfm.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
void vp9_idct4x4_1_add_neon(
|
@ -10,7 +10,7 @@
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "vp9/common/vp9_idct.h"
|
||||
#include "vpx_dsp/inv_txfm.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
void vp9_idct8x8_1_add_neon(
|
2476
vpx_dsp/inv_txfm.c
Normal file
2476
vpx_dsp/inv_txfm.c
Normal file
File diff suppressed because it is too large
Load Diff
124
vpx_dsp/inv_txfm.h
Normal file
124
vpx_dsp/inv_txfm.h
Normal file
@ -0,0 +1,124 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VPX_DSP_INV_TXFM_H_
|
||||
#define VPX_DSP_INV_TXFM_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_dsp/txfm_common.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static INLINE tran_low_t check_range(tran_high_t input) {
|
||||
#if CONFIG_COEFFICIENT_RANGE_CHECKING
|
||||
// For valid VP9 input streams, intermediate stage coefficients should always
|
||||
// stay within the range of a signed 16 bit integer. Coefficients can go out
|
||||
// of this range for invalid/corrupt VP9 streams. However, strictly checking
|
||||
// this range for every intermediate coefficient can burdensome for a decoder,
|
||||
// therefore the following assertion is only enabled when configured with
|
||||
// --enable-coefficient-range-checking.
|
||||
assert(INT16_MIN <= input);
|
||||
assert(input <= INT16_MAX);
|
||||
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
|
||||
return (tran_low_t)input;
|
||||
}
|
||||
|
||||
static INLINE tran_low_t dct_const_round_shift(tran_high_t input) {
|
||||
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
|
||||
return check_range(rv);
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
static INLINE tran_low_t highbd_check_range(tran_high_t input,
|
||||
int bd) {
|
||||
#if CONFIG_COEFFICIENT_RANGE_CHECKING
|
||||
// For valid highbitdepth VP9 streams, intermediate stage coefficients will
|
||||
// stay within the ranges:
|
||||
// - 8 bit: signed 16 bit integer
|
||||
// - 10 bit: signed 18 bit integer
|
||||
// - 12 bit: signed 20 bit integer
|
||||
const int32_t int_max = (1 << (7 + bd)) - 1;
|
||||
const int32_t int_min = -int_max - 1;
|
||||
assert(int_min <= input);
|
||||
assert(input <= int_max);
|
||||
(void) int_min;
|
||||
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
|
||||
(void) bd;
|
||||
return (tran_low_t)input;
|
||||
}
|
||||
|
||||
static INLINE tran_low_t highbd_dct_const_round_shift(tran_high_t input,
|
||||
int bd) {
|
||||
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
|
||||
return highbd_check_range(rv, bd);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#if CONFIG_EMULATE_HARDWARE
|
||||
// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a
|
||||
// non-normative method to handle overflows. A stream that causes
|
||||
// overflows in the inverse transform is considered invalid in VP9,
|
||||
// and a hardware implementer is free to choose any reasonable
|
||||
// method to handle overflows. However to aid in hardware
|
||||
// verification they can use a specific implementation of the
|
||||
// WRAPLOW() macro below that is identical to their intended
|
||||
// hardware implementation (and also use configure options to trigger
|
||||
// the C-implementation of the transform).
|
||||
//
|
||||
// The particular WRAPLOW implementation below performs strict
|
||||
// overflow wrapping to match common hardware implementations.
|
||||
// bd of 8 uses trans_low with 16bits, need to remove 16bits
|
||||
// bd of 10 uses trans_low with 18bits, need to remove 14bits
|
||||
// bd of 12 uses trans_low with 20bits, need to remove 12bits
|
||||
// bd of x uses trans_low with 8+x bits, need to remove 24-x bits
|
||||
#define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd))
|
||||
#else
|
||||
#define WRAPLOW(x, bd) ((int32_t)(x))
|
||||
#endif // CONFIG_EMULATE_HARDWARE
|
||||
|
||||
void idct4_c(const tran_low_t *input, tran_low_t *output);
|
||||
void idct8_c(const tran_low_t *input, tran_low_t *output);
|
||||
void idct16_c(const tran_low_t *input, tran_low_t *output);
|
||||
void idct32_c(const tran_low_t *input, tran_low_t *output);
|
||||
void iadst4_c(const tran_low_t *input, tran_low_t *output);
|
||||
void iadst8_c(const tran_low_t *input, tran_low_t *output);
|
||||
void iadst16_c(const tran_low_t *input, tran_low_t *output);
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
void vp9_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
void vp9_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
void vp9_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
void highbd_idct32_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
|
||||
void highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
void highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
void highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
|
||||
static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
|
||||
int bd) {
|
||||
trans = WRAPLOW(trans, bd);
|
||||
return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd);
|
||||
}
|
||||
#endif
|
||||
|
||||
static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) {
|
||||
trans = WRAPLOW(trans, 8);
|
||||
return clip_pixel(WRAPLOW(dest + trans, 8));
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VPX_DSP_INV_TXFM_H_
|
@ -169,6 +169,43 @@ DSP_SRCS-$(HAVE_MSA) += mips/fwd_txfm_msa.c
|
||||
DSP_SRCS-$(HAVE_MSA) += mips/fwd_dct32x32_msa.c
|
||||
endif # CONFIG_VP9_ENCODER
|
||||
|
||||
# inverse transform
|
||||
ifeq ($(CONFIG_VP9),yes)
|
||||
DSP_SRCS-yes += inv_txfm.h
|
||||
DSP_SRCS-yes += inv_txfm.c
|
||||
DSP_SRCS-$(HAVE_SSE2) += x86/inv_txfm_sse2.h
|
||||
DSP_SRCS-$(HAVE_SSE2) += x86/inv_txfm_sse2.c
|
||||
DSP_SRCS-$(HAVE_SSE2) += x86/inv_txfm_sse2.asm
|
||||
ifeq ($(ARCH_X86_64),yes)
|
||||
ifeq ($(CONFIG_USE_X86INC),yes)
|
||||
DSP_SRCS-$(HAVE_SSSE3) += x86/inv_txfm_ssse3_x86_64.asm
|
||||
endif # CONFIG_USE_X86INC
|
||||
endif # ARCH_X86_64
|
||||
|
||||
ifeq ($(HAVE_NEON_ASM),yes)
|
||||
DSP_SRCS-yes += arm/idct4x4_1_add_neon$(ASM)
|
||||
DSP_SRCS-yes += arm/idct4x4_add_neon$(ASM)
|
||||
DSP_SRCS-yes += arm/idct8x8_1_add_neon$(ASM)
|
||||
DSP_SRCS-yes += arm/idct8x8_add_neon$(ASM)
|
||||
DSP_SRCS-yes += arm/idct16x16_1_add_neon$(ASM)
|
||||
DSP_SRCS-yes += arm/idct16x16_add_neon$(ASM)
|
||||
DSP_SRCS-yes += arm/idct32x32_1_add_neon$(ASM)
|
||||
DSP_SRCS-yes += arm/idct32x32_add_neon$(ASM)
|
||||
else
|
||||
ifeq ($(HAVE_NEON),yes)
|
||||
DSP_SRCS-yes += arm/idct4x4_1_add_neon.c
|
||||
DSP_SRCS-yes += arm/idct4x4_add_neon.c
|
||||
DSP_SRCS-yes += arm/idct8x8_1_add_neon.c
|
||||
DSP_SRCS-yes += arm/idct8x8_add_neon.c
|
||||
DSP_SRCS-yes += arm/idct16x16_1_add_neon.c
|
||||
DSP_SRCS-yes += arm/idct16x16_add_neon.c
|
||||
DSP_SRCS-yes += arm/idct32x32_1_add_neon.c
|
||||
DSP_SRCS-yes += arm/idct32x32_add_neon.c
|
||||
endif # HAVE_NEON
|
||||
endif # HAVE_NEON_ASM
|
||||
DSP_SRCS-$(HAVE_NEON) += arm/idct16x16_neon.c
|
||||
endif # CONFIG_VP9
|
||||
|
||||
# quantization
|
||||
ifeq ($(CONFIG_VP9_ENCODER),yes)
|
||||
DSP_SRCS-yes += quantize.c
|
||||
|
@ -592,6 +592,193 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
} # CONFIG_VP9_HIGHBITDEPTH
|
||||
} # CONFIG_VP9_ENCODER
|
||||
|
||||
#
|
||||
# Inverse transform
|
||||
if (vpx_config("CONFIG_VP9") eq "yes") {
|
||||
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
# Note as optimized versions of these functions are added we need to add a check to ensure
|
||||
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
|
||||
add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_16_add/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_1_add/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_64_add/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_12_add/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_1_add/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_256_add/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_10_add/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1024_add/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_34_add/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1_add/;
|
||||
|
||||
add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_16_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct8x8_1_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct16x16_1_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct32x32_1024_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct32x32_34_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct32x32_1_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_iwht4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_iwht4x4_16_add/;
|
||||
|
||||
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
|
||||
if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
|
||||
add_proto qw/void vp9_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct4x4_16_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct8x8_64_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct8x8_10_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct16x16_256_add/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct16x16_10_add/;
|
||||
} else {
|
||||
add_proto qw/void vp9_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct4x4_16_add sse2/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct8x8_64_add sse2/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct8x8_10_add sse2/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct16x16_256_add sse2/;
|
||||
|
||||
add_proto qw/void vp9_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct16x16_10_add sse2/;
|
||||
} # CONFIG_EMULATE_HARDWARE
|
||||
} else {
|
||||
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
|
||||
if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
|
||||
add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_16_add/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_1_add/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_64_add/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_12_add/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_1_add/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_256_add/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_10_add/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1024_add/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_34_add/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1_add/;
|
||||
|
||||
add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_1_add/;
|
||||
|
||||
add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_16_add/;
|
||||
} else {
|
||||
add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_1_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct4x4_16_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_1_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
|
||||
|
||||
add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
|
||||
|
||||
add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_1_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_256_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_10_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2 msa/;
|
||||
# Need to add 34 eob idct32x32 neon implementation.
|
||||
$vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_1_add msa/;
|
||||
|
||||
add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_iwht4x4_16_add msa/, "$sse2_x86inc";
|
||||
} # CONFIG_EMULATE_HARDWARE
|
||||
} # CONFIG_VP9_HIGHBITDEPTH
|
||||
} # CONFIG_VP9
|
||||
|
||||
#
|
||||
# Quantization
|
||||
#
|
||||
|
4053
vpx_dsp/x86/inv_txfm_sse2.c
Normal file
4053
vpx_dsp/x86/inv_txfm_sse2.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
@ -8,12 +8,13 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#ifndef VPX_DSP_X86_INV_TXFM_SSE2_H_
|
||||
#define VPX_DSP_X86_INV_TXFM_SSE2_H_
|
||||
|
||||
#include <emmintrin.h> // SSE2
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vp9/common/vp9_idct.h"
|
||||
#include "vpx_dsp/inv_txfm.h"
|
||||
|
||||
// perform 8x8 transpose
|
||||
static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
|
||||
@ -172,3 +173,12 @@ static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) {
|
||||
RECON_AND_STORE(dest + 14 * stride, in[14]);
|
||||
RECON_AND_STORE(dest + 15 * stride, in[15]);
|
||||
}
|
||||
|
||||
void idct4_sse2(__m128i *in);
|
||||
void idct8_sse2(__m128i *in);
|
||||
void idct16_sse2(__m128i *in0, __m128i *in1);
|
||||
void iadst4_sse2(__m128i *in);
|
||||
void iadst8_sse2(__m128i *in);
|
||||
void iadst16_sse2(__m128i *in0, __m128i *in1);
|
||||
|
||||
#endif // VPX_DSP_X86_INV_TXFM_SSE2_H_
|
Loading…
x
Reference in New Issue
Block a user