SSE2 high precision 32x32 forward DCT
Enable SSE2 implementation of high precision 32x32 forward DCT. The intermediate stacks are of 32-bits. The run-time goes down from 32126 cycles to 13442 cycles. Change-Id: Ib5ccafe3176c65bd6f2dbdef790bd47bbc880e56
This commit is contained in:
parent
b89eef8f82
commit
78136edcdc
@ -143,7 +143,7 @@ typedef struct {
|
||||
unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
|
||||
unsigned char segment_id; // Segment id for current frame
|
||||
|
||||
// Flags used for prediction status of various bistream signals
|
||||
// Flags used for prediction status of various bit-stream signals
|
||||
unsigned char seg_id_predicted;
|
||||
|
||||
// Indicates if the mb is part of the image (1) vs border (0)
|
||||
|
@ -27,6 +27,9 @@
|
||||
#define pair_set_epi16(a, b) \
|
||||
_mm_set1_epi32(((uint16_t)(a)) + (((uint16_t)(b)) << 16))
|
||||
|
||||
#define pair_set_epi32(a, b) \
|
||||
_mm_set_epi32(b, a, b, a)
|
||||
|
||||
// Constants:
|
||||
// for (int i = 1; i< 32; ++i)
|
||||
// printf("static const int cospi_%d_64 = %.0f;\n", i,
|
||||
|
@ -740,7 +740,7 @@ prototype void vp9_short_fdct8x4 "int16_t *InputData, int16_t *OutputData, int p
|
||||
specialize vp9_short_fdct8x4 sse2
|
||||
|
||||
prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int pitch"
|
||||
specialize vp9_short_fdct32x32
|
||||
specialize vp9_short_fdct32x32 sse2
|
||||
|
||||
prototype void vp9_short_fdct32x32_rd "int16_t *InputData, int16_t *OutputData, int pitch"
|
||||
specialize vp9_short_fdct32x32_rd sse2
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <limits.h>
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2573,13 +2573,13 @@ void vp9_short_fht16x16_sse2(int16_t *input, int16_t *output,
|
||||
}
|
||||
|
||||
#define FDCT32x32_2D vp9_short_fdct32x32_rd_sse2
|
||||
#define FDCT32x32_LOW_PRECISION 1
|
||||
#define FDCT32x32_HIGH_PRECISION 0
|
||||
#include "vp9/encoder/x86/vp9_dct32x32_sse2.c"
|
||||
#undef FDCT32x32_2D
|
||||
#undef FDCT32x32_LOW_PRECISION
|
||||
#undef FDCT32x32_HIGH_PRECISION
|
||||
|
||||
#define FDCT32x32_2D vp9_short_fdct32x32_sse2
|
||||
#define FDCT32x32_LOW_PRECISION 0
|
||||
#define FDCT32x32_HIGH_PRECISION 1
|
||||
#include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT
|
||||
#undef FDCT32x32_2D
|
||||
#undef FDCT32x32_LOW_PRECISION
|
||||
#undef FDCT32x32_HIGH_PRECISION
|
||||
|
Loading…
Reference in New Issue
Block a user