Optimize 16x16 idct10 function
Wrote sse2 version of vp9_short_idct10_16x16 function. Compared to c version, the sse2 version is 2.3X faster. Change-Id: I314c4f09369648721798321eeed6f58e38857f26
This commit is contained in:
parent
8a3233b54d
commit
869d6c0534
@ -298,7 +298,7 @@ prototype void vp9_short_idct16x16 "int16_t *input, int16_t *output, int pitch"
|
||||
specialize vp9_short_idct16x16 sse2
|
||||
|
||||
prototype void vp9_short_idct10_16x16 "int16_t *input, int16_t *output, int pitch"
|
||||
specialize vp9_short_idct10_16x16
|
||||
specialize vp9_short_idct10_16x16 sse2
|
||||
|
||||
prototype void vp9_short_idct1_16x16 "int16_t *input, int16_t *output"
|
||||
specialize vp9_short_idct1_16x16
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -315,7 +315,7 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq,
|
||||
input[48] *= dq[1];
|
||||
|
||||
// the idct halves ( >> 1) the pitch
|
||||
vp9_short_idct10_16x16_c(input, output, 32);
|
||||
vp9_short_idct10_16x16(input, output, 32);
|
||||
|
||||
input[0] = input[1] = input[2] = input[3] = 0;
|
||||
input[16] = input[17] = input[18] = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user