Add 32x32 idct function for eob<=34 case
When only upper-left 8x8 area has non-zero dct coefficients, we could skip 1D IDCT for 9th to 32th rows to save operations. This function is called when eob <= 34. Change-Id: I9684b75947bdde346cfe3720f08a953aa7a13fb5
This commit is contained in:
parent
f6d870f7ae
commit
f88315cb29
@ -1280,6 +1280,31 @@ void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) {
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int stride) {
|
||||
int16_t out[32 * 32] = {0};
|
||||
int16_t *outptr = out;
|
||||
int i, j;
|
||||
int16_t temp_in[32], temp_out[32];
|
||||
|
||||
// Rows
|
||||
// only upper-left 8x8 has non-zero coeff
|
||||
for (i = 0; i < 8; ++i) {
|
||||
idct32_1d(input, outptr);
|
||||
input += 32;
|
||||
outptr += 32;
|
||||
}
|
||||
|
||||
// Columns
|
||||
for (i = 0; i < 32; ++i) {
|
||||
for (j = 0; j < 32; ++j)
|
||||
temp_in[j] = out[j * 32 + i];
|
||||
idct32_1d(temp_in, temp_out);
|
||||
for (j = 0; j < 32; ++j)
|
||||
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
|
||||
+ dest[j * stride + i]);
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_idct32x32_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
|
||||
int i, j;
|
||||
int a1;
|
||||
@ -1350,6 +1375,9 @@ void vp9_idct32x32_add(const int16_t *input, uint8_t *dest, int stride,
|
||||
if (eob) {
|
||||
if (eob == 1)
|
||||
vp9_idct32x32_1_add(input, dest, stride);
|
||||
else if (eob <= 34)
|
||||
// non-zero coeff only in upper-left 8x8
|
||||
vp9_idct32x32_34_add(input, dest, stride);
|
||||
else
|
||||
vp9_idct32x32_1024_add(input, dest, stride);
|
||||
}
|
||||
|
@ -294,6 +294,9 @@ specialize vp9_idct16x16_10_add sse2 neon
|
||||
prototype void vp9_idct32x32_1024_add "const int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_idct32x32_1024_add sse2 neon
|
||||
|
||||
prototype void vp9_idct32x32_34_add "const int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_idct32x32_34_add sse2
|
||||
|
||||
prototype void vp9_idct32x32_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_idct32x32_1_add sse2
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user