Add 32x32 idct function for eob<=34 case

When only upper-left 8x8 area has non-zero dct coefficients, we
could skip 1D IDCT for 9th to 32th rows to save operations. This
function is called when eob <= 34.

Change-Id: I9684b75947bdde346cfe3720f08a953aa7a13fb5
This commit is contained in:
Yunqing Wang 2013-10-23 14:25:16 -07:00
parent f6d870f7ae
commit f88315cb29
3 changed files with 731 additions and 337 deletions

View File

@ -1280,6 +1280,31 @@ void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) {
}
}
void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int stride) {
int16_t out[32 * 32] = {0};
int16_t *outptr = out;
int i, j;
int16_t temp_in[32], temp_out[32];
// Rows
// only upper-left 8x8 has non-zero coeff
for (i = 0; i < 8; ++i) {
idct32_1d(input, outptr);
input += 32;
outptr += 32;
}
// Columns
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j)
temp_in[j] = out[j * 32 + i];
idct32_1d(temp_in, temp_out);
for (j = 0; j < 32; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * stride + i]);
}
}
void vp9_idct32x32_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
int i, j;
int a1;
@ -1350,6 +1375,9 @@ void vp9_idct32x32_add(const int16_t *input, uint8_t *dest, int stride,
if (eob) {
if (eob == 1)
vp9_idct32x32_1_add(input, dest, stride);
else if (eob <= 34)
// non-zero coeff only in upper-left 8x8
vp9_idct32x32_34_add(input, dest, stride);
else
vp9_idct32x32_1024_add(input, dest, stride);
}

View File

@ -294,6 +294,9 @@ specialize vp9_idct16x16_10_add sse2 neon
prototype void vp9_idct32x32_1024_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct32x32_1024_add sse2 neon
prototype void vp9_idct32x32_34_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct32x32_34_add sse2
prototype void vp9_idct32x32_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct32x32_1_add sse2

File diff suppressed because it is too large Load Diff