Redo the forward 4x4 dct

The new fdct lowers the round trip sum squared error for a
4x4 block ~0.12. or ~0.008/pixel. For reference, the old
matrix multiply version has average round trip error 1.46
for a 4x4 block.

Thanks to "derf" for his suggestions and references.

Change-Id: I5559d1e81d333b319404ab16b336b739f87afc79
This commit is contained in:
Yaowu Xu
2010-06-16 12:52:18 -07:00
parent a5906668a3
commit d0dd01b8ce
14 changed files with 118 additions and 562 deletions

View File

@@ -137,8 +137,6 @@ extern unsigned int inter_b_modes[15];
extern void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
extern void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
extern void (*vp8_fast_fdct4x4)(short *input, short *output, int pitch);
extern void (*vp8_fast_fdct8x4)(short *input, short *output, int pitch);
extern const int vp8_bits_per_mb[2][QINDEX_RANGE];
@@ -1136,15 +1134,11 @@ void vp8_set_speed_features(VP8_COMP *cpi)
{
cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x4);
cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4);
cpi->mb.short_fdct8x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, short8x4);
cpi->mb.short_fdct4x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4);
}
else
{
cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, fast8x4);
cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, fast4x4);
cpi->mb.short_fdct8x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, fast8x4);
cpi->mb.short_fdct4x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, fast4x4);
}
cpi->mb.short_walsh4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, walsh_short4x4);