Improve the accuracy of forward walsh-hadamard transform

Besides the slight improvement in round trip error. This
also fixes a sign bias in the forward transform, so the
round trip errors are evenly distributed between +1s and
-1s. The old bias seemed to work well with the dc sign bias
in old fdct,  which no longer exist in the improved fdct.

Change-Id: I8635e7be16c69e69a8669eca5438550d23089cef
This commit is contained in:
Yaowu Xu 2010-06-28 22:03:43 -07:00
parent f1a3b1e0d9
commit b62d093efa
2 changed files with 26 additions and 25 deletions

View File

@ -69,17 +69,18 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch)
short *ip = input;
short *op = output;
for (i = 0; i < 4; i++)
{
a1 = ip[0] + ip[3];
b1 = ip[1] + ip[2];
c1 = ip[1] - ip[2];
d1 = ip[0] - ip[3];
a1 = ((ip[0] + ip[2])<<2);
d1 = ((ip[1] + ip[3])<<2);
c1 = ((ip[1] - ip[3])<<2);
b1 = ((ip[0] - ip[2])<<2);
op[0] = a1 + b1;
op[1] = c1 + d1;
op[2] = a1 - b1;
op[3] = d1 - c1;
op[0] = a1 + d1 + (a1!=0);
op[1] = b1 + c1;
op[2] = b1 - c1;
op[3] = a1 - d1;
ip += pitch / 2;
op += 4;
}
@ -89,25 +90,25 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch)
for (i = 0; i < 4; i++)
{
a1 = ip[0] + ip[12];
b1 = ip[4] + ip[8];
c1 = ip[4] - ip[8];
d1 = ip[0] - ip[12];
a1 = ip[0] + ip[8];
d1 = ip[4] + ip[12];
c1 = ip[4] - ip[12];
b1 = ip[0] - ip[8];
a2 = a1 + b1;
b2 = c1 + d1;
c2 = a1 - b1;
d2 = d1 - c1;
a2 = a1 + d1;
b2 = b1 + c1;
c2 = b1 - c1;
d2 = a1 - d1;
a2 += (a2 > 0);
b2 += (b2 > 0);
c2 += (c2 > 0);
d2 += (d2 > 0);
a2 += a2<0;
b2 += b2<0;
c2 += c2<0;
d2 += d2<0;
op[0] = (a2) >> 1;
op[4] = (b2) >> 1;
op[8] = (c2) >> 1;
op[12] = (d2) >> 1;
op[0] = (a2+3) >> 3;
op[4] = (b2+3) >> 3;
op[8] = (c2+3) >> 3;
op[12]= (d2+3) >> 3;
ip++;
op++;

View File

@ -278,7 +278,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_sse2;
cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_sse2;
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_sse2;
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c ;
cpi->rtcd.encodemb.berr = vp8_block_error_xmm;
cpi->rtcd.encodemb.mberr = vp8_mbblock_error_xmm;