Improve the accuracy of forward walsh-hadamard transform
Besides the slight improvement in round trip error. This also fixes a sign bias in the forward transform, so the round trip errors are evenly distributed between +1s and -1s. The old bias seemed to work well with the dc sign bias in old fdct, which no longer exist in the improved fdct. Change-Id: I8635e7be16c69e69a8669eca5438550d23089cef
This commit is contained in:
parent
f1a3b1e0d9
commit
b62d093efa
@ -69,17 +69,18 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch)
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[3];
|
||||
b1 = ip[1] + ip[2];
|
||||
c1 = ip[1] - ip[2];
|
||||
d1 = ip[0] - ip[3];
|
||||
a1 = ((ip[0] + ip[2])<<2);
|
||||
d1 = ((ip[1] + ip[3])<<2);
|
||||
c1 = ((ip[1] - ip[3])<<2);
|
||||
b1 = ((ip[0] - ip[2])<<2);
|
||||
|
||||
op[0] = a1 + b1;
|
||||
op[1] = c1 + d1;
|
||||
op[2] = a1 - b1;
|
||||
op[3] = d1 - c1;
|
||||
op[0] = a1 + d1 + (a1!=0);
|
||||
op[1] = b1 + c1;
|
||||
op[2] = b1 - c1;
|
||||
op[3] = a1 - d1;
|
||||
ip += pitch / 2;
|
||||
op += 4;
|
||||
}
|
||||
@ -89,25 +90,25 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch)
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[12];
|
||||
b1 = ip[4] + ip[8];
|
||||
c1 = ip[4] - ip[8];
|
||||
d1 = ip[0] - ip[12];
|
||||
a1 = ip[0] + ip[8];
|
||||
d1 = ip[4] + ip[12];
|
||||
c1 = ip[4] - ip[12];
|
||||
b1 = ip[0] - ip[8];
|
||||
|
||||
a2 = a1 + b1;
|
||||
b2 = c1 + d1;
|
||||
c2 = a1 - b1;
|
||||
d2 = d1 - c1;
|
||||
a2 = a1 + d1;
|
||||
b2 = b1 + c1;
|
||||
c2 = b1 - c1;
|
||||
d2 = a1 - d1;
|
||||
|
||||
a2 += (a2 > 0);
|
||||
b2 += (b2 > 0);
|
||||
c2 += (c2 > 0);
|
||||
d2 += (d2 > 0);
|
||||
a2 += a2<0;
|
||||
b2 += b2<0;
|
||||
c2 += c2<0;
|
||||
d2 += d2<0;
|
||||
|
||||
op[0] = (a2) >> 1;
|
||||
op[4] = (b2) >> 1;
|
||||
op[8] = (c2) >> 1;
|
||||
op[12] = (d2) >> 1;
|
||||
op[0] = (a2+3) >> 3;
|
||||
op[4] = (b2+3) >> 3;
|
||||
op[8] = (c2+3) >> 3;
|
||||
op[12]= (d2+3) >> 3;
|
||||
|
||||
ip++;
|
||||
op++;
|
||||
|
@ -278,7 +278,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_sse2;
|
||||
cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_sse2;
|
||||
|
||||
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_sse2;
|
||||
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c ;
|
||||
|
||||
cpi->rtcd.encodemb.berr = vp8_block_error_xmm;
|
||||
cpi->rtcd.encodemb.mberr = vp8_mbblock_error_xmm;
|
||||
|
Loading…
x
Reference in New Issue
Block a user