Add 0 offsets handling in SSSE3 sixtap_predict functions

This patch fixed issue 458 by calling copy function when both
offsets are 0, which guarantees the SSSE3 functions output
same result as the c function for all possible offsets.

Change-Id: I209aec7a4c6b3362db2646a8887c1038493b6496
This commit is contained in:
Yunqing Wang 2012-07-02 16:50:48 -07:00
parent b293698561
commit 147e864629

View File

@ -438,19 +438,35 @@ void vp8_sixtap_predict16x16_ssse3
{
if (yoffset)
{
vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset);
vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, 16, yoffset);
vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
src_pixels_per_line, FData2,
16, 21, xoffset);
vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch,
16, yoffset);
}
else
{
/* First-pass only */
vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset);
vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line,
dst_ptr, dst_pitch, 16, xoffset);
}
}
else
{
/* Second-pass only */
vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset);
if (yoffset)
{
/* Second-pass only */
vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
src_pixels_per_line,
dst_ptr, dst_pitch, 16, yoffset);
}
else
{
/* ssse3 second-pass only function couldn't handle (xoffset==0 &&
* yoffset==0) case correctly. Add copy function here to guarantee
* six-tap function handles all possible offsets. */
vp8_copy_mem16x16(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
}
}
}
@ -470,18 +486,34 @@ void vp8_sixtap_predict8x8_ssse3
{
if (yoffset)
{
vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset);
vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset);
vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
src_pixels_per_line, FData2,
8, 13, xoffset);
vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch,
8, yoffset);
}
else
{
vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset);
vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
dst_ptr, dst_pitch, 8, xoffset);
}
}
else
{
/* Second-pass only */
vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset);
if (yoffset)
{
/* Second-pass only */
vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
src_pixels_per_line,
dst_ptr, dst_pitch, 8, yoffset);
}
else
{
/* ssse3 second-pass only function couldn't handle (xoffset==0 &&
* yoffset==0) case correctly. Add copy function here to guarantee
* six-tap function handles all possible offsets. */
vp8_copy_mem8x8(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
}
}
}
@ -502,19 +534,35 @@ void vp8_sixtap_predict8x4_ssse3
{
if (yoffset)
{
vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset);
vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset);
vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
src_pixels_per_line, FData2,
8, 9, xoffset);
vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch,
4, yoffset);
}
else
{
/* First-pass only */
vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
dst_ptr, dst_pitch, 4, xoffset);
}
}
else
{
/* Second-pass only */
vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
if (yoffset)
{
/* Second-pass only */
vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
src_pixels_per_line,
dst_ptr, dst_pitch, 4, yoffset);
}
else
{
/* ssse3 second-pass only function couldn't handle (xoffset==0 &&
* yoffset==0) case correctly. Add copy function here to guarantee
* six-tap function handles all possible offsets. */
vp8_copy_mem8x4(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
}
}
}
@ -534,19 +582,48 @@ void vp8_sixtap_predict4x4_ssse3
{
if (yoffset)
{
vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset);
vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset);
vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
src_pixels_per_line,
FData2, 4, 9, xoffset);
vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch,
4, yoffset);
}
else
{
vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line,
dst_ptr, dst_pitch, 4, xoffset);
}
}
else
{
vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
}
if (yoffset)
{
vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
src_pixels_per_line,
dst_ptr, dst_pitch, 4, yoffset);
}
else
{
/* ssse3 second-pass only function couldn't handle (xoffset==0 &&
* yoffset==0) case correctly. Add copy function here to guarantee
* six-tap function handles all possible offsets. */
int r;
for (r = 0; r < 4; r++)
{
#if !(CONFIG_FAST_UNALIGNED)
dst_ptr[0] = src_ptr[0];
dst_ptr[1] = src_ptr[1];
dst_ptr[2] = src_ptr[2];
dst_ptr[3] = src_ptr[3];
#else
*(uint32_t *)dst_ptr = *(uint32_t *)src_ptr ;
#endif
dst_ptr += dst_pitch;
src_ptr += src_pixels_per_line;
}
}
}
}
#endif