Add 0 offsets handling in SSSE3 sixtap_predict functions
This patch fixed issue 458 by calling copy function when both offsets are 0, which guarantees the SSSE3 functions output same result as the c function for all possible offsets. Change-Id: I209aec7a4c6b3362db2646a8887c1038493b6496
This commit is contained in:
parent
b293698561
commit
147e864629
@ -438,19 +438,35 @@ void vp8_sixtap_predict16x16_ssse3
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset);
|
||||
vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, 16, yoffset);
|
||||
vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
|
||||
src_pixels_per_line, FData2,
|
||||
16, 21, xoffset);
|
||||
vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch,
|
||||
16, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* First-pass only */
|
||||
vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset);
|
||||
vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pitch, 16, xoffset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Second-pass only */
|
||||
vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset);
|
||||
if (yoffset)
|
||||
{
|
||||
/* Second-pass only */
|
||||
vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
|
||||
src_pixels_per_line,
|
||||
dst_ptr, dst_pitch, 16, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* ssse3 second-pass only function couldn't handle (xoffset==0 &&
|
||||
* yoffset==0) case correctly. Add copy function here to guarantee
|
||||
* six-tap function handles all possible offsets. */
|
||||
vp8_copy_mem16x16(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -470,18 +486,34 @@ void vp8_sixtap_predict8x8_ssse3
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset);
|
||||
vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset);
|
||||
vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
|
||||
src_pixels_per_line, FData2,
|
||||
8, 13, xoffset);
|
||||
vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch,
|
||||
8, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset);
|
||||
vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pitch, 8, xoffset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Second-pass only */
|
||||
vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset);
|
||||
if (yoffset)
|
||||
{
|
||||
/* Second-pass only */
|
||||
vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
|
||||
src_pixels_per_line,
|
||||
dst_ptr, dst_pitch, 8, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* ssse3 second-pass only function couldn't handle (xoffset==0 &&
|
||||
* yoffset==0) case correctly. Add copy function here to guarantee
|
||||
* six-tap function handles all possible offsets. */
|
||||
vp8_copy_mem8x8(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -502,19 +534,35 @@ void vp8_sixtap_predict8x4_ssse3
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset);
|
||||
vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset);
|
||||
vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
|
||||
src_pixels_per_line, FData2,
|
||||
8, 9, xoffset);
|
||||
vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch,
|
||||
4, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* First-pass only */
|
||||
vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
|
||||
vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pitch, 4, xoffset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Second-pass only */
|
||||
vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
|
||||
if (yoffset)
|
||||
{
|
||||
/* Second-pass only */
|
||||
vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
|
||||
src_pixels_per_line,
|
||||
dst_ptr, dst_pitch, 4, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* ssse3 second-pass only function couldn't handle (xoffset==0 &&
|
||||
* yoffset==0) case correctly. Add copy function here to guarantee
|
||||
* six-tap function handles all possible offsets. */
|
||||
vp8_copy_mem8x4(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -534,19 +582,48 @@ void vp8_sixtap_predict4x4_ssse3
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset);
|
||||
vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset);
|
||||
vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
|
||||
src_pixels_per_line,
|
||||
FData2, 4, 9, xoffset);
|
||||
vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch,
|
||||
4, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
|
||||
vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pitch, 4, xoffset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
|
||||
}
|
||||
if (yoffset)
|
||||
{
|
||||
vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
|
||||
src_pixels_per_line,
|
||||
dst_ptr, dst_pitch, 4, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* ssse3 second-pass only function couldn't handle (xoffset==0 &&
|
||||
* yoffset==0) case correctly. Add copy function here to guarantee
|
||||
* six-tap function handles all possible offsets. */
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
dst_ptr[0] = src_ptr[0];
|
||||
dst_ptr[1] = src_ptr[1];
|
||||
dst_ptr[2] = src_ptr[2];
|
||||
dst_ptr[3] = src_ptr[3];
|
||||
#else
|
||||
*(uint32_t *)dst_ptr = *(uint32_t *)src_ptr ;
|
||||
#endif
|
||||
dst_ptr += dst_pitch;
|
||||
src_ptr += src_pixels_per_line;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user