Use correct size load in vpx_avg_4x4_sse2.

The old version used 64 bit loads, and then ignored the top half
of the result. This can cause asan failures if we read past the end
of a buffer. Switched to using 32 bit loads instead.

Change-Id: I57da127a26f869fb4b4f700b55408f6dc2fbbc1a
This commit is contained in:
Geza Lore 2016-06-16 11:08:14 +01:00
parent 94e84bbc07
commit ffa9173378
2 changed files with 9 additions and 4 deletions

View File

@ -13,6 +13,8 @@ DSP_SRCS-yes += vpx_dsp_common.h
DSP_SRCS-$(HAVE_MSA) += mips/macros_msa.h DSP_SRCS-$(HAVE_MSA) += mips/macros_msa.h
DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64) += x86/synonyms.h
# bit reader # bit reader
DSP_SRCS-yes += prob.h DSP_SRCS-yes += prob.h
DSP_SRCS-yes += prob.c DSP_SRCS-yes += prob.c

View File

@ -10,6 +10,8 @@
#include <emmintrin.h> #include <emmintrin.h>
#include "vpx_dsp/x86/synonyms.h"
#include "./vpx_dsp_rtcd.h" #include "./vpx_dsp_rtcd.h"
#include "vpx_ports/mem.h" #include "vpx_ports/mem.h"
@ -121,13 +123,14 @@ unsigned int vpx_avg_8x8_sse2(const uint8_t *s, int p) {
unsigned int vpx_avg_4x4_sse2(const uint8_t *s, int p) { unsigned int vpx_avg_4x4_sse2(const uint8_t *s, int p) {
__m128i s0, s1, u0; __m128i s0, s1, u0;
unsigned int avg = 0; unsigned int avg = 0;
u0 = _mm_setzero_si128(); u0 = _mm_setzero_si128();
s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0); s0 = _mm_unpacklo_epi8(xx_loadl_32(s), u0);
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0); s1 = _mm_unpacklo_epi8(xx_loadl_32(s + p), u0);
s0 = _mm_adds_epu16(s0, s1); s0 = _mm_adds_epu16(s0, s1);
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0); s1 = _mm_unpacklo_epi8(xx_loadl_32(s + 2 * p), u0);
s0 = _mm_adds_epu16(s0, s1); s0 = _mm_adds_epu16(s0, s1);
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0); s1 = _mm_unpacklo_epi8(xx_loadl_32(s + 3 * p), u0);
s0 = _mm_adds_epu16(s0, s1); s0 = _mm_adds_epu16(s0, s1);
s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 4)); s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 4));