Use correct size load in vpx_avg_4x4_sse2.
The old version used 64 bit loads, and then ignored the top half of the result. This can cause asan failures if we read past the end of a buffer. Switched to using 32 bit loads instead. Change-Id: I57da127a26f869fb4b4f700b55408f6dc2fbbc1a
This commit is contained in:
parent
94e84bbc07
commit
ffa9173378
@ -13,6 +13,8 @@ DSP_SRCS-yes += vpx_dsp_common.h
|
|||||||
|
|
||||||
DSP_SRCS-$(HAVE_MSA) += mips/macros_msa.h
|
DSP_SRCS-$(HAVE_MSA) += mips/macros_msa.h
|
||||||
|
|
||||||
|
DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64) += x86/synonyms.h
|
||||||
|
|
||||||
# bit reader
|
# bit reader
|
||||||
DSP_SRCS-yes += prob.h
|
DSP_SRCS-yes += prob.h
|
||||||
DSP_SRCS-yes += prob.c
|
DSP_SRCS-yes += prob.c
|
||||||
|
@ -10,6 +10,8 @@
|
|||||||
|
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
|
|
||||||
|
#include "vpx_dsp/x86/synonyms.h"
|
||||||
|
|
||||||
#include "./vpx_dsp_rtcd.h"
|
#include "./vpx_dsp_rtcd.h"
|
||||||
#include "vpx_ports/mem.h"
|
#include "vpx_ports/mem.h"
|
||||||
|
|
||||||
@ -121,13 +123,14 @@ unsigned int vpx_avg_8x8_sse2(const uint8_t *s, int p) {
|
|||||||
unsigned int vpx_avg_4x4_sse2(const uint8_t *s, int p) {
|
unsigned int vpx_avg_4x4_sse2(const uint8_t *s, int p) {
|
||||||
__m128i s0, s1, u0;
|
__m128i s0, s1, u0;
|
||||||
unsigned int avg = 0;
|
unsigned int avg = 0;
|
||||||
|
|
||||||
u0 = _mm_setzero_si128();
|
u0 = _mm_setzero_si128();
|
||||||
s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0);
|
s0 = _mm_unpacklo_epi8(xx_loadl_32(s), u0);
|
||||||
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0);
|
s1 = _mm_unpacklo_epi8(xx_loadl_32(s + p), u0);
|
||||||
s0 = _mm_adds_epu16(s0, s1);
|
s0 = _mm_adds_epu16(s0, s1);
|
||||||
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0);
|
s1 = _mm_unpacklo_epi8(xx_loadl_32(s + 2 * p), u0);
|
||||||
s0 = _mm_adds_epu16(s0, s1);
|
s0 = _mm_adds_epu16(s0, s1);
|
||||||
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0);
|
s1 = _mm_unpacklo_epi8(xx_loadl_32(s + 3 * p), u0);
|
||||||
s0 = _mm_adds_epu16(s0, s1);
|
s0 = _mm_adds_epu16(s0, s1);
|
||||||
|
|
||||||
s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 4));
|
s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 4));
|
||||||
|
Loading…
Reference in New Issue
Block a user