Merge "Fix bug 806"
This commit is contained in:
commit
efa82922e4
@ -640,19 +640,9 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
|
||||
|
||||
#if HAVE_AVX2
|
||||
#if CONFIG_VP9_ENCODER
|
||||
// TODO(jzern): these prototypes can be removed after the avx2 versions are
|
||||
// reenabled in vp9_rtcd_defs.pl.
|
||||
extern "C" {
|
||||
void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_ptr[], int ref_stride,
|
||||
unsigned int *sad_array);
|
||||
void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_ptr[], int ref_stride,
|
||||
unsigned int *sad_array);
|
||||
}
|
||||
const SadMxNx4Func sad_64x64x4d_avx2 = vp9_sad64x64x4d_avx2;
|
||||
const SadMxNx4Func sad_32x32x4d_avx2 = vp9_sad32x32x4d_avx2;
|
||||
INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, SADx4Test, ::testing::Values(
|
||||
INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::Values(
|
||||
make_tuple(32, 32, sad_32x32x4d_avx2),
|
||||
make_tuple(64, 64, sad_64x64x4d_avx2)));
|
||||
#endif // CONFIG_VP9_ENCODER
|
||||
|
@ -653,7 +653,7 @@ add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const
|
||||
specialize qw/vp9_sad4x4x8 sse4/;
|
||||
|
||||
add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
|
||||
specialize qw/vp9_sad64x64x4d sse2/;
|
||||
specialize qw/vp9_sad64x64x4d sse2 avx2/;
|
||||
|
||||
add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
|
||||
specialize qw/vp9_sad32x64x4d sse2/;
|
||||
@ -668,7 +668,7 @@ add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, co
|
||||
specialize qw/vp9_sad16x32x4d sse2/;
|
||||
|
||||
add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
|
||||
specialize qw/vp9_sad32x32x4d sse2/;
|
||||
specialize qw/vp9_sad32x32x4d sse2 avx2/;
|
||||
|
||||
add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
|
||||
specialize qw/vp9_sad16x16x4d sse2/;
|
||||
|
@ -31,7 +31,7 @@ void vp9_sad32x32x4d_avx2(uint8_t *src,
|
||||
sum_ref3 = _mm256_set1_epi16(0);
|
||||
for (i = 0; i < 32 ; i++) {
|
||||
// load src and all refs
|
||||
src_reg = _mm256_load_si256((__m256i *)(src));
|
||||
src_reg = _mm256_loadu_si256((__m256i *)(src));
|
||||
ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
|
||||
ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
|
||||
ref2_reg = _mm256_loadu_si256((__m256i *) (ref2));
|
||||
@ -103,8 +103,8 @@ void vp9_sad64x64x4d_avx2(uint8_t *src,
|
||||
sum_ref3 = _mm256_set1_epi16(0);
|
||||
for (i = 0; i < 64 ; i++) {
|
||||
// load 64 bytes from src and all refs
|
||||
src_reg = _mm256_load_si256((__m256i *)(src));
|
||||
srcnext_reg = _mm256_load_si256((__m256i *)(src + 32));
|
||||
src_reg = _mm256_loadu_si256((__m256i *)(src));
|
||||
srcnext_reg = _mm256_loadu_si256((__m256i *)(src + 32));
|
||||
ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
|
||||
ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32));
|
||||
ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
|
||||
|
Loading…
x
Reference in New Issue
Block a user