block error avx2: use tran_low_t
Change-Id: Ic5f3a1f569d6f82afeaf4fcd7235374bb460db3c
This commit is contained in:
parent
0bf6b51572
commit
2104454607
@ -125,6 +125,7 @@ if (vpx_config("CONFIG_VP9_TEMPORAL_DENOISING") eq "yes") {
|
||||
|
||||
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
|
||||
specialize qw/vp9_block_error avx2/;
|
||||
|
||||
add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
|
||||
specialize qw/vp9_highbd_block_error sse2/;
|
||||
|
@ -12,8 +12,10 @@
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_dsp/vpx_dsp_common.h"
|
||||
#include "vpx_dsp/x86/bitdepth_conversion_avx2.h"
|
||||
|
||||
int64_t vp9_block_error_avx2(const int16_t *coeff, const int16_t *dqcoeff,
|
||||
int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff,
|
||||
intptr_t block_size, int64_t *ssz) {
|
||||
__m256i sse_reg, ssz_reg, coeff_reg, dqcoeff_reg;
|
||||
__m256i exp_dqcoeff_lo, exp_dqcoeff_hi, exp_coeff_lo, exp_coeff_hi;
|
||||
@ -29,8 +31,8 @@ int64_t vp9_block_error_avx2(const int16_t *coeff, const int16_t *dqcoeff,
|
||||
|
||||
for (i = 0; i < block_size; i += 16) {
|
||||
// load 32 bytes from coeff and dqcoeff
|
||||
coeff_reg = _mm256_loadu_si256((const __m256i *)(coeff + i));
|
||||
dqcoeff_reg = _mm256_loadu_si256((const __m256i *)(dqcoeff + i));
|
||||
coeff_reg = load_tran_low(coeff + i);
|
||||
dqcoeff_reg = load_tran_low(dqcoeff + i);
|
||||
// dqcoeff - coeff
|
||||
dqcoeff_reg = _mm256_sub_epi16(dqcoeff_reg, coeff_reg);
|
||||
// madd (dqcoeff - coeff)
|
||||
|
@ -13,6 +13,7 @@ DSP_SRCS-yes += vpx_dsp_common.h
|
||||
|
||||
DSP_SRCS-$(HAVE_MSA) += mips/macros_msa.h
|
||||
|
||||
DSP_SRCS-$(HAVE_AVX2) += x86/bitdepth_conversion_avx2.h
|
||||
DSP_SRCS-$(HAVE_SSE2) += x86/bitdepth_conversion_sse2.h
|
||||
# This file is included in libs.mk. Including it here would cause it to be
|
||||
# compiled into an object. Even as an empty file, this would create an
|
||||
|
30
vpx_dsp/x86/bitdepth_conversion_avx2.h
Normal file
30
vpx_dsp/x86/bitdepth_conversion_avx2.h
Normal file
@ -0,0 +1,30 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#ifndef VPX_DSP_X86_BITDEPTH_CONVERSION_AVX2_H_
|
||||
#define VPX_DSP_X86_BITDEPTH_CONVERSION_AVX2_H_
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_dsp/vpx_dsp_common.h"
|
||||
|
||||
// Load 16 16 bit values. If the source is 32 bits then pack down with
|
||||
// saturation.
|
||||
static INLINE __m256i load_tran_low(const tran_low_t *a) {
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
const __m256i a_low = _mm256_loadu_si256((const __m256i *)a);
|
||||
return _mm256_packs_epi32(a_low, *(const __m256i *)(a + 8));
|
||||
#else
|
||||
return _mm256_loadu_si256((const __m256i *)a);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // VPX_DSP_X86_BITDEPTH_CONVERSION_AVX2_H_
|
Loading…
x
Reference in New Issue
Block a user