
This minimize code differences between AOM master and nextgenv2 Change-Id: If144865bdf3ef0818e7aac11018b9e786444c550
109 lines
6.6 KiB
C
109 lines
6.6 KiB
C
/*
|
|
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
|
*
|
|
* This source code is subject to the terms of the BSD 2 Clause License and
|
|
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
|
* was not distributed with this source code in the LICENSE file, you can
|
|
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
|
* Media Patent License 1.0 was not distributed with this source code in the
|
|
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
|
*/
|
|
|
|
#include "./av1_rtcd.h"
|
|
#include "aom_dsp/mips/macros_msa.h"
|
|
|
|
#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
|
|
static int64_t block_error_##BSize##size_msa( \
|
|
const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \
|
|
int64_t err = 0; \
|
|
uint32_t loop_cnt; \
|
|
v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
|
|
v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
|
|
v2i64 sq_coeff_r, sq_coeff_l; \
|
|
v2i64 err0, err_dup0, err1, err_dup1; \
|
|
\
|
|
coeff = LD_SH(coeff_ptr); \
|
|
dq_coeff = LD_SH(dq_coeff_ptr); \
|
|
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
|
|
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
|
|
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
|
|
DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r, \
|
|
sq_coeff_l); \
|
|
DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
|
|
\
|
|
coeff = LD_SH(coeff_ptr + 8); \
|
|
dq_coeff = LD_SH(dq_coeff_ptr + 8); \
|
|
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
|
|
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
|
|
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
|
|
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
|
|
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
|
|
\
|
|
coeff_ptr += 16; \
|
|
dq_coeff_ptr += 16; \
|
|
\
|
|
for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
|
|
coeff = LD_SH(coeff_ptr); \
|
|
dq_coeff = LD_SH(dq_coeff_ptr); \
|
|
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
|
|
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
|
|
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
|
|
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
|
|
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
|
|
\
|
|
coeff = LD_SH(coeff_ptr + 8); \
|
|
dq_coeff = LD_SH(dq_coeff_ptr + 8); \
|
|
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
|
|
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
|
|
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
|
|
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
|
|
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
|
|
\
|
|
coeff_ptr += 16; \
|
|
dq_coeff_ptr += 16; \
|
|
} \
|
|
\
|
|
err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
|
|
err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
|
|
sq_coeff_r += err_dup0; \
|
|
sq_coeff_l += err_dup1; \
|
|
*ssz = __msa_copy_s_d(sq_coeff_r, 0); \
|
|
*ssz += __msa_copy_s_d(sq_coeff_l, 0); \
|
|
\
|
|
err_dup0 = __msa_splati_d(err0, 1); \
|
|
err_dup1 = __msa_splati_d(err1, 1); \
|
|
err0 += err_dup0; \
|
|
err1 += err_dup1; \
|
|
err = __msa_copy_s_d(err0, 0); \
|
|
err += __msa_copy_s_d(err1, 0); \
|
|
\
|
|
return err; \
|
|
}
|
|
|
|
/* clang-format off */
|
|
BLOCK_ERROR_BLOCKSIZE_MSA(16)
|
|
BLOCK_ERROR_BLOCKSIZE_MSA(64)
|
|
BLOCK_ERROR_BLOCKSIZE_MSA(256)
|
|
BLOCK_ERROR_BLOCKSIZE_MSA(1024)
|
|
/* clang-format on */
|
|
|
|
int64_t av1_block_error_msa(const tran_low_t *coeff_ptr,
|
|
const tran_low_t *dq_coeff_ptr, intptr_t blk_size,
|
|
int64_t *ssz) {
|
|
int64_t err;
|
|
const int16_t *coeff = (const int16_t *)coeff_ptr;
|
|
const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
|
|
|
|
switch (blk_size) {
|
|
case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break;
|
|
case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break;
|
|
case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break;
|
|
case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break;
|
|
default:
|
|
err = av1_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
|
|
break;
|
|
}
|
|
|
|
return err;
|
|
}
|