vp8: [loongson] optimize idctllm with mmi
1. vp8_short_idct4x4llm_mmi 2. vp8_short_inv_walsh4x4_mmi 3. vp8_dc_only_idct_add_mmi Change-Id: I616923681e79d78607a4988608fc39df77b093f4
This commit is contained in:
parent
0726dd97d3
commit
5b558592f5
@ -169,4 +169,9 @@ INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
|
||||
INSTANTIATE_TEST_CASE_P(MSA, IDCTTest,
|
||||
::testing::Values(vp8_short_idct4x4llm_msa));
|
||||
#endif // HAVE_MSA
|
||||
|
||||
#if HAVE_MMI
|
||||
INSTANTIATE_TEST_CASE_P(MMI, IDCTTest,
|
||||
::testing::Values(vp8_short_idct4x4llm_mmi));
|
||||
#endif // HAVE_MMI
|
||||
}
|
||||
|
328
vp8/common/mips/mmi/idctllm_mmi.c
Normal file
328
vp8/common/mips/mmi/idctllm_mmi.c
Normal file
@ -0,0 +1,328 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx_ports/asmdefs_mmi.h"
|
||||
|
||||
#define TRANSPOSE_4H \
|
||||
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
|
||||
MMI_LI(%[tmp0], 0x93) \
|
||||
"mtc1 %[tmp0], %[ftmp10] \n\t" \
|
||||
"punpcklhw %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \
|
||||
"punpcklhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \
|
||||
"pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \
|
||||
"or %[ftmp5], %[ftmp5], %[ftmp9] \n\t" \
|
||||
"punpckhhw %[ftmp6], %[ftmp1], %[ftmp0] \n\t" \
|
||||
"punpckhhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \
|
||||
"pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \
|
||||
"or %[ftmp6], %[ftmp6], %[ftmp9] \n\t" \
|
||||
"punpcklhw %[ftmp7], %[ftmp3], %[ftmp0] \n\t" \
|
||||
"punpcklhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \
|
||||
"pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \
|
||||
"or %[ftmp7], %[ftmp7], %[ftmp9] \n\t" \
|
||||
"punpckhhw %[ftmp8], %[ftmp3], %[ftmp0] \n\t" \
|
||||
"punpckhhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \
|
||||
"pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \
|
||||
"or %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
|
||||
"punpcklwd %[ftmp1], %[ftmp5], %[ftmp7] \n\t" \
|
||||
"punpckhwd %[ftmp2], %[ftmp5], %[ftmp7] \n\t" \
|
||||
"punpcklwd %[ftmp3], %[ftmp6], %[ftmp8] \n\t" \
|
||||
"punpckhwd %[ftmp4], %[ftmp6], %[ftmp8] \n\t"
|
||||
|
||||
void vp8_short_idct4x4llm_mmi(int16_t *input, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride) {
|
||||
double ftmp[12];
|
||||
uint32_t tmp[0];
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_ph_04) = { 0x0004000400040004ULL };
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_ph_4e7b) = { 0x4e7b4e7b4e7b4e7bULL };
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_ph_22a3) = { 0x22a322a322a322a3ULL };
|
||||
|
||||
__asm__ volatile (
|
||||
MMI_LI(%[tmp0], 0x02)
|
||||
"mtc1 %[tmp0], %[ftmp11] \n\t"
|
||||
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
|
||||
|
||||
"gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t"
|
||||
"gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t"
|
||||
"gsldlc1 %[ftmp2], 0x0f(%[ip]) \n\t"
|
||||
"gsldrc1 %[ftmp2], 0x08(%[ip]) \n\t"
|
||||
"gsldlc1 %[ftmp3], 0x17(%[ip]) \n\t"
|
||||
"gsldrc1 %[ftmp3], 0x10(%[ip]) \n\t"
|
||||
"gsldlc1 %[ftmp4], 0x1f(%[ip]) \n\t"
|
||||
"gsldrc1 %[ftmp4], 0x18(%[ip]) \n\t"
|
||||
|
||||
// ip[0...3] + ip[8...11]
|
||||
"paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t"
|
||||
// ip[0...3] - ip[8...11]
|
||||
"psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t"
|
||||
// (ip[12...15] * sinpi8sqrt2) >> 16
|
||||
"psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t"
|
||||
"pmulhh %[ftmp7], %[ftmp9], %[ff_ph_22a3] \n\t"
|
||||
// (ip[ 4... 7] * sinpi8sqrt2) >> 16
|
||||
"psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t"
|
||||
"pmulhh %[ftmp8], %[ftmp9], %[ff_ph_22a3] \n\t"
|
||||
// ip[ 4... 7] + ((ip[ 4... 7] * cospi8sqrt2minus1) >> 16)
|
||||
"pmulhh %[ftmp9], %[ftmp2], %[ff_ph_4e7b] \n\t"
|
||||
"paddh %[ftmp9], %[ftmp9], %[ftmp2] \n\t"
|
||||
// ip[12...15] + ((ip[12...15] * cospi8sqrt2minus1) >> 16)
|
||||
"pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t"
|
||||
"paddh %[ftmp10], %[ftmp10], %[ftmp4] \n\t"
|
||||
|
||||
"paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t"
|
||||
"paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
|
||||
"paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t"
|
||||
"psubh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
|
||||
"psubh %[ftmp3], %[ftmp6], %[ftmp8] \n\t"
|
||||
"paddh %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
|
||||
"psubh %[ftmp4], %[ftmp5], %[ftmp7] \n\t"
|
||||
"psubh %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
|
||||
|
||||
TRANSPOSE_4H
|
||||
// a
|
||||
"paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t"
|
||||
// b
|
||||
"psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t"
|
||||
// c
|
||||
"psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t"
|
||||
"pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t"
|
||||
"psubh %[ftmp7], %[ftmp9], %[ftmp4] \n\t"
|
||||
"pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t"
|
||||
"psubh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
|
||||
// d
|
||||
"psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t"
|
||||
"pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t"
|
||||
"paddh %[ftmp8], %[ftmp9], %[ftmp2] \n\t"
|
||||
"pmulhh %[ftmp10], %[ftmp2], %[ff_ph_4e7b] \n\t"
|
||||
"paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
|
||||
|
||||
MMI_LI(%[tmp0], 0x03)
|
||||
"mtc1 %[tmp0], %[ftmp11] \n\t"
|
||||
// a + d
|
||||
"paddh %[ftmp1], %[ftmp5], %[ftmp8] \n\t"
|
||||
"paddh %[ftmp1], %[ftmp1], %[ff_ph_04] \n\t"
|
||||
"psrah %[ftmp1], %[ftmp1], %[ftmp11] \n\t"
|
||||
// b + c
|
||||
"paddh %[ftmp2], %[ftmp6], %[ftmp7] \n\t"
|
||||
"paddh %[ftmp2], %[ftmp2], %[ff_ph_04] \n\t"
|
||||
"psrah %[ftmp2], %[ftmp2], %[ftmp11] \n\t"
|
||||
// b - c
|
||||
"psubh %[ftmp3], %[ftmp6], %[ftmp7] \n\t"
|
||||
"paddh %[ftmp3], %[ftmp3], %[ff_ph_04] \n\t"
|
||||
"psrah %[ftmp3], %[ftmp3], %[ftmp11] \n\t"
|
||||
// a - d
|
||||
"psubh %[ftmp4], %[ftmp5], %[ftmp8] \n\t"
|
||||
"paddh %[ftmp4], %[ftmp4], %[ff_ph_04] \n\t"
|
||||
"psrah %[ftmp4], %[ftmp4], %[ftmp11] \n\t"
|
||||
|
||||
TRANSPOSE_4H
|
||||
#if _MIPS_SIM == _ABIO32
|
||||
"ulw %[tmp0], 0x00(%[pred_prt]) \n\t"
|
||||
"mtc1 %[tmp0], %[ftmp5] \n\t"
|
||||
#else
|
||||
"gslwlc1 %[ftmp5], 0x03(%[pred_ptr]) \n\t"
|
||||
"gslwrc1 %[ftmp5], 0x00(%[pred_ptr]) \n\t"
|
||||
#endif
|
||||
"punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
|
||||
"paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
|
||||
"packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
|
||||
"gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t"
|
||||
"gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t"
|
||||
MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride])
|
||||
MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride])
|
||||
|
||||
#if _MIPS_SIM == _ABIO32
|
||||
"ulw %[tmp0], 0x00(%[pred_prt]) \n\t"
|
||||
"mtc1 %[tmp0], %[ftmp6] \n\t"
|
||||
#else
|
||||
"gslwlc1 %[ftmp6], 0x03(%[pred_ptr]) \n\t"
|
||||
"gslwrc1 %[ftmp6], 0x00(%[pred_ptr]) \n\t"
|
||||
#endif
|
||||
"punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
|
||||
"paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
|
||||
"packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
|
||||
"gsswlc1 %[ftmp2], 0x03(%[dst_ptr]) \n\t"
|
||||
"gsswrc1 %[ftmp2], 0x00(%[dst_ptr]) \n\t"
|
||||
MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride])
|
||||
MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride])
|
||||
|
||||
#if _MIPS_SIM == _ABIO32
|
||||
"ulw %[tmp0], 0x00(%[pred_prt]) \n\t"
|
||||
"mtc1 %[tmp0], %[ftmp7] \n\t"
|
||||
#else
|
||||
"gslwlc1 %[ftmp7], 0x03(%[pred_ptr]) \n\t"
|
||||
"gslwrc1 %[ftmp7], 0x00(%[pred_ptr]) \n\t"
|
||||
#endif
|
||||
"punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
|
||||
"paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
|
||||
"packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
|
||||
"gsswlc1 %[ftmp3], 0x03(%[dst_ptr]) \n\t"
|
||||
"gsswrc1 %[ftmp3], 0x00(%[dst_ptr]) \n\t"
|
||||
MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride])
|
||||
MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride])
|
||||
|
||||
#if _MIPS_SIM == _ABIO32
|
||||
"ulw %[tmp0], 0x00(%[pred_prt]) \n\t"
|
||||
"mtc1 %[tmp0], %[ftmp8] \n\t"
|
||||
#else
|
||||
"gslwlc1 %[ftmp8], 0x03(%[pred_ptr]) \n\t"
|
||||
"gslwrc1 %[ftmp8], 0x00(%[pred_ptr]) \n\t"
|
||||
#endif
|
||||
"punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
|
||||
"paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
|
||||
"packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
|
||||
"gsswlc1 %[ftmp4], 0x03(%[dst_ptr]) \n\t"
|
||||
"gsswrc1 %[ftmp4], 0x00(%[dst_ptr]) \n\t"
|
||||
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
|
||||
[ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
|
||||
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
|
||||
[ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]),
|
||||
[ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]),
|
||||
[pred_ptr]"+&r"(pred_ptr), [dst_ptr]"+&r"(dst_ptr)
|
||||
: [ip]"r"(input), [ff_ph_22a3]"f"(ff_ph_22a3),
|
||||
[ff_ph_4e7b]"f"(ff_ph_4e7b), [ff_ph_04]"f"(ff_ph_04),
|
||||
[pred_stride]"r"((mips_reg)pred_stride),
|
||||
[dst_stride]"r"((mips_reg)dst_stride)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void vp8_dc_only_idct_add_mmi(int16_t input_dc, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride) {
|
||||
int a1 = ((input_dc + 4) >> 3);
|
||||
double ftmp[5];
|
||||
int low32;
|
||||
|
||||
__asm__ volatile (
|
||||
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
|
||||
"pshufh %[a1], %[a1], %[ftmp0] \n\t"
|
||||
"ulw %[low32], 0x00(%[pred_ptr]) \n\t"
|
||||
"mtc1 %[low32], %[ftmp1] \n\t"
|
||||
"punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
|
||||
"paddsh %[ftmp2], %[ftmp2], %[a1] \n\t"
|
||||
"packushb %[ftmp1], %[ftmp2], %[ftmp0] \n\t"
|
||||
"gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t"
|
||||
"gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t"
|
||||
|
||||
MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride])
|
||||
MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride])
|
||||
"ulw %[low32], 0x00(%[pred_ptr]) \n\t"
|
||||
"mtc1 %[low32], %[ftmp1] \n\t"
|
||||
"punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
|
||||
"paddsh %[ftmp2], %[ftmp2], %[a1] \n\t"
|
||||
"packushb %[ftmp1], %[ftmp2], %[ftmp0] \n\t"
|
||||
"gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t"
|
||||
"gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t"
|
||||
|
||||
MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride])
|
||||
MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride])
|
||||
"ulw %[low32], 0x00(%[pred_ptr]) \n\t"
|
||||
"mtc1 %[low32], %[ftmp1] \n\t"
|
||||
"punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
|
||||
"paddsh %[ftmp2], %[ftmp2], %[a1] \n\t"
|
||||
"packushb %[ftmp1], %[ftmp2], %[ftmp0] \n\t"
|
||||
"gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t"
|
||||
"gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t"
|
||||
|
||||
MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride])
|
||||
MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride])
|
||||
"ulw %[low32], 0x00(%[pred_ptr]) \n\t"
|
||||
"mtc1 %[low32], %[ftmp1] \n\t"
|
||||
"punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
|
||||
"paddsh %[ftmp2], %[ftmp2], %[a1] \n\t"
|
||||
"packushb %[ftmp1], %[ftmp2], %[ftmp0] \n\t"
|
||||
"gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t"
|
||||
"gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t"
|
||||
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
|
||||
[ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [low32]"=&r"(low32),
|
||||
[dst_ptr]"+&r"(dst_ptr), [pred_ptr]"+&r"(pred_ptr)
|
||||
: [dst_stride]"r"((mips_reg)dst_stride),
|
||||
[pred_stride]"r"((mips_reg)pred_stride), [a1]"f"(a1)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_mmi(int16_t *input, int16_t *mb_dqcoeff) {
|
||||
int i;
|
||||
int16_t output[16];
|
||||
double ftmp[12];
|
||||
uint32_t tmp[1];
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_ph_03) = { 0x0003000300030003ULL };
|
||||
|
||||
__asm__ volatile (
|
||||
MMI_LI(%[tmp0], 0x03)
|
||||
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
|
||||
"mtc1 %[tmp0], %[ftmp11] \n\t"
|
||||
"gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t"
|
||||
"gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t"
|
||||
"gsldlc1 %[ftmp2], 0x0f(%[ip]) \n\t"
|
||||
"gsldrc1 %[ftmp2], 0x08(%[ip]) \n\t"
|
||||
"gsldlc1 %[ftmp3], 0x17(%[ip]) \n\t"
|
||||
"gsldrc1 %[ftmp3], 0x10(%[ip]) \n\t"
|
||||
"gsldlc1 %[ftmp4], 0x1f(%[ip]) \n\t"
|
||||
"gsldrc1 %[ftmp4], 0x18(%[ip]) \n\t"
|
||||
"paddh %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
|
||||
"psubh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
|
||||
"paddh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
|
||||
"psubh %[ftmp8], %[ftmp3], %[ftmp4] \n\t"
|
||||
|
||||
"paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t"
|
||||
"psubh %[ftmp2], %[ftmp5], %[ftmp7] \n\t"
|
||||
"psubh %[ftmp3], %[ftmp6], %[ftmp8] \n\t"
|
||||
"paddh %[ftmp4], %[ftmp6], %[ftmp8] \n\t"
|
||||
|
||||
TRANSPOSE_4H
|
||||
// a
|
||||
"paddh %[ftmp5], %[ftmp1], %[ftmp4] \n\t"
|
||||
// d
|
||||
"psubh %[ftmp6], %[ftmp1], %[ftmp4] \n\t"
|
||||
// b
|
||||
"paddh %[ftmp7], %[ftmp2], %[ftmp3] \n\t"
|
||||
// c
|
||||
"psubh %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
|
||||
|
||||
"paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t"
|
||||
"paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t"
|
||||
"psubh %[ftmp3], %[ftmp5], %[ftmp7] \n\t"
|
||||
"psubh %[ftmp4], %[ftmp6], %[ftmp8] \n\t"
|
||||
|
||||
"paddh %[ftmp1], %[ftmp1], %[ff_ph_03] \n\t"
|
||||
"psrah %[ftmp1], %[ftmp1], %[ftmp11] \n\t"
|
||||
"paddh %[ftmp2], %[ftmp2], %[ff_ph_03] \n\t"
|
||||
"psrah %[ftmp2], %[ftmp2], %[ftmp11] \n\t"
|
||||
"paddh %[ftmp3], %[ftmp3], %[ff_ph_03] \n\t"
|
||||
"psrah %[ftmp3], %[ftmp3], %[ftmp11] \n\t"
|
||||
"paddh %[ftmp4], %[ftmp4], %[ff_ph_03] \n\t"
|
||||
"psrah %[ftmp4], %[ftmp4], %[ftmp11] \n\t"
|
||||
|
||||
TRANSPOSE_4H
|
||||
"gssdlc1 %[ftmp1], 0x07(%[op]) \n\t"
|
||||
"gssdrc1 %[ftmp1], 0x00(%[op]) \n\t"
|
||||
"gssdlc1 %[ftmp2], 0x0f(%[op]) \n\t"
|
||||
"gssdrc1 %[ftmp2], 0x08(%[op]) \n\t"
|
||||
"gssdlc1 %[ftmp3], 0x17(%[op]) \n\t"
|
||||
"gssdrc1 %[ftmp3], 0x10(%[op]) \n\t"
|
||||
"gssdlc1 %[ftmp4], 0x1f(%[op]) \n\t"
|
||||
"gssdrc1 %[ftmp4], 0x18(%[op]) \n\t"
|
||||
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
|
||||
[ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
|
||||
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
|
||||
[ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]),
|
||||
[ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0])
|
||||
: [ip]"r"(input), [op]"r"(output), [ff_ph_03]"f"(ff_ph_03)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
mb_dqcoeff[i * 16] = output[i];
|
||||
}
|
||||
}
|
@ -86,7 +86,7 @@ $vp8_loop_filter_simple_bh_mmi=vp8_loop_filter_bhs_mmi;
|
||||
#
|
||||
#idct16
|
||||
add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride";
|
||||
specialize qw/vp8_short_idct4x4llm mmx neon dspr2 msa/;
|
||||
specialize qw/vp8_short_idct4x4llm mmx neon dspr2 msa mmi/;
|
||||
|
||||
#iwalsh1
|
||||
add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *output";
|
||||
@ -94,11 +94,11 @@ specialize qw/vp8_short_inv_walsh4x4_1 dspr2/;
|
||||
|
||||
#iwalsh16
|
||||
add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *output";
|
||||
specialize qw/vp8_short_inv_walsh4x4 sse2 neon dspr2 msa/;
|
||||
specialize qw/vp8_short_inv_walsh4x4 sse2 neon dspr2 msa mmi/;
|
||||
|
||||
#idct1_scalar_add
|
||||
add_proto qw/void vp8_dc_only_idct_add/, "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride";
|
||||
specialize qw/vp8_dc_only_idct_add mmx neon dspr2 msa/;
|
||||
specialize qw/vp8_dc_only_idct_add mmx neon dspr2 msa mmi/;
|
||||
|
||||
#
|
||||
# RECON
|
||||
|
@ -119,6 +119,7 @@ VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp8_macros_msa.h
|
||||
# common (c)
|
||||
VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/sixtap_filter_mmi.c
|
||||
VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/loopfilter_filters_mmi.c
|
||||
VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/idctllm_mmi.c
|
||||
|
||||
ifeq ($(CONFIG_POSTPROC),yes)
|
||||
VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/mfqe_msa.c
|
||||
|
Loading…
x
Reference in New Issue
Block a user