diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 3ea0b0602..3e4e25114 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -26,9 +26,6 @@ forward_decls vp9_common_forward_decls # # Dequant # -prototype void vp9_dequantize_b "struct blockd *x" -specialize vp9_dequantize_b - prototype void vp9_dequant_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd" specialize vp9_dequant_idct_add_y_block_8x8 diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 1da87a3e1..c2d42ea4a 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -43,14 +43,6 @@ static void add_constant_residual(const int16_t diff, const uint8_t *pred, } } -void vp9_dequantize_b_c(BLOCKD *b) { - int i; - - for (i = 0; i < 16; i++) - b->dqcoeff[i] = b->qcoeff[i] * b->dequant[i]; -} - - void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, diff --git a/vp9/decoder/x86/vp9_dequantize_mmx.asm b/vp9/decoder/x86/vp9_dequantize_mmx.asm deleted file mode 100644 index 23080bfee..000000000 --- a/vp9/decoder/x86/vp9_dequantize_mmx.asm +++ /dev/null @@ -1,406 +0,0 @@ -; -; Copyright (c) 2012 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "third_party/x86inc/x86inc.asm" - -SECTION_RODATA -align 16 -x_s1sqr2: times 4 dw 0x8A8C -align 16 -x_c1sqr2less1: times 4 dw 0x4E7B -align 16 -pw_16: times 4 dw 16 - -SECTION .text - -INIT_MMX - - -;void dequantize_b_impl_mmx(short *sq, short *dq, short *q) -cglobal dequantize_b_impl_mmx, 3,3,0,sq,dq,arg3 - mova m1, [sqq] - pmullw m1, [arg3q+0] ; mm4 *= kernel 0 modifiers. - mova [dqq+ 0], m1 - - mova m1, [sqq+8] - pmullw m1, [arg3q+8] ; mm4 *= kernel 0 modifiers. - mova [dqq+ 8], m1 - - mova m1, [sqq+16] - pmullw m1, [arg3q+16] ; mm4 *= kernel 0 modifiers. - mova [dqq+16], m1 - - mova m1, [sqq+24] - pmullw m1, [arg3q+24] ; mm4 *= kernel 0 modifiers. - mova [dqq+24], m1 - RET - - -;void dequant_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride) -cglobal dequant_idct_add_mmx, 4,6,0,inp,dq,pred,dest,pit,stride - -%if ARCH_X86_64 - movsxd strideq, dword stridem - movsxd pitq, dword pitm -%else - mov strideq, stridem - mov pitq, pitm -%endif - - mova m0, [inpq+ 0] - pmullw m0, [dqq] - - mova m1, [inpq+ 8] - pmullw m1, [dqq+ 8] - - mova m2, [inpq+16] - pmullw m2, [dqq+16] - - mova m3, [inpq+24] - pmullw m3, [dqq+24] - - pxor m7, m7 - mova [inpq], m7 - mova [inpq+8], m7 - mova [inpq+16], m7 - mova [inpq+24], m7 - - - psubw m0, m2 ; b1= 0-2 - paddw m2, m2 ; - - mova m5, m1 - paddw m2, m0 ; a1 =0+2 - - pmulhw m5, [x_s1sqr2]; - paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2) - - mova m7, m3 ; - pmulhw m7, [x_c1sqr2less1]; - - paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2) - psubw m7, m5 ; c1 - - mova m5, m1 - mova m4, m3 - - pmulhw m5, [x_c1sqr2less1] - paddw m5, m1 - - pmulhw m3, [x_s1sqr2] - paddw m3, m4 - - paddw m3, m5 ; d1 - mova m6, m2 ; a1 - - mova m4, m0 ; b1 - paddw m2, m3 ;0 - - paddw m4, m7 ;1 - psubw m0, m7 ;2 - - psubw m6, m3 ;3 - - mova m1, m2 ; 03 02 01 00 - mova m3, m4 ; 23 22 21 20 - - punpcklwd m1, m0 ; 11 01 10 00 - punpckhwd m2, m0 ; 13 03 12 02 - - punpcklwd m3, m6 ; 31 21 30 20 - punpckhwd m4, m6 ; 33 23 32 22 - - mova m0, m1 ; 11 01 10 00 - mova m5, m2 ; 13 03 12 02 - - punpckldq m0, m3 ; 30 20 10 00 - punpckhdq m1, m3 ; 31 21 11 01 - - punpckldq m2, m4 ; 32 22 12 02 - punpckhdq m5, m4 ; 33 23 13 03 - - mova m3, m5 ; 33 23 13 03 - - psubw m0, m2 ; b1= 0-2 - paddw m2, m2 ; - - mova m5, m1 - paddw m2, m0 ; a1 =0+2 - - pmulhw m5, [x_s1sqr2]; - paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2) - - mova m7, m3 ; - pmulhw m7, [x_c1sqr2less1]; - - paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2) - psubw m7, m5 ; c1 - - mova m5, m1 - mova m4, m3 - - pmulhw m5, [x_c1sqr2less1] - paddw m5, m1 - - pmulhw m3, [x_s1sqr2] - paddw m3, m4 - - paddw m3, m5 ; d1 - paddw m0, [pw_16] - - paddw m2, [pw_16] - mova m6, m2 ; a1 - - mova m4, m0 ; b1 - paddw m2, m3 ;0 - - paddw m4, m7 ;1 - psubw m0, m7 ;2 - - psubw m6, m3 ;3 - psraw m2, 5 - - psraw m0, 5 - psraw m4, 5 - - psraw m6, 5 - - mova m1, m2 ; 03 02 01 00 - mova m3, m4 ; 23 22 21 20 - - punpcklwd m1, m0 ; 11 01 10 00 - punpckhwd m2, m0 ; 13 03 12 02 - - punpcklwd m3, m6 ; 31 21 30 20 - punpckhwd m4, m6 ; 33 23 32 22 - - mova m0, m1 ; 11 01 10 00 - mova m5, m2 ; 13 03 12 02 - - punpckldq m0, m3 ; 30 20 10 00 - punpckhdq m1, m3 ; 31 21 11 01 - - punpckldq m2, m4 ; 32 22 12 02 - punpckhdq m5, m4 ; 33 23 13 03 - - pxor m7, m7 - - movh m4, [predq] - punpcklbw m4, m7 - paddsw m0, m4 - packuswb m0, m7 - movh [destq], m0 - - movh m4, [predq+pitq] - punpcklbw m4, m7 - paddsw m1, m4 - packuswb m1, m7 - movh [destq+strideq], m1 - - movh m4, [predq+2*pitq] - punpcklbw m4, m7 - paddsw m2, m4 - packuswb m2, m7 - movh [destq+strideq*2], m2 - - add destq, strideq - add predq, pitq - - movh m4, [predq+2*pitq] - punpcklbw m4, m7 - paddsw m5, m4 - packuswb m5, m7 - movh [destq+strideq*2], m5 - RET - - -;void dequant_dc_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc) -cglobal dequant_dc_idct_add_mmx, 4,7,0,inp,dq,pred,dest,pit,stride,Dc - -%if ARCH_X86_64 - movsxd strideq, dword stridem - movsxd pitq, dword pitm -%else - mov strideq, stridem - mov pitq, pitm -%endif - - mov Dcq, Dcm - mova m0, [inpq+ 0] - pmullw m0, [dqq+ 0] - - mova m1, [inpq+ 8] - pmullw m1, [dqq+ 8] - - mova m2, [inpq+16] - pmullw m2, [dqq+16] - - mova m3, [inpq+24] - pmullw m3, [dqq+24] - - pxor m7, m7 - mova [inpq+ 0], m7 - mova [inpq+ 8], m7 - mova [inpq+16], m7 - mova [inpq+24], m7 - - ; move lower word of Dc to lower word of m0 - psrlq m0, 16 - psllq m0, 16 - and Dcq, 0xFFFF ; If Dc < 0, we don't want the full dword precision. - movh m7, Dcq - por m0, m7 - psubw m0, m2 ; b1= 0-2 - paddw m2, m2 ; - - mova m5, m1 - paddw m2, m0 ; a1 =0+2 - - pmulhw m5, [x_s1sqr2]; - paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2) - - mova m7, m3 ; - pmulhw m7, [x_c1sqr2less1]; - - paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2) - psubw m7, m5 ; c1 - - mova m5, m1 - mova m4, m3 - - pmulhw m5, [x_c1sqr2less1] - paddw m5, m1 - - pmulhw m3, [x_s1sqr2] - paddw m3, m4 - - paddw m3, m5 ; d1 - mova m6, m2 ; a1 - - mova m4, m0 ; b1 - paddw m2, m3 ;0 - - paddw m4, m7 ;1 - psubw m0, m7 ;2 - - psubw m6, m3 ;3 - - mova m1, m2 ; 03 02 01 00 - mova m3, m4 ; 23 22 21 20 - - punpcklwd m1, m0 ; 11 01 10 00 - punpckhwd m2, m0 ; 13 03 12 02 - - punpcklwd m3, m6 ; 31 21 30 20 - punpckhwd m4, m6 ; 33 23 32 22 - - mova m0, m1 ; 11 01 10 00 - mova m5, m2 ; 13 03 12 02 - - punpckldq m0, m3 ; 30 20 10 00 - punpckhdq m1, m3 ; 31 21 11 01 - - punpckldq m2, m4 ; 32 22 12 02 - punpckhdq m5, m4 ; 33 23 13 03 - - mova m3, m5 ; 33 23 13 03 - - psubw m0, m2 ; b1= 0-2 - paddw m2, m2 ; - - mova m5, m1 - paddw m2, m0 ; a1 =0+2 - - pmulhw m5, [x_s1sqr2]; - paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2) - - mova m7, m3 ; - pmulhw m7, [x_c1sqr2less1]; - - paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2) - psubw m7, m5 ; c1 - - mova m5, m1 - mova m4, m3 - - pmulhw m5, [x_c1sqr2less1] - paddw m5, m1 - - pmulhw m3, [x_s1sqr2] - paddw m3, m4 - - paddw m3, m5 ; d1 - paddw m0, [pw_16] - - paddw m2, [pw_16] - mova m6, m2 ; a1 - - mova m4, m0 ; b1 - paddw m2, m3 ;0 - - paddw m4, m7 ;1 - psubw m0, m7 ;2 - - psubw m6, m3 ;3 - psraw m2, 5 - - psraw m0, 5 - psraw m4, 5 - - psraw m6, 5 - - mova m1, m2 ; 03 02 01 00 - mova m3, m4 ; 23 22 21 20 - - punpcklwd m1, m0 ; 11 01 10 00 - punpckhwd m2, m0 ; 13 03 12 02 - - punpcklwd m3, m6 ; 31 21 30 20 - punpckhwd m4, m6 ; 33 23 32 22 - - mova m0, m1 ; 11 01 10 00 - mova m5, m2 ; 13 03 12 02 - - punpckldq m0, m3 ; 30 20 10 00 - punpckhdq m1, m3 ; 31 21 11 01 - - punpckldq m2, m4 ; 32 22 12 02 - punpckhdq m5, m4 ; 33 23 13 03 - - pxor m7, m7 - - movh m4, [predq] - punpcklbw m4, m7 - paddsw m0, m4 - packuswb m0, m7 - movh [destq], m0 - - movh m4, [predq+pitq] - punpcklbw m4, m7 - paddsw m1, m4 - packuswb m1, m7 - movh [destq+strideq], m1 - - movh m4, [predq+2*pitq] - punpcklbw m4, m7 - paddsw m2, m4 - packuswb m2, m7 - movh [destq+strideq*2], m2 - - add destq, strideq - add predq, pitq - - movh m4, [predq+2*pitq] - punpcklbw m4, m7 - paddsw m5, m4 - packuswb m5, m7 - movh [destq+strideq*2], m5 - RET - diff --git a/vp9/decoder/x86/vp9_x86_dsystemdependent.c b/vp9/decoder/x86/vp9_x86_dsystemdependent.c deleted file mode 100644 index 51ee8ec31..000000000 --- a/vp9/decoder/x86/vp9_x86_dsystemdependent.c +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "vpx_ports/x86.h" -#include "vp9/decoder/vp9_onyxd_int.h" - -#if HAVE_MMX -void vp9_dequantize_b_impl_mmx(short *sq, short *dq, short *q); - -void vp9_dequantize_b_mmx(BLOCKD *d) { - short *sq = (short *) d->qcoeff; - short *dq = (short *) d->dqcoeff; - short *q = (short *) d->dequant; - vp9_dequantize_b_impl_mmx(sq, dq, q); -} -#endif - -