Merge "Remove unused file" into experimental
This commit is contained in:
commit
d6ff6fe2ed
@ -1,241 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
%include "third_party/x86inc/x86inc.asm"
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
x_s1sqr2: times 4 dw 0x8A8C
|
||||
align 16
|
||||
x_c1sqr2less1: times 4 dw 0x4E7B
|
||||
align 16
|
||||
pw_16: times 4 dw 16
|
||||
|
||||
SECTION .text
|
||||
|
||||
|
||||
; /****************************************************************************
|
||||
; * Notes:
|
||||
; *
|
||||
; * This implementation makes use of 16 bit fixed point version of two multiply
|
||||
; * constants:
|
||||
; * 1. sqrt(2) * cos (pi/8)
|
||||
; * 2. sqrt(2) * sin (pi/8)
|
||||
; * Because the first constant is bigger than 1, to maintain the same 16 bit
|
||||
; * fixed point precision as the second one, we use a trick of
|
||||
; * x * a = x + x*(a-1)
|
||||
; * so
|
||||
; * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
|
||||
; *
|
||||
; * For the second constant, because of the 16bit version is 35468, which
|
||||
; * is bigger than 32768, in signed 16 bit multiply, it becomes a negative
|
||||
; * number.
|
||||
; * (x * (unsigned)35468 >> 16) = x * (signed)35468 >> 16 + x
|
||||
; *
|
||||
; **************************************************************************/
|
||||
|
||||
INIT_MMX
|
||||
|
||||
;void short_idct4x4llm_mmx(short *input, short *output, int pitch)
|
||||
cglobal short_idct4x4llm_mmx, 3,3,0, inp, out, pit
|
||||
mova m0, [inpq +0]
|
||||
mova m1, [inpq +8]
|
||||
|
||||
mova m2, [inpq+16]
|
||||
mova m3, [inpq+24]
|
||||
|
||||
psubw m0, m2 ; b1= 0-2
|
||||
paddw m2, m2 ;
|
||||
|
||||
mova m5, m1
|
||||
paddw m2, m0 ; a1 =0+2
|
||||
|
||||
pmulhw m5, [x_s1sqr2] ;
|
||||
paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
mova m7, m3 ;
|
||||
pmulhw m7, [x_c1sqr2less1] ;
|
||||
|
||||
paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw m7, m5 ; c1
|
||||
|
||||
mova m5, m1
|
||||
mova m4, m3
|
||||
|
||||
pmulhw m5, [x_c1sqr2less1]
|
||||
paddw m5, m1
|
||||
|
||||
pmulhw m3, [x_s1sqr2]
|
||||
paddw m3, m4
|
||||
|
||||
paddw m3, m5 ; d1
|
||||
mova m6, m2 ; a1
|
||||
|
||||
mova m4, m0 ; b1
|
||||
paddw m2, m3 ;0
|
||||
|
||||
paddw m4, m7 ;1
|
||||
psubw m0, m7 ;2
|
||||
|
||||
psubw m6, m3 ;3
|
||||
|
||||
mova m1, m2 ; 03 02 01 00
|
||||
mova m3, m4 ; 23 22 21 20
|
||||
|
||||
punpcklwd m1, m0 ; 11 01 10 00
|
||||
punpckhwd m2, m0 ; 13 03 12 02
|
||||
|
||||
punpcklwd m3, m6 ; 31 21 30 20
|
||||
punpckhwd m4, m6 ; 33 23 32 22
|
||||
|
||||
mova m0, m1 ; 11 01 10 00
|
||||
mova m5, m2 ; 13 03 12 02
|
||||
|
||||
punpckldq m0, m3 ; 30 20 10 00
|
||||
punpckhdq m1, m3 ; 31 21 11 01
|
||||
|
||||
punpckldq m2, m4 ; 32 22 12 02
|
||||
punpckhdq m5, m4 ; 33 23 13 03
|
||||
|
||||
mova m3, m5 ; 33 23 13 03
|
||||
|
||||
psubw m0, m2 ; b1= 0-2
|
||||
paddw m2, m2 ;
|
||||
|
||||
mova m5, m1
|
||||
paddw m2, m0 ; a1 =0+2
|
||||
|
||||
pmulhw m5, [x_s1sqr2] ;
|
||||
paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
mova m7, m3 ;
|
||||
pmulhw m7, [x_c1sqr2less1] ;
|
||||
|
||||
paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw m7, m5 ; c1
|
||||
|
||||
mova m5, m1
|
||||
mova m4, m3
|
||||
|
||||
pmulhw m5, [x_c1sqr2less1]
|
||||
paddw m5, m1
|
||||
|
||||
pmulhw m3, [x_s1sqr2]
|
||||
paddw m3, m4
|
||||
|
||||
paddw m3, m5 ; d1
|
||||
paddw m0, [pw_16]
|
||||
|
||||
paddw m2, [pw_16]
|
||||
mova m6, m2 ; a1
|
||||
|
||||
mova m4, m0 ; b1
|
||||
paddw m2, m3 ;0
|
||||
|
||||
paddw m4, m7 ;1
|
||||
psubw m0, m7 ;2
|
||||
|
||||
psubw m6, m3 ;3
|
||||
psraw m2, 5
|
||||
|
||||
psraw m0, 5
|
||||
psraw m4, 5
|
||||
|
||||
psraw m6, 5
|
||||
|
||||
mova m1, m2 ; 03 02 01 00
|
||||
mova m3, m4 ; 23 22 21 20
|
||||
|
||||
punpcklwd m1, m0 ; 11 01 10 00
|
||||
punpckhwd m2, m0 ; 13 03 12 02
|
||||
|
||||
punpcklwd m3, m6 ; 31 21 30 20
|
||||
punpckhwd m4, m6 ; 33 23 32 22
|
||||
|
||||
mova m0, m1 ; 11 01 10 00
|
||||
mova m5, m2 ; 13 03 12 02
|
||||
|
||||
punpckldq m0, m3 ; 30 20 10 00
|
||||
punpckhdq m1, m3 ; 31 21 11 01
|
||||
|
||||
punpckldq m2, m4 ; 32 22 12 02
|
||||
punpckhdq m5, m4 ; 33 23 13 03
|
||||
|
||||
mova [outq], m0
|
||||
|
||||
mova [outq+r2], m1
|
||||
mova [outq+pitq*2], m2
|
||||
|
||||
add outq, pitq
|
||||
mova [outq+pitq*2], m5
|
||||
RET
|
||||
|
||||
;void short_idct4x4llm_1_mmx(short *input, short *output, int pitch)
|
||||
cglobal short_idct4x4llm_1_mmx,3,3,0,inp,out,pit
|
||||
movh m0, [inpq]
|
||||
paddw m0, [pw_16]
|
||||
psraw m0, 5
|
||||
punpcklwd m0, m0
|
||||
punpckldq m0, m0
|
||||
|
||||
mova [outq], m0
|
||||
mova [outq+pitq], m0
|
||||
|
||||
mova [outq+pitq*2], m0
|
||||
add r1, r2
|
||||
|
||||
mova [outq+pitq*2], m0
|
||||
RET
|
||||
|
||||
|
||||
;void dc_only_idct_add_mmx(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
|
||||
cglobal dc_only_idct_add_mmx, 4,5,0,in_dc,pred,dst,pit,stride
|
||||
%if ARCH_X86_64
|
||||
movsxd strideq, dword stridem
|
||||
%else
|
||||
mov strideq, stridem
|
||||
%endif
|
||||
pxor m0, m0
|
||||
|
||||
movh m5, in_dcq ; dc
|
||||
paddw m5, [pw_16]
|
||||
|
||||
psraw m5, 5
|
||||
|
||||
punpcklwd m5, m5
|
||||
punpckldq m5, m5
|
||||
|
||||
movh m1, [predq]
|
||||
punpcklbw m1, m0
|
||||
paddsw m1, m5
|
||||
packuswb m1, m0 ; pack and unpack to saturate
|
||||
movh [dstq], m1
|
||||
|
||||
movh m2, [predq+pitq]
|
||||
punpcklbw m2, m0
|
||||
paddsw m2, m5
|
||||
packuswb m2, m0 ; pack and unpack to saturate
|
||||
movh [dstq+strideq], m2
|
||||
|
||||
movh m3, [predq+2*pitq]
|
||||
punpcklbw m3, m0
|
||||
paddsw m3, m5
|
||||
packuswb m3, m0 ; pack and unpack to saturate
|
||||
movh [dstq+2*strideq], m3
|
||||
|
||||
add dstq, strideq
|
||||
add predq, pitq
|
||||
movh m4, [predq+2*pitq]
|
||||
punpcklbw m4, m0
|
||||
paddsw m4, m5
|
||||
packuswb m4, m0 ; pack and unpack to saturate
|
||||
movh [dstq+2*strideq], m4
|
||||
RET
|
||||
|
Loading…
Reference in New Issue
Block a user