Merge pull request #162 from Vproject/yasm

Allow yasm to be used instead of nasm.
This commit is contained in:
Ethan Hugg 2014-01-18 08:55:49 -08:00
commit a50d54f8e8
5 changed files with 42 additions and 42 deletions

View File

@ -392,7 +392,7 @@ UpdateMbMv_sse2:
;mov eax, [esp+4] ; mv_buffer
;movd xmm0, [esp+8] ; _mv
movd xmm0, r1d ; _mv
pshufd xmm1, xmm0, $0
pshufd xmm1, xmm0, $00
movdqa [r0 ], xmm1
movdqa [r0+0x10], xmm1
movdqa [r0+0x20], xmm1

View File

@ -101,7 +101,7 @@
punpcklwd %1, %2
punpckhwd %3, %4
punpcklwd %1, %3
psraw %1, $4
psraw %1, $04
%endmacro
%macro VAA_AVG_BLOCK_SSSE3 6 ; dst, t0, t1, t2, t3, t4
@ -129,7 +129,7 @@
paddw %2, %4 ; block 2, 3
phaddw %1, %2 ; block[0]: 0-15, 16-31; block[1]: 32-47, 48-63; ..
phaddw %1, xmm7 ; block[0]: 0-15; block[1]: 16-31; block[2]: 32-47; block[3]: 48-63; ....
psraw %1, $4
psraw %1, $04
%endmacro
@ -178,12 +178,12 @@ AnalysisVaaInfoIntra_sse2:
mov r2,r1
sal r2,$1 ;r2 = 2*iLineSize
sal r2,$01 ;r2 = 2*iLineSize
mov r3,r2
add r3,r1 ;r3 = 3*iLineSize
mov r4,r2
sal r4,$1 ;r4 = 4*iLineSize
sal r4,$01 ;r4 = 4*iLineSize
pxor xmm7, xmm7
@ -231,7 +231,7 @@ AnalysisVaaInfoIntra_sse2:
and r2, 0ffffh ; effective low work truncated
mov r3, r2
imul r2, r3
sar r2, $4
sar r2, $04
movd retrd, xmm1
sub retrd, r2d
@ -273,12 +273,12 @@ AnalysisVaaInfoIntra_ssse3:
mov r2,r1
sal r2,$1 ;r2 = 2*iLineSize
sal r2,$01 ;r2 = 2*iLineSize
mov r3,r2
add r3,r1 ;r3 = 3*iLineSize
mov r4,r2
sal r4,$1 ;r4 = 4*iLineSize
sal r4,$01 ;r4 = 4*iLineSize
pxor xmm7, xmm7
@ -327,7 +327,7 @@ AnalysisVaaInfoIntra_ssse3:
and r2, 0ffffh ; effective low work truncated
mov r3, r2
imul r2, r3
sar r2, $4
sar r2, $04
movd retrd, xmm1
sub retrd, r2d

View File

@ -86,17 +86,17 @@ SSE2_DeQuant8 dw 10, 13, 10, 13, 13, 16, 13, 16,
%macro MMX_SumSubMul2 3
movq %3, %1
psllw %1, $1
psllw %1, $01
paddw %1, %2
psllw %2, $1
psllw %2, $01
psubw %3, %2
%endmacro
%macro MMX_SumSubDiv2 3
movq %3, %2
psraw %3, $1
psraw %3, $01
paddw %3, %1
psraw %1, $1
psraw %1, $01
psubw %1, %2
%endmacro
@ -124,7 +124,7 @@ SSE2_DeQuant8 dw 10, 13, 10, 13, 13, 16, 13, 16,
movd %2, %6
punpcklbw %2, %4
paddw %1, %3
psraw %1, $6
psraw %1, $06
paddsw %1, %2
packuswb %1, %2
movd %5, %1
@ -255,15 +255,15 @@ WelsIDctT4Rec_mmx:
%macro SSE2_SumSubDiv2 4
movdqa %4, %1
movdqa %3, %2
psraw %2, $1
psraw %4, $1
psraw %2, $01
psraw %4, $01
paddw %1, %2
psubw %4, %3
%endmacro
%macro SSE2_StoreDiff8p 6
paddw %1, %3
psraw %1, $6
psraw %1, $06
movq %2, %6
punpcklbw %2, %4
paddsw %2, %1
@ -282,7 +282,7 @@ WelsIDctT4Rec_mmx:
%macro SSE2_Load8DC 6
movdqa %1, %6 ; %1 = dc0 dc1
paddw %1, %5
psraw %1, $6 ; (dc + 32) >> 6
psraw %1, $06 ; (dc + 32) >> 6
movdqa %2, %1
psrldq %2, 4

View File

@ -94,13 +94,13 @@ DyadicBilinearDownsamplerWidthx32_sse:
mov ecx, [esp+36] ; iSrcStride
mov ebp, [esp+44] ; iSrcHeight
sar ebp, $1 ; iSrcHeight >> 1
sar ebp, $01 ; iSrcHeight >> 1
.yloops:
mov eax, [esp+40] ; iSrcWidth
sar eax, $1 ; iSrcWidth >> 1
sar eax, $01 ; iSrcWidth >> 1
mov ebx, eax ; iDstWidth restored at ebx
sar eax, $4 ; (iSrcWidth >> 1) / 16 ; loop count = num_of_mb
sar eax, $04 ; (iSrcWidth >> 1) / 16 ; loop count = num_of_mb
neg ebx ; - (iSrcWidth >> 1)
; each loop = source bandwidth: 32 bytes
.xloops:
@ -247,13 +247,13 @@ DyadicBilinearDownsamplerWidthx16_sse:
mov ecx, [esp+36] ; iSrcStride
mov ebp, [esp+44] ; iSrcHeight
sar ebp, $1 ; iSrcHeight >> 1
sar ebp, $01 ; iSrcHeight >> 1
.yloops:
mov eax, [esp+40] ; iSrcWidth
sar eax, $1 ; iSrcWidth >> 1
sar eax, $01 ; iSrcWidth >> 1
mov ebx, eax ; iDstWidth restored at ebx
sar eax, $3 ; (iSrcWidth >> 1) / 8 ; loop count = num_of_mb
sar eax, $03 ; (iSrcWidth >> 1) / 8 ; loop count = num_of_mb
neg ebx ; - (iSrcWidth >> 1)
; each loop = source bandwidth: 16 bytes
.xloops:
@ -351,13 +351,13 @@ DyadicBilinearDownsamplerWidthx8_sse:
mov ecx, [esp+36] ; iSrcStride
mov ebp, [esp+44] ; iSrcHeight
sar ebp, $1 ; iSrcHeight >> 1
sar ebp, $01 ; iSrcHeight >> 1
.yloops:
mov eax, [esp+40] ; iSrcWidth
sar eax, $1 ; iSrcWidth >> 1
sar eax, $01 ; iSrcWidth >> 1
mov ebx, eax ; iDstWidth restored at ebx
sar eax, $2 ; (iSrcWidth >> 1) / 4 ; loop count = num_of_mb
sar eax, $02 ; (iSrcWidth >> 1) / 4 ; loop count = num_of_mb
neg ebx ; - (iSrcWidth >> 1)
; each loop = source bandwidth: 8 bytes
.xloops:
@ -442,16 +442,16 @@ DyadicBilinearDownsamplerWidthx32_ssse3:
mov ecx, [esp+36] ; iSrcStride
mov ebp, [esp+44] ; iSrcHeight
sar ebp, $1 ; iSrcHeight >> 1
sar ebp, $01 ; iSrcHeight >> 1
movdqa xmm7, [shufb_mask_low] ; mask low
movdqa xmm6, [shufb_mask_high] ; mask high
.yloops:
mov eax, [esp+40] ; iSrcWidth
sar eax, $1 ; iSrcWidth >> 1
sar eax, $01 ; iSrcWidth >> 1
mov ebx, eax ; iDstWidth restored at ebx
sar eax, $4 ; (iSrcWidth >> 1) / 16 ; loop count = num_of_mb
sar eax, $04 ; (iSrcWidth >> 1) / 16 ; loop count = num_of_mb
neg ebx ; - (iSrcWidth >> 1)
; each loop = source bandwidth: 32 bytes
.xloops:
@ -553,15 +553,15 @@ DyadicBilinearDownsamplerWidthx16_ssse3:
mov ecx, [esp+36] ; iSrcStride
mov ebp, [esp+44] ; iSrcHeight
sar ebp, $1 ; iSrcHeight >> 1
sar ebp, $01 ; iSrcHeight >> 1
movdqa xmm7, [shufb_mask_low] ; mask low
movdqa xmm6, [shufb_mask_high] ; mask high
.yloops:
mov eax, [esp+40] ; iSrcWidth
sar eax, $1 ; iSrcWidth >> 1
sar eax, $01 ; iSrcWidth >> 1
mov ebx, eax ; iDstWidth restored at ebx
sar eax, $3 ; (iSrcWidth >> 1) / 8 ; loop count = num_of_mb
sar eax, $03 ; (iSrcWidth >> 1) / 8 ; loop count = num_of_mb
neg ebx ; - (iSrcWidth >> 1)
; each loop = source bandwidth: 16 bytes
.xloops:
@ -643,16 +643,16 @@ DyadicBilinearDownsamplerWidthx32_sse4:
mov ecx, [esp+36] ; iSrcStride
mov ebp, [esp+44] ; iSrcHeight
sar ebp, $1 ; iSrcHeight >> 1
sar ebp, $01 ; iSrcHeight >> 1
movdqa xmm7, [shufb_mask_low] ; mask low
movdqa xmm6, [shufb_mask_high] ; mask high
.yloops:
mov eax, [esp+40] ; iSrcWidth
sar eax, $1 ; iSrcWidth >> 1
sar eax, $01 ; iSrcWidth >> 1
mov ebx, eax ; iDstWidth restored at ebx
sar eax, $4 ; (iSrcWidth >> 1) / 16 ; loop count = num_of_mb
sar eax, $04 ; (iSrcWidth >> 1) / 16 ; loop count = num_of_mb
neg ebx ; - (iSrcWidth >> 1)
; each loop = source bandwidth: 32 bytes
.xloops:
@ -753,15 +753,15 @@ DyadicBilinearDownsamplerWidthx16_sse4:
mov ecx, [esp+36] ; iSrcStride
mov ebp, [esp+44] ; iSrcHeight
sar ebp, $1 ; iSrcHeight >> 1
sar ebp, $01 ; iSrcHeight >> 1
movdqa xmm7, [shufb_mask_low] ; mask low
movdqa xmm6, [shufb_mask_high] ; mask high
.yloops:
mov eax, [esp+40] ; iSrcWidth
sar eax, $1 ; iSrcWidth >> 1
sar eax, $01 ; iSrcWidth >> 1
mov ebx, eax ; iDstWidth restored at ebx
sar eax, $3 ; (iSrcWidth >> 1) / 8 ; loop count = num_of_mb
sar eax, $03 ; (iSrcWidth >> 1) / 8 ; loop count = num_of_mb
neg ebx ; - (iSrcWidth >> 1)
; each loop = source bandwidth: 16 bytes
.xloops:

View File

@ -121,7 +121,7 @@
punpcklwd %1, %2
punpckhwd %3, %4
punpcklwd %1, %3
psraw %1, $4
psraw %1, $04
%endmacro
%macro VAA_AVG_BLOCK_SSSE3 6 ; dst, t0, t1, t2, t3, t4
@ -149,7 +149,7 @@
paddw %2, %4 ; block 2, 3
phaddw %1, %2 ; block[0]: 0-15, 16-31; block[1]: 32-47, 48-63; ..
phaddw %1, xmm7 ; block[0]: 0-15; block[1]: 16-31; block[2]: 32-47; block[3]: 48-63; ....
psraw %1, $4
psraw %1, $04
%endmacro
%macro WELS_SAD_16x2_SSE2 0
@ -353,7 +353,7 @@ rc_sad_frame_sse2:
pxor xmm0, xmm0
.hloop:
mov eax, ebx
mov ebp, $0
mov ebp, $00
.wloop:
movdqa xmm1, [esi+ebp]
movdqa xmm2, [edi+ebp]