Merge pull request #147 from volvet/illegal_assembly_fix

fix illegal instruction use
This commit is contained in:
Ethan Hugg 2014-01-17 14:13:24 -08:00
commit 2011a7407e
5 changed files with 73 additions and 73 deletions

View File

@ -60,9 +60,9 @@ SECTION .text
%ifdef WIN64
WELS_EXTERN DeblockLumaLt4V_sse2
WELS_EXTERN DeblockLumaLt4V_ssse3
DeblockLumaLt4V_sse2:
DeblockLumaLt4V_ssse3:
push rbp
mov r11,[rsp + 16 + 20h] ; pTC
sub rsp,1B0h
@ -317,10 +317,10 @@ DeblockLumaLt4V_sse2:
ret
WELS_EXTERN DeblockLumaEq4V_sse2
WELS_EXTERN DeblockLumaEq4V_ssse3
ALIGN 16
DeblockLumaEq4V_sse2:
DeblockLumaEq4V_ssse3:
mov rax,rsp
push rbx
push rbp
@ -780,10 +780,10 @@ DeblockLumaEq4V_sse2:
ret
WELS_EXTERN DeblockChromaLt4V_sse2
WELS_EXTERN DeblockChromaLt4V_ssse3
ALIGN 16
DeblockChromaLt4V_sse2:
DeblockChromaLt4V_ssse3:
mov rax,rsp
push rbx
push rdi
@ -942,9 +942,9 @@ DeblockChromaLt4V_sse2:
ret
WELS_EXTERN DeblockChromaEq4V_sse2
WELS_EXTERN DeblockChromaEq4V_ssse3
ALIGN 16
DeblockChromaEq4V_sse2:
DeblockChromaEq4V_ssse3:
mov rax,rsp
push rbx
sub rsp,90h
@ -1096,9 +1096,9 @@ DeblockChromaEq4V_sse2:
WELS_EXTERN DeblockChromaEq4H_sse2
WELS_EXTERN DeblockChromaEq4H_ssse3
ALIGN 16
DeblockChromaEq4H_sse2:
DeblockChromaEq4H_ssse3:
mov rax,rsp
mov [rax+20h],rbx
push rdi
@ -1360,9 +1360,9 @@ DeblockChromaEq4H_sse2:
WELS_EXTERN DeblockChromaLt4H_sse2
WELS_EXTERN DeblockChromaLt4H_ssse3
ALIGN 16
DeblockChromaLt4H_sse2:
DeblockChromaLt4H_ssse3:
mov rax,rsp
push rbx
push rbp
@ -1646,9 +1646,9 @@ DeblockChromaLt4H_sse2:
%elifdef UNIX64
WELS_EXTERN DeblockLumaLt4V_sse2
WELS_EXTERN DeblockLumaLt4V_ssse3
DeblockLumaLt4V_sse2:
DeblockLumaLt4V_ssse3:
push rbp
mov r11,r8 ; pTC
sub rsp,1B0h
@ -1903,10 +1903,10 @@ DeblockLumaLt4V_sse2:
ret
WELS_EXTERN DeblockLumaEq4V_sse2
WELS_EXTERN DeblockLumaEq4V_ssse3
ALIGN 16
DeblockLumaEq4V_sse2:
DeblockLumaEq4V_ssse3:
mov rax,rsp
push rbx
push rbp
@ -2365,9 +2365,9 @@ DeblockLumaEq4V_sse2:
pop rbx
ret
WELS_EXTERN DeblockChromaLt4V_sse2
WELS_EXTERN DeblockChromaLt4V_ssse3
ALIGN 16
DeblockChromaLt4V_sse2:
DeblockChromaLt4V_ssse3:
mov rax,rsp
push rbx
push rbp
@ -2533,9 +2533,9 @@ DeblockChromaLt4V_sse2:
pop rbx
ret
WELS_EXTERN DeblockChromaEq4V_sse2
ALIGN 16
DeblockChromaEq4V_sse2:
WELS_EXTERN DeblockChromaEq4V_ssse3
DeblockChromaEq4V_ssse3:
mov rax,rsp
push rbx
push rbp
@ -2684,10 +2684,10 @@ DeblockChromaEq4V_sse2:
pop rbx
ret
WELS_EXTERN DeblockChromaEq4H_ssse3
WELS_EXTERN DeblockChromaEq4H_sse2
ALIGN 16
DeblockChromaEq4H_sse2:
DeblockChromaEq4H_ssse3:
mov rax,rsp
push rbx
push rbp
@ -2959,9 +2959,9 @@ DeblockChromaEq4H_sse2:
ret
WELS_EXTERN DeblockChromaLt4H_sse2
WELS_EXTERN DeblockChromaLt4H_ssse3
ALIGN 16
DeblockChromaLt4H_sse2:
DeblockChromaLt4H_ssse3:
mov rax,rsp
push rbx
push rbp
@ -3252,13 +3252,13 @@ DeblockChromaLt4H_sse2:
%elifdef X86_32
;********************************************************************************
; void DeblockChromaEq4V_sse2(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride,
; void DeblockChromaEq4V_ssse3(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride,
; int32_t iAlpha, int32_t iBeta)
;********************************************************************************
WELS_EXTERN DeblockChromaEq4V_sse2
WELS_EXTERN DeblockChromaEq4V_ssse3
ALIGN 16
DeblockChromaEq4V_sse2:
DeblockChromaEq4V_ssse3:
push ebp
mov ebp,esp
and esp,0FFFFFFF0h
@ -3421,13 +3421,13 @@ DeblockChromaEq4V_sse2:
ret
;******************************************************************************
; void DeblockChromaLt4V_sse2(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride,
; void DeblockChromaLt4V_ssse3(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride,
; int32_t iAlpha, int32_t iBeta, int8_t * pTC);
;*******************************************************************************
WELS_EXTERN DeblockChromaLt4V_sse2
WELS_EXTERN DeblockChromaLt4V_ssse3
DeblockChromaLt4V_sse2:
DeblockChromaLt4V_ssse3:
push ebp
mov ebp,esp
and esp,0FFFFFFF0h
@ -3624,15 +3624,15 @@ DeblockChromaLt4V_sse2:
ret
;***************************************************************************
; void DeblockChromaEq4H_sse2(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride,
; void DeblockChromaEq4H_ssse3(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride,
; int32_t iAlpha, int32_t iBeta)
;***************************************************************************
WELS_EXTERN DeblockChromaEq4H_sse2
WELS_EXTERN DeblockChromaEq4H_ssse3
ALIGN 16
DeblockChromaEq4H_sse2:
DeblockChromaEq4H_ssse3:
push ebp
mov ebp,esp
and esp,0FFFFFFF0h
@ -3909,15 +3909,15 @@ DeblockChromaEq4H_sse2:
ret
;*******************************************************************************
; void DeblockChromaLt4H_sse2(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride,
; void DeblockChromaLt4H_ssse3(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride,
; int32_t iAlpha, int32_t iBeta, int8_t * pTC);
;*******************************************************************************
WELS_EXTERN DeblockChromaLt4H_sse2
WELS_EXTERN DeblockChromaLt4H_ssse3
ALIGN 16
DeblockChromaLt4H_sse2:
DeblockChromaLt4H_ssse3:
push ebp
mov ebp,esp
and esp,0FFFFFFF0h
@ -4224,16 +4224,16 @@ DeblockChromaLt4H_sse2:
;*******************************************************************************
; void DeblockLumaLt4V_sse2(uint8_t * pPix, int32_t iStride, int32_t iAlpha,
; void DeblockLumaLt4V_ssse3(uint8_t * pPix, int32_t iStride, int32_t iAlpha,
; int32_t iBeta, int8_t * pTC)
;*******************************************************************************
WELS_EXTERN DeblockLumaLt4V_sse2
WELS_EXTERN DeblockLumaLt4V_ssse3
ALIGN 16
DeblockLumaLt4V_sse2:
DeblockLumaLt4V_ssse3:
push ebp
mov ebp, esp
and esp, -16 ; fffffff0H
@ -4616,15 +4616,15 @@ DeblockLumaLt4V_sse2:
;*******************************************************************************
; void DeblockLumaEq4V_sse2(uint8_t * pPix, int32_t iStride, int32_t iAlpha,
; void DeblockLumaEq4V_ssse3(uint8_t * pPix, int32_t iStride, int32_t iAlpha,
; int32_t iBeta)
;*******************************************************************************
WELS_EXTERN DeblockLumaEq4V_sse2
WELS_EXTERN DeblockLumaEq4V_ssse3
ALIGN 16
DeblockLumaEq4V_sse2:
DeblockLumaEq4V_ssse3:
push ebp
mov ebp, esp

View File

@ -183,19 +183,19 @@ void_t DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, i
#ifdef X86_ASM
extern "C" {
void DeblockLumaLt4H_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc) {
void DeblockLumaLt4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc) {
FORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16);
DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]);
DeblockLumaLt4V_sse2 (&uiBuf[4 * 16], 16, iAlpha, iBeta, pTc);
DeblockLumaLt4V_ssse3 (&uiBuf[4 * 16], 16, iAlpha, iBeta, pTc);
DeblockLumaTransposeV2H_sse2 (pPixY - 4, iStride, &uiBuf[0]);
}
void DeblockLumaEq4H_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
void DeblockLumaEq4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
FORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16);
DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]);
DeblockLumaEq4V_sse2 (&uiBuf[4 * 16], 16, iAlpha, iBeta);
DeblockLumaEq4V_ssse3 (&uiBuf[4 * 16], 16, iAlpha, iBeta);
DeblockLumaTransposeV2H_sse2 (pPixY - 4, iStride, &uiBuf[0]);
}

View File

@ -20,17 +20,17 @@ extern "C" {
#endif//__cplusplus
#ifdef X86_ASM
void DeblockLumaLt4V_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4V_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockLumaLt4V_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4V_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockLumaTransposeH2V_sse2 (uint8_t* pPixY, int32_t iStride, uint8_t* pDst);
void DeblockLumaTransposeV2H_sse2 (uint8_t* pPixY, int32_t iStride, uint8_t* pSrc);
void DeblockLumaLt4H_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4H_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaEq4V_sse2 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4V_sse2 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
void DeblockLumaLt4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaEq4V_ssse3 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4V_ssse3 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTC);
void DeblockChromaEq4H_sse2 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4H_sse2 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
void DeblockChromaEq4H_ssse3 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4H_ssse3 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTC);
#endif
#if defined(__cplusplus)

View File

@ -708,15 +708,15 @@ void_t DeblockingInit (SDeblockingFunc* pFunc, int32_t iCpu) {
pFunc->pfChromaDeblockinEQ4Hor = DeblockChromaEq4H_c;
#ifdef X86_ASM
if (iCpu & WELS_CPU_SSE2) {
pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_sse2;
pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_sse2;
pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_sse2;
pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_sse2;
pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_sse2;
pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_sse2;
pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_sse2;
pFunc->pfChromaDeblockinEQ4Hor = DeblockChromaEq4H_sse2;
if (iCpu & WELS_CPU_SSSE3) {
pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_ssse3;
pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_ssse3;
pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_ssse3;
pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_ssse3;
pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_ssse3;
pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_ssse3;
pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_ssse3;
pFunc->pfChromaDeblockinEQ4Hor = DeblockChromaEq4H_ssse3;
}
#endif

View File

@ -787,15 +787,15 @@ void DeblockingInit (DeblockingFunc* pFunc, int32_t iCpu) {
#ifdef X86_ASM
if (iCpu & WELS_CPU_SSE2) {
pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_sse2;
pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_sse2;
pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_sse2;
pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_sse2;
pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_sse2;
pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_sse2;
pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_sse2;
pFunc->pfChromaDeblockinEQ4Hor = DeblockChromaEq4H_sse2;
if (iCpu & WELS_CPU_SSSE3) {
pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_ssse3;
pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_ssse3;
pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_ssse3;
pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_ssse3;
pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_ssse3;
pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_ssse3;
pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_ssse3;
pFunc->pfChromaDeblockinEQ4Hor = DeblockChromaEq4H_ssse3;
}
#endif
}