57f6bcc4b0
Previously the assembly sources had mixed indentation consisting of both spaces and tabs, making it quite hard to read unless the right tab size was used in the editor. Tabs have been interpreted as 4 spaces in most cases, matching the surrounding code.
460 lines
13 KiB
NASM
460 lines
13 KiB
NASM
;*!
|
|
;* \copy
|
|
;* Copyright (c) 2010-2013, Cisco Systems
|
|
;* All rights reserved.
|
|
;*
|
|
;* Redistribution and use in source and binary forms, with or without
|
|
;* modification, are permitted provided that the following conditions
|
|
;* are met:
|
|
;*
|
|
;* * Redistributions of source code must retain the above copyright
|
|
;* notice, this list of conditions and the following disclaimer.
|
|
;*
|
|
;* * Redistributions in binary form must reproduce the above copyright
|
|
;* notice, this list of conditions and the following disclaimer in
|
|
;* the documentation and/or other materials provided with the
|
|
;* distribution.
|
|
;*
|
|
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
;* POSSIBILITY OF SUCH DAMAGE.
|
|
;*
|
|
;*
|
|
;* memzero.asm
|
|
;*
|
|
;* Abstract
|
|
;* cavlc
|
|
;*
|
|
;* History
|
|
;* 09/08/2010 Created
|
|
;*
|
|
;*
|
|
;*************************************************************************/
|
|
|
|
%include "asm_inc.asm"
|
|
|
|
|
|
|
|
%ifdef X86_32
|
|
SECTION .rodata align=16
|
|
|
|
align 16
|
|
sse2_b8 db 8, 8, 8, 8, 8, 8, 8, 8
|
|
|
|
ALIGN 16
|
|
sse2_b_1 db -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1
|
|
|
|
align 16
|
|
byte_1pos_table:
|
|
db 0,0,0,0,0,0,0,0, ;0
|
|
db 0,0,0,0,0,0,0,1, ;1
|
|
db 1,0,0,0,0,0,0,1, ;2
|
|
db 1,0,0,0,0,0,0,2, ;3
|
|
db 2,0,0,0,0,0,0,1, ;4
|
|
db 2,0,0,0,0,0,0,2, ;5
|
|
db 2,1,0,0,0,0,0,2, ;6
|
|
db 2,1,0,0,0,0,0,3, ;7
|
|
db 3,0,0,0,0,0,0,1, ;8
|
|
db 3,0,0,0,0,0,0,2, ;9
|
|
db 3,1,0,0,0,0,0,2, ;10
|
|
db 3,1,0,0,0,0,0,3, ;11
|
|
db 3,2,0,0,0,0,0,2, ;12
|
|
db 3,2,0,0,0,0,0,3, ;13
|
|
db 3,2,1,0,0,0,0,3, ;14
|
|
db 3,2,1,0,0,0,0,4, ;15
|
|
db 4,0,0,0,0,0,0,1, ;16
|
|
db 4,0,0,0,0,0,0,2, ;17
|
|
db 4,1,0,0,0,0,0,2, ;18
|
|
db 4,1,0,0,0,0,0,3, ;19
|
|
db 4,2,0,0,0,0,0,2, ;20
|
|
db 4,2,0,0,0,0,0,3, ;21
|
|
db 4,2,1,0,0,0,0,3, ;22
|
|
db 4,2,1,0,0,0,0,4, ;23
|
|
db 4,3,0,0,0,0,0,2, ;24
|
|
db 4,3,0,0,0,0,0,3, ;25
|
|
db 4,3,1,0,0,0,0,3, ;26
|
|
db 4,3,1,0,0,0,0,4, ;27
|
|
db 4,3,2,0,0,0,0,3, ;28
|
|
db 4,3,2,0,0,0,0,4, ;29
|
|
db 4,3,2,1,0,0,0,4, ;30
|
|
db 4,3,2,1,0,0,0,5, ;31
|
|
db 5,0,0,0,0,0,0,1, ;32
|
|
db 5,0,0,0,0,0,0,2, ;33
|
|
db 5,1,0,0,0,0,0,2, ;34
|
|
db 5,1,0,0,0,0,0,3, ;35
|
|
db 5,2,0,0,0,0,0,2, ;36
|
|
db 5,2,0,0,0,0,0,3, ;37
|
|
db 5,2,1,0,0,0,0,3, ;38
|
|
db 5,2,1,0,0,0,0,4, ;39
|
|
db 5,3,0,0,0,0,0,2, ;40
|
|
db 5,3,0,0,0,0,0,3, ;41
|
|
db 5,3,1,0,0,0,0,3, ;42
|
|
db 5,3,1,0,0,0,0,4, ;43
|
|
db 5,3,2,0,0,0,0,3, ;44
|
|
db 5,3,2,0,0,0,0,4, ;45
|
|
db 5,3,2,1,0,0,0,4, ;46
|
|
db 5,3,2,1,0,0,0,5, ;47
|
|
db 5,4,0,0,0,0,0,2, ;48
|
|
db 5,4,0,0,0,0,0,3, ;49
|
|
db 5,4,1,0,0,0,0,3, ;50
|
|
db 5,4,1,0,0,0,0,4, ;51
|
|
db 5,4,2,0,0,0,0,3, ;52
|
|
db 5,4,2,0,0,0,0,4, ;53
|
|
db 5,4,2,1,0,0,0,4, ;54
|
|
db 5,4,2,1,0,0,0,5, ;55
|
|
db 5,4,3,0,0,0,0,3, ;56
|
|
db 5,4,3,0,0,0,0,4, ;57
|
|
db 5,4,3,1,0,0,0,4, ;58
|
|
db 5,4,3,1,0,0,0,5, ;59
|
|
db 5,4,3,2,0,0,0,4, ;60
|
|
db 5,4,3,2,0,0,0,5, ;61
|
|
db 5,4,3,2,1,0,0,5, ;62
|
|
db 5,4,3,2,1,0,0,6, ;63
|
|
db 6,0,0,0,0,0,0,1, ;64
|
|
db 6,0,0,0,0,0,0,2, ;65
|
|
db 6,1,0,0,0,0,0,2, ;66
|
|
db 6,1,0,0,0,0,0,3, ;67
|
|
db 6,2,0,0,0,0,0,2, ;68
|
|
db 6,2,0,0,0,0,0,3, ;69
|
|
db 6,2,1,0,0,0,0,3, ;70
|
|
db 6,2,1,0,0,0,0,4, ;71
|
|
db 6,3,0,0,0,0,0,2, ;72
|
|
db 6,3,0,0,0,0,0,3, ;73
|
|
db 6,3,1,0,0,0,0,3, ;74
|
|
db 6,3,1,0,0,0,0,4, ;75
|
|
db 6,3,2,0,0,0,0,3, ;76
|
|
db 6,3,2,0,0,0,0,4, ;77
|
|
db 6,3,2,1,0,0,0,4, ;78
|
|
db 6,3,2,1,0,0,0,5, ;79
|
|
db 6,4,0,0,0,0,0,2, ;80
|
|
db 6,4,0,0,0,0,0,3, ;81
|
|
db 6,4,1,0,0,0,0,3, ;82
|
|
db 6,4,1,0,0,0,0,4, ;83
|
|
db 6,4,2,0,0,0,0,3, ;84
|
|
db 6,4,2,0,0,0,0,4, ;85
|
|
db 6,4,2,1,0,0,0,4, ;86
|
|
db 6,4,2,1,0,0,0,5, ;87
|
|
db 6,4,3,0,0,0,0,3, ;88
|
|
db 6,4,3,0,0,0,0,4, ;89
|
|
db 6,4,3,1,0,0,0,4, ;90
|
|
db 6,4,3,1,0,0,0,5, ;91
|
|
db 6,4,3,2,0,0,0,4, ;92
|
|
db 6,4,3,2,0,0,0,5, ;93
|
|
db 6,4,3,2,1,0,0,5, ;94
|
|
db 6,4,3,2,1,0,0,6, ;95
|
|
db 6,5,0,0,0,0,0,2, ;96
|
|
db 6,5,0,0,0,0,0,3, ;97
|
|
db 6,5,1,0,0,0,0,3, ;98
|
|
db 6,5,1,0,0,0,0,4, ;99
|
|
db 6,5,2,0,0,0,0,3, ;100
|
|
db 6,5,2,0,0,0,0,4, ;101
|
|
db 6,5,2,1,0,0,0,4, ;102
|
|
db 6,5,2,1,0,0,0,5, ;103
|
|
db 6,5,3,0,0,0,0,3, ;104
|
|
db 6,5,3,0,0,0,0,4, ;105
|
|
db 6,5,3,1,0,0,0,4, ;106
|
|
db 6,5,3,1,0,0,0,5, ;107
|
|
db 6,5,3,2,0,0,0,4, ;108
|
|
db 6,5,3,2,0,0,0,5, ;109
|
|
db 6,5,3,2,1,0,0,5, ;110
|
|
db 6,5,3,2,1,0,0,6, ;111
|
|
db 6,5,4,0,0,0,0,3, ;112
|
|
db 6,5,4,0,0,0,0,4, ;113
|
|
db 6,5,4,1,0,0,0,4, ;114
|
|
db 6,5,4,1,0,0,0,5, ;115
|
|
db 6,5,4,2,0,0,0,4, ;116
|
|
db 6,5,4,2,0,0,0,5, ;117
|
|
db 6,5,4,2,1,0,0,5, ;118
|
|
db 6,5,4,2,1,0,0,6, ;119
|
|
db 6,5,4,3,0,0,0,4, ;120
|
|
db 6,5,4,3,0,0,0,5, ;121
|
|
db 6,5,4,3,1,0,0,5, ;122
|
|
db 6,5,4,3,1,0,0,6, ;123
|
|
db 6,5,4,3,2,0,0,5, ;124
|
|
db 6,5,4,3,2,0,0,6, ;125
|
|
db 6,5,4,3,2,1,0,6, ;126
|
|
db 6,5,4,3,2,1,0,7, ;127
|
|
db 7,0,0,0,0,0,0,1, ;128
|
|
db 7,0,0,0,0,0,0,2, ;129
|
|
db 7,1,0,0,0,0,0,2, ;130
|
|
db 7,1,0,0,0,0,0,3, ;131
|
|
db 7,2,0,0,0,0,0,2, ;132
|
|
db 7,2,0,0,0,0,0,3, ;133
|
|
db 7,2,1,0,0,0,0,3, ;134
|
|
db 7,2,1,0,0,0,0,4, ;135
|
|
db 7,3,0,0,0,0,0,2, ;136
|
|
db 7,3,0,0,0,0,0,3, ;137
|
|
db 7,3,1,0,0,0,0,3, ;138
|
|
db 7,3,1,0,0,0,0,4, ;139
|
|
db 7,3,2,0,0,0,0,3, ;140
|
|
db 7,3,2,0,0,0,0,4, ;141
|
|
db 7,3,2,1,0,0,0,4, ;142
|
|
db 7,3,2,1,0,0,0,5, ;143
|
|
db 7,4,0,0,0,0,0,2, ;144
|
|
db 7,4,0,0,0,0,0,3, ;145
|
|
db 7,4,1,0,0,0,0,3, ;146
|
|
db 7,4,1,0,0,0,0,4, ;147
|
|
db 7,4,2,0,0,0,0,3, ;148
|
|
db 7,4,2,0,0,0,0,4, ;149
|
|
db 7,4,2,1,0,0,0,4, ;150
|
|
db 7,4,2,1,0,0,0,5, ;151
|
|
db 7,4,3,0,0,0,0,3, ;152
|
|
db 7,4,3,0,0,0,0,4, ;153
|
|
db 7,4,3,1,0,0,0,4, ;154
|
|
db 7,4,3,1,0,0,0,5, ;155
|
|
db 7,4,3,2,0,0,0,4, ;156
|
|
db 7,4,3,2,0,0,0,5, ;157
|
|
db 7,4,3,2,1,0,0,5, ;158
|
|
db 7,4,3,2,1,0,0,6, ;159
|
|
db 7,5,0,0,0,0,0,2, ;160
|
|
db 7,5,0,0,0,0,0,3, ;161
|
|
db 7,5,1,0,0,0,0,3, ;162
|
|
db 7,5,1,0,0,0,0,4, ;163
|
|
db 7,5,2,0,0,0,0,3, ;164
|
|
db 7,5,2,0,0,0,0,4, ;165
|
|
db 7,5,2,1,0,0,0,4, ;166
|
|
db 7,5,2,1,0,0,0,5, ;167
|
|
db 7,5,3,0,0,0,0,3, ;168
|
|
db 7,5,3,0,0,0,0,4, ;169
|
|
db 7,5,3,1,0,0,0,4, ;170
|
|
db 7,5,3,1,0,0,0,5, ;171
|
|
db 7,5,3,2,0,0,0,4, ;172
|
|
db 7,5,3,2,0,0,0,5, ;173
|
|
db 7,5,3,2,1,0,0,5, ;174
|
|
db 7,5,3,2,1,0,0,6, ;175
|
|
db 7,5,4,0,0,0,0,3, ;176
|
|
db 7,5,4,0,0,0,0,4, ;177
|
|
db 7,5,4,1,0,0,0,4, ;178
|
|
db 7,5,4,1,0,0,0,5, ;179
|
|
db 7,5,4,2,0,0,0,4, ;180
|
|
db 7,5,4,2,0,0,0,5, ;181
|
|
db 7,5,4,2,1,0,0,5, ;182
|
|
db 7,5,4,2,1,0,0,6, ;183
|
|
db 7,5,4,3,0,0,0,4, ;184
|
|
db 7,5,4,3,0,0,0,5, ;185
|
|
db 7,5,4,3,1,0,0,5, ;186
|
|
db 7,5,4,3,1,0,0,6, ;187
|
|
db 7,5,4,3,2,0,0,5, ;188
|
|
db 7,5,4,3,2,0,0,6, ;189
|
|
db 7,5,4,3,2,1,0,6, ;190
|
|
db 7,5,4,3,2,1,0,7, ;191
|
|
db 7,6,0,0,0,0,0,2, ;192
|
|
db 7,6,0,0,0,0,0,3, ;193
|
|
db 7,6,1,0,0,0,0,3, ;194
|
|
db 7,6,1,0,0,0,0,4, ;195
|
|
db 7,6,2,0,0,0,0,3, ;196
|
|
db 7,6,2,0,0,0,0,4, ;197
|
|
db 7,6,2,1,0,0,0,4, ;198
|
|
db 7,6,2,1,0,0,0,5, ;199
|
|
db 7,6,3,0,0,0,0,3, ;200
|
|
db 7,6,3,0,0,0,0,4, ;201
|
|
db 7,6,3,1,0,0,0,4, ;202
|
|
db 7,6,3,1,0,0,0,5, ;203
|
|
db 7,6,3,2,0,0,0,4, ;204
|
|
db 7,6,3,2,0,0,0,5, ;205
|
|
db 7,6,3,2,1,0,0,5, ;206
|
|
db 7,6,3,2,1,0,0,6, ;207
|
|
db 7,6,4,0,0,0,0,3, ;208
|
|
db 7,6,4,0,0,0,0,4, ;209
|
|
db 7,6,4,1,0,0,0,4, ;210
|
|
db 7,6,4,1,0,0,0,5, ;211
|
|
db 7,6,4,2,0,0,0,4, ;212
|
|
db 7,6,4,2,0,0,0,5, ;213
|
|
db 7,6,4,2,1,0,0,5, ;214
|
|
db 7,6,4,2,1,0,0,6, ;215
|
|
db 7,6,4,3,0,0,0,4, ;216
|
|
db 7,6,4,3,0,0,0,5, ;217
|
|
db 7,6,4,3,1,0,0,5, ;218
|
|
db 7,6,4,3,1,0,0,6, ;219
|
|
db 7,6,4,3,2,0,0,5, ;220
|
|
db 7,6,4,3,2,0,0,6, ;221
|
|
db 7,6,4,3,2,1,0,6, ;222
|
|
db 7,6,4,3,2,1,0,7, ;223
|
|
db 7,6,5,0,0,0,0,3, ;224
|
|
db 7,6,5,0,0,0,0,4, ;225
|
|
db 7,6,5,1,0,0,0,4, ;226
|
|
db 7,6,5,1,0,0,0,5, ;227
|
|
db 7,6,5,2,0,0,0,4, ;228
|
|
db 7,6,5,2,0,0,0,5, ;229
|
|
db 7,6,5,2,1,0,0,5, ;230
|
|
db 7,6,5,2,1,0,0,6, ;231
|
|
db 7,6,5,3,0,0,0,4, ;232
|
|
db 7,6,5,3,0,0,0,5, ;233
|
|
db 7,6,5,3,1,0,0,5, ;234
|
|
db 7,6,5,3,1,0,0,6, ;235
|
|
db 7,6,5,3,2,0,0,5, ;236
|
|
db 7,6,5,3,2,0,0,6, ;237
|
|
db 7,6,5,3,2,1,0,6, ;238
|
|
db 7,6,5,3,2,1,0,7, ;239
|
|
db 7,6,5,4,0,0,0,4, ;240
|
|
db 7,6,5,4,0,0,0,5, ;241
|
|
db 7,6,5,4,1,0,0,5, ;242
|
|
db 7,6,5,4,1,0,0,6, ;243
|
|
db 7,6,5,4,2,0,0,5, ;244
|
|
db 7,6,5,4,2,0,0,6, ;245
|
|
db 7,6,5,4,2,1,0,6, ;246
|
|
db 7,6,5,4,2,1,0,7, ;247
|
|
db 7,6,5,4,3,0,0,5, ;248
|
|
db 7,6,5,4,3,0,0,6, ;249
|
|
db 7,6,5,4,3,1,0,6, ;250
|
|
db 7,6,5,4,3,1,0,7, ;251
|
|
db 7,6,5,4,3,2,0,6, ;252
|
|
db 7,6,5,4,3,2,0,7, ;253
|
|
db 7,6,5,4,3,2,1,7, ;254
|
|
db 7,6,5,4,3,2,1,8, ;255
|
|
|
|
;***********************************************************************
|
|
; Code
|
|
;***********************************************************************
|
|
SECTION .text
|
|
|
|
|
|
|
|
;***********************************************************************
|
|
;int32_t CavlcParamCal_sse2(int16_t*coffLevel, uint8_t* run, int16_t *Level, int32_t* total_coeffs , int32_t endIdx);
|
|
;***********************************************************************
|
|
WELS_EXTERN CavlcParamCal_sse2
|
|
push ebx
|
|
push edi
|
|
push esi
|
|
|
|
mov eax, [esp+16] ;coffLevel
|
|
mov edi, [esp+24] ;Level
|
|
mov ebx, [esp+32] ;endIdx
|
|
cmp ebx, 3
|
|
jne .Level16
|
|
pxor xmm1, xmm1
|
|
movq xmm0, [eax] ; removed QWORD
|
|
jmp .Cal_begin
|
|
.Level16:
|
|
movdqa xmm0, [eax]
|
|
movdqa xmm1, [eax+16]
|
|
.Cal_begin:
|
|
movdqa xmm2, xmm0
|
|
packsswb xmm0, xmm1
|
|
movdqa xmm4, xmm0
|
|
pxor xmm3, xmm3
|
|
pcmpgtb xmm0, xmm3
|
|
pcmpgtb xmm3, xmm4
|
|
por xmm0, xmm3
|
|
pmovmskb edx, xmm0
|
|
cmp edx, 0
|
|
je near .return
|
|
movdqa xmm6, [sse2_b_1]
|
|
pcmpeqw xmm7, xmm7 ;generate -1
|
|
mov ebx, 0xff
|
|
;pinsrw xmm6, ebx, 3
|
|
|
|
mov bl, dh
|
|
|
|
lea ebx, [byte_1pos_table+8*ebx]
|
|
movq xmm0, [ebx]
|
|
pextrw ecx, xmm0, 3
|
|
shr ecx, 8
|
|
mov dh, cl
|
|
|
|
.loopHighFind0:
|
|
cmp ecx, 0
|
|
je .loopHighFind0End
|
|
;mov esi, [ebx]
|
|
;and esi, 0xff
|
|
movzx esi, byte [ebx]
|
|
add esi, 8
|
|
mov esi, [eax+2*esi]
|
|
mov [edi], si
|
|
add edi, 2
|
|
;add ebx, 1
|
|
inc ebx
|
|
dec ecx
|
|
jmp .loopHighFind0
|
|
.loopHighFind0End:
|
|
mov cl, dh
|
|
cmp cl, 8
|
|
pand xmm0, xmm6
|
|
jne .LowByteFind0
|
|
sub edi, 2
|
|
mov esi, [eax+16]
|
|
mov [edi], esi
|
|
add edi, 2
|
|
.LowByteFind0:
|
|
and edx, 0xff
|
|
lea ebx, [byte_1pos_table+8*edx]
|
|
movq xmm1, [ebx]
|
|
pextrw esi, xmm1, 3
|
|
or esi, 0xff
|
|
or ecx, 0xff00
|
|
and ecx, esi
|
|
shr esi, 8
|
|
pand xmm1, xmm6
|
|
.loopLowFind0:
|
|
cmp esi, 0
|
|
je .loopLowFind0End
|
|
;mov edx, [ebx]
|
|
;and edx, 0xff
|
|
movzx edx, byte [ebx]
|
|
mov edx, [eax+2*edx]
|
|
mov [edi], dx
|
|
add edi, 2
|
|
;add ebx, 1
|
|
inc ebx
|
|
dec esi
|
|
jmp .loopLowFind0
|
|
.loopLowFind0End:
|
|
cmp ch, 8
|
|
jne .getLevelEnd
|
|
sub edi, 2
|
|
mov edx, [eax]
|
|
mov [edi], dx
|
|
.getLevelEnd:
|
|
mov edx, [esp+28] ;total_coeffs
|
|
;mov ebx, ecx
|
|
;and ebx, 0xff
|
|
movzx ebx, byte cl
|
|
add cl, ch
|
|
mov [edx], cl
|
|
;getRun
|
|
movq xmm5, [sse2_b8]
|
|
paddb xmm0, xmm5
|
|
pxor xmm2, xmm2
|
|
pxor xmm3, xmm3
|
|
mov eax, 8
|
|
sub eax, ebx
|
|
shl eax, 3
|
|
shl ebx, 3
|
|
pinsrw xmm2, ebx, 0
|
|
pinsrw xmm3, eax, 0
|
|
psllq xmm0, xmm3
|
|
psrlq xmm0, xmm3
|
|
movdqa xmm4, xmm1
|
|
psllq xmm1, xmm2
|
|
psrlq xmm4, xmm3
|
|
punpcklqdq xmm1, xmm4
|
|
por xmm0, xmm1
|
|
|
|
pextrw eax, xmm0, 0
|
|
and eax, 0xff
|
|
inc eax
|
|
sub al, cl
|
|
movdqa xmm1, xmm0
|
|
paddb xmm1, xmm7
|
|
psrldq xmm0, 1
|
|
psubb xmm1, xmm0
|
|
mov ecx, [esp+20] ;run
|
|
movdqa [ecx], xmm1
|
|
;getRunEnd
|
|
.return:
|
|
pop esi
|
|
pop edi
|
|
pop ebx
|
|
ret
|
|
%endif
|