Merge "update x86 asm for loopfilter"
This commit is contained in:
commit
211694f67e
@ -16,7 +16,7 @@
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; int count
|
; int count
|
||||||
@ -122,12 +122,10 @@ next8_h:
|
|||||||
paddusb mm5, mm5 ; abs(p0-q0)*2
|
paddusb mm5, mm5 ; abs(p0-q0)*2
|
||||||
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||||
|
|
||||||
mov rdx, arg(2) ;flimit ; get flimit
|
mov rdx, arg(2) ;blimit ; get blimit
|
||||||
movq mm2, [rdx] ; flimit mm2
|
movq mm7, [rdx] ; blimit
|
||||||
paddb mm2, mm2 ; flimit*2 (less than 255)
|
|
||||||
paddb mm7, mm2 ; flimit * 2 + limit (less than 255)
|
|
||||||
|
|
||||||
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
por mm1, mm5
|
por mm1, mm5
|
||||||
pxor mm5, mm5
|
pxor mm5, mm5
|
||||||
pcmpeqb mm1, mm5 ; mask mm1
|
pcmpeqb mm1, mm5 ; mask mm1
|
||||||
@ -230,7 +228,7 @@ next8_h:
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; int count
|
; int count
|
||||||
@ -406,9 +404,9 @@ next8_v:
|
|||||||
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||||
psrlw mm5, 1 ; abs(p1-q1)/2
|
psrlw mm5, 1 ; abs(p1-q1)/2
|
||||||
|
|
||||||
mov rdx, arg(2) ;flimit ;
|
mov rdx, arg(2) ;blimit ;
|
||||||
|
|
||||||
movq mm2, [rdx] ;flimit mm2
|
movq mm4, [rdx] ;blimit
|
||||||
movq mm1, mm3 ; mm1=mm3=p0
|
movq mm1, mm3 ; mm1=mm3=p0
|
||||||
|
|
||||||
movq mm7, mm6 ; mm7=mm6=q0
|
movq mm7, mm6 ; mm7=mm6=q0
|
||||||
@ -419,10 +417,7 @@ next8_v:
|
|||||||
paddusb mm1, mm1 ; abs(q0-p0)*2
|
paddusb mm1, mm1 ; abs(q0-p0)*2
|
||||||
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||||
|
|
||||||
paddb mm2, mm2 ; flimit*2 (less than 255)
|
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
paddb mm4, mm2 ; flimit * 2 + limit (less than 255)
|
|
||||||
|
|
||||||
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
|
||||||
por mm1, mm0; ; mask
|
por mm1, mm0; ; mask
|
||||||
|
|
||||||
pxor mm0, mm0
|
pxor mm0, mm0
|
||||||
@ -603,7 +598,7 @@ next8_v:
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; int count
|
; int count
|
||||||
@ -719,17 +714,15 @@ next8_mbh:
|
|||||||
paddusb mm5, mm5 ; abs(p0-q0)*2
|
paddusb mm5, mm5 ; abs(p0-q0)*2
|
||||||
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||||
|
|
||||||
mov rdx, arg(2) ;flimit ; get flimit
|
mov rdx, arg(2) ;blimit ; get blimit
|
||||||
movq mm2, [rdx] ; flimit mm2
|
movq mm7, [rdx] ; blimit
|
||||||
paddb mm2, mm2 ; flimit*2 (less than 255)
|
|
||||||
paddb mm7, mm2 ; flimit * 2 + limit (less than 255)
|
|
||||||
|
|
||||||
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
por mm1, mm5
|
por mm1, mm5
|
||||||
pxor mm5, mm5
|
pxor mm5, mm5
|
||||||
pcmpeqb mm1, mm5 ; mask mm1
|
pcmpeqb mm1, mm5 ; mask mm1
|
||||||
|
|
||||||
; mm1 = mask, mm0=q0, mm7 = flimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
|
; mm1 = mask, mm0=q0, mm7 = blimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
|
||||||
; mm6 = p0,
|
; mm6 = p0,
|
||||||
|
|
||||||
; calculate high edge variance
|
; calculate high edge variance
|
||||||
@ -922,7 +915,7 @@ next8_mbh:
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; int count
|
; int count
|
||||||
@ -1108,9 +1101,9 @@ next8_mbv:
|
|||||||
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||||
psrlw mm5, 1 ; abs(p1-q1)/2
|
psrlw mm5, 1 ; abs(p1-q1)/2
|
||||||
|
|
||||||
mov rdx, arg(2) ;flimit ;
|
mov rdx, arg(2) ;blimit ;
|
||||||
|
|
||||||
movq mm2, [rdx] ;flimit mm2
|
movq mm4, [rdx] ;blimit
|
||||||
movq mm1, mm3 ; mm1=mm3=p0
|
movq mm1, mm3 ; mm1=mm3=p0
|
||||||
|
|
||||||
movq mm7, mm6 ; mm7=mm6=q0
|
movq mm7, mm6 ; mm7=mm6=q0
|
||||||
@ -1121,10 +1114,7 @@ next8_mbv:
|
|||||||
paddusb mm1, mm1 ; abs(q0-p0)*2
|
paddusb mm1, mm1 ; abs(q0-p0)*2
|
||||||
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||||
|
|
||||||
paddb mm2, mm2 ; flimit*2 (less than 255)
|
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
paddb mm4, mm2 ; flimit * 2 + limit (less than 255)
|
|
||||||
|
|
||||||
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
|
||||||
por mm1, mm0; ; mask
|
por mm1, mm0; ; mask
|
||||||
|
|
||||||
pxor mm0, mm0
|
pxor mm0, mm0
|
||||||
@ -1392,16 +1382,13 @@ next8_mbv:
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit
|
||||||
; const char *limit,
|
|
||||||
; const char *thresh,
|
|
||||||
; int count
|
|
||||||
;)
|
;)
|
||||||
global sym(vp8_loop_filter_simple_horizontal_edge_mmx)
|
global sym(vp8_loop_filter_simple_horizontal_edge_mmx)
|
||||||
sym(vp8_loop_filter_simple_horizontal_edge_mmx):
|
sym(vp8_loop_filter_simple_horizontal_edge_mmx):
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
SHADOW_ARGS_TO_STACK 6
|
SHADOW_ARGS_TO_STACK 3
|
||||||
GET_GOT rbx
|
GET_GOT rbx
|
||||||
push rsi
|
push rsi
|
||||||
push rdi
|
push rdi
|
||||||
@ -1410,14 +1397,10 @@ sym(vp8_loop_filter_simple_horizontal_edge_mmx):
|
|||||||
mov rsi, arg(0) ;src_ptr
|
mov rsi, arg(0) ;src_ptr
|
||||||
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
||||||
|
|
||||||
movsxd rcx, dword ptr arg(5) ;count
|
mov rcx, 2 ; count
|
||||||
nexts8_h:
|
nexts8_h:
|
||||||
mov rdx, arg(3) ;limit
|
mov rdx, arg(2) ;blimit ; get blimit
|
||||||
movq mm7, [rdx]
|
|
||||||
mov rdx, arg(2) ;flimit ; get flimit
|
|
||||||
movq mm3, [rdx] ;
|
movq mm3, [rdx] ;
|
||||||
paddb mm3, mm3 ; flimit*2 (less than 255)
|
|
||||||
paddb mm3, mm7 ; flimit * 2 + limit (less than 255)
|
|
||||||
|
|
||||||
mov rdi, rsi ; rdi points to row +1 for indirect addressing
|
mov rdi, rsi ; rdi points to row +1 for indirect addressing
|
||||||
add rdi, rax
|
add rdi, rax
|
||||||
@ -1445,7 +1428,7 @@ nexts8_h:
|
|||||||
paddusb mm5, mm5 ; abs(p0-q0)*2
|
paddusb mm5, mm5 ; abs(p0-q0)*2
|
||||||
paddusb mm5, mm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
paddusb mm5, mm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||||
|
|
||||||
psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
pxor mm3, mm3
|
pxor mm3, mm3
|
||||||
pcmpeqb mm5, mm3
|
pcmpeqb mm5, mm3
|
||||||
|
|
||||||
@ -1515,16 +1498,13 @@ nexts8_h:
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit
|
||||||
; const char *limit,
|
|
||||||
; const char *thresh,
|
|
||||||
; int count
|
|
||||||
;)
|
;)
|
||||||
global sym(vp8_loop_filter_simple_vertical_edge_mmx)
|
global sym(vp8_loop_filter_simple_vertical_edge_mmx)
|
||||||
sym(vp8_loop_filter_simple_vertical_edge_mmx):
|
sym(vp8_loop_filter_simple_vertical_edge_mmx):
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
SHADOW_ARGS_TO_STACK 6
|
SHADOW_ARGS_TO_STACK 3
|
||||||
GET_GOT rbx
|
GET_GOT rbx
|
||||||
push rsi
|
push rsi
|
||||||
push rdi
|
push rdi
|
||||||
@ -1539,7 +1519,7 @@ sym(vp8_loop_filter_simple_vertical_edge_mmx):
|
|||||||
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
||||||
|
|
||||||
lea rsi, [rsi + rax*4- 2]; ;
|
lea rsi, [rsi + rax*4- 2]; ;
|
||||||
movsxd rcx, dword ptr arg(5) ;count
|
mov rcx, 2 ; count
|
||||||
nexts8_v:
|
nexts8_v:
|
||||||
|
|
||||||
lea rdi, [rsi + rax];
|
lea rdi, [rsi + rax];
|
||||||
@ -1602,14 +1582,10 @@ nexts8_v:
|
|||||||
paddusb mm5, mm5 ; abs(p0-q0)*2
|
paddusb mm5, mm5 ; abs(p0-q0)*2
|
||||||
paddusb mm5, mm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
paddusb mm5, mm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||||
|
|
||||||
mov rdx, arg(2) ;flimit ; get flimit
|
mov rdx, arg(2) ;blimit ; get blimit
|
||||||
movq mm7, [rdx]
|
movq mm7, [rdx]
|
||||||
mov rdx, arg(3) ; get limit
|
|
||||||
movq mm6, [rdx]
|
|
||||||
paddb mm7, mm7 ; flimit*2 (less than 255)
|
|
||||||
paddb mm7, mm6 ; flimit * 2 + limit (less than 255)
|
|
||||||
|
|
||||||
psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
pxor mm7, mm7
|
pxor mm7, mm7
|
||||||
pcmpeqb mm5, mm7 ; mm5 = mask
|
pcmpeqb mm5, mm7 ; mm5 = mask
|
||||||
|
|
||||||
|
@ -110,7 +110,7 @@
|
|||||||
psubusb xmm6, xmm5 ; p1-=p0
|
psubusb xmm6, xmm5 ; p1-=p0
|
||||||
|
|
||||||
por xmm6, xmm4 ; abs(p1 - p0)
|
por xmm6, xmm4 ; abs(p1 - p0)
|
||||||
mov rdx, arg(2) ; get flimit
|
mov rdx, arg(2) ; get blimit
|
||||||
|
|
||||||
movdqa t1, xmm6 ; save to t1
|
movdqa t1, xmm6 ; save to t1
|
||||||
|
|
||||||
@ -123,7 +123,7 @@
|
|||||||
psubusb xmm1, xmm7
|
psubusb xmm1, xmm7
|
||||||
por xmm2, xmm3 ; abs(p1-q1)
|
por xmm2, xmm3 ; abs(p1-q1)
|
||||||
|
|
||||||
movdqa xmm4, XMMWORD PTR [rdx] ; flimit
|
movdqa xmm7, XMMWORD PTR [rdx] ; blimit
|
||||||
|
|
||||||
movdqa xmm3, xmm0 ; q0
|
movdqa xmm3, xmm0 ; q0
|
||||||
pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||||
@ -134,13 +134,11 @@
|
|||||||
psrlw xmm2, 1 ; abs(p1-q1)/2
|
psrlw xmm2, 1 ; abs(p1-q1)/2
|
||||||
|
|
||||||
psubusb xmm5, xmm3 ; p0-=q0
|
psubusb xmm5, xmm3 ; p0-=q0
|
||||||
paddb xmm4, xmm4 ; flimit*2 (less than 255)
|
|
||||||
|
|
||||||
psubusb xmm3, xmm6 ; q0-=p0
|
psubusb xmm3, xmm6 ; q0-=p0
|
||||||
por xmm5, xmm3 ; abs(p0 - q0)
|
por xmm5, xmm3 ; abs(p0 - q0)
|
||||||
|
|
||||||
paddusb xmm5, xmm5 ; abs(p0-q0)*2
|
paddusb xmm5, xmm5 ; abs(p0-q0)*2
|
||||||
paddb xmm7, xmm4 ; flimit * 2 + limit (less than 255)
|
|
||||||
|
|
||||||
movdqa xmm4, t0 ; hev get abs (q1 - q0)
|
movdqa xmm4, t0 ; hev get abs (q1 - q0)
|
||||||
|
|
||||||
@ -150,7 +148,7 @@
|
|||||||
|
|
||||||
movdqa xmm2, XMMWORD PTR [rdx] ; hev
|
movdqa xmm2, XMMWORD PTR [rdx] ; hev
|
||||||
|
|
||||||
psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
psubusb xmm4, xmm2 ; hev
|
psubusb xmm4, xmm2 ; hev
|
||||||
|
|
||||||
psubusb xmm3, xmm2 ; hev
|
psubusb xmm3, xmm2 ; hev
|
||||||
@ -278,7 +276,7 @@
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; int count
|
; int count
|
||||||
@ -328,7 +326,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; int count
|
; int count
|
||||||
@ -574,7 +572,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; int count
|
; int count
|
||||||
@ -624,7 +622,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
|
|||||||
;(
|
;(
|
||||||
; unsigned char *u,
|
; unsigned char *u,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; unsigned char *v
|
; unsigned char *v
|
||||||
@ -904,7 +902,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
|||||||
movdqa xmm4, XMMWORD PTR [rdx]; limit
|
movdqa xmm4, XMMWORD PTR [rdx]; limit
|
||||||
|
|
||||||
pmaxub xmm0, xmm7
|
pmaxub xmm0, xmm7
|
||||||
mov rdx, arg(2) ; flimit
|
mov rdx, arg(2) ; blimit
|
||||||
|
|
||||||
psubusb xmm0, xmm4
|
psubusb xmm0, xmm4
|
||||||
movdqa xmm5, xmm2 ; q1
|
movdqa xmm5, xmm2 ; q1
|
||||||
@ -921,12 +919,11 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
|||||||
psrlw xmm5, 1 ; abs(p1-q1)/2
|
psrlw xmm5, 1 ; abs(p1-q1)/2
|
||||||
psubusb xmm6, xmm3 ; q0-p0
|
psubusb xmm6, xmm3 ; q0-p0
|
||||||
|
|
||||||
movdqa xmm2, XMMWORD PTR [rdx]; flimit
|
movdqa xmm4, XMMWORD PTR [rdx]; blimit
|
||||||
|
|
||||||
mov rdx, arg(4) ; get thresh
|
mov rdx, arg(4) ; get thresh
|
||||||
|
|
||||||
por xmm1, xmm6 ; abs(q0-p0)
|
por xmm1, xmm6 ; abs(q0-p0)
|
||||||
paddb xmm2, xmm2 ; flimit*2 (less than 255)
|
|
||||||
|
|
||||||
movdqa xmm6, t0 ; get abs (q1 - q0)
|
movdqa xmm6, t0 ; get abs (q1 - q0)
|
||||||
|
|
||||||
@ -939,10 +936,9 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
|||||||
paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||||
psubusb xmm6, xmm7 ; abs(q1 - q0) > thresh
|
psubusb xmm6, xmm7 ; abs(q1 - q0) > thresh
|
||||||
|
|
||||||
paddb xmm4, xmm2 ; flimit * 2 + limit (less than 255)
|
|
||||||
psubusb xmm3, xmm7 ; abs(p1 - p0)> thresh
|
psubusb xmm3, xmm7 ; abs(p1 - p0)> thresh
|
||||||
|
|
||||||
psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
por xmm6, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
|
por xmm6, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
|
||||||
|
|
||||||
por xmm1, xmm0 ; mask
|
por xmm1, xmm0 ; mask
|
||||||
@ -1014,7 +1010,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; int count
|
; int count
|
||||||
@ -1081,7 +1077,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
|
|||||||
;(
|
;(
|
||||||
; unsigned char *u,
|
; unsigned char *u,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; unsigned char *v
|
; unsigned char *v
|
||||||
@ -1239,7 +1235,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; int count
|
; int count
|
||||||
@ -1308,7 +1304,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
|
|||||||
;(
|
;(
|
||||||
; unsigned char *u,
|
; unsigned char *u,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; unsigned char *v
|
; unsigned char *v
|
||||||
@ -1376,16 +1372,13 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
|
||||||
; const char *thresh,
|
|
||||||
; int count
|
|
||||||
;)
|
;)
|
||||||
global sym(vp8_loop_filter_simple_horizontal_edge_sse2)
|
global sym(vp8_loop_filter_simple_horizontal_edge_sse2)
|
||||||
sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
SHADOW_ARGS_TO_STACK 6
|
SHADOW_ARGS_TO_STACK 3
|
||||||
SAVE_XMM 7
|
SAVE_XMM 7
|
||||||
GET_GOT rbx
|
GET_GOT rbx
|
||||||
push rsi
|
push rsi
|
||||||
@ -1394,13 +1387,8 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
|||||||
|
|
||||||
mov rsi, arg(0) ;src_ptr
|
mov rsi, arg(0) ;src_ptr
|
||||||
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
||||||
mov rdx, arg(2) ;flimit ; get flimit
|
mov rdx, arg(2) ;blimit
|
||||||
movdqa xmm3, XMMWORD PTR [rdx]
|
movdqa xmm3, XMMWORD PTR [rdx]
|
||||||
mov rdx, arg(3) ;limit
|
|
||||||
movdqa xmm7, XMMWORD PTR [rdx]
|
|
||||||
|
|
||||||
paddb xmm3, xmm3 ; flimit*2 (less than 255)
|
|
||||||
paddb xmm3, xmm7 ; flimit * 2 + limit (less than 255)
|
|
||||||
|
|
||||||
mov rdi, rsi ; rdi points to row +1 for indirect addressing
|
mov rdi, rsi ; rdi points to row +1 for indirect addressing
|
||||||
add rdi, rax
|
add rdi, rax
|
||||||
@ -1428,7 +1416,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
|||||||
paddusb xmm5, xmm5 ; abs(p0-q0)*2
|
paddusb xmm5, xmm5 ; abs(p0-q0)*2
|
||||||
paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||||
|
|
||||||
psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
pxor xmm3, xmm3
|
pxor xmm3, xmm3
|
||||||
pcmpeqb xmm5, xmm3
|
pcmpeqb xmm5, xmm3
|
||||||
|
|
||||||
@ -1493,16 +1481,13 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
|
||||||
; const char *thresh,
|
|
||||||
; int count
|
|
||||||
;)
|
;)
|
||||||
global sym(vp8_loop_filter_simple_vertical_edge_sse2)
|
global sym(vp8_loop_filter_simple_vertical_edge_sse2)
|
||||||
sym(vp8_loop_filter_simple_vertical_edge_sse2):
|
sym(vp8_loop_filter_simple_vertical_edge_sse2):
|
||||||
push rbp ; save old base pointer value.
|
push rbp ; save old base pointer value.
|
||||||
mov rbp, rsp ; set new base pointer value.
|
mov rbp, rsp ; set new base pointer value.
|
||||||
SHADOW_ARGS_TO_STACK 6
|
SHADOW_ARGS_TO_STACK 3
|
||||||
SAVE_XMM 7
|
SAVE_XMM 7
|
||||||
GET_GOT rbx ; save callee-saved reg
|
GET_GOT rbx ; save callee-saved reg
|
||||||
push rsi
|
push rsi
|
||||||
@ -1607,14 +1592,10 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
|
|||||||
paddusb xmm5, xmm5 ; abs(p0-q0)*2
|
paddusb xmm5, xmm5 ; abs(p0-q0)*2
|
||||||
paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||||
|
|
||||||
mov rdx, arg(2) ;flimit
|
mov rdx, arg(2) ;blimit
|
||||||
movdqa xmm7, XMMWORD PTR [rdx]
|
movdqa xmm7, XMMWORD PTR [rdx]
|
||||||
mov rdx, arg(3) ; get limit
|
|
||||||
movdqa xmm6, XMMWORD PTR [rdx]
|
|
||||||
paddb xmm7, xmm7 ; flimit*2 (less than 255)
|
|
||||||
paddb xmm7, xmm6 ; flimit * 2 + limit (less than 255)
|
|
||||||
|
|
||||||
psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
pxor xmm7, xmm7
|
pxor xmm7, xmm7
|
||||||
pcmpeqb xmm5, xmm7 ; mm5 = mask
|
pcmpeqb xmm5, xmm7 ; mm5 = mask
|
||||||
|
|
||||||
|
@ -9,30 +9,18 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include "vpx_ports/config.h"
|
#include "vpx_config.h"
|
||||||
#include "vp8/common/loopfilter.h"
|
#include "vp8/common/loopfilter.h"
|
||||||
|
|
||||||
prototype_loopfilter(vp8_loop_filter_horizontal_edge_c);
|
|
||||||
prototype_loopfilter(vp8_loop_filter_vertical_edge_c);
|
|
||||||
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_c);
|
|
||||||
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
|
|
||||||
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
|
|
||||||
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
|
|
||||||
|
|
||||||
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx);
|
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx);
|
||||||
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx);
|
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx);
|
||||||
prototype_loopfilter(vp8_loop_filter_vertical_edge_mmx);
|
prototype_loopfilter(vp8_loop_filter_vertical_edge_mmx);
|
||||||
prototype_loopfilter(vp8_loop_filter_horizontal_edge_mmx);
|
prototype_loopfilter(vp8_loop_filter_horizontal_edge_mmx);
|
||||||
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
|
|
||||||
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx);
|
|
||||||
|
|
||||||
prototype_loopfilter(vp8_loop_filter_vertical_edge_sse2);
|
prototype_loopfilter(vp8_loop_filter_vertical_edge_sse2);
|
||||||
prototype_loopfilter(vp8_loop_filter_horizontal_edge_sse2);
|
prototype_loopfilter(vp8_loop_filter_horizontal_edge_sse2);
|
||||||
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_sse2);
|
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_sse2);
|
||||||
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_sse2);
|
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_sse2);
|
||||||
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
|
|
||||||
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
|
|
||||||
prototype_loopfilter(vp8_fast_loop_filter_vertical_edges_sse2);
|
|
||||||
|
|
||||||
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
|
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
|
||||||
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
|
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
|
||||||
@ -44,23 +32,13 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
|
|||||||
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||||
|
|
||||||
if (v_ptr)
|
if (v_ptr)
|
||||||
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
|
||||||
(void) u_ptr;
|
|
||||||
(void) v_ptr;
|
|
||||||
(void) uv_stride;
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -68,23 +46,13 @@ void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
|||||||
void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||||
|
|
||||||
if (v_ptr)
|
if (v_ptr)
|
||||||
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
|
||||||
(void) u_ptr;
|
|
||||||
(void) v_ptr;
|
|
||||||
(void) uv_stride;
|
|
||||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -92,27 +60,23 @@ void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
|||||||
void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||||
|
|
||||||
if (v_ptr)
|
if (v_ptr)
|
||||||
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
{
|
||||||
(void) u_ptr;
|
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||||
(void) v_ptr;
|
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||||
(void) uv_stride;
|
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -120,27 +84,23 @@ void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
|||||||
void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||||
|
|
||||||
if (v_ptr)
|
if (v_ptr)
|
||||||
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
{
|
||||||
(void) u_ptr;
|
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit);
|
||||||
(void) v_ptr;
|
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit);
|
||||||
(void) uv_stride;
|
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit);
|
||||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -150,20 +110,10 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
|||||||
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
|
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
|
||||||
(void) u_ptr;
|
|
||||||
(void) v_ptr;
|
|
||||||
(void) uv_stride;
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -171,20 +121,10 @@ void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
|||||||
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
|
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
|
||||||
(void) u_ptr;
|
|
||||||
(void) v_ptr;
|
|
||||||
(void) uv_stride;
|
|
||||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -192,24 +132,20 @@ void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
|||||||
void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4 * uv_stride);
|
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4 * uv_stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
{
|
||||||
(void) u_ptr;
|
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||||
(void) v_ptr;
|
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||||
(void) uv_stride;
|
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -217,36 +153,20 @@ void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
|||||||
void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4);
|
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
{
|
||||||
(void) u_ptr;
|
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit);
|
||||||
(void) v_ptr;
|
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit);
|
||||||
(void) uv_stride;
|
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit);
|
||||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if 0
|
|
||||||
void vp8_fast_loop_filter_vertical_edges_sse(unsigned char *y_ptr,
|
|
||||||
int y_stride,
|
|
||||||
loop_filter_info *lfi)
|
|
||||||
{
|
|
||||||
|
|
||||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
@ -24,10 +24,10 @@ extern prototype_loopfilter_block(vp8_loop_filter_mbv_mmx);
|
|||||||
extern prototype_loopfilter_block(vp8_loop_filter_bv_mmx);
|
extern prototype_loopfilter_block(vp8_loop_filter_bv_mmx);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbh_mmx);
|
extern prototype_loopfilter_block(vp8_loop_filter_mbh_mmx);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bh_mmx);
|
extern prototype_loopfilter_block(vp8_loop_filter_bh_mmx);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_mmx);
|
extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bvs_mmx);
|
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_mmx);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_mmx);
|
extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bhs_mmx);
|
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_mmx);
|
||||||
|
|
||||||
|
|
||||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||||
@ -44,13 +44,13 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_mmx);
|
|||||||
#define vp8_lf_normal_b_h vp8_loop_filter_bh_mmx
|
#define vp8_lf_normal_b_h vp8_loop_filter_bh_mmx
|
||||||
|
|
||||||
#undef vp8_lf_simple_mb_v
|
#undef vp8_lf_simple_mb_v
|
||||||
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_mmx
|
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_mmx
|
||||||
|
|
||||||
#undef vp8_lf_simple_b_v
|
#undef vp8_lf_simple_b_v
|
||||||
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_mmx
|
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_mmx
|
||||||
|
|
||||||
#undef vp8_lf_simple_mb_h
|
#undef vp8_lf_simple_mb_h
|
||||||
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_mmx
|
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_mmx
|
||||||
|
|
||||||
#undef vp8_lf_simple_b_h
|
#undef vp8_lf_simple_b_h
|
||||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_mmx
|
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_mmx
|
||||||
@ -63,10 +63,10 @@ extern prototype_loopfilter_block(vp8_loop_filter_mbv_sse2);
|
|||||||
extern prototype_loopfilter_block(vp8_loop_filter_bv_sse2);
|
extern prototype_loopfilter_block(vp8_loop_filter_bv_sse2);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbh_sse2);
|
extern prototype_loopfilter_block(vp8_loop_filter_mbh_sse2);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bh_sse2);
|
extern prototype_loopfilter_block(vp8_loop_filter_bh_sse2);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_sse2);
|
extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bvs_sse2);
|
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_sse2);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_sse2);
|
extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bhs_sse2);
|
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_sse2);
|
||||||
|
|
||||||
|
|
||||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||||
@ -83,13 +83,13 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_sse2);
|
|||||||
#define vp8_lf_normal_b_h vp8_loop_filter_bh_sse2
|
#define vp8_lf_normal_b_h vp8_loop_filter_bh_sse2
|
||||||
|
|
||||||
#undef vp8_lf_simple_mb_v
|
#undef vp8_lf_simple_mb_v
|
||||||
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_sse2
|
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_sse2
|
||||||
|
|
||||||
#undef vp8_lf_simple_b_v
|
#undef vp8_lf_simple_b_v
|
||||||
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_sse2
|
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_sse2
|
||||||
|
|
||||||
#undef vp8_lf_simple_mb_h
|
#undef vp8_lf_simple_mb_h
|
||||||
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_sse2
|
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_sse2
|
||||||
|
|
||||||
#undef vp8_lf_simple_b_h
|
#undef vp8_lf_simple_b_h
|
||||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_sse2
|
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_sse2
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include "vpx_ports/config.h"
|
#include "vpx_config.h"
|
||||||
#include "vpx_ports/x86.h"
|
#include "vpx_ports/x86.h"
|
||||||
#include "vp8/common/g_common.h"
|
#include "vp8/common/g_common.h"
|
||||||
#include "vp8/common/subpixel.h"
|
#include "vp8/common/subpixel.h"
|
||||||
@ -63,9 +63,9 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
|
|||||||
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_mmx;
|
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_mmx;
|
||||||
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_mmx;
|
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_mmx;
|
||||||
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_mmx;
|
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_mmx;
|
||||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_mmx;
|
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_mmx;
|
||||||
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_mmx;
|
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_mmx;
|
||||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_mmx;
|
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_mmx;
|
||||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_mmx;
|
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_mmx;
|
||||||
|
|
||||||
#if CONFIG_POSTPROC
|
#if CONFIG_POSTPROC
|
||||||
@ -101,9 +101,9 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
|
|||||||
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_sse2;
|
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_sse2;
|
||||||
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_sse2;
|
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_sse2;
|
||||||
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_sse2;
|
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_sse2;
|
||||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_sse2;
|
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_sse2;
|
||||||
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_sse2;
|
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_sse2;
|
||||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_sse2;
|
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_sse2;
|
||||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_sse2;
|
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_sse2;
|
||||||
|
|
||||||
#if CONFIG_POSTPROC
|
#if CONFIG_POSTPROC
|
||||||
|
Loading…
x
Reference in New Issue
Block a user