Removed mmx versions of vp9_bilinear_predict filters

These filters will not work with VP9.

Change-Id: Ic26c77961084fcea6bfa97f4cd95afdea2282e85
This commit is contained in:
Scott LaVarnway 2012-12-21 14:41:49 -08:00
parent 9a7023d2ad
commit 89ac94f8fb
4 changed files with 4 additions and 500 deletions

View File

@ -319,10 +319,10 @@ prototype void vp9_sixtap_predict_avg "uint8_t *src_ptr, int src_pixels_per_lin
specialize vp9_sixtap_predict_avg
prototype void vp9_bilinear_predict16x16 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
specialize vp9_bilinear_predict16x16 mmx sse2
specialize vp9_bilinear_predict16x16 sse2
prototype void vp9_bilinear_predict8x8 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
specialize vp9_bilinear_predict8x8 mmx sse2
specialize vp9_bilinear_predict8x8 sse2
prototype void vp9_bilinear_predict_avg16x16 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
specialize vp9_bilinear_predict_avg16x16
@ -331,10 +331,10 @@ prototype void vp9_bilinear_predict_avg8x8 "uint8_t *src_ptr, int src_pixels_pe
specialize vp9_bilinear_predict_avg8x8
prototype void vp9_bilinear_predict8x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
specialize vp9_bilinear_predict8x4 mmx
specialize vp9_bilinear_predict8x4
prototype void vp9_bilinear_predict4x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
specialize vp9_bilinear_predict4x4 mmx
specialize vp9_bilinear_predict4x4
prototype void vp9_bilinear_predict_avg4x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
specialize vp9_bilinear_predict_avg4x4

View File

@ -15,8 +15,6 @@
extern const short vp9_six_tap_mmx[16][6 * 8];
extern const short vp9_bilinear_filters_8x_mmx[16][2 * 8];
extern void vp9_filter_block1d_h6_mmx(unsigned char *src_ptr,
unsigned short *output_ptr,
unsigned int src_pixels_per_line,
@ -95,8 +93,6 @@ extern void vp9_filter_block1d8_v6_only_sse2(unsigned char *src_ptr,
unsigned int output_height,
const short *vp9_filter);
extern prototype_subpixel_predict(vp9_bilinear_predict8x8_mmx);
///////////////////////////////////////////////////////////////////////////
// the mmx function that does the bilinear filtering and var calculation //
// int one pass //
@ -232,26 +228,6 @@ void vp9_sixtap_predict8x4_mmx(unsigned char *src_ptr,
vp9_filter_block1dc_v6_mmx(fdata2 + 20, dst_ptr + 4, dst_pitch,
16, 8, 4, 8, vfilter);
}
void vp9_bilinear_predict16x16_mmx(unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch) {
vp9_bilinear_predict8x8_mmx(src_ptr,
src_pixels_per_line, xoffset, yoffset,
dst_ptr, dst_pitch);
vp9_bilinear_predict8x8_mmx(src_ptr + 8,
src_pixels_per_line, xoffset, yoffset,
dst_ptr + 8, dst_pitch);
vp9_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line,
src_pixels_per_line, xoffset, yoffset,
dst_ptr + dst_pitch * 8, dst_pitch);
vp9_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line + 8,
src_pixels_per_line, xoffset, yoffset,
dst_ptr + dst_pitch * 8 + 8, dst_pitch);
}
#endif
#if HAVE_SSE2

View File

@ -202,438 +202,6 @@ sym(vp9_filter_block1dc_v6_mmx):
pop rbp
ret
;void bilinear_predict8x8_mmx
;(
; unsigned char *src_ptr,
; int src_pixels_per_line,
; int xoffset,
; int yoffset,
; unsigned char *dst_ptr,
; int dst_pitch
;)
global sym(vp9_bilinear_predict8x8_mmx)
sym(vp9_bilinear_predict8x8_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
GET_GOT rbx
push rsi
push rdi
; end prolog
;const short *HFilter = bilinear_filters_mmx[xoffset];
;const short *VFilter = bilinear_filters_mmx[yoffset];
movsxd rax, dword ptr arg(2) ;xoffset
mov rdi, arg(4) ;dst_ptr ;
shl rax, 5 ; offset * 32
lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))]
add rax, rcx ; HFilter
mov rsi, arg(0) ;src_ptr ;
movsxd rdx, dword ptr arg(5) ;dst_pitch
movq mm1, [rax] ;
movq mm2, [rax+16] ;
movsxd rax, dword ptr arg(3) ;yoffset
pxor mm0, mm0 ;
shl rax, 5 ; offset*32
add rax, rcx ; VFilter
lea rcx, [rdi+rdx*8] ;
movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
; get the first horizontal line done ;
movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
movq mm4, mm3 ; make a copy of current line
punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
punpckhbw mm4, mm0 ;
pmullw mm3, mm1 ;
pmullw mm4, mm1 ;
movq mm5, [rsi+1] ;
movq mm6, mm5 ;
punpcklbw mm5, mm0 ;
punpckhbw mm6, mm0 ;
pmullw mm5, mm2 ;
pmullw mm6, mm2 ;
paddw mm3, mm5 ;
paddw mm4, mm6 ;
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
paddw mm4, [GLOBAL(rd)] ;
psraw mm4, VP9_FILTER_SHIFT ;
movq mm7, mm3 ;
packuswb mm7, mm4 ;
add rsi, rdx ; next line
.next_row_8x8:
movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
movq mm4, mm3 ; make a copy of current line
punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
punpckhbw mm4, mm0 ;
pmullw mm3, mm1 ;
pmullw mm4, mm1 ;
movq mm5, [rsi+1] ;
movq mm6, mm5 ;
punpcklbw mm5, mm0 ;
punpckhbw mm6, mm0 ;
pmullw mm5, mm2 ;
pmullw mm6, mm2 ;
paddw mm3, mm5 ;
paddw mm4, mm6 ;
movq mm5, mm7 ;
movq mm6, mm7 ;
punpcklbw mm5, mm0 ;
punpckhbw mm6, mm0
pmullw mm5, [rax] ;
pmullw mm6, [rax] ;
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
paddw mm4, [GLOBAL(rd)] ;
psraw mm4, VP9_FILTER_SHIFT ;
movq mm7, mm3 ;
packuswb mm7, mm4 ;
pmullw mm3, [rax+16] ;
pmullw mm4, [rax+16] ;
paddw mm3, mm5 ;
paddw mm4, mm6 ;
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
paddw mm4, [GLOBAL(rd)] ;
psraw mm4, VP9_FILTER_SHIFT ;
packuswb mm3, mm4
movq [rdi], mm3 ; store the results in the destination
%if ABI_IS_32BIT
add rsi, rdx ; next line
add rdi, dword ptr arg(5) ;dst_pitch ;
%else
movsxd r8, dword ptr arg(5) ;dst_pitch
add rsi, rdx ; next line
add rdi, r8 ;dst_pitch
%endif
cmp rdi, rcx ;
jne .next_row_8x8
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void bilinear_predict8x4_mmx
;(
; unsigned char *src_ptr,
; int src_pixels_per_line,
; int xoffset,
; int yoffset,
; unsigned char *dst_ptr,
; int dst_pitch
;)
global sym(vp9_bilinear_predict8x4_mmx)
sym(vp9_bilinear_predict8x4_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
GET_GOT rbx
push rsi
push rdi
; end prolog
;const short *HFilter = bilinear_filters_mmx[xoffset];
;const short *VFilter = bilinear_filters_mmx[yoffset];
movsxd rax, dword ptr arg(2) ;xoffset
mov rdi, arg(4) ;dst_ptr ;
lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))]
shl rax, 5
mov rsi, arg(0) ;src_ptr ;
add rax, rcx
movsxd rdx, dword ptr arg(5) ;dst_pitch
movq mm1, [rax] ;
movq mm2, [rax+16] ;
movsxd rax, dword ptr arg(3) ;yoffset
pxor mm0, mm0 ;
shl rax, 5
add rax, rcx
lea rcx, [rdi+rdx*4] ;
movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
; get the first horizontal line done ;
movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
movq mm4, mm3 ; make a copy of current line
punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
punpckhbw mm4, mm0 ;
pmullw mm3, mm1 ;
pmullw mm4, mm1 ;
movq mm5, [rsi+1] ;
movq mm6, mm5 ;
punpcklbw mm5, mm0 ;
punpckhbw mm6, mm0 ;
pmullw mm5, mm2 ;
pmullw mm6, mm2 ;
paddw mm3, mm5 ;
paddw mm4, mm6 ;
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
paddw mm4, [GLOBAL(rd)] ;
psraw mm4, VP9_FILTER_SHIFT ;
movq mm7, mm3 ;
packuswb mm7, mm4 ;
add rsi, rdx ; next line
.next_row_8x4:
movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
movq mm4, mm3 ; make a copy of current line
punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
punpckhbw mm4, mm0 ;
pmullw mm3, mm1 ;
pmullw mm4, mm1 ;
movq mm5, [rsi+1] ;
movq mm6, mm5 ;
punpcklbw mm5, mm0 ;
punpckhbw mm6, mm0 ;
pmullw mm5, mm2 ;
pmullw mm6, mm2 ;
paddw mm3, mm5 ;
paddw mm4, mm6 ;
movq mm5, mm7 ;
movq mm6, mm7 ;
punpcklbw mm5, mm0 ;
punpckhbw mm6, mm0
pmullw mm5, [rax] ;
pmullw mm6, [rax] ;
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
paddw mm4, [GLOBAL(rd)] ;
psraw mm4, VP9_FILTER_SHIFT ;
movq mm7, mm3 ;
packuswb mm7, mm4 ;
pmullw mm3, [rax+16] ;
pmullw mm4, [rax+16] ;
paddw mm3, mm5 ;
paddw mm4, mm6 ;
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
paddw mm4, [GLOBAL(rd)] ;
psraw mm4, VP9_FILTER_SHIFT ;
packuswb mm3, mm4
movq [rdi], mm3 ; store the results in the destination
%if ABI_IS_32BIT
add rsi, rdx ; next line
add rdi, dword ptr arg(5) ;dst_pitch ;
%else
movsxd r8, dword ptr arg(5) ;dst_pitch
add rsi, rdx ; next line
add rdi, r8
%endif
cmp rdi, rcx ;
jne .next_row_8x4
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void bilinear_predict4x4_mmx
;(
; unsigned char *src_ptr,
; int src_pixels_per_line,
; int xoffset,
; int yoffset,
; unsigned char *dst_ptr,
; int dst_pitch
;)
global sym(vp9_bilinear_predict4x4_mmx)
sym(vp9_bilinear_predict4x4_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
GET_GOT rbx
push rsi
push rdi
; end prolog
;const short *HFilter = bilinear_filters_mmx[xoffset];
;const short *VFilter = bilinear_filters_mmx[yoffset];
movsxd rax, dword ptr arg(2) ;xoffset
mov rdi, arg(4) ;dst_ptr ;
lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))]
shl rax, 5
add rax, rcx ; HFilter
mov rsi, arg(0) ;src_ptr ;
movsxd rdx, dword ptr arg(5) ;ldst_pitch
movq mm1, [rax] ;
movq mm2, [rax+16] ;
movsxd rax, dword ptr arg(3) ;yoffset
pxor mm0, mm0 ;
shl rax, 5
add rax, rcx
lea rcx, [rdi+rdx*4] ;
movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
; get the first horizontal line done ;
movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
pmullw mm3, mm1 ;
movd mm5, [rsi+1] ;
punpcklbw mm5, mm0 ;
pmullw mm5, mm2 ;
paddw mm3, mm5 ;
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
movq mm7, mm3 ;
packuswb mm7, mm0 ;
add rsi, rdx ; next line
.next_row_4x4:
movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
pmullw mm3, mm1 ;
movd mm5, [rsi+1] ;
punpcklbw mm5, mm0 ;
pmullw mm5, mm2 ;
paddw mm3, mm5 ;
movq mm5, mm7 ;
punpcklbw mm5, mm0 ;
pmullw mm5, [rax] ;
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
movq mm7, mm3 ;
packuswb mm7, mm0 ;
pmullw mm3, [rax+16] ;
paddw mm3, mm5 ;
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
packuswb mm3, mm0
movd [rdi], mm3 ; store the results in the destination
%if ABI_IS_32BIT
add rsi, rdx ; next line
add rdi, dword ptr arg(5) ;dst_pitch ;
%else
movsxd r8, dword ptr arg(5) ;dst_pitch ;
add rsi, rdx ; next line
add rdi, r8
%endif
cmp rdi, rcx ;
jne .next_row_4x4
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
align 16
rd:
@ -698,30 +266,3 @@ sym(vp9_six_tap_mmx):
times 8 dw -6
times 8 dw 0
align 16
global HIDDEN_DATA(sym(vp9_bilinear_filters_8x_mmx))
sym(vp9_bilinear_filters_8x_mmx):
times 8 dw 128
times 8 dw 0
times 8 dw 112
times 8 dw 16
times 8 dw 96
times 8 dw 32
times 8 dw 80
times 8 dw 48
times 8 dw 64
times 8 dw 64
times 8 dw 48
times 8 dw 80
times 8 dw 32
times 8 dw 96
times 8 dw 16
times 8 dw 112

View File

@ -25,10 +25,6 @@ extern prototype_subpixel_predict(vp9_sixtap_predict8x8_mmx);
extern prototype_subpixel_predict(vp9_sixtap_predict8x4_mmx);
extern prototype_subpixel_predict(vp9_sixtap_predict4x4_mmx);
extern prototype_subpixel_predict(vp9_bilinear_predict16x16_mmx);
extern prototype_subpixel_predict(vp9_bilinear_predict8x8_mmx);
extern prototype_subpixel_predict(vp9_bilinear_predict8x4_mmx);
extern prototype_subpixel_predict(vp9_bilinear_predict4x4_mmx);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp9_subpix_sixtap16x16
@ -46,15 +42,6 @@ extern prototype_subpixel_predict(vp9_bilinear_predict4x4_mmx);
#undef vp9_subpix_bilinear16x16
#define vp9_subpix_bilinear16x16 vp9_bilinear_predict16x16_mmx
#undef vp9_subpix_bilinear8x8
#define vp9_subpix_bilinear8x8 vp9_bilinear_predict8x8_mmx
#undef vp9_subpix_bilinear8x4
#define vp9_subpix_bilinear8x4 vp9_bilinear_predict8x4_mmx
#undef vp9_subpix_bilinear4x4
#define vp9_subpix_bilinear4x4 vp9_bilinear_predict4x4_mmx
#endif
#endif