x86: vc1dsp: port to cpuflags
This commit is contained in:
parent
d578f94746
commit
6c104826bd
@ -34,7 +34,13 @@ section .text
|
|||||||
punpckl%1 m%2, m%4
|
punpckl%1 m%2, m%4
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro STORE_4_WORDS_MMX 6
|
%macro STORE_4_WORDS 6
|
||||||
|
%if cpuflag(sse4)
|
||||||
|
pextrw %1, %5, %6+0
|
||||||
|
pextrw %2, %5, %6+1
|
||||||
|
pextrw %3, %5, %6+2
|
||||||
|
pextrw %4, %5, %6+3
|
||||||
|
%else
|
||||||
movd %6d, %5
|
movd %6d, %5
|
||||||
%if mmsize==16
|
%if mmsize==16
|
||||||
psrldq %5, 4
|
psrldq %5, 4
|
||||||
@ -48,13 +54,7 @@ section .text
|
|||||||
mov %3, %6w
|
mov %3, %6w
|
||||||
shr %6, 16
|
shr %6, 16
|
||||||
mov %4, %6w
|
mov %4, %6w
|
||||||
%endmacro
|
%endif
|
||||||
|
|
||||||
%macro STORE_4_WORDS_SSE4 6
|
|
||||||
pextrw %1, %5, %6+0
|
|
||||||
pextrw %2, %5, %6+1
|
|
||||||
pextrw %3, %5, %6+2
|
|
||||||
pextrw %4, %5, %6+3
|
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
; in: p1 p0 q0 q1, clobbers p0
|
; in: p1 p0 q0 q1, clobbers p0
|
||||||
@ -200,14 +200,14 @@ section .text
|
|||||||
VC1_FILTER %1
|
VC1_FILTER %1
|
||||||
punpcklbw m0, m1
|
punpcklbw m0, m1
|
||||||
%if %0 > 1
|
%if %0 > 1
|
||||||
STORE_4_WORDS_MMX [r0-1], [r0+r1-1], [r0+2*r1-1], [r0+r3-1], m0, %2
|
STORE_4_WORDS [r0-1], [r0+r1-1], [r0+2*r1-1], [r0+r3-1], m0, %2
|
||||||
%if %1 > 4
|
%if %1 > 4
|
||||||
psrldq m0, 4
|
psrldq m0, 4
|
||||||
STORE_4_WORDS_MMX [r4-1], [r4+r1-1], [r4+2*r1-1], [r4+r3-1], m0, %2
|
STORE_4_WORDS [r4-1], [r4+r1-1], [r4+2*r1-1], [r4+r3-1], m0, %2
|
||||||
%endif
|
%endif
|
||||||
%else
|
%else
|
||||||
STORE_4_WORDS_SSE4 [r0-1], [r0+r1-1], [r0+2*r1-1], [r0+r3-1], m0, 0
|
STORE_4_WORDS [r0-1], [r0+r1-1], [r0+2*r1-1], [r0+r3-1], m0, 0
|
||||||
STORE_4_WORDS_SSE4 [r4-1], [r4+r1-1], [r4+2*r1-1], [r4+r3-1], m0, 4
|
STORE_4_WORDS [r4-1], [r4+r1-1], [r4+2*r1-1], [r4+r3-1], m0, 4
|
||||||
%endif
|
%endif
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
@ -228,92 +228,93 @@ section .text
|
|||||||
imul r2, 0x01010101
|
imul r2, 0x01010101
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro VC1_LF_MMX 1
|
%macro VC1_LF 0
|
||||||
INIT_MMX
|
cglobal vc1_v_loop_filter_internal
|
||||||
cglobal vc1_v_loop_filter_internal_%1
|
|
||||||
VC1_V_LOOP_FILTER 4, d
|
VC1_V_LOOP_FILTER 4, d
|
||||||
ret
|
ret
|
||||||
|
|
||||||
cglobal vc1_h_loop_filter_internal_%1
|
cglobal vc1_h_loop_filter_internal
|
||||||
VC1_H_LOOP_FILTER 4, r4
|
VC1_H_LOOP_FILTER 4, r4
|
||||||
ret
|
ret
|
||||||
|
|
||||||
; void ff_vc1_v_loop_filter4_mmx2(uint8_t *src, int stride, int pq)
|
; void ff_vc1_v_loop_filter4_mmxext(uint8_t *src, int stride, int pq)
|
||||||
cglobal vc1_v_loop_filter4_%1, 3,5,0
|
cglobal vc1_v_loop_filter4, 3,5,0
|
||||||
START_V_FILTER
|
START_V_FILTER
|
||||||
call vc1_v_loop_filter_internal_%1
|
call vc1_v_loop_filter_internal
|
||||||
RET
|
RET
|
||||||
|
|
||||||
; void ff_vc1_h_loop_filter4_mmx2(uint8_t *src, int stride, int pq)
|
; void ff_vc1_h_loop_filter4_mmxext(uint8_t *src, int stride, int pq)
|
||||||
cglobal vc1_h_loop_filter4_%1, 3,5,0
|
cglobal vc1_h_loop_filter4, 3,5,0
|
||||||
START_H_FILTER 4
|
START_H_FILTER 4
|
||||||
call vc1_h_loop_filter_internal_%1
|
call vc1_h_loop_filter_internal
|
||||||
RET
|
RET
|
||||||
|
|
||||||
; void ff_vc1_v_loop_filter8_mmx2(uint8_t *src, int stride, int pq)
|
; void ff_vc1_v_loop_filter8_mmxext(uint8_t *src, int stride, int pq)
|
||||||
cglobal vc1_v_loop_filter8_%1, 3,5,0
|
cglobal vc1_v_loop_filter8, 3,5,0
|
||||||
START_V_FILTER
|
START_V_FILTER
|
||||||
call vc1_v_loop_filter_internal_%1
|
call vc1_v_loop_filter_internal
|
||||||
add r4, 4
|
add r4, 4
|
||||||
add r0, 4
|
add r0, 4
|
||||||
call vc1_v_loop_filter_internal_%1
|
call vc1_v_loop_filter_internal
|
||||||
RET
|
RET
|
||||||
|
|
||||||
; void ff_vc1_h_loop_filter8_mmx2(uint8_t *src, int stride, int pq)
|
; void ff_vc1_h_loop_filter8_mmxext(uint8_t *src, int stride, int pq)
|
||||||
cglobal vc1_h_loop_filter8_%1, 3,5,0
|
cglobal vc1_h_loop_filter8, 3,5,0
|
||||||
START_H_FILTER 4
|
START_H_FILTER 4
|
||||||
call vc1_h_loop_filter_internal_%1
|
call vc1_h_loop_filter_internal
|
||||||
lea r0, [r0+4*r1]
|
lea r0, [r0+4*r1]
|
||||||
call vc1_h_loop_filter_internal_%1
|
call vc1_h_loop_filter_internal
|
||||||
RET
|
RET
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
|
INIT_MMX mmxext
|
||||||
%define PABSW PABSW_MMXEXT
|
%define PABSW PABSW_MMXEXT
|
||||||
VC1_LF_MMX mmx2
|
VC1_LF
|
||||||
|
|
||||||
INIT_XMM
|
INIT_XMM sse2
|
||||||
; void ff_vc1_v_loop_filter8_sse2(uint8_t *src, int stride, int pq)
|
; void ff_vc1_v_loop_filter8_sse2(uint8_t *src, int stride, int pq)
|
||||||
cglobal vc1_v_loop_filter8_sse2, 3,5,8
|
cglobal vc1_v_loop_filter8, 3,5,8
|
||||||
START_V_FILTER
|
START_V_FILTER
|
||||||
VC1_V_LOOP_FILTER 8, q
|
VC1_V_LOOP_FILTER 8, q
|
||||||
RET
|
RET
|
||||||
|
|
||||||
; void ff_vc1_h_loop_filter8_sse2(uint8_t *src, int stride, int pq)
|
; void ff_vc1_h_loop_filter8_sse2(uint8_t *src, int stride, int pq)
|
||||||
cglobal vc1_h_loop_filter8_sse2, 3,6,8
|
cglobal vc1_h_loop_filter8, 3,6,8
|
||||||
START_H_FILTER 8
|
START_H_FILTER 8
|
||||||
VC1_H_LOOP_FILTER 8, r5
|
VC1_H_LOOP_FILTER 8, r5
|
||||||
RET
|
RET
|
||||||
|
|
||||||
%define PABSW PABSW_SSSE3
|
%define PABSW PABSW_SSSE3
|
||||||
|
|
||||||
INIT_MMX
|
INIT_MMX ssse3
|
||||||
; void ff_vc1_v_loop_filter4_ssse3(uint8_t *src, int stride, int pq)
|
; void ff_vc1_v_loop_filter4_ssse3(uint8_t *src, int stride, int pq)
|
||||||
cglobal vc1_v_loop_filter4_ssse3, 3,5,0
|
cglobal vc1_v_loop_filter4, 3,5,0
|
||||||
START_V_FILTER
|
START_V_FILTER
|
||||||
VC1_V_LOOP_FILTER 4, d
|
VC1_V_LOOP_FILTER 4, d
|
||||||
RET
|
RET
|
||||||
|
|
||||||
; void ff_vc1_h_loop_filter4_ssse3(uint8_t *src, int stride, int pq)
|
; void ff_vc1_h_loop_filter4_ssse3(uint8_t *src, int stride, int pq)
|
||||||
cglobal vc1_h_loop_filter4_ssse3, 3,5,0
|
cglobal vc1_h_loop_filter4, 3,5,0
|
||||||
START_H_FILTER 4
|
START_H_FILTER 4
|
||||||
VC1_H_LOOP_FILTER 4, r4
|
VC1_H_LOOP_FILTER 4, r4
|
||||||
RET
|
RET
|
||||||
|
|
||||||
INIT_XMM
|
INIT_XMM ssse3
|
||||||
; void ff_vc1_v_loop_filter8_ssse3(uint8_t *src, int stride, int pq)
|
; void ff_vc1_v_loop_filter8_ssse3(uint8_t *src, int stride, int pq)
|
||||||
cglobal vc1_v_loop_filter8_ssse3, 3,5,8
|
cglobal vc1_v_loop_filter8, 3,5,8
|
||||||
START_V_FILTER
|
START_V_FILTER
|
||||||
VC1_V_LOOP_FILTER 8, q
|
VC1_V_LOOP_FILTER 8, q
|
||||||
RET
|
RET
|
||||||
|
|
||||||
; void ff_vc1_h_loop_filter8_ssse3(uint8_t *src, int stride, int pq)
|
; void ff_vc1_h_loop_filter8_ssse3(uint8_t *src, int stride, int pq)
|
||||||
cglobal vc1_h_loop_filter8_ssse3, 3,6,8
|
cglobal vc1_h_loop_filter8, 3,6,8
|
||||||
START_H_FILTER 8
|
START_H_FILTER 8
|
||||||
VC1_H_LOOP_FILTER 8, r5
|
VC1_H_LOOP_FILTER 8, r5
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
INIT_XMM sse4
|
||||||
; void ff_vc1_h_loop_filter8_sse4(uint8_t *src, int stride, int pq)
|
; void ff_vc1_h_loop_filter8_sse4(uint8_t *src, int stride, int pq)
|
||||||
cglobal vc1_h_loop_filter8_sse4, 3,5,8
|
cglobal vc1_h_loop_filter8, 3,5,8
|
||||||
START_H_FILTER 8
|
START_H_FILTER 8
|
||||||
VC1_H_LOOP_FILTER 8
|
VC1_H_LOOP_FILTER 8
|
||||||
RET
|
RET
|
||||||
|
@ -49,7 +49,7 @@ static void vc1_h_loop_filter16_ ## EXT(uint8_t *src, int stride, int pq) \
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
LOOP_FILTER(mmx2)
|
LOOP_FILTER(mmxext)
|
||||||
LOOP_FILTER(sse2)
|
LOOP_FILTER(sse2)
|
||||||
LOOP_FILTER(ssse3)
|
LOOP_FILTER(ssse3)
|
||||||
|
|
||||||
@ -98,7 +98,7 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
ASSIGN_LF(mmx2);
|
ASSIGN_LF(mmxext);
|
||||||
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmx2;
|
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmx2;
|
||||||
} else if (mm_flags & AV_CPU_FLAG_3DNOW) {
|
} else if (mm_flags & AV_CPU_FLAG_3DNOW) {
|
||||||
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow;
|
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user