ARM: NEON 2xN chroma MC
Originally committed as revision 20696 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
04e7f6d2d0
commit
1025d19dd7
@ -125,9 +125,11 @@ void ff_avg_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, int);
|
||||
|
||||
void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
|
||||
void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
|
||||
void ff_put_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
|
||||
|
||||
void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
|
||||
void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
|
||||
void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
|
||||
|
||||
void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
|
||||
int beta, int8_t *tc0);
|
||||
@ -272,9 +274,11 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
|
||||
if (CONFIG_H264_DECODER) {
|
||||
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
|
||||
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
|
||||
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
|
||||
|
||||
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
|
||||
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
|
||||
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon;
|
||||
|
||||
c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon;
|
||||
c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon;
|
||||
|
@ -320,6 +320,74 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1
|
||||
.endfunc
|
||||
.endm
|
||||
|
||||
.macro h264_chroma_mc2 type
|
||||
function ff_\type\()_h264_chroma_mc2_neon, export=1
|
||||
push {r4-r6, lr}
|
||||
ldr r4, [sp, #16]
|
||||
ldr lr, [sp, #20]
|
||||
pld [r1]
|
||||
pld [r1, r2]
|
||||
orrs r5, r4, lr
|
||||
beq 2f
|
||||
|
||||
mul r5, r4, lr
|
||||
rsb r6, r5, lr, lsl #3
|
||||
rsb r12, r5, r4, lsl #3
|
||||
sub r4, r5, r4, lsl #3
|
||||
sub r4, r4, lr, lsl #3
|
||||
add r4, r4, #64
|
||||
vdup.8 d0, r4
|
||||
vdup.8 d2, r12
|
||||
vdup.8 d1, r6
|
||||
vdup.8 d3, r5
|
||||
vtrn.16 q0, q1
|
||||
1:
|
||||
vld1.32 {d4[0]}, [r1], r2
|
||||
vld1.32 {d4[1]}, [r1], r2
|
||||
vrev64.32 d5, d4
|
||||
vld1.32 {d5[1]}, [r1]
|
||||
vext.8 q3, q2, q2, #1
|
||||
vtrn.16 q2, q3
|
||||
vmull.u8 q8, d4, d0
|
||||
vmlal.u8 q8, d5, d1
|
||||
.ifc \type,avg
|
||||
vld1.16 {d18[0]}, [r0,:16], r2
|
||||
vld1.16 {d18[1]}, [r0,:16]
|
||||
sub r0, r0, r2
|
||||
.endif
|
||||
vtrn.32 d16, d17
|
||||
vadd.i16 d16, d16, d17
|
||||
vrshrn.u16 d16, q8, #6
|
||||
.ifc \type,avg
|
||||
vrhadd.u8 d16, d16, d18
|
||||
.endif
|
||||
vst1.16 {d16[0]}, [r0,:16], r2
|
||||
vst1.16 {d16[1]}, [r0,:16], r2
|
||||
subs r3, r3, #2
|
||||
bgt 1b
|
||||
pop {r4-r6, pc}
|
||||
2:
|
||||
.ifc \type,put
|
||||
ldrh r5, [r1], r2
|
||||
strh r5, [r0], r2
|
||||
ldrh r6, [r1], r2
|
||||
strh r6, [r0], r2
|
||||
.else
|
||||
vld1.16 {d16[0]}, [r1], r2
|
||||
vld1.16 {d16[1]}, [r1], r2
|
||||
vld1.16 {d18[0]}, [r0,:16], r2
|
||||
vld1.16 {d18[1]}, [r0,:16]
|
||||
sub r0, r0, r2
|
||||
vrhadd.u8 d16, d16, d18
|
||||
vst1.16 {d16[0]}, [r0,:16], r2
|
||||
vst1.16 {d16[1]}, [r0,:16], r2
|
||||
.endif
|
||||
subs r3, r3, #2
|
||||
bgt 2b
|
||||
pop {r4-r6, pc}
|
||||
.endfunc
|
||||
.endm
|
||||
|
||||
.text
|
||||
.align
|
||||
|
||||
@ -327,6 +395,8 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1
|
||||
h264_chroma_mc8 avg
|
||||
h264_chroma_mc4 put
|
||||
h264_chroma_mc4 avg
|
||||
h264_chroma_mc2 put
|
||||
h264_chroma_mc2 avg
|
||||
|
||||
/* H.264 loop filter */
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user