Merge pull request #719 from dongzha/MC

Modify ARM32 Neon code for Expand Chroma Picture, when UVWidth%16==8.
This commit is contained in:
Licai Guo 2014-04-21 14:38:51 +08:00
commit 3f2ea77908
2 changed files with 23 additions and 5 deletions

View File

@ -87,7 +87,7 @@ WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN ExpandPictureChroma_neon
stmdb sp!, {r4-r8}
stmdb sp!, {r4-r9}
//Save the dst
mov r7, r0
mov r8, r3
@ -109,12 +109,14 @@ _expand_picture_chroma_loop2:
//for the top and bottom expand
add r2, #32
mov r9, r2
bic r2, #15
sub r0, #16
mla r4, r1, r3, r0
sub r4, r1
_expand_picture_chroma_loop0:
mov r5, #16
mls r5, r5, r1, r0
mls r5, r5, r1, r0
add r6, r4, r1
vld1.8 {q0}, [r0]!
vld1.8 {q1}, [r4]!
@ -124,14 +126,30 @@ _expand_picture_chroma_loop1:
vst1.8 {q0}, [r5], r1
vst1.8 {q1}, [r6], r1
subs r8, #1
bne _expand_picture_chroma_loop1
bne _expand_picture_chroma_loop1
subs r2, #16
bne _expand_picture_chroma_loop0
//vldreq.32 d0, [r0]
ldmia sp!, {r4-r8}
and r9, #15
cmp r9, #8
bne _expand_picture_chroma_end
mov r5, #16
mls r5, r5, r1, r0
add r6, r4, r1
vld1.8 {d0}, [r0]!
vld1.8 {d2}, [r4]!
mov r8, #16
_expand_picture_chroma_loop3:
vst1.8 {d0}, [r5], r1
vst1.8 {d2}, [r6], r1
subs r8, #1
bne _expand_picture_chroma_loop3
_expand_picture_chroma_end:
ldmia sp!, {r4-r9}
WELS_ASM_FUNC_END
#endif

View File

@ -132,7 +132,7 @@ void InitExpandPictureFunc (void* pL, const uint32_t kuiCPUFlag) {
#if defined(HAVE_NEON)
if (kuiCPUFlag & WELS_CPU_NEON) {
pFuncList->pfExpandLumaPicture = ExpandPictureLuma_neon;
pFuncList->pfExpandChromaPicture[0] = ExpandPictureChroma_c;
pFuncList->pfExpandChromaPicture[0] = ExpandPictureChroma_neon;
pFuncList->pfExpandChromaPicture[1] = ExpandPictureChroma_neon;
}
#endif//HAVE_NEON