ARMv6 optimised pix_sum
Originally committed as revision 21705 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
@@ -594,3 +594,28 @@ function ff_pix_norm1_armv6, export=1
|
|||||||
mov r0, lr
|
mov r0, lr
|
||||||
pop {r4-r6, pc}
|
pop {r4-r6, pc}
|
||||||
.endfunc
|
.endfunc
|
||||||
|
|
||||||
|
function ff_pix_sum_armv6, export=1
|
||||||
|
push {r4-r7, lr}
|
||||||
|
mov r12, #16
|
||||||
|
mov r2, #0
|
||||||
|
mov r3, #0
|
||||||
|
mov lr, #0
|
||||||
|
ldr r4, [r0]
|
||||||
|
1:
|
||||||
|
subs r12, r12, #1
|
||||||
|
ldr r5, [r0, #4]
|
||||||
|
usada8 r2, r4, lr, r2
|
||||||
|
ldr r6, [r0, #8]
|
||||||
|
usada8 r3, r5, lr, r3
|
||||||
|
ldr r7, [r0, #12]
|
||||||
|
usada8 r2, r6, lr, r2
|
||||||
|
beq 2f
|
||||||
|
ldr r4, [r0, r1]!
|
||||||
|
usada8 r3, r7, lr, r3
|
||||||
|
bgt 1b
|
||||||
|
2:
|
||||||
|
usada8 r3, r7, lr, r3
|
||||||
|
add r0, r2, r3
|
||||||
|
pop {r4-r7, pc}
|
||||||
|
.endfunc
|
||||||
|
@@ -68,6 +68,7 @@ int ff_sse16_armv6(void *s, uint8_t *blk1, uint8_t *blk2,
|
|||||||
int line_size, int h);
|
int line_size, int h);
|
||||||
|
|
||||||
int ff_pix_norm1_armv6(uint8_t *pix, int line_size);
|
int ff_pix_norm1_armv6(uint8_t *pix, int line_size);
|
||||||
|
int ff_pix_sum_armv6(uint8_t *pix, int line_size);
|
||||||
|
|
||||||
void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
|
void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
@@ -116,4 +117,5 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->sse[0] = ff_sse16_armv6;
|
c->sse[0] = ff_sse16_armv6;
|
||||||
|
|
||||||
c->pix_norm1 = ff_pix_norm1_armv6;
|
c->pix_norm1 = ff_pix_norm1_armv6;
|
||||||
|
c->pix_sum = ff_pix_sum_armv6;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user