From 182826c8846f2f7f0538c4b2a09e882e29881804 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Fri, 11 Mar 2011 02:50:57 +0000 Subject: [PATCH] ac3: armv6 optimised bit_alloc_calc_bap Signed-off-by: Mans Rullgard --- libavcodec/arm/Makefile | 2 + libavcodec/arm/ac3dsp_armv6.S | 83 ++++++++++++++++++++++++++++++++ libavcodec/arm/ac3dsp_init_arm.c | 9 ++++ 3 files changed, 94 insertions(+) create mode 100644 libavcodec/arm/ac3dsp_armv6.S diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index 08697da29b..27c2643de8 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -1,6 +1,8 @@ OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_init_arm.o \ +ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o + OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_arm.o diff --git a/libavcodec/arm/ac3dsp_armv6.S b/libavcodec/arm/ac3dsp_armv6.S new file mode 100644 index 0000000000..7f01addbde --- /dev/null +++ b/libavcodec/arm/ac3dsp_armv6.S @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2011 Mans Rullgard + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "asm.S" + +function ff_ac3_bit_alloc_calc_bap_armv6, export=1 + ldr r12, [sp] + cmp r12, #-960 + beq 4f + push {r4-r11,lr} + add r5, sp, #40 + movrel r4, X(ff_ac3_bin_to_band_tab) + movrel lr, X(ff_ac3_band_start_tab) + ldm r5, {r5-r7} + ldrb r4, [r4, r2] + add r1, r1, r2, lsl #1 @ psd + start + add r0, r0, r4, lsl #1 @ mask + band + add r4, lr, r4 + add r7, r7, r2 @ bap + start + ldrb r10, [r4], #1 +1: + ldrsh r9, [r0], #2 @ mask[band] + movw r8, #0x1fe0 + sub r9, r9, r12 @ - snr_offset + mov r11, r10 + ldrb r10, [r4], #1 @ band_start_tab[band++] + subs r9, r9, r5 @ - floor + movlt r9, #0 + cmp r10, r3 @ - end + and r9, r9, r8 @ & 0x1fe0 + subgt r8, r3, r11 + suble r8, r10, r11 + add r9, r9, r5 @ + floor => m + tst r8, #1 + add r2, r7, r8 + bne 3f + b 5f +2: + ldrsh r8, [r1], #2 + ldrsh lr, [r1], #2 + sub r8, r8, r9 + sub lr, lr, r9 + usat r8, #6, r8, asr #5 @ address + usat lr, #6, lr, asr #5 + ldrb r8, [r6, r8] @ bap_tab[address] + ldrb lr, [r6, lr] + strb r8, [r7], #1 @ bap[bin] + strb lr, [r7], #1 +5: cmp r7, r2 + blo 2b + cmp r3, r11 + bgt 1b + pop {r4-r11,pc} +3: + ldrsh r8, [r1], #2 @ psd[bin] + sub r8, r8, r9 @ - m + usat r8, #6, r8, asr #5 @ address + ldrb r8, [r6, r8] @ bap_tab[address] + strb r8, [r7], #1 @ bap[bin] + b 5b +4: + ldr r0, [sp, #12] + mov r1, #0 + mov r2, #256 + b memset +endfunc diff --git a/libavcodec/arm/ac3dsp_init_arm.c b/libavcodec/arm/ac3dsp_init_arm.c index 03200e64cd..c1e96e2d1b 100644 --- a/libavcodec/arm/ac3dsp_init_arm.c +++ b/libavcodec/arm/ac3dsp_init_arm.c @@ -29,8 +29,17 @@ void ff_ac3_lshift_int16_neon(int16_t *src, unsigned len, unsigned shift); void ff_ac3_rshift_int32_neon(int32_t *src, unsigned len, unsigned shift); void ff_float_to_fixed24_neon(int32_t *dst, const float *src, unsigned int len); +void ff_ac3_bit_alloc_calc_bap_armv6(int16_t *mask, int16_t *psd, + int start, int end, + int snr_offset, int floor, + const uint8_t *bap_tab, uint8_t *bap); + av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact) { + if (HAVE_ARMV6) { + c->bit_alloc_calc_bap = ff_ac3_bit_alloc_calc_bap_armv6; + } + if (HAVE_NEON) { c->ac3_exponent_min = ff_ac3_exponent_min_neon; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_neon;