SBR DSP x86: implement SSE sbr_sum_square_sse
The 32bits targets have been compiled with -mfpmath=sse for proper reference. sbr_sum_square C /32bits: 82c (unrolled)/102c C /64bits: 69c (unrolled)/82c SSE/32bits: 42c SSE/64bits: 31c Use of SSE4.1 dpps to perform the final sum is slower. Not unrolling to perform 8 operations in a loop yields 10 more cycles. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
This commit is contained in:

committed by
Ronald S. Bultje

parent
2e74a5abc2
commit
34454c761f
@@ -238,4 +238,6 @@ av_cold void ff_sbrdsp_init(SBRDSPContext *s)
|
|||||||
|
|
||||||
if (ARCH_ARM)
|
if (ARCH_ARM)
|
||||||
ff_sbrdsp_init_arm(s);
|
ff_sbrdsp_init_arm(s);
|
||||||
|
if (HAVE_MMX)
|
||||||
|
ff_sbrdsp_init_x86(s);
|
||||||
}
|
}
|
||||||
|
@@ -46,5 +46,6 @@ extern const float ff_sbr_noise_table[][2];
|
|||||||
|
|
||||||
void ff_sbrdsp_init(SBRDSPContext *s);
|
void ff_sbrdsp_init(SBRDSPContext *s);
|
||||||
void ff_sbrdsp_init_arm(SBRDSPContext *s);
|
void ff_sbrdsp_init_arm(SBRDSPContext *s);
|
||||||
|
void ff_sbrdsp_init_x86(SBRDSPContext *s);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -47,6 +47,8 @@ YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o
|
|||||||
MMX-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp-init.o
|
MMX-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp-init.o
|
||||||
YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
|
YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
|
||||||
MMX-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp-init.o
|
MMX-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp-init.o
|
||||||
|
MMX-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp_init.o
|
||||||
|
YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp.o
|
||||||
MMX-OBJS-$(CONFIG_DWT) += x86/snowdsp_mmx.o
|
MMX-OBJS-$(CONFIG_DWT) += x86/snowdsp_mmx.o
|
||||||
MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o
|
MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o
|
||||||
YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/vp3dsp.o
|
YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/vp3dsp.o
|
||||||
|
74
libavcodec/x86/sbrdsp.asm
Normal file
74
libavcodec/x86/sbrdsp.asm
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
;******************************************************************************
|
||||||
|
;* AAC Spectral Band Replication decoding functions
|
||||||
|
;* Copyright (C) 2012 Christophe Gisquet <christophe.gisquet@gmail.com>
|
||||||
|
;*
|
||||||
|
;* This file is part of Libav.
|
||||||
|
;*
|
||||||
|
;* Libav is free software; you can redistribute it and/or
|
||||||
|
;* modify it under the terms of the GNU Lesser General Public
|
||||||
|
;* License as published by the Free Software Foundation; either
|
||||||
|
;* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
;*
|
||||||
|
;* Libav is distributed in the hope that it will be useful,
|
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;* Lesser General Public License for more details.
|
||||||
|
;*
|
||||||
|
;* You should have received a copy of the GNU Lesser General Public
|
||||||
|
;* License along with Libav; if not, write to the Free Software
|
||||||
|
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;******************************************************************************
|
||||||
|
|
||||||
|
%include "x86inc.asm"
|
||||||
|
%include "x86util.asm"
|
||||||
|
|
||||||
|
;SECTION_RODATA
|
||||||
|
SECTION .text
|
||||||
|
|
||||||
|
INIT_XMM sse
|
||||||
|
cglobal sbr_sum_square, 2, 3, 6
|
||||||
|
mov r2, r1
|
||||||
|
xorps m0, m0
|
||||||
|
xorps m1, m1
|
||||||
|
sar r2, 3
|
||||||
|
jz .prepare
|
||||||
|
.loop:
|
||||||
|
movu m2, [r0 + 0]
|
||||||
|
movu m3, [r0 + 16]
|
||||||
|
movu m4, [r0 + 32]
|
||||||
|
movu m5, [r0 + 48]
|
||||||
|
mulps m2, m2
|
||||||
|
mulps m3, m3
|
||||||
|
mulps m4, m4
|
||||||
|
mulps m5, m5
|
||||||
|
addps m0, m2
|
||||||
|
addps m1, m3
|
||||||
|
addps m0, m4
|
||||||
|
addps m1, m5
|
||||||
|
add r0, 64
|
||||||
|
dec r2
|
||||||
|
jnz .loop
|
||||||
|
.prepare:
|
||||||
|
and r1, 7
|
||||||
|
sar r1, 1
|
||||||
|
jz .end
|
||||||
|
; len is a multiple of 2, thus there are at least 4 elements to process
|
||||||
|
.endloop:
|
||||||
|
movu m2, [r0]
|
||||||
|
add r0, 16
|
||||||
|
mulps m2, m2
|
||||||
|
dec r1
|
||||||
|
addps m0, m2
|
||||||
|
jnz .endloop
|
||||||
|
.end:
|
||||||
|
addps m0, m1
|
||||||
|
movhlps m2, m0
|
||||||
|
addps m0, m2
|
||||||
|
movss m1, m0
|
||||||
|
shufps m0, m0, 1
|
||||||
|
addss m0, m1
|
||||||
|
%if ARCH_X86_64 == 0
|
||||||
|
movd r0m, m0
|
||||||
|
fld dword r0m
|
||||||
|
%endif
|
||||||
|
RET
|
37
libavcodec/x86/sbrdsp_init.c
Normal file
37
libavcodec/x86/sbrdsp_init.c
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
/*
|
||||||
|
* AAC Spectral Band Replication decoding functions
|
||||||
|
* Copyright (c) 2012 Christophe Gisquet <christophe.gisquet@gmail.com>
|
||||||
|
*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#include "libavutil/cpu.h"
|
||||||
|
#include "libavcodec/sbrdsp.h"
|
||||||
|
|
||||||
|
float ff_sbr_sum_square_sse(float (*x)[2], int n);
|
||||||
|
|
||||||
|
void ff_sbrdsp_init_x86(SBRDSPContext *s)
|
||||||
|
{
|
||||||
|
if (HAVE_YASM) {
|
||||||
|
int mm_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (mm_flags & AV_CPU_FLAG_SSE) {
|
||||||
|
s->sum_square = ff_sbr_sum_square_sse;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Reference in New Issue
Block a user