Merge commit '4cb6964244fd6c099383d8b7e99731e72cc844b9'
* commit '4cb6964244fd6c099383d8b7e99731e72cc844b9': dcadec: simplify decoding of VQ high frequencies Conflicts: configure libavcodec/dcadec.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
fb3c33f3cd
2
configure
vendored
2
configure
vendored
@ -1540,7 +1540,6 @@ HAVE_LIST="
|
|||||||
altivec_h
|
altivec_h
|
||||||
arpa_inet_h
|
arpa_inet_h
|
||||||
asm_mod_q
|
asm_mod_q
|
||||||
asm_mod_y
|
|
||||||
asm_types_h
|
asm_types_h
|
||||||
atomic_cas_ptr
|
atomic_cas_ptr
|
||||||
atomics_native
|
atomics_native
|
||||||
@ -4147,7 +4146,6 @@ EOF
|
|||||||
$ARCH_EXT_LIST_ARM
|
$ARCH_EXT_LIST_ARM
|
||||||
|
|
||||||
check_inline_asm asm_mod_q '"add r0, %Q0, %R0" :: "r"((long long)0)'
|
check_inline_asm asm_mod_q '"add r0, %Q0, %R0" :: "r"((long long)0)'
|
||||||
check_inline_asm asm_mod_y '"vmul.i32 d0, d0, %y0" :: "x"(0)'
|
|
||||||
|
|
||||||
[ $target_os != win32 ] && enabled_all armv6t2 shared !pic && enable_weak_pic
|
[ $target_os != win32 ] && enabled_all armv6t2 shared !pic && enable_weak_pic
|
||||||
|
|
||||||
|
@ -79,27 +79,4 @@ static inline int decode_blockcodes(int code1, int code2, int levels,
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_NEON_INLINE && HAVE_ASM_MOD_Y
|
|
||||||
|
|
||||||
#define int8x8_fmul_int32 int8x8_fmul_int32
|
|
||||||
static inline void int8x8_fmul_int32(av_unused DCADSPContext *dsp,
|
|
||||||
float *dst, const int8_t *src, int scale)
|
|
||||||
{
|
|
||||||
__asm__ ("vcvt.f32.s32 %2, %2, #4 \n"
|
|
||||||
"vld1.8 {d0}, [%1,:64] \n"
|
|
||||||
"vmovl.s8 q0, d0 \n"
|
|
||||||
"vmovl.s16 q1, d1 \n"
|
|
||||||
"vmovl.s16 q0, d0 \n"
|
|
||||||
"vcvt.f32.s32 q0, q0 \n"
|
|
||||||
"vcvt.f32.s32 q1, q1 \n"
|
|
||||||
"vmul.f32 q0, q0, %y2 \n"
|
|
||||||
"vmul.f32 q1, q1, %y2 \n"
|
|
||||||
"vst1.32 {q0-q1}, [%m0,:128] \n"
|
|
||||||
: "=Um"(*(float (*)[8])dst)
|
|
||||||
: "r"(src), "x"(scale)
|
|
||||||
: "d0", "d1", "d2", "d3");
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* AVCODEC_ARM_DCA_H */
|
#endif /* AVCODEC_ARM_DCA_H */
|
||||||
|
@ -49,14 +49,10 @@
|
|||||||
#if ARCH_ARM
|
#if ARCH_ARM
|
||||||
# include "arm/dca.h"
|
# include "arm/dca.h"
|
||||||
#endif
|
#endif
|
||||||
#if ARCH_X86
|
|
||||||
# include "x86/dca.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//#define TRACE
|
//#define TRACE
|
||||||
|
|
||||||
#define DCA_PRIM_CHANNELS_MAX (7)
|
#define DCA_PRIM_CHANNELS_MAX (7)
|
||||||
#define DCA_SUBBANDS (64)
|
|
||||||
#define DCA_ABITS_MAX (32) /* Should be 28 */
|
#define DCA_ABITS_MAX (32) /* Should be 28 */
|
||||||
#define DCA_SUBSUBFRAMES_MAX (4)
|
#define DCA_SUBSUBFRAMES_MAX (4)
|
||||||
#define DCA_SUBFRAMES_MAX (16)
|
#define DCA_SUBFRAMES_MAX (16)
|
||||||
@ -403,7 +399,7 @@ typedef struct {
|
|||||||
int prediction_vq[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS]; ///< prediction VQ coefs
|
int prediction_vq[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS]; ///< prediction VQ coefs
|
||||||
int bitalloc[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS]; ///< bit allocation index
|
int bitalloc[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS]; ///< bit allocation index
|
||||||
int transition_mode[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS]; ///< transition mode (transients)
|
int transition_mode[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS]; ///< transition mode (transients)
|
||||||
int scale_factor[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][2]; ///< scale factors (2 if transient)
|
int32_t scale_factor[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][2];///< scale factors (2 if transient)
|
||||||
int joint_huff[DCA_PRIM_CHANNELS_MAX]; ///< joint subband scale factors codebook
|
int joint_huff[DCA_PRIM_CHANNELS_MAX]; ///< joint subband scale factors codebook
|
||||||
int joint_scale_factor[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS]; ///< joint subband scale factors
|
int joint_scale_factor[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS]; ///< joint subband scale factors
|
||||||
float downmix_coef[DCA_PRIM_CHANNELS_MAX + 1][2]; ///< stereo downmix coefficients
|
float downmix_coef[DCA_PRIM_CHANNELS_MAX + 1][2]; ///< stereo downmix coefficients
|
||||||
@ -416,7 +412,7 @@ typedef struct {
|
|||||||
uint8_t core_downmix_amode; ///< audio channel arrangement of embedded downmix
|
uint8_t core_downmix_amode; ///< audio channel arrangement of embedded downmix
|
||||||
uint16_t core_downmix_codes[DCA_PRIM_CHANNELS_MAX + 1][4]; ///< embedded downmix coefficients (9-bit codes)
|
uint16_t core_downmix_codes[DCA_PRIM_CHANNELS_MAX + 1][4]; ///< embedded downmix coefficients (9-bit codes)
|
||||||
|
|
||||||
int high_freq_vq[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS]; ///< VQ encoded high frequency subbands
|
int32_t high_freq_vq[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS]; ///< VQ encoded high frequency subbands
|
||||||
|
|
||||||
float lfe_data[2 * DCA_LFE_MAX * (DCA_BLOCKS_MAX + 4)]; ///< Low frequency effect data
|
float lfe_data[2 * DCA_LFE_MAX * (DCA_BLOCKS_MAX + 4)]; ///< Low frequency effect data
|
||||||
int lfe_scale_factor;
|
int lfe_scale_factor;
|
||||||
@ -1249,14 +1245,6 @@ static int decode_blockcodes(int code1, int code2, int levels, int32_t *values)
|
|||||||
static const uint8_t abits_sizes[7] = { 7, 10, 12, 13, 15, 17, 19 };
|
static const uint8_t abits_sizes[7] = { 7, 10, 12, 13, 15, 17, 19 };
|
||||||
static const uint8_t abits_levels[7] = { 3, 5, 7, 9, 13, 17, 25 };
|
static const uint8_t abits_levels[7] = { 3, 5, 7, 9, 13, 17, 25 };
|
||||||
|
|
||||||
#ifndef int8x8_fmul_int32
|
|
||||||
static inline void int8x8_fmul_int32(DCADSPContext *dsp, float *dst,
|
|
||||||
const int8_t *src, int scale)
|
|
||||||
{
|
|
||||||
dsp->int8x8_fmul_int32(dst, src, scale);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
|
static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
|
||||||
{
|
{
|
||||||
int k, l;
|
int k, l;
|
||||||
@ -1381,20 +1369,16 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
|
|||||||
/*
|
/*
|
||||||
* Decode VQ encoded high frequencies
|
* Decode VQ encoded high frequencies
|
||||||
*/
|
*/
|
||||||
for (l = s->vq_start_subband[k]; l < s->subband_activity[k]; l++) {
|
if (s->subband_activity[k] > s->vq_start_subband[k]) {
|
||||||
/* 1 vector -> 32 samples but we only need the 8 samples
|
|
||||||
* for this subsubframe. */
|
|
||||||
int hfvq = s->high_freq_vq[k][l];
|
|
||||||
|
|
||||||
if (!s->debug_flag & 0x01) {
|
if (!s->debug_flag & 0x01) {
|
||||||
av_log(s->avctx, AV_LOG_DEBUG,
|
av_log(s->avctx, AV_LOG_DEBUG,
|
||||||
"Stream with high frequencies VQ coding\n");
|
"Stream with high frequencies VQ coding\n");
|
||||||
s->debug_flag |= 0x01;
|
s->debug_flag |= 0x01;
|
||||||
}
|
}
|
||||||
|
s->dcadsp.decode_hf(subband_samples[k], s->high_freq_vq[k],
|
||||||
int8x8_fmul_int32(&s->dcadsp, subband_samples[k][l],
|
high_freq_vq, subsubframe * 8,
|
||||||
&high_freq_vq[hfvq][subsubframe * 8],
|
s->scale_factor[k], s->vq_start_subband[k],
|
||||||
s->scale_factor[k][l][0]);
|
s->subband_activity[k]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,12 +24,22 @@
|
|||||||
#include "libavutil/intreadwrite.h"
|
#include "libavutil/intreadwrite.h"
|
||||||
#include "dcadsp.h"
|
#include "dcadsp.h"
|
||||||
|
|
||||||
static void int8x8_fmul_int32_c(float *dst, const int8_t *src, int scale)
|
static void decode_hf_c(float dst[DCA_SUBBANDS][8],
|
||||||
|
const int32_t vq_num[DCA_SUBBANDS],
|
||||||
|
const int8_t hf_vq[1024][32], intptr_t vq_offset,
|
||||||
|
int32_t scale[DCA_SUBBANDS][2],
|
||||||
|
intptr_t start, intptr_t end)
|
||||||
{
|
{
|
||||||
float fscale = scale / 16.0;
|
int i, l;
|
||||||
int i;
|
|
||||||
|
for (l = start; l < end; l++) {
|
||||||
|
/* 1 vector -> 32 samples but we only need the 8 samples
|
||||||
|
* for this subsubframe. */
|
||||||
|
const int8_t *ptr = &hf_vq[vq_num[l]][vq_offset];
|
||||||
|
float fscale = scale[l][0] * (1 / 16.0);
|
||||||
for (i = 0; i < 8; i++)
|
for (i = 0; i < 8; i++)
|
||||||
dst[i] = src[i] * fscale;
|
dst[l][i] = ptr[i] * fscale;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
@ -96,7 +106,7 @@ av_cold void ff_dcadsp_init(DCADSPContext *s)
|
|||||||
s->lfe_fir[0] = dca_lfe_fir0_c;
|
s->lfe_fir[0] = dca_lfe_fir0_c;
|
||||||
s->lfe_fir[1] = dca_lfe_fir1_c;
|
s->lfe_fir[1] = dca_lfe_fir1_c;
|
||||||
s->qmf_32_subbands = dca_qmf_32_subbands;
|
s->qmf_32_subbands = dca_qmf_32_subbands;
|
||||||
s->int8x8_fmul_int32 = int8x8_fmul_int32_c;
|
s->decode_hf = decode_hf_c;
|
||||||
if (ARCH_ARM) ff_dcadsp_init_arm(s);
|
if (ARCH_ARM) ff_dcadsp_init_arm(s);
|
||||||
if (ARCH_X86) ff_dcadsp_init_x86(s);
|
if (ARCH_X86) ff_dcadsp_init_x86(s);
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,8 @@
|
|||||||
#include "avfft.h"
|
#include "avfft.h"
|
||||||
#include "synth_filter.h"
|
#include "synth_filter.h"
|
||||||
|
|
||||||
|
#define DCA_SUBBANDS 64
|
||||||
|
|
||||||
typedef struct DCADSPContext {
|
typedef struct DCADSPContext {
|
||||||
void (*lfe_fir[2])(float *out, const float *in, const float *coefs);
|
void (*lfe_fir[2])(float *out, const float *in, const float *coefs);
|
||||||
void (*qmf_32_subbands)(float samples_in[32][8], int sb_act,
|
void (*qmf_32_subbands)(float samples_in[32][8], int sb_act,
|
||||||
@ -30,7 +32,11 @@ typedef struct DCADSPContext {
|
|||||||
int *synth_buf_offset, float synth_buf2[32],
|
int *synth_buf_offset, float synth_buf2[32],
|
||||||
const float window[512], float *samples_out,
|
const float window[512], float *samples_out,
|
||||||
float raXin[32], float scale);
|
float raXin[32], float scale);
|
||||||
void (*int8x8_fmul_int32)(float *dst, const int8_t *src, int scale);
|
void (*decode_hf)(float dst[DCA_SUBBANDS][8],
|
||||||
|
const int32_t vq_num[DCA_SUBBANDS],
|
||||||
|
const int8_t hf_vq[1024][32], intptr_t vq_offset,
|
||||||
|
int32_t scale[DCA_SUBBANDS][2],
|
||||||
|
intptr_t start, intptr_t end);
|
||||||
} DCADSPContext;
|
} DCADSPContext;
|
||||||
|
|
||||||
void ff_dcadsp_init(DCADSPContext *s);
|
void ff_dcadsp_init(DCADSPContext *s);
|
||||||
|
@ -26,18 +26,35 @@ pf_inv16: times 4 dd 0x3D800000 ; 1/16
|
|||||||
|
|
||||||
SECTION_TEXT
|
SECTION_TEXT
|
||||||
|
|
||||||
; void int8x8_fmul_int32_sse2(float *dst, const int8_t *src, int scale)
|
; void decode_hf(float dst[DCA_SUBBANDS][8], const int32_t vq_num[DCA_SUBBANDS],
|
||||||
%macro INT8X8_FMUL_INT32 0
|
; const int8_t hf_vq[1024][32], intptr_t vq_offset,
|
||||||
cglobal int8x8_fmul_int32, 3,3,5, dst, src, scale
|
; int32_t scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end)
|
||||||
cvtsi2ss m0, scalem
|
|
||||||
|
%macro DECODE_HF 0
|
||||||
|
cglobal decode_hf, 6,6,5, dst, num, src, offset, scale, start, end
|
||||||
|
lea srcq, [srcq + offsetq]
|
||||||
|
shl startq, 2
|
||||||
|
mov offsetd, endm
|
||||||
|
%define DICT offsetq
|
||||||
|
shl offsetq, 2
|
||||||
|
mov endm, offsetq
|
||||||
|
.loop:
|
||||||
|
%if ARCH_X86_64
|
||||||
|
mov offsetd, [scaleq + 2 * startq]
|
||||||
|
cvtsi2ss m0, offsetd
|
||||||
|
%else
|
||||||
|
cvtsi2ss m0, [scaleq + 2 * startq]
|
||||||
|
%endif
|
||||||
|
mov offsetd, [numq + startq]
|
||||||
mulss m0, [pf_inv16]
|
mulss m0, [pf_inv16]
|
||||||
|
shl DICT, 5
|
||||||
shufps m0, m0, 0
|
shufps m0, m0, 0
|
||||||
%if cpuflag(sse2)
|
%if cpuflag(sse2)
|
||||||
%if cpuflag(sse4)
|
%if cpuflag(sse4)
|
||||||
pmovsxbd m1, [srcq+0]
|
pmovsxbd m1, [srcq + DICT + 0]
|
||||||
pmovsxbd m2, [srcq+4]
|
pmovsxbd m2, [srcq + DICT + 4]
|
||||||
%else
|
%else
|
||||||
movq m1, [srcq]
|
movq m1, [srcq + DICT]
|
||||||
punpcklbw m1, m1
|
punpcklbw m1, m1
|
||||||
mova m2, m1
|
mova m2, m1
|
||||||
punpcklwd m1, m1
|
punpcklwd m1, m1
|
||||||
@ -48,8 +65,8 @@ cglobal int8x8_fmul_int32, 3,3,5, dst, src, scale
|
|||||||
cvtdq2ps m1, m1
|
cvtdq2ps m1, m1
|
||||||
cvtdq2ps m2, m2
|
cvtdq2ps m2, m2
|
||||||
%else
|
%else
|
||||||
movd mm0, [srcq+0]
|
movd mm0, [srcq + DICT + 0]
|
||||||
movd mm1, [srcq+4]
|
movd mm1, [srcq + DICT + 4]
|
||||||
punpcklbw mm0, mm0
|
punpcklbw mm0, mm0
|
||||||
punpcklbw mm1, mm1
|
punpcklbw mm1, mm1
|
||||||
movq mm2, mm0
|
movq mm2, mm0
|
||||||
@ -67,27 +84,33 @@ cglobal int8x8_fmul_int32, 3,3,5, dst, src, scale
|
|||||||
cvtpi2ps m3, mm2
|
cvtpi2ps m3, mm2
|
||||||
cvtpi2ps m4, mm3
|
cvtpi2ps m4, mm3
|
||||||
shufps m0, m0, 0
|
shufps m0, m0, 0
|
||||||
emms
|
|
||||||
shufps m1, m3, q1010
|
shufps m1, m3, q1010
|
||||||
shufps m2, m4, q1010
|
shufps m2, m4, q1010
|
||||||
%endif
|
%endif
|
||||||
mulps m1, m0
|
mulps m1, m0
|
||||||
mulps m2, m0
|
mulps m2, m0
|
||||||
mova [dstq+ 0], m1
|
mova [dstq + 8 * startq + 0], m1
|
||||||
mova [dstq+16], m2
|
mova [dstq + 8 * startq + 16], m2
|
||||||
|
add startq, 4
|
||||||
|
cmp startq, endm
|
||||||
|
jl .loop
|
||||||
|
.end:
|
||||||
|
%if notcpuflag(sse2)
|
||||||
|
emms
|
||||||
|
%endif
|
||||||
REP_RET
|
REP_RET
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%if ARCH_X86_32
|
%if ARCH_X86_32
|
||||||
INIT_XMM sse
|
INIT_XMM sse
|
||||||
INT8X8_FMUL_INT32
|
DECODE_HF
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
INIT_XMM sse2
|
INIT_XMM sse2
|
||||||
INT8X8_FMUL_INT32
|
DECODE_HF
|
||||||
|
|
||||||
INIT_XMM sse4
|
INIT_XMM sse4
|
||||||
INT8X8_FMUL_INT32
|
DECODE_HF
|
||||||
|
|
||||||
; %1=v0/v1 %2=in1 %3=in2
|
; %1=v0/v1 %2=in1 %3=in2
|
||||||
%macro FIR_LOOP 2-3
|
%macro FIR_LOOP 2-3
|
||||||
|
@ -23,9 +23,15 @@
|
|||||||
#include "libavutil/x86/cpu.h"
|
#include "libavutil/x86/cpu.h"
|
||||||
#include "libavcodec/dcadsp.h"
|
#include "libavcodec/dcadsp.h"
|
||||||
|
|
||||||
void ff_int8x8_fmul_int32_sse(float *dst, const int8_t *src, int scale);
|
void ff_decode_hf_sse(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
|
||||||
void ff_int8x8_fmul_int32_sse2(float *dst, const int8_t *src, int scale);
|
const int8_t hf_vq[1024][32], intptr_t vq_offset,
|
||||||
void ff_int8x8_fmul_int32_sse4(float *dst, const int8_t *src, int scale);
|
int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
|
||||||
|
void ff_decode_hf_sse2(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
|
||||||
|
const int8_t hf_vq[1024][32], intptr_t vq_offset,
|
||||||
|
int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
|
||||||
|
void ff_decode_hf_sse4(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
|
||||||
|
const int8_t hf_vq[1024][32], intptr_t vq_offset,
|
||||||
|
int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
|
||||||
void ff_dca_lfe_fir0_sse(float *out, const float *in, const float *coefs);
|
void ff_dca_lfe_fir0_sse(float *out, const float *in, const float *coefs);
|
||||||
void ff_dca_lfe_fir1_sse(float *out, const float *in, const float *coefs);
|
void ff_dca_lfe_fir1_sse(float *out, const float *in, const float *coefs);
|
||||||
|
|
||||||
@ -35,18 +41,18 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
|
|||||||
|
|
||||||
if (EXTERNAL_SSE(cpu_flags)) {
|
if (EXTERNAL_SSE(cpu_flags)) {
|
||||||
#if ARCH_X86_32
|
#if ARCH_X86_32
|
||||||
s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse;
|
s->decode_hf = ff_decode_hf_sse;
|
||||||
#endif
|
#endif
|
||||||
s->lfe_fir[0] = ff_dca_lfe_fir0_sse;
|
s->lfe_fir[0] = ff_dca_lfe_fir0_sse;
|
||||||
s->lfe_fir[1] = ff_dca_lfe_fir1_sse;
|
s->lfe_fir[1] = ff_dca_lfe_fir1_sse;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse2;
|
s->decode_hf = ff_decode_hf_sse2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_SSE4(cpu_flags)) {
|
if (EXTERNAL_SSE4(cpu_flags)) {
|
||||||
s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse4;
|
s->decode_hf = ff_decode_hf_sse4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user