ffmpeg/libavcodec/ppc/fmtconvert_altivec.c

/*
 * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/mem.h"
#include "libavutil/ppc/util_altivec.h"
#include "libavcodec/fmtconvert.h"

#if HAVE_ALTIVEC

static void int32_to_float_fmul_scalar_altivec(float *dst, const int32_t *src,
                                               float mul, int len)
{
    union {
        vector float v;
        float s[4];
    } mul_u;
    int i;
    vector float src1, src2, dst1, dst2, mul_v, zero;

    zero = (vector float)vec_splat_u32(0);
    mul_u.s[0] = mul;
    mul_v = vec_splat(mul_u.v, 0);

    for (i = 0; i < len; i += 8) {
        src1 = vec_ctf(vec_ld(0,  src+i), 0);
        src2 = vec_ctf(vec_ld(16, src+i), 0);
        dst1 = vec_madd(src1, mul_v, zero);
        dst2 = vec_madd(src2, mul_v, zero);
        vec_st(dst1,  0, dst+i);
        vec_st(dst2, 16, dst+i);
    }
}


static vector signed short float_to_int16_one_altivec(const float *src)
{
    vector float s0 = vec_ld(0, src);
    vector float s1 = vec_ld(16, src);
    vector signed int t0 = vec_cts(s0, 0);
    vector signed int t1 = vec_cts(s1, 0);
    return vec_packs(t0,t1);
}

static void float_to_int16_altivec(int16_t *dst, const float *src, long len)
{
    int i;
    vector signed short d0, d1, d;
    vector unsigned char align;
    if (((long)dst) & 15) { //FIXME
        for (i = 0; i < len - 7; i += 8) {
            d0 = vec_ld(0, dst+i);
            d  = float_to_int16_one_altivec(src + i);
            d1 = vec_ld(15, dst+i);
            d1 = vec_perm(d1, d0, vec_lvsl(0, dst + i));
            align = vec_lvsr(0, dst + i);
            d0 = vec_perm(d1, d, align);
            d1 = vec_perm(d, d1, align);
            vec_st(d0,  0, dst + i);
            vec_st(d1, 15, dst + i);
        }
    } else {
        for (i = 0; i < len - 7; i += 8) {
            d = float_to_int16_one_altivec(src + i);
            vec_st(d, 0, dst + i);
        }
    }
}

#define VSTE_INC(dst, v, elem, inc) do {                \
        vector signed short s = vec_splat(v, elem);     \
        vec_ste(s, 0, dst);                             \
        dst += inc;                                     \
    } while (0)

static void float_to_int16_stride_altivec(int16_t *dst, const float *src,
                                          long len, int stride)
{
    int i;
    vector signed short d;

    for (i = 0; i < len - 7; i += 8) {
        d = float_to_int16_one_altivec(src + i);
        VSTE_INC(dst, d, 0, stride);
        VSTE_INC(dst, d, 1, stride);
        VSTE_INC(dst, d, 2, stride);
        VSTE_INC(dst, d, 3, stride);
        VSTE_INC(dst, d, 4, stride);
        VSTE_INC(dst, d, 5, stride);
        VSTE_INC(dst, d, 6, stride);
        VSTE_INC(dst, d, 7, stride);
    }
}

static void float_to_int16_interleave_altivec(int16_t *dst, const float **src,
                                              long len, int channels)
{
    int i;
    vector signed short d0, d1, d2, c0, c1, t0, t1;
    vector unsigned char align;

    if (channels == 1)
        float_to_int16_altivec(dst, src[0], len);
    else {
        if (channels == 2) {
            if (((long)dst) & 15) {
                for (i = 0; i < len - 7; i += 8) {
                    d0 = vec_ld(0,  dst + i);
                    t0 = float_to_int16_one_altivec(src[0] + i);
                    d1 = vec_ld(31, dst + i);
                    t1 = float_to_int16_one_altivec(src[1] + i);
                    c0 = vec_mergeh(t0, t1);
                    c1 = vec_mergel(t0, t1);
                    d2 = vec_perm(d1, d0, vec_lvsl(0, dst + i));
                    align = vec_lvsr(0, dst + i);
                    d0 = vec_perm(d2, c0, align);
                    d1 = vec_perm(c0, c1, align);
                    vec_st(d0,  0, dst + i);
                    d0 = vec_perm(c1, d2, align);
                    vec_st(d1, 15, dst + i);
                    vec_st(d0, 31, dst + i);
                    dst += 8;
                }
            } else {
                for (i = 0; i < len - 7; i += 8) {
                    t0 = float_to_int16_one_altivec(src[0] + i);
                    t1 = float_to_int16_one_altivec(src[1] + i);
                    d0 = vec_mergeh(t0, t1);
                    d1 = vec_mergel(t0, t1);
                    vec_st(d0,  0, dst + i);
                    vec_st(d1, 16, dst + i);
                    dst += 8;
                }
            }
        } else {
            for (i = 0; i < channels; i++)
                float_to_int16_stride_altivec(dst + i, src[i], len, channels);
        }
    }
}

#endif /* HAVE_ALTIVEC */

av_cold void ff_fmt_convert_init_ppc(FmtConvertContext *c,
                                     AVCodecContext *avctx)
{
#if HAVE_ALTIVEC
    if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
        return;

    c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_altivec;
    if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
        c->float_to_int16 = float_to_int16_altivec;
        c->float_to_int16_interleave = float_to_int16_interleave_altivec;
    }
#endif /* HAVE_ALTIVEC */
}
Separate format conversion DSP functions from DSPContext. This will be beneficial for use with the audio conversion API without requiring it to depend on all of dsputil. Signed-off-by: Mans Rullgard <mans@mansr.com> (cherry picked from commit c73d99e672329c8f2df290736ffc474c360ac4ae) 2011-01-30 16:06:46 +01:00			`/*`
			`* Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>`
			`*`
			`* This file is part of FFmpeg.`
			`*`
			`* FFmpeg is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2.1 of the License, or (at your option) any later version.`
			`*`
			`* FFmpeg is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
			`* License along with FFmpeg; if not, write to the Free Software`
			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`*/`

ppc: More consistent arch initialization 2013-04-14 14:47:30 +02:00			`#include "config.h"`
Add av_cold attributes to arch-specific init functions 2013-02-01 10:31:59 +01:00			`#include "libavutil/attributes.h"`
ppc: Add missing AltiVec cpuflag detection invocations 2013-08-23 18:48:17 +02:00			`#include "libavutil/cpu.h"`
Don't include common.h from avutil.h Signed-off-by: Martin Storsjö <martin@martin.st> 2012-08-06 15:49:32 +02:00			`#include "libavutil/mem.h"`
ppc: More consistent arch initialization 2013-04-14 14:47:30 +02:00			`#include "libavutil/ppc/util_altivec.h"`
			`#include "libavcodec/fmtconvert.h"`
Separate format conversion DSP functions from DSPContext. This will be beneficial for use with the audio conversion API without requiring it to depend on all of dsputil. Signed-off-by: Mans Rullgard <mans@mansr.com> (cherry picked from commit c73d99e672329c8f2df290736ffc474c360ac4ae) 2011-01-30 16:06:46 +01:00
ppc: More consistent arch initialization 2013-04-14 14:47:30 +02:00			`#if HAVE_ALTIVEC`

fmtconvert: int32_t input to int32_to_float_fmul_scalar It was previously declared as int. Does not change fate results for x86. Conflicts: libavcodec/ppc/fmtconvert_altivec.c Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 2012-12-27 22:33:51 +01:00			`static void int32_to_float_fmul_scalar_altivec(float dst, const int32_t src,`
dsputil: ppc: cosmetics: pretty-print 2012-07-22 20:37:24 +02:00			`float mul, int len)`
Separate format conversion DSP functions from DSPContext. This will be beneficial for use with the audio conversion API without requiring it to depend on all of dsputil. Signed-off-by: Mans Rullgard <mans@mansr.com> (cherry picked from commit c73d99e672329c8f2df290736ffc474c360ac4ae) 2011-01-30 16:06:46 +01:00			`{`
			`union {`
			`vector float v;`
			`float s[4];`
			`} mul_u;`
			`int i;`
			`vector float src1, src2, dst1, dst2, mul_v, zero;`

			`zero = (vector float)vec_splat_u32(0);`
			`mul_u.s[0] = mul;`
			`mul_v = vec_splat(mul_u.v, 0);`

dsputil: ppc: cosmetics: pretty-print 2012-07-22 20:37:24 +02:00			`for (i = 0; i < len; i += 8) {`
Separate format conversion DSP functions from DSPContext. This will be beneficial for use with the audio conversion API without requiring it to depend on all of dsputil. Signed-off-by: Mans Rullgard <mans@mansr.com> (cherry picked from commit c73d99e672329c8f2df290736ffc474c360ac4ae) 2011-01-30 16:06:46 +01:00			`src1 = vec_ctf(vec_ld(0, src+i), 0);`
			`src2 = vec_ctf(vec_ld(16, src+i), 0);`
			`dst1 = vec_madd(src1, mul_v, zero);`
			`dst2 = vec_madd(src2, mul_v, zero);`
			`vec_st(dst1, 0, dst+i);`
			`vec_st(dst2, 16, dst+i);`
			`}`
			`}`


dsputil: ppc: cosmetics: pretty-print 2012-07-22 20:37:24 +02:00			`static vector signed short float_to_int16_one_altivec(const float *src)`
Separate format conversion DSP functions from DSPContext. This will be beneficial for use with the audio conversion API without requiring it to depend on all of dsputil. Signed-off-by: Mans Rullgard <mans@mansr.com> (cherry picked from commit c73d99e672329c8f2df290736ffc474c360ac4ae) 2011-01-30 16:06:46 +01:00			`{`
			`vector float s0 = vec_ld(0, src);`
			`vector float s1 = vec_ld(16, src);`
			`vector signed int t0 = vec_cts(s0, 0);`
			`vector signed int t1 = vec_cts(s1, 0);`
			`return vec_packs(t0,t1);`
			`}`

			`static void float_to_int16_altivec(int16_t dst, const float src, long len)`
			`{`
			`int i;`
			`vector signed short d0, d1, d;`
			`vector unsigned char align;`
dsputil: ppc: cosmetics: pretty-print 2012-07-22 20:37:24 +02:00			`if (((long)dst) & 15) { //FIXME`
			`for (i = 0; i < len - 7; i += 8) {`
			`d0 = vec_ld(0, dst+i);`
			`d = float_to_int16_one_altivec(src + i);`
			`d1 = vec_ld(15, dst+i);`
			`d1 = vec_perm(d1, d0, vec_lvsl(0, dst + i));`
			`align = vec_lvsr(0, dst + i);`
			`d0 = vec_perm(d1, d, align);`
			`d1 = vec_perm(d, d1, align);`
			`vec_st(d0, 0, dst + i);`
			`vec_st(d1, 15, dst + i);`
			`}`
			`} else {`
			`for (i = 0; i < len - 7; i += 8) {`
			`d = float_to_int16_one_altivec(src + i);`
			`vec_st(d, 0, dst + i);`
			`}`
Separate format conversion DSP functions from DSPContext. This will be beneficial for use with the audio conversion API without requiring it to depend on all of dsputil. Signed-off-by: Mans Rullgard <mans@mansr.com> (cherry picked from commit c73d99e672329c8f2df290736ffc474c360ac4ae) 2011-01-30 16:06:46 +01:00			`}`
			`}`

ppc: fix Altivec build with old compilers The vec_splat() intrinsic requires a constant argument for the element number, and the code relies on the compiler unrolling the loop to provide this. Manually unrolling the loop avoids this reliance and works with all compilers. Signed-off-by: Mans Rullgard <mans@mansr.com> 2012-10-09 00:01:02 +02:00			`#define VSTE_INC(dst, v, elem, inc) do { \`
			`vector signed short s = vec_splat(v, elem); \`
			`vec_ste(s, 0, dst); \`
			`dst += inc; \`
			`} while (0)`

ppc: fmtconvert: kill VLA in float_to_int16_interleave_altivec() Signed-off-by: Mans Rullgard <mans@mansr.com> 2012-10-04 02:26:50 +02:00			`static void float_to_int16_stride_altivec(int16_t dst, const float src,`
			`long len, int stride)`
			`{`
Fix build failure on osx 10.5.8 ppc Second parameter to vec_splat must be a literal, not a variable value. Therefore the second nested for-loop in float_to_int16_stride_altivec had to be unrolled. Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 2012-10-08 05:13:28 +02:00			`int i;`
ppc: fmtconvert: Drop two unused variables. 2013-02-01 01:54:19 +01:00			`vector signed short d;`
ppc: fmtconvert: kill VLA in float_to_int16_interleave_altivec() Signed-off-by: Mans Rullgard <mans@mansr.com> 2012-10-04 02:26:50 +02:00
			`for (i = 0; i < len - 7; i += 8) {`
			`d = float_to_int16_one_altivec(src + i);`
ppc: fix Altivec build with old compilers The vec_splat() intrinsic requires a constant argument for the element number, and the code relies on the compiler unrolling the loop to provide this. Manually unrolling the loop avoids this reliance and works with all compilers. Signed-off-by: Mans Rullgard <mans@mansr.com> 2012-10-09 00:01:02 +02:00			`VSTE_INC(dst, d, 0, stride);`
			`VSTE_INC(dst, d, 1, stride);`
			`VSTE_INC(dst, d, 2, stride);`
			`VSTE_INC(dst, d, 3, stride);`
			`VSTE_INC(dst, d, 4, stride);`
			`VSTE_INC(dst, d, 5, stride);`
			`VSTE_INC(dst, d, 6, stride);`
			`VSTE_INC(dst, d, 7, stride);`
ppc: fmtconvert: kill VLA in float_to_int16_interleave_altivec() Signed-off-by: Mans Rullgard <mans@mansr.com> 2012-10-04 02:26:50 +02:00			`}`
			`}`

dsputil: ppc: cosmetics: pretty-print 2012-07-22 20:37:24 +02:00			`static void float_to_int16_interleave_altivec(int16_t dst, const float *src,`
			`long len, int channels)`
Separate format conversion DSP functions from DSPContext. This will be beneficial for use with the audio conversion API without requiring it to depend on all of dsputil. Signed-off-by: Mans Rullgard <mans@mansr.com> (cherry picked from commit c73d99e672329c8f2df290736ffc474c360ac4ae) 2011-01-30 16:06:46 +01:00			`{`
			`int i;`
			`vector signed short d0, d1, d2, c0, c1, t0, t1;`
			`vector unsigned char align;`
dsputil: ppc: cosmetics: pretty-print 2012-07-22 20:37:24 +02:00
			`if (channels == 1)`
Separate format conversion DSP functions from DSPContext. This will be beneficial for use with the audio conversion API without requiring it to depend on all of dsputil. Signed-off-by: Mans Rullgard <mans@mansr.com> (cherry picked from commit c73d99e672329c8f2df290736ffc474c360ac4ae) 2011-01-30 16:06:46 +01:00			`float_to_int16_altivec(dst, src[0], len);`
dsputil: ppc: cosmetics: pretty-print 2012-07-22 20:37:24 +02:00			`else {`
Separate format conversion DSP functions from DSPContext. This will be beneficial for use with the audio conversion API without requiring it to depend on all of dsputil. Signed-off-by: Mans Rullgard <mans@mansr.com> (cherry picked from commit c73d99e672329c8f2df290736ffc474c360ac4ae) 2011-01-30 16:06:46 +01:00			`if (channels == 2) {`
dsputil: ppc: cosmetics: pretty-print 2012-07-22 20:37:24 +02:00			`if (((long)dst) & 15) {`
			`for (i = 0; i < len - 7; i += 8) {`
			`d0 = vec_ld(0, dst + i);`
			`t0 = float_to_int16_one_altivec(src[0] + i);`
			`d1 = vec_ld(31, dst + i);`
			`t1 = float_to_int16_one_altivec(src[1] + i);`
			`c0 = vec_mergeh(t0, t1);`
			`c1 = vec_mergel(t0, t1);`
			`d2 = vec_perm(d1, d0, vec_lvsl(0, dst + i));`
			`align = vec_lvsr(0, dst + i);`
			`d0 = vec_perm(d2, c0, align);`
			`d1 = vec_perm(c0, c1, align);`
			`vec_st(d0, 0, dst + i);`
			`d0 = vec_perm(c1, d2, align);`
			`vec_st(d1, 15, dst + i);`
			`vec_st(d0, 31, dst + i);`
			`dst += 8;`
			`}`
			`} else {`
			`for (i = 0; i < len - 7; i += 8) {`
			`t0 = float_to_int16_one_altivec(src[0] + i);`
			`t1 = float_to_int16_one_altivec(src[1] + i);`
			`d0 = vec_mergeh(t0, t1);`
			`d1 = vec_mergel(t0, t1);`
			`vec_st(d0, 0, dst + i);`
			`vec_st(d1, 16, dst + i);`
			`dst += 8;`
			`}`
			`}`
			`} else {`
ppc: fmtconvert: kill VLA in float_to_int16_interleave_altivec() Signed-off-by: Mans Rullgard <mans@mansr.com> 2012-10-04 02:26:50 +02:00			`for (i = 0; i < channels; i++)`
			`float_to_int16_stride_altivec(dst + i, src[i], len, channels);`
Separate format conversion DSP functions from DSPContext. This will be beneficial for use with the audio conversion API without requiring it to depend on all of dsputil. Signed-off-by: Mans Rullgard <mans@mansr.com> (cherry picked from commit c73d99e672329c8f2df290736ffc474c360ac4ae) 2011-01-30 16:06:46 +01:00			`}`
dsputil: ppc: cosmetics: pretty-print 2012-07-22 20:37:24 +02:00			`}`
Separate format conversion DSP functions from DSPContext. This will be beneficial for use with the audio conversion API without requiring it to depend on all of dsputil. Signed-off-by: Mans Rullgard <mans@mansr.com> (cherry picked from commit c73d99e672329c8f2df290736ffc474c360ac4ae) 2011-01-30 16:06:46 +01:00			`}`

ppc: More consistent arch initialization 2013-04-14 14:47:30 +02:00			`#endif /* HAVE_ALTIVEC */`

			`av_cold void ff_fmt_convert_init_ppc(FmtConvertContext *c,`
			`AVCodecContext *avctx)`
Separate format conversion DSP functions from DSPContext. This will be beneficial for use with the audio conversion API without requiring it to depend on all of dsputil. Signed-off-by: Mans Rullgard <mans@mansr.com> (cherry picked from commit c73d99e672329c8f2df290736ffc474c360ac4ae) 2011-01-30 16:06:46 +01:00			`{`
ppc: More consistent arch initialization 2013-04-14 14:47:30 +02:00			`#if HAVE_ALTIVEC`
ppc: Add missing AltiVec cpuflag detection invocations 2013-08-23 18:48:17 +02:00			`if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))`
			`return;`

Separate format conversion DSP functions from DSPContext. This will be beneficial for use with the audio conversion API without requiring it to depend on all of dsputil. Signed-off-by: Mans Rullgard <mans@mansr.com> (cherry picked from commit c73d99e672329c8f2df290736ffc474c360ac4ae) 2011-01-30 16:06:46 +01:00			`c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_altivec;`
dsputil: ppc: cosmetics: pretty-print 2012-07-22 20:37:24 +02:00			`if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {`
Separate format conversion DSP functions from DSPContext. This will be beneficial for use with the audio conversion API without requiring it to depend on all of dsputil. Signed-off-by: Mans Rullgard <mans@mansr.com> (cherry picked from commit c73d99e672329c8f2df290736ffc474c360ac4ae) 2011-01-30 16:06:46 +01:00			`c->float_to_int16 = float_to_int16_altivec;`
			`c->float_to_int16_interleave = float_to_int16_interleave_altivec;`
			`}`
ppc: More consistent arch initialization 2013-04-14 14:47:30 +02:00			`#endif /* HAVE_ALTIVEC */`
Separate format conversion DSP functions from DSPContext. This will be beneficial for use with the audio conversion API without requiring it to depend on all of dsputil. Signed-off-by: Mans Rullgard <mans@mansr.com> (cherry picked from commit c73d99e672329c8f2df290736ffc474c360ac4ae) 2011-01-30 16:06:46 +01:00			`}`