prores: extract idct into its own dspcontext and merge with put_pixels.
This commit is contained in:
parent
8babfc033e
commit
92fb52d906
@ -295,7 +295,7 @@ OBJS-$(CONFIG_PNG_DECODER) += png.o pngdec.o
|
|||||||
OBJS-$(CONFIG_PNG_ENCODER) += png.o pngenc.o
|
OBJS-$(CONFIG_PNG_ENCODER) += png.o pngenc.o
|
||||||
OBJS-$(CONFIG_PPM_DECODER) += pnmdec.o pnm.o
|
OBJS-$(CONFIG_PPM_DECODER) += pnmdec.o pnm.o
|
||||||
OBJS-$(CONFIG_PPM_ENCODER) += pnmenc.o pnm.o
|
OBJS-$(CONFIG_PPM_ENCODER) += pnmenc.o pnm.o
|
||||||
OBJS-$(CONFIG_PRORES_DECODER) += proresdec.o
|
OBJS-$(CONFIG_PRORES_DECODER) += proresdec.o proresdsp.o
|
||||||
OBJS-$(CONFIG_PTX_DECODER) += ptx.o
|
OBJS-$(CONFIG_PTX_DECODER) += ptx.o
|
||||||
OBJS-$(CONFIG_QCELP_DECODER) += qcelpdec.o celp_math.o \
|
OBJS-$(CONFIG_QCELP_DECODER) += qcelpdec.o celp_math.o \
|
||||||
celp_filters.o acelp_vectors.o \
|
celp_filters.o acelp_vectors.o \
|
||||||
|
@ -145,6 +145,41 @@ void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_s
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ff_init_scantable_permutation(uint8_t *idct_permutation,
|
||||||
|
int idct_permutation_type)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
switch(idct_permutation_type){
|
||||||
|
case FF_NO_IDCT_PERM:
|
||||||
|
for(i=0; i<64; i++)
|
||||||
|
idct_permutation[i]= i;
|
||||||
|
break;
|
||||||
|
case FF_LIBMPEG2_IDCT_PERM:
|
||||||
|
for(i=0; i<64; i++)
|
||||||
|
idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
|
||||||
|
break;
|
||||||
|
case FF_SIMPLE_IDCT_PERM:
|
||||||
|
for(i=0; i<64; i++)
|
||||||
|
idct_permutation[i]= simple_mmx_permutation[i];
|
||||||
|
break;
|
||||||
|
case FF_TRANSPOSE_IDCT_PERM:
|
||||||
|
for(i=0; i<64; i++)
|
||||||
|
idct_permutation[i]= ((i&7)<<3) | (i>>3);
|
||||||
|
break;
|
||||||
|
case FF_PARTTRANS_IDCT_PERM:
|
||||||
|
for(i=0; i<64; i++)
|
||||||
|
idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
|
||||||
|
break;
|
||||||
|
case FF_SSE2_IDCT_PERM:
|
||||||
|
for(i=0; i<64; i++)
|
||||||
|
idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int pix_sum_c(uint8_t * pix, int line_size)
|
static int pix_sum_c(uint8_t * pix, int line_size)
|
||||||
{
|
{
|
||||||
int s, i, j;
|
int s, i, j;
|
||||||
@ -3123,32 +3158,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
|
c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
|
||||||
}
|
}
|
||||||
|
|
||||||
switch(c->idct_permutation_type){
|
ff_init_scantable_permutation(c->idct_permutation,
|
||||||
case FF_NO_IDCT_PERM:
|
c->idct_permutation_type);
|
||||||
for(i=0; i<64; i++)
|
|
||||||
c->idct_permutation[i]= i;
|
|
||||||
break;
|
|
||||||
case FF_LIBMPEG2_IDCT_PERM:
|
|
||||||
for(i=0; i<64; i++)
|
|
||||||
c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
|
|
||||||
break;
|
|
||||||
case FF_SIMPLE_IDCT_PERM:
|
|
||||||
for(i=0; i<64; i++)
|
|
||||||
c->idct_permutation[i]= simple_mmx_permutation[i];
|
|
||||||
break;
|
|
||||||
case FF_TRANSPOSE_IDCT_PERM:
|
|
||||||
for(i=0; i<64; i++)
|
|
||||||
c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
|
|
||||||
break;
|
|
||||||
case FF_PARTTRANS_IDCT_PERM:
|
|
||||||
for(i=0; i<64; i++)
|
|
||||||
c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
|
|
||||||
break;
|
|
||||||
case FF_SSE2_IDCT_PERM:
|
|
||||||
for(i=0; i<64; i++)
|
|
||||||
c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -202,6 +202,8 @@ typedef struct ScanTable{
|
|||||||
} ScanTable;
|
} ScanTable;
|
||||||
|
|
||||||
void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
|
void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
|
||||||
|
void ff_init_scantable_permutation(uint8_t *idct_permutation,
|
||||||
|
int idct_permutation_type);
|
||||||
|
|
||||||
#define EMULATED_EDGE(depth) \
|
#define EMULATED_EDGE(depth) \
|
||||||
void ff_emulated_edge_mc_ ## depth (uint8_t *buf, const uint8_t *src, int linesize,\
|
void ff_emulated_edge_mc_ ## depth (uint8_t *buf, const uint8_t *src, int linesize,\
|
||||||
|
@ -34,17 +34,11 @@
|
|||||||
|
|
||||||
#include "libavutil/intmath.h"
|
#include "libavutil/intmath.h"
|
||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
#include "dsputil.h"
|
#include "proresdsp.h"
|
||||||
#include "get_bits.h"
|
#include "get_bits.h"
|
||||||
|
|
||||||
#define BITS_PER_SAMPLE 10 ///< output precision of that decoder
|
|
||||||
#define BIAS (1 << (BITS_PER_SAMPLE - 1)) ///< bias value for converting signed pixels into unsigned ones
|
|
||||||
#define CLIP_MIN (1 << (BITS_PER_SAMPLE - 8)) ///< minimum value for clipping resulting pixels
|
|
||||||
#define CLIP_MAX (1 << BITS_PER_SAMPLE) - CLIP_MIN - 1 ///< maximum value for clipping resulting pixels
|
|
||||||
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
DSPContext dsp;
|
ProresDSPContext dsp;
|
||||||
AVFrame picture;
|
AVFrame picture;
|
||||||
ScanTable scantable;
|
ScanTable scantable;
|
||||||
int scantable_type; ///< -1 = uninitialized, 0 = progressive, 1/2 = interlaced
|
int scantable_type; ///< -1 = uninitialized, 0 = progressive, 1/2 = interlaced
|
||||||
@ -104,8 +98,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
|
|||||||
|
|
||||||
avctx->pix_fmt = PIX_FMT_YUV422P10; // set default pixel format
|
avctx->pix_fmt = PIX_FMT_YUV422P10; // set default pixel format
|
||||||
|
|
||||||
avctx->bits_per_raw_sample = BITS_PER_SAMPLE;
|
avctx->bits_per_raw_sample = PRORES_BITS_PER_SAMPLE;
|
||||||
dsputil_init(&ctx->dsp, avctx);
|
ff_proresdsp_init(&ctx->dsp);
|
||||||
|
|
||||||
avctx->coded_frame = &ctx->picture;
|
avctx->coded_frame = &ctx->picture;
|
||||||
avcodec_get_frame_defaults(&ctx->picture);
|
avcodec_get_frame_defaults(&ctx->picture);
|
||||||
@ -449,48 +443,6 @@ static inline void decode_ac_coeffs(GetBitContext *gb, DCTELEM *out,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX))
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Add bias value, clamp and output pixels of a slice
|
|
||||||
*/
|
|
||||||
static void put_pixels(const DCTELEM *in, uint16_t *out, int stride,
|
|
||||||
int mbs_per_slice, int blocks_per_mb)
|
|
||||||
{
|
|
||||||
int mb, x, y, src_offset, dst_offset;
|
|
||||||
const DCTELEM *src1, *src2;
|
|
||||||
uint16_t *dst1, *dst2;
|
|
||||||
|
|
||||||
src1 = in;
|
|
||||||
src2 = in + (blocks_per_mb << 5);
|
|
||||||
dst1 = out;
|
|
||||||
dst2 = out + (stride << 3);
|
|
||||||
|
|
||||||
for (mb = 0; mb < mbs_per_slice; mb++) {
|
|
||||||
for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) {
|
|
||||||
for (x = 0; x < 8; x++) {
|
|
||||||
src_offset = (y << 3) + x;
|
|
||||||
|
|
||||||
dst1[dst_offset + x] = CLIP_AND_BIAS(src1[src_offset]);
|
|
||||||
dst2[dst_offset + x] = CLIP_AND_BIAS(src2[src_offset]);
|
|
||||||
|
|
||||||
if (blocks_per_mb > 2) {
|
|
||||||
dst1[dst_offset + x + 8] =
|
|
||||||
CLIP_AND_BIAS(src1[src_offset + 64]);
|
|
||||||
dst2[dst_offset + x + 8] =
|
|
||||||
CLIP_AND_BIAS(src2[src_offset + 64]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
src1 += blocks_per_mb << 6;
|
|
||||||
src2 += blocks_per_mb << 6;
|
|
||||||
dst1 += blocks_per_mb << 2;
|
|
||||||
dst2 += blocks_per_mb << 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decode a slice plane (luma or chroma).
|
* Decode a slice plane (luma or chroma).
|
||||||
*/
|
*/
|
||||||
@ -502,7 +454,7 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf,
|
|||||||
{
|
{
|
||||||
GetBitContext gb;
|
GetBitContext gb;
|
||||||
DCTELEM *block_ptr;
|
DCTELEM *block_ptr;
|
||||||
int i, blk_num, blocks_per_slice;
|
int mb_num, blocks_per_slice;
|
||||||
|
|
||||||
blocks_per_slice = mbs_per_slice * blocks_per_mb;
|
blocks_per_slice = mbs_per_slice * blocks_per_mb;
|
||||||
|
|
||||||
@ -518,20 +470,20 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf,
|
|||||||
/* inverse quantization, inverse transform and output */
|
/* inverse quantization, inverse transform and output */
|
||||||
block_ptr = ctx->blocks;
|
block_ptr = ctx->blocks;
|
||||||
|
|
||||||
for (blk_num = 0; blk_num < blocks_per_slice; blk_num++, block_ptr += 64) {
|
for (mb_num = 0; mb_num < mbs_per_slice; mb_num++, out_ptr += blocks_per_mb * 4) {
|
||||||
/* TODO: the correct solution shoud be (block_ptr[i] * qmat[i]) >> 1
|
ctx->dsp.idct_put(out_ptr, linesize, block_ptr, qmat);
|
||||||
* and the input of the inverse transform should be scaled by 2
|
block_ptr += 64;
|
||||||
* in order to avoid rounding errors.
|
if (blocks_per_mb > 2) {
|
||||||
* Due to the fact the existing Libav transforms are incompatible with
|
ctx->dsp.idct_put(out_ptr + 8, linesize, block_ptr, qmat);
|
||||||
* that input I temporally introduced the coarse solution below... */
|
block_ptr += 64;
|
||||||
for (i = 0; i < 64; i++)
|
}
|
||||||
block_ptr[i] = (block_ptr[i] * qmat[i]) >> 2;
|
ctx->dsp.idct_put(out_ptr + linesize * 4, linesize, block_ptr, qmat);
|
||||||
|
block_ptr += 64;
|
||||||
ctx->dsp.idct(block_ptr);
|
if (blocks_per_mb > 2) {
|
||||||
|
ctx->dsp.idct_put(out_ptr + linesize * 4 + 8, linesize, block_ptr, qmat);
|
||||||
|
block_ptr += 64;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
put_pixels(ctx->blocks, out_ptr, linesize >> 1, mbs_per_slice,
|
|
||||||
blocks_per_mb);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
61
libavcodec/proresdsp.c
Normal file
61
libavcodec/proresdsp.c
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
/*
|
||||||
|
* Apple ProRes compatible decoder
|
||||||
|
*
|
||||||
|
* Copyright (c) 2010-2011 Maxim Poliakovski
|
||||||
|
*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "proresdsp.h"
|
||||||
|
#include "simple_idct.h"
|
||||||
|
|
||||||
|
#define BIAS (1 << (PRORES_BITS_PER_SAMPLE - 1)) ///< bias value for converting signed pixels into unsigned ones
|
||||||
|
#define CLIP_MIN (1 << (PRORES_BITS_PER_SAMPLE - 8)) ///< minimum value for clipping resulting pixels
|
||||||
|
#define CLIP_MAX (1 << PRORES_BITS_PER_SAMPLE) - CLIP_MIN - 1 ///< maximum value for clipping resulting pixels
|
||||||
|
|
||||||
|
#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX))
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add bias value, clamp and output pixels of a slice
|
||||||
|
*/
|
||||||
|
static void put_pixels(uint16_t *dst, int stride, const DCTELEM *in)
|
||||||
|
{
|
||||||
|
int x, y, src_offset, dst_offset;
|
||||||
|
|
||||||
|
for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) {
|
||||||
|
for (x = 0; x < 8; x++) {
|
||||||
|
src_offset = (y << 3) + x;
|
||||||
|
|
||||||
|
dst[dst_offset + x] = CLIP_AND_BIAS(in[src_offset]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void prores_idct_put_c(uint16_t *out, int linesize, DCTELEM *block, const int16_t *qmat)
|
||||||
|
{
|
||||||
|
ff_prores_idct(block, qmat);
|
||||||
|
put_pixels(out, linesize >> 1, block);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ff_proresdsp_init(ProresDSPContext *dsp)
|
||||||
|
{
|
||||||
|
dsp->idct_put = prores_idct_put_c;
|
||||||
|
dsp->idct_permutation_type = FF_NO_IDCT_PERM;
|
||||||
|
|
||||||
|
ff_init_scantable_permutation(dsp->idct_permutation,
|
||||||
|
dsp->idct_permutation_type);
|
||||||
|
}
|
38
libavcodec/proresdsp.h
Normal file
38
libavcodec/proresdsp.h
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
/*
|
||||||
|
* Apple ProRes compatible decoder
|
||||||
|
*
|
||||||
|
* Copyright (c) 2010-2011 Maxim Poliakovski
|
||||||
|
*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AVCODEC_PRORESDSP_H
|
||||||
|
#define AVCODEC_PRORESDSP_H
|
||||||
|
|
||||||
|
#include "dsputil.h"
|
||||||
|
|
||||||
|
#define PRORES_BITS_PER_SAMPLE 10 ///< output precision of prores decoder
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int idct_permutation_type;
|
||||||
|
uint8_t idct_permutation[64];
|
||||||
|
void (* idct_put) (uint16_t *out, int linesize, DCTELEM *block, const int16_t *qmat);
|
||||||
|
} ProresDSPContext;
|
||||||
|
|
||||||
|
void ff_proresdsp_init(ProresDSPContext *dsp);
|
||||||
|
|
||||||
|
#endif /* AVCODEC_PRORESDSP_H */
|
@ -221,3 +221,20 @@ void ff_simple_idct44_add(uint8_t *dest, int line_size, DCTELEM *block)
|
|||||||
idct4col_add(dest + i, line_size, block + i);
|
idct4col_add(dest + i, line_size, block + i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ff_prores_idct(DCTELEM *block, const int16_t *qmat)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < 64; i++)
|
||||||
|
block[i] *= qmat[i];
|
||||||
|
|
||||||
|
for (i = 0; i < 8; i++)
|
||||||
|
idctRowCondDC_10(block + i*8);
|
||||||
|
|
||||||
|
for (i = 0; i < 64; i++)
|
||||||
|
block[i] >>= 2;
|
||||||
|
|
||||||
|
for (i = 0; i < 8; i++)
|
||||||
|
idctSparseCol_10(block + i);
|
||||||
|
}
|
||||||
|
@ -38,6 +38,12 @@ void ff_simple_idct_8(DCTELEM *block);
|
|||||||
void ff_simple_idct_put_10(uint8_t *dest, int line_size, DCTELEM *block);
|
void ff_simple_idct_put_10(uint8_t *dest, int line_size, DCTELEM *block);
|
||||||
void ff_simple_idct_add_10(uint8_t *dest, int line_size, DCTELEM *block);
|
void ff_simple_idct_add_10(uint8_t *dest, int line_size, DCTELEM *block);
|
||||||
void ff_simple_idct_10(DCTELEM *block);
|
void ff_simple_idct_10(DCTELEM *block);
|
||||||
|
/**
|
||||||
|
* Special version of ff_simple_idct_10() which does dequantization
|
||||||
|
* and scales by a factor of 2 more between the two IDCTs to account
|
||||||
|
* for larger scale of input coefficients.
|
||||||
|
*/
|
||||||
|
void ff_prores_idct(DCTELEM *block, const int16_t *qmat);
|
||||||
|
|
||||||
void ff_simple_idct_mmx(int16_t *block);
|
void ff_simple_idct_mmx(int16_t *block);
|
||||||
void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block);
|
void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block);
|
||||||
|
Loading…
Reference in New Issue
Block a user