diff --git a/doc/APIchanges b/doc/APIchanges index 1f5e8aad39..aa2827af08 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -13,6 +13,12 @@ libavutil: 2011-04-18 API changes, most recent first: +2011-05-10 - xxxxxxx - lavc 53.3.0 - avcodec.h + Deprecate AVLPCType and the following fields in + AVCodecContext: lpc_coeff_precision, prediction_order_method, + min_partition_order, max_partition_order, lpc_type, lpc_passes. + Corresponding FLAC encoder options should be used instead. + 2011-05-07 - xxxxxxx - lavfi 2.5.0 - avcodec.h Add libavfilter/avcodec.h header and avfilter_copy_frame_props() function. diff --git a/ffmpeg.c b/ffmpeg.c index 74461d368f..2875d8aec8 100644 --- a/ffmpeg.c +++ b/ffmpeg.c @@ -292,7 +292,6 @@ typedef struct AVOutputStream { int resample_pix_fmt; float frame_aspect_ratio; - /* forced key frames */ int64_t *forced_kf_pts; int forced_kf_count; @@ -1505,7 +1504,7 @@ static int output_packet(AVInputStream *ist, int ist_index, AVFormatContext *os; AVOutputStream *ost; int ret, i; - int got_picture; + int got_output; AVFrame picture; void *buffer_to_free = NULL; static unsigned int samples_size= 0; @@ -1537,7 +1536,7 @@ static int output_packet(AVInputStream *ist, int ist_index, pkt_pts = av_rescale_q(pkt->pts, ist->st->time_base, AV_TIME_BASE_Q); //while we have more to decode or while the decoder did output something on EOF - while (avpkt.size > 0 || (!pkt && ist->next_pts != ist->pts)) { + while (avpkt.size > 0 || (!pkt && got_output)) { uint8_t *data_buf, *decoded_data_buf; int data_size, decoded_data_size; handle_eof: @@ -1573,9 +1572,10 @@ static int output_packet(AVInputStream *ist, int ist_index, avpkt.data += ret; avpkt.size -= ret; data_size = ret; + got_output = decoded_data_size > 0; /* Some bug in mpeg audio decoder gives */ /* decoded_data_size < 0, it seems they are overflows */ - if (decoded_data_size <= 0) { + if (!got_output) { /* no audio frame */ continue; } @@ -1592,11 +1592,11 @@ static int output_packet(AVInputStream *ist, int ist_index, pkt_pts = AV_NOPTS_VALUE; ret = avcodec_decode_video2(ist->st->codec, - &picture, &got_picture, &avpkt); + &picture, &got_output, &avpkt); ist->st->quality= picture.quality; if (ret < 0) goto fail_decode; - if (!got_picture) { + if (!got_output) { /* no picture yet */ goto discard_packet; } @@ -1613,10 +1613,10 @@ static int output_packet(AVInputStream *ist, int ist_index, break; case AVMEDIA_TYPE_SUBTITLE: ret = avcodec_decode_subtitle2(ist->st->codec, - &subtitle, &got_picture, &avpkt); + &subtitle, &got_output, &avpkt); if (ret < 0) goto fail_decode; - if (!got_picture) { + if (!got_output) { goto discard_packet; } subtitle_to_free = &subtitle; diff --git a/libavcodec/alacenc.c b/libavcodec/alacenc.c index 9d2865d51e..c3a1fdfa03 100644 --- a/libavcodec/alacenc.c +++ b/libavcodec/alacenc.c @@ -146,7 +146,7 @@ static void calc_predictor_params(AlacEncodeContext *s, int ch) s->min_prediction_order, s->max_prediction_order, ALAC_MAX_LPC_PRECISION, coefs, shift, - AV_LPC_TYPE_LEVINSON, 0, + FF_LPC_TYPE_LEVINSON, 0, ORDER_METHOD_EST, ALAC_MAX_LPC_SHIFT, 1); s->lpc[ch].lpc_order = opt_order; @@ -457,7 +457,7 @@ static av_cold int alac_encode_init(AVCodecContext *avctx) s->avctx = avctx; ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size, s->max_prediction_order, - AV_LPC_TYPE_LEVINSON); + FF_LPC_TYPE_LEVINSON); return ret; } diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c index 96e7030e9d..6ce3f4bf15 100644 --- a/libavcodec/alpha/dsputil_alpha.c +++ b/libavcodec/alpha/dsputil_alpha.c @@ -270,9 +270,9 @@ static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) { - const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; - if (!h264_high_depth) { + if (!high_bit_depth) { c->put_pixels_tab[0][0] = put_pixels16_axp_asm; c->put_pixels_tab[0][1] = put_pixels16_x2_axp; c->put_pixels_tab[0][2] = put_pixels16_y2_axp; diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c index 218d162687..0351412761 100644 --- a/libavcodec/arm/dsputil_init_arm.c +++ b/libavcodec/arm/dsputil_init_arm.c @@ -75,7 +75,7 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, DCTELEM *block) void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx) { - const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; ff_put_pixels_clamped = c->put_pixels_clamped; ff_add_pixels_clamped = c->add_pixels_clamped; @@ -97,7 +97,7 @@ void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx) c->add_pixels_clamped = ff_add_pixels_clamped_arm; - if (!h264_high_depth) { + if (!high_bit_depth) { c->put_pixels_tab[0][0] = ff_put_pixels16_arm; c->put_pixels_tab[0][1] = ff_put_pixels16_x2_arm; c->put_pixels_tab[0][2] = ff_put_pixels16_y2_arm; diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c index fc0f7865f0..9acea4a1d6 100644 --- a/libavcodec/arm/dsputil_init_armv6.c +++ b/libavcodec/arm/dsputil_init_armv6.c @@ -72,7 +72,7 @@ int ff_pix_sum_armv6(uint8_t *pix, int line_size); void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx) { - const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; if (!avctx->lowres && (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLEARMV6)) { @@ -82,7 +82,7 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx) c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; } - if (!h264_high_depth) { + if (!high_bit_depth) { c->put_pixels_tab[0][0] = ff_put_pixels16_armv6; c->put_pixels_tab[0][1] = ff_put_pixels16_x2_armv6; c->put_pixels_tab[0][2] = ff_put_pixels16_y2_armv6; diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index 9e456f32df..6faf3dc8d0 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -173,7 +173,7 @@ void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src, void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) { - const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; if (!avctx->lowres) { if (avctx->idct_algo == FF_IDCT_AUTO || @@ -192,7 +192,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) } } - if (!h264_high_depth) { + if (!high_bit_depth) { c->clear_block = ff_clear_block_neon; c->clear_blocks = ff_clear_blocks_neon; @@ -223,7 +223,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon; if (CONFIG_H264_DECODER) { - if (!h264_high_depth) { + if (!high_bit_depth) { c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon; c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon; c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon; diff --git a/libavcodec/arm/dsputil_iwmmxt.c b/libavcodec/arm/dsputil_iwmmxt.c index 6db1837ba0..85be83148a 100644 --- a/libavcodec/arm/dsputil_iwmmxt.c +++ b/libavcodec/arm/dsputil_iwmmxt.c @@ -155,7 +155,7 @@ static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h) void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx) { int mm_flags = AV_CPU_FLAG_IWMMXT; /* multimedia extension flags */ - const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; if (avctx->dsp_mask) { if (avctx->dsp_mask & AV_CPU_FLAG_FORCE) @@ -168,7 +168,7 @@ void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx) c->add_pixels_clamped = add_pixels_clamped_iwmmxt; - if (!h264_high_depth) { + if (!high_bit_depth) { c->clear_blocks = clear_blocks_iwmmxt; c->put_pixels_tab[0][0] = put_pixels16_iwmmxt; diff --git a/libavcodec/arm/h264pred_init_arm.c b/libavcodec/arm/h264pred_init_arm.c index 5b11b7da88..b1d4f005e8 100644 --- a/libavcodec/arm/h264pred_init_arm.c +++ b/libavcodec/arm/h264pred_init_arm.c @@ -74,7 +74,7 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int b h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_neon; } -void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth) +void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth) { if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id, bit_depth); } diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index acdb183f90..00e9dd5e2f 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -516,10 +516,11 @@ enum AVChromaLocation{ AVCHROMA_LOC_NB , ///< Not part of ABI }; +#if FF_API_FLAC_GLOBAL_OPTS /** * LPC analysis type */ -enum AVLPCType { +attribute_deprecated enum AVLPCType { AV_LPC_TYPE_DEFAULT = -1, ///< use the codec default LPC type AV_LPC_TYPE_NONE = 0, ///< do not use LPC prediction or use all zero coefficients AV_LPC_TYPE_FIXED = 1, ///< fixed LPC coefficients @@ -527,6 +528,7 @@ enum AVLPCType { AV_LPC_TYPE_CHOLESKY = 3, ///< Cholesky factorization AV_LPC_TYPE_NB , ///< Not part of ABI }; +#endif enum AVAudioServiceType { AV_AUDIO_SERVICE_TYPE_MAIN = 0, @@ -2513,13 +2515,6 @@ typedef struct AVCodecContext { int compression_level; #define FF_COMPRESSION_DEFAULT -1 - /** - * LPC coefficient precision - used by FLAC encoder - * - encoding: Set by user. - * - decoding: unused - */ - int lpc_coeff_precision; - /** * - encoding: Set by user. * - decoding: unused @@ -2532,24 +2527,42 @@ typedef struct AVCodecContext { */ int max_prediction_order; +#if FF_API_FLAC_GLOBAL_OPTS + /** + * @defgroup flac_opts FLAC options + * @deprecated Use FLAC encoder private options instead. + * @{ + */ + + /** + * LPC coefficient precision - used by FLAC encoder + * - encoding: Set by user. + * - decoding: unused + */ + attribute_deprecated int lpc_coeff_precision; + /** * search method for selecting prediction order * - encoding: Set by user. * - decoding: unused */ - int prediction_order_method; + attribute_deprecated int prediction_order_method; /** * - encoding: Set by user. * - decoding: unused */ - int min_partition_order; + attribute_deprecated int min_partition_order; /** * - encoding: Set by user. * - decoding: unused */ - int max_partition_order; + attribute_deprecated int max_partition_order; + /** + * @} + */ +#endif /** * GOP timecode frame start number, in non drop frame format @@ -2767,19 +2780,21 @@ typedef struct AVCodecContext { int log_level_offset; +#if FF_API_FLAC_GLOBAL_OPTS /** * Determines which LPC analysis algorithm to use. * - encoding: Set by user * - decoding: unused */ - enum AVLPCType lpc_type; + attribute_deprecated enum AVLPCType lpc_type; /** * Number of passes to use for Cholesky factorization during LPC analysis * - encoding: Set by user * - decoding: unused */ - int lpc_passes; + attribute_deprecated int lpc_passes; +#endif /** * Number of slices. diff --git a/libavcodec/bfin/dsputil_bfin.c b/libavcodec/bfin/dsputil_bfin.c index 01d7ec6a44..5b94472326 100644 --- a/libavcodec/bfin/dsputil_bfin.c +++ b/libavcodec/bfin/dsputil_bfin.c @@ -197,14 +197,14 @@ static int bfin_pix_abs8_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_si void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx ) { - const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; c->get_pixels = ff_bfin_get_pixels; c->diff_pixels = ff_bfin_diff_pixels; c->put_pixels_clamped = ff_bfin_put_pixels_clamped; c->add_pixels_clamped = ff_bfin_add_pixels_clamped; - if (!h264_high_depth) + if (!high_bit_depth) c->clear_blocks = bfin_clear_blocks; c->pix_sum = ff_bfin_pix_sum; c->pix_norm1 = ff_bfin_pix_norm1; @@ -231,7 +231,7 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx ) c->sse[1] = ff_bfin_sse8; c->sse[2] = ff_bfin_sse4; - if (!h264_high_depth) { + if (!high_bit_depth) { c->put_pixels_tab[0][0] = bfin_put_pixels16; c->put_pixels_tab[0][1] = bfin_put_pixels16_x2; c->put_pixels_tab[0][2] = bfin_put_pixels16_y2; diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 162458a7b5..0e596b1b01 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -43,15 +43,15 @@ uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; uint32_t ff_squareTbl[512] = {0, }; #define BIT_DEPTH 9 -#include "dsputil_internal.h" +#include "dsputil_template.c" #undef BIT_DEPTH #define BIT_DEPTH 10 -#include "dsputil_internal.h" +#include "dsputil_template.c" #undef BIT_DEPTH #define BIT_DEPTH 8 -#include "dsputil_internal.h" +#include "dsputil_template.c" // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size #define pb_7f (~0UL/255 * 0x7f) diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c new file mode 100644 index 0000000000..8ca6d3e414 --- /dev/null +++ b/libavcodec/dsputil_template.c @@ -0,0 +1,1391 @@ +/* + * DSP utils + * Copyright (c) 2000, 2001 Fabrice Bellard + * Copyright (c) 2002-2004 Michael Niedermayer + * + * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * DSP utils + */ + +#include "high_bit_depth.h" + +static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) +{ + int i; + for(i=0; i 8 + int j; + for (j = 0; j < w; j++) { + ptr[j-w] = ptr[0]; + ptr[j+width] = ptr[width-1]; + } +#else + memset(ptr - w, ptr[0], w); + memset(ptr + width, ptr[width-1], w); +#endif + ptr += wrap; + } + + /* top and bottom + corners */ + buf -= w; + last_line = buf + (height - 1) * wrap; + if (sides & EDGE_TOP) + for(i = 0; i < w; i++) + memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top + if (sides & EDGE_BOTTOM) + for (i = 0; i < w; i++) + memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom +} + +/** + * Copy a rectangular area of samples to a temporary buffer and replicate the border samples. + * @param buf destination buffer + * @param src source buffer + * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers + * @param block_w width of block + * @param block_h height of block + * @param src_x x coordinate of the top left sample of the block in the source buffer + * @param src_y y coordinate of the top left sample of the block in the source buffer + * @param w width of the source buffer + * @param h height of the source buffer + */ +void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h, + int src_x, int src_y, int w, int h){ + int x, y; + int start_y, start_x, end_y, end_x; + + if(src_y>= h){ + src+= (h-1-src_y)*linesize; + src_y=h-1; + }else if(src_y<=-block_h){ + src+= (1-block_h-src_y)*linesize; + src_y=1-block_h; + } + if(src_x>= w){ + src+= (w-1-src_x)*sizeof(pixel); + src_x=w-1; + }else if(src_x<=-block_w){ + src+= (1-block_w-src_x)*sizeof(pixel); + src_x=1-block_w; + } + + start_y= FFMAX(0, -src_y); + start_x= FFMAX(0, -src_x); + end_y= FFMIN(block_h, h-src_y); + end_x= FFMIN(block_w, w-src_x); + assert(start_y < end_y && block_h); + assert(start_x < end_x && block_w); + + w = end_x - start_x; + src += start_y*linesize + start_x*sizeof(pixel); + buf += start_x*sizeof(pixel); + + //top + for(y=0; y>1));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i>1));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i>1));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i>1));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + const uint64_t a= AV_RN64(pixels );\ + const uint64_t b= AV_RN64(pixels+1);\ + uint64_t l0= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL)\ + + 0x0202020202020202ULL;\ + uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + uint64_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ + pixels+=line_size;\ + block +=line_size;\ + a= AV_RN64(pixels );\ + b= AV_RN64(pixels+1);\ + l0= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL)\ + + 0x0202020202020202ULL;\ + h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + const uint64_t a= AV_RN64(pixels );\ + const uint64_t b= AV_RN64(pixels+1);\ + uint64_t l0= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL)\ + + 0x0101010101010101ULL;\ + uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + uint64_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ + pixels+=line_size;\ + block +=line_size;\ + a= AV_RN64(pixels );\ + b= AV_RN64(pixels+1);\ + l0= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL)\ + + 0x0101010101010101ULL;\ + h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8*sizeof(pixel))\ +CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8*sizeof(pixel))\ +CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8*sizeof(pixel))\ +CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8*sizeof(pixel))\ +CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8*sizeof(pixel))\ +CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8*sizeof(pixel))\ +CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8*sizeof(pixel)) + +#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) +#else // 64 bit variant + +#define PIXOP2(OPNAME, OP) \ +static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + int i;\ + for(i=0; i>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + l1= (c&0x03030303UL)\ + + (d&0x03030303UL);\ + h1= ((c&0xFCFCFCFCUL)>>2)\ + + ((d&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + a= AV_RN32(&src1[i*src_stride1+4]);\ + b= AV_RN32(&src2[i*src_stride2+4]);\ + c= AV_RN32(&src3[i*src_stride3+4]);\ + d= AV_RN32(&src4[i*src_stride4+4]);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + l1= (c&0x03030303UL)\ + + (d&0x03030303UL);\ + h1= ((c&0xFCFCFCFCUL)>>2)\ + + ((d&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + }\ +}\ +\ +static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\ +}\ +\ +static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ +}\ +\ +static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\ +}\ +\ +static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ +}\ +\ +static inline void FUNC(OPNAME ## _no_rnd_pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ + int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ + /* FIXME HIGH BIT DEPTH*/\ + int i;\ + for(i=0; i>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + l1= (c&0x03030303UL)\ + + (d&0x03030303UL);\ + h1= ((c&0xFCFCFCFCUL)>>2)\ + + ((d&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + a= AV_RN32(&src1[i*src_stride1+4]);\ + b= AV_RN32(&src2[i*src_stride2+4]);\ + c= AV_RN32(&src3[i*src_stride3+4]);\ + d= AV_RN32(&src4[i*src_stride4+4]);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x01010101UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + l1= (c&0x03030303UL)\ + + (d&0x03030303UL);\ + h1= ((c&0xFCFCFCFCUL)>>2)\ + + ((d&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + }\ +}\ +static inline void FUNC(OPNAME ## _pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ + int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ + FUNC(OPNAME ## _pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ + FUNC(OPNAME ## _pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ +}\ +static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ + int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ + FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ + FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ +}\ +\ +static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *_block, const uint8_t *_pixels, int line_size, int h)\ +{\ + int i, a0, b0, a1, b1;\ + pixel *block = (pixel*)_block;\ + const pixel *pixels = (const pixel*)_pixels;\ + line_size /= sizeof(pixel);\ + a0= pixels[0];\ + b0= pixels[1] + 2;\ + a0 += b0;\ + b0 += pixels[2];\ +\ + pixels+=line_size;\ + for(i=0; i>2; /* FIXME non put */\ + block[1]= (b1+b0)>>2;\ +\ + pixels+=line_size;\ + block +=line_size;\ +\ + a0= pixels[0];\ + b0= pixels[1] + 2;\ + a0 += b0;\ + b0 += pixels[2];\ +\ + block[0]= (a1+a0)>>2;\ + block[1]= (b1+b0)>>2;\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + /* FIXME HIGH BIT DEPTH */\ + int i;\ + const uint32_t a= AV_RN32(pixels );\ + const uint32_t b= AV_RN32(pixels+1);\ + uint32_t l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + uint32_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + a= AV_RN32(pixels );\ + b= AV_RN32(pixels+1);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + /* FIXME HIGH BIT DEPTH */\ + int j;\ + for(j=0; j<2; j++){\ + int i;\ + const uint32_t a= AV_RN32(pixels );\ + const uint32_t b= AV_RN32(pixels+1);\ + uint32_t l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + uint32_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + a= AV_RN32(pixels );\ + b= AV_RN32(pixels+1);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ + pixels+=4-line_size*(h+1);\ + block +=4-line_size*h;\ + }\ +}\ +\ +static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + /* FIXME HIGH BIT DEPTH */\ + int j;\ + for(j=0; j<2; j++){\ + int i;\ + const uint32_t a= AV_RN32(pixels );\ + const uint32_t b= AV_RN32(pixels+1);\ + uint32_t l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x01010101UL;\ + uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + uint32_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + a= AV_RN32(pixels );\ + b= AV_RN32(pixels+1);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x01010101UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ + pixels+=4-line_size*(h+1);\ + block +=4-line_size*h;\ + }\ +}\ +\ +CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\ +CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_x2) , FUNCC(OPNAME ## _pixels8_x2) , 8*sizeof(pixel))\ +CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_y2) , FUNCC(OPNAME ## _pixels8_y2) , 8*sizeof(pixel))\ +CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2), FUNCC(OPNAME ## _pixels8_xy2), 8*sizeof(pixel))\ +av_unused CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\ +CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_x2) , FUNCC(OPNAME ## _no_rnd_pixels8_x2) , 8*sizeof(pixel))\ +CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2) , FUNCC(OPNAME ## _no_rnd_pixels8_y2) , 8*sizeof(pixel))\ +CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2), FUNCC(OPNAME ## _no_rnd_pixels8_xy2), 8*sizeof(pixel))\ + +#define op_avg(a, b) a = rnd_avg_pixel4(a, b) +#endif +#define op_put(a, b) a = b + +PIXOP2(avg, op_avg) +PIXOP2(put, op_put) +#undef op_avg +#undef op_put + +#define put_no_rnd_pixels8_c put_pixels8_c +#define put_no_rnd_pixels16_c put_pixels16_c + +static void FUNCC(put_no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ + FUNC(put_no_rnd_pixels16_l2)(dst, a, b, stride, stride, stride, h); +} + +static void FUNCC(put_no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ + FUNC(put_no_rnd_pixels8_l2)(dst, a, b, stride, stride, stride, h); +} + +#define H264_CHROMA_MC(OPNAME, OP)\ +static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\ + pixel *dst = (pixel*)_dst;\ + pixel *src = (pixel*)_src;\ + const int A=(8-x)*(8-y);\ + const int B=( x)*(8-y);\ + const int C=(8-x)*( y);\ + const int D=( x)*( y);\ + int i;\ + stride /= sizeof(pixel);\ + \ + assert(x<8 && y<8 && x>=0 && y>=0);\ +\ + if(D){\ + for(i=0; i=0 && y>=0);\ +\ + if(D){\ + for(i=0; i=0 && y>=0);\ +\ + if(D){\ + for(i=0; i>6)+1)>>1) +#define op_put(a, b) a = (((b) + 32)>>6) + +H264_CHROMA_MC(put_ , op_put) +H264_CHROMA_MC(avg_ , op_avg) +#undef op_avg +#undef op_put + +#define H264_LOWPASS(OPNAME, OP, OP2) \ +static av_unused void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\ + const int h=2;\ + INIT_CLIP\ + int i;\ + pixel *dst = (pixel*)_dst;\ + pixel *src = (pixel*)_src;\ + dstStride /= sizeof(pixel);\ + srcStride /= sizeof(pixel);\ + for(i=0; i 9) ? (-10 * ((1< 9) ? (-10 * ((1< 9) ? (-10 * ((1<>5)+1)>>1) +//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) +#define op_put(a, b) a = CLIP(((b) + 16)>>5) +#define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1) +#define op2_put(a, b) a = CLIP(((b) + 512)>>10) + +H264_LOWPASS(put_ , op_put, op2_put) +H264_LOWPASS(avg_ , op_avg, op2_avg) +H264_MC(put_, 2) +H264_MC(put_, 4) +H264_MC(put_, 8) +H264_MC(put_, 16) +H264_MC(avg_, 4) +H264_MC(avg_, 8) +H264_MC(avg_, 16) + +#undef op_avg +#undef op_put +#undef op2_avg +#undef op2_put + +#if BIT_DEPTH == 8 +# define put_h264_qpel8_mc00_8_c ff_put_pixels8x8_8_c +# define avg_h264_qpel8_mc00_8_c ff_avg_pixels8x8_8_c +# define put_h264_qpel16_mc00_8_c ff_put_pixels16x16_8_c +# define avg_h264_qpel16_mc00_8_c ff_avg_pixels16x16_8_c +#elif BIT_DEPTH == 9 +# define put_h264_qpel8_mc00_9_c ff_put_pixels8x8_9_c +# define avg_h264_qpel8_mc00_9_c ff_avg_pixels8x8_9_c +# define put_h264_qpel16_mc00_9_c ff_put_pixels16x16_9_c +# define avg_h264_qpel16_mc00_9_c ff_avg_pixels16x16_9_c +#elif BIT_DEPTH == 10 +# define put_h264_qpel8_mc00_10_c ff_put_pixels8x8_10_c +# define avg_h264_qpel8_mc00_10_c ff_avg_pixels8x8_10_c +# define put_h264_qpel16_mc00_10_c ff_put_pixels16x16_10_c +# define avg_h264_qpel16_mc00_10_c ff_avg_pixels16x16_10_c +#endif + +void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) { + FUNCC(put_pixels8)(dst, src, stride, 8); +} +void FUNCC(ff_avg_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) { + FUNCC(avg_pixels8)(dst, src, stride, 8); +} +void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) { + FUNCC(put_pixels16)(dst, src, stride, 16); +} +void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) { + FUNCC(avg_pixels16)(dst, src, stride, 16); +} + +static void FUNCC(clear_block)(DCTELEM *block) +{ + memset(block, 0, sizeof(dctcoef)*64); +} + +/** + * memset(blocks, 0, sizeof(DCTELEM)*6*64) + */ +static void FUNCC(clear_blocks)(DCTELEM *blocks) +{ + memset(blocks, 0, sizeof(dctcoef)*6*64); +} diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c index 637d09dba5..811a75a06f 100644 --- a/libavcodec/flacenc.c +++ b/libavcodec/flacenc.c @@ -21,6 +21,7 @@ #include "libavutil/crc.h" #include "libavutil/md5.h" +#include "libavutil/opt.h" #include "avcodec.h" #include "get_bits.h" #include "golomb.h" @@ -43,7 +44,7 @@ typedef struct CompressionOptions { int compression_level; int block_time_ms; - enum AVLPCType lpc_type; + enum FFLPCType lpc_type; int lpc_passes; int lpc_coeff_precision; int min_prediction_order; @@ -80,6 +81,7 @@ typedef struct FlacFrame { } FlacFrame; typedef struct FlacEncodeContext { + AVClass *class; PutBitContext pb; int channels; int samplerate; @@ -156,16 +158,16 @@ static av_cold void dprint_compression_options(FlacEncodeContext *s) av_log(avctx, AV_LOG_DEBUG, " compression: %d\n", opt->compression_level); switch (opt->lpc_type) { - case AV_LPC_TYPE_NONE: + case FF_LPC_TYPE_NONE: av_log(avctx, AV_LOG_DEBUG, " lpc type: None\n"); break; - case AV_LPC_TYPE_FIXED: + case FF_LPC_TYPE_FIXED: av_log(avctx, AV_LOG_DEBUG, " lpc type: Fixed pre-defined coefficients\n"); break; - case AV_LPC_TYPE_LEVINSON: + case FF_LPC_TYPE_LEVINSON: av_log(avctx, AV_LOG_DEBUG, " lpc type: Levinson-Durbin recursion with Welch window\n"); break; - case AV_LPC_TYPE_CHOLESKY: + case FF_LPC_TYPE_CHOLESKY: av_log(avctx, AV_LOG_DEBUG, " lpc type: Cholesky factorization, %d pass%s\n", opt->lpc_passes, opt->lpc_passes == 1 ? "" : "es"); break; @@ -266,32 +268,42 @@ static av_cold int flac_encode_init(AVCodecContext *avctx) s->options.block_time_ms = ((int[]){ 27, 27, 27,105,105,105,105,105,105,105,105,105,105})[level]; - s->options.lpc_type = ((int[]){ AV_LPC_TYPE_FIXED, AV_LPC_TYPE_FIXED, AV_LPC_TYPE_FIXED, - AV_LPC_TYPE_LEVINSON, AV_LPC_TYPE_LEVINSON, AV_LPC_TYPE_LEVINSON, - AV_LPC_TYPE_LEVINSON, AV_LPC_TYPE_LEVINSON, AV_LPC_TYPE_LEVINSON, - AV_LPC_TYPE_LEVINSON, AV_LPC_TYPE_LEVINSON, AV_LPC_TYPE_LEVINSON, - AV_LPC_TYPE_LEVINSON})[level]; + if (s->options.lpc_type == FF_LPC_TYPE_DEFAULT) + s->options.lpc_type = ((int[]){ FF_LPC_TYPE_FIXED, FF_LPC_TYPE_FIXED, FF_LPC_TYPE_FIXED, + FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON, + FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON, + FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON, + FF_LPC_TYPE_LEVINSON})[level]; s->options.min_prediction_order = ((int[]){ 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1})[level]; s->options.max_prediction_order = ((int[]){ 3, 4, 4, 6, 8, 8, 8, 8, 12, 12, 12, 32, 32})[level]; - s->options.prediction_order_method = ((int[]){ ORDER_METHOD_EST, ORDER_METHOD_EST, ORDER_METHOD_EST, - ORDER_METHOD_EST, ORDER_METHOD_EST, ORDER_METHOD_EST, - ORDER_METHOD_4LEVEL, ORDER_METHOD_LOG, ORDER_METHOD_4LEVEL, - ORDER_METHOD_LOG, ORDER_METHOD_SEARCH, ORDER_METHOD_LOG, - ORDER_METHOD_SEARCH})[level]; + if (s->options.prediction_order_method < 0) + s->options.prediction_order_method = ((int[]){ ORDER_METHOD_EST, ORDER_METHOD_EST, ORDER_METHOD_EST, + ORDER_METHOD_EST, ORDER_METHOD_EST, ORDER_METHOD_EST, + ORDER_METHOD_4LEVEL, ORDER_METHOD_LOG, ORDER_METHOD_4LEVEL, + ORDER_METHOD_LOG, ORDER_METHOD_SEARCH, ORDER_METHOD_LOG, + ORDER_METHOD_SEARCH})[level]; - s->options.min_partition_order = ((int[]){ 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})[level]; - s->options.max_partition_order = ((int[]){ 2, 2, 3, 3, 3, 8, 8, 8, 8, 8, 8, 8, 8})[level]; + if (s->options.min_partition_order > s->options.max_partition_order) { + av_log(avctx, AV_LOG_ERROR, "invalid partition orders: min=%d max=%d\n", + s->options.min_partition_order, s->options.max_partition_order); + return AVERROR(EINVAL); + } + if (s->options.min_partition_order < 0) + s->options.min_partition_order = ((int[]){ 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})[level]; + if (s->options.max_partition_order < 0) + s->options.max_partition_order = ((int[]){ 2, 2, 3, 3, 3, 8, 8, 8, 8, 8, 8, 8, 8})[level]; /* set compression option overrides from AVCodecContext */ - if (avctx->lpc_type > AV_LPC_TYPE_DEFAULT) { - if (avctx->lpc_type > AV_LPC_TYPE_CHOLESKY) { +#if FF_API_FLAC_GLOBAL_OPTS + if (avctx->lpc_type > FF_LPC_TYPE_DEFAULT) { + if (avctx->lpc_type > FF_LPC_TYPE_CHOLESKY) { av_log(avctx, AV_LOG_ERROR, "unknown lpc type: %d\n", avctx->lpc_type); return -1; } s->options.lpc_type = avctx->lpc_type; - if (s->options.lpc_type == AV_LPC_TYPE_CHOLESKY) { + if (s->options.lpc_type == FF_LPC_TYPE_CHOLESKY) { if (avctx->lpc_passes < 0) { // default number of passes for Cholesky s->options.lpc_passes = 2; @@ -304,11 +316,12 @@ static av_cold int flac_encode_init(AVCodecContext *avctx) } } } +#endif - if (s->options.lpc_type == AV_LPC_TYPE_NONE) { + if (s->options.lpc_type == FF_LPC_TYPE_NONE) { s->options.min_prediction_order = 0; } else if (avctx->min_prediction_order >= 0) { - if (s->options.lpc_type == AV_LPC_TYPE_FIXED) { + if (s->options.lpc_type == FF_LPC_TYPE_FIXED) { if (avctx->min_prediction_order > MAX_FIXED_ORDER) { av_log(avctx, AV_LOG_ERROR, "invalid min prediction order: %d\n", avctx->min_prediction_order); @@ -322,10 +335,10 @@ static av_cold int flac_encode_init(AVCodecContext *avctx) } s->options.min_prediction_order = avctx->min_prediction_order; } - if (s->options.lpc_type == AV_LPC_TYPE_NONE) { + if (s->options.lpc_type == FF_LPC_TYPE_NONE) { s->options.max_prediction_order = 0; } else if (avctx->max_prediction_order >= 0) { - if (s->options.lpc_type == AV_LPC_TYPE_FIXED) { + if (s->options.lpc_type == FF_LPC_TYPE_FIXED) { if (avctx->max_prediction_order > MAX_FIXED_ORDER) { av_log(avctx, AV_LOG_ERROR, "invalid max prediction order: %d\n", avctx->max_prediction_order); @@ -345,6 +358,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx) return -1; } +#if FF_API_FLAC_GLOBAL_OPTS if (avctx->prediction_order_method >= 0) { if (avctx->prediction_order_method > ORDER_METHOD_LOG) { av_log(avctx, AV_LOG_ERROR, "invalid prediction order method: %d\n", @@ -375,6 +389,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx) s->options.min_partition_order, s->options.max_partition_order); return -1; } +#endif if (avctx->frame_size > 0) { if (avctx->frame_size < FLAC_MIN_BLOCKSIZE || @@ -388,6 +403,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx) } s->max_blocksize = s->avctx->frame_size; +#if FF_API_FLAC_GLOBAL_OPTS /* set LPC precision */ if (avctx->lpc_coeff_precision > 0) { if (avctx->lpc_coeff_precision > MAX_LPC_PRECISION) { @@ -396,10 +412,8 @@ static av_cold int flac_encode_init(AVCodecContext *avctx) return -1; } s->options.lpc_coeff_precision = avctx->lpc_coeff_precision; - } else { - /* default LPC precision */ - s->options.lpc_coeff_precision = 15; } +#endif /* set maximum encoded frame size in verbatim mode */ s->max_framesize = ff_flac_get_max_frame_size(s->avctx->frame_size, @@ -448,7 +462,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx) } ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size, - s->options.max_prediction_order, AV_LPC_TYPE_LEVINSON); + s->options.max_prediction_order, FF_LPC_TYPE_LEVINSON); dprint_compression_options(s); @@ -889,8 +903,8 @@ static int encode_residual_ch(FlacEncodeContext *s, int ch) /* FIXED */ sub->type = FLAC_SUBFRAME_FIXED; - if (s->options.lpc_type == AV_LPC_TYPE_NONE || - s->options.lpc_type == AV_LPC_TYPE_FIXED || n <= max_order) { + if (s->options.lpc_type == FF_LPC_TYPE_NONE || + s->options.lpc_type == FF_LPC_TYPE_FIXED || n <= max_order) { uint32_t bits[MAX_FIXED_ORDER+1]; if (max_order > MAX_FIXED_ORDER) max_order = MAX_FIXED_ORDER; @@ -1336,6 +1350,33 @@ static av_cold int flac_encode_close(AVCodecContext *avctx) return 0; } +#define FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM +static const AVOption options[] = { +{ "lpc_coeff_precision", "LPC coefficient precision", offsetof(FlacEncodeContext, options.lpc_coeff_precision), FF_OPT_TYPE_INT, 15, 0, MAX_LPC_PRECISION, FLAGS }, +{ "lpc_type", "LPC algorithm", offsetof(FlacEncodeContext, options.lpc_type), FF_OPT_TYPE_INT, FF_LPC_TYPE_DEFAULT, FF_LPC_TYPE_DEFAULT, FF_LPC_TYPE_NB-1, FLAGS, "lpc_type" }, +{ "none", NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_NONE, INT_MIN, INT_MAX, FLAGS, "lpc_type" }, +{ "fixed", NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_FIXED, INT_MIN, INT_MAX, FLAGS, "lpc_type" }, +{ "levinson", NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_LEVINSON, INT_MIN, INT_MAX, FLAGS, "lpc_type" }, +{ "cholesky", NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_CHOLESKY, INT_MIN, INT_MAX, FLAGS, "lpc_type" }, +{ "lpc_passes", "Number of passes to use for Cholesky factorization during LPC analysis", offsetof(FlacEncodeContext, options.lpc_passes), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, FLAGS }, +{ "min_partition_order", NULL, offsetof(FlacEncodeContext, options.min_partition_order), FF_OPT_TYPE_INT, -1, -1, MAX_PARTITION_ORDER, FLAGS }, +{ "max_partition_order", NULL, offsetof(FlacEncodeContext, options.max_partition_order), FF_OPT_TYPE_INT, -1, -1, MAX_PARTITION_ORDER, FLAGS }, +{ "prediction_order_method", "Search method for selecting prediction order", offsetof(FlacEncodeContext, options.prediction_order_method), FF_OPT_TYPE_INT, -1, -1, ORDER_METHOD_LOG, FLAGS, "predm" }, +{ "estimation", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_EST, INT_MIN, INT_MAX, FLAGS, "predm" }, +{ "2level", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_2LEVEL, INT_MIN, INT_MAX, FLAGS, "predm" }, +{ "4level", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_4LEVEL, INT_MIN, INT_MAX, FLAGS, "predm" }, +{ "8level", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_8LEVEL, INT_MIN, INT_MAX, FLAGS, "predm" }, +{ "search", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_SEARCH, INT_MIN, INT_MAX, FLAGS, "predm" }, +{ "log", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_LOG, INT_MIN, INT_MAX, FLAGS, "predm" }, +{ NULL }, +}; + +static const AVClass flac_encoder_class = { + "FLAC encoder", + av_default_item_name, + options, + LIBAVUTIL_VERSION_INT, +}; AVCodec ff_flac_encoder = { "flac", @@ -1349,4 +1390,5 @@ AVCodec ff_flac_encoder = { .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY, .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE}, .long_name = NULL_IF_CONFIG_SMALL("FLAC (Free Lossless Audio Codec)"), + .priv_class = &flac_encoder_class, }; diff --git a/libavcodec/h264.c b/libavcodec/h264.c index 353a0b343b..a843d21446 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -45,11 +45,11 @@ //#undef NDEBUG #include -static const uint8_t rem6[QP_MAX_MAX+1]={ +static const uint8_t rem6[QP_MAX_NUM+1]={ 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, }; -static const uint8_t div6[QP_MAX_MAX+1]={ +static const uint8_t div6[QP_MAX_NUM+1]={ 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,10,10,10,10, }; @@ -586,6 +586,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){ h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8; h->pixel_shift = 0; + h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8; h->thread_context[0] = h; h->outputed_poc = h->next_outputed_poc = INT_MIN; @@ -733,6 +734,7 @@ static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContex int ff_h264_frame_start(H264Context *h){ MpegEncContext * const s = &h->s; int i; + const int pixel_shift = h->pixel_shift; if(MPV_frame_start(s, s->avctx) < 0) return -1; @@ -749,14 +751,14 @@ int ff_h264_frame_start(H264Context *h){ assert(s->linesize && s->uvlinesize); for(i=0; i<16; i++){ - h->block_offset[i]= (4*((scan8[i] - scan8[0])&7)<pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3); - h->block_offset[24+i]= (4*((scan8[i] - scan8[0])&7)<pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3); + h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3); + h->block_offset[24+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3); } for(i=0; i<4; i++){ h->block_offset[16+i]= - h->block_offset[20+i]= (4*((scan8[i] - scan8[0])&7)<pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3); + h->block_offset[20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3); h->block_offset[24+16+i]= - h->block_offset[24+20+i]= (4*((scan8[i] - scan8[0])&7)<pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); + h->block_offset[24+20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); } /* can't be in alloc_tables because linesize isn't known there. @@ -948,6 +950,7 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src MpegEncContext * const s = &h->s; uint8_t *top_border; int top_idx = 1; + const int pixel_shift = h->pixel_shift; src_y -= linesize; src_cb -= uvlinesize; @@ -958,10 +961,10 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src if(!MB_MBAFF){ top_border = h->top_borders[0][s->mb_x]; AV_COPY128(top_border, src_y + 15*linesize); - if (h->pixel_shift) + if (pixel_shift) AV_COPY128(top_border+16, src_y+15*linesize+16); if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ - if (h->pixel_shift) { + if (pixel_shift) { AV_COPY128(top_border+32, src_cb+7*uvlinesize); AV_COPY128(top_border+48, src_cr+7*uvlinesize); } else { @@ -980,11 +983,11 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src // There are two lines saved, the line above the the top macroblock of a pair, // and the line above the bottom macroblock AV_COPY128(top_border, src_y + 16*linesize); - if (h->pixel_shift) + if (pixel_shift) AV_COPY128(top_border+16, src_y+16*linesize+16); if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ - if (h->pixel_shift) { + if (pixel_shift) { AV_COPY128(top_border+32, src_cb+8*uvlinesize); AV_COPY128(top_border+48, src_cr+8*uvlinesize); } else { @@ -994,7 +997,10 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src } } -static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple, int pixel_shift){ +static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, + uint8_t *src_cb, uint8_t *src_cr, + int linesize, int uvlinesize, + int xchg, int simple, int pixel_shift){ MpegEncContext * const s = &h->s; int deblock_left; int deblock_top; @@ -1040,38 +1046,38 @@ else AV_COPY64(b,a); if(deblock_top){ if(deblock_left){ - XCHG(top_border_m1+(8<pixel_shift), 1); + XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1); } - XCHG(top_border+(0<mb_x+1 < s->mb_width){ - XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +(17<top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1); } } if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ if(deblock_top){ if(deblock_left){ - XCHG(top_border_m1+(16<current_picture.data[0] + ((mb_x<linesize ) * 16; - dest_cb = s->current_picture.data[1] + ((mb_x<uvlinesize) * 8; - dest_cr = s->current_picture.data[2] + ((mb_x<uvlinesize) * 8; + dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; + dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; + dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; - s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64<linesize, 4); - s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64<dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4); + s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2); h->list_counts[mb_xy]= h->list_count; @@ -1186,16 +1192,16 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i uint8_t * const ptr= dest_y + block_offset[i]; const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ - h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16<hpc.pred8x8l_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize); }else{ const int nnz = h->non_zero_count_cache[ scan8[i] ]; h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<topright_samples_available<mb, i*16, pixel_shift)) - idct_dc_add(ptr, h->mb + (i*16<mb, pixel_shift, i*16)) + idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize); else - idct_add (ptr, h->mb + (i*16<mb + (i*16 << pixel_shift), linesize); } } } @@ -1212,7 +1218,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ - h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16<hpc.pred4x4_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize); }else{ uint8_t *topright; int nnz, tr; @@ -1229,7 +1235,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i topright= (uint8_t*) &tr; } }else - topright= ptr + (4<non_zero_count_cache[ scan8[i] ]; if(nnz){ if(is_h264){ - if(nnz == 1 && dctcoef_get(h, h->mb, i*16, pixel_shift)) - idct_dc_add(ptr, h->mb + (i*16<mb, pixel_shift, i*16)) + idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize); else idct_add (ptr, h->mb + (i*16<mb, dc_mapping[i], dctcoef_get(h, h->mb_luma_dc, i,pixel_shift),pixel_shift); + dctcoef_set(h->mb, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc, pixel_shift, i)); } } } @@ -1288,8 +1294,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize); }else{ for(i=0; i<16; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16,pixel_shift)) - s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16<non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16)) + s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize); } } }else{ @@ -1301,7 +1307,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4; for(i=0; i<16; i+=di){ if(h->non_zero_count_cache[ scan8[i] ]){ - idct_add(dest_y + block_offset[i], h->mb + (i*16<mb + (i*16 << pixel_shift), linesize); } } }else{ @@ -1329,21 +1335,21 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i uint8_t *dest[2] = {dest_cb, dest_cr}; if(transform_bypass){ if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){ - h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16<hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16<hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16 << pixel_shift), uvlinesize); + h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16 << pixel_shift), uvlinesize); }else{ idct_add = s->dsp.add_pixels4; for(i=16; i<16+8; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16,pixel_shift)) - idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16<non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16)) + idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize); } } }else{ if(is_h264){ if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ]) - h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16<dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); + h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16 << pixel_shift) , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ]) - h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16)<dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); + h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16) << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); h->h264dsp.h264_idct_add8(dest, block_offset, h->mb, uvlinesize, h->non_zero_count_cache); @@ -1370,9 +1376,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i /** * Process a macroblock; this case avoids checks for expensive uncommon cases. */ -static void hl_decode_mb_simple8(H264Context *h){ - hl_decode_mb_internal(h, 1, 0); +#define hl_decode_mb_simple(sh, bits) \ +static void hl_decode_mb_simple_ ## bits(H264Context *h){ \ + hl_decode_mb_internal(h, 1, sh); \ } +hl_decode_mb_simple(0, 8); +hl_decode_mb_simple(1, 16); /** * Process a macroblock; this handles edge cases, such as interlacing. @@ -1387,11 +1396,12 @@ void ff_h264_hl_decode_mb(H264Context *h){ const int mb_type= s->current_picture.mb_type[mb_xy]; int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0; - if (is_complex || h->pixel_shift) + if (is_complex) { hl_decode_mb_complex(h); - else{ - hl_decode_mb_simple8(h); - } + } else if (h->pixel_shift) { + hl_decode_mb_simple_16(h); + } else + hl_decode_mb_simple_8(h); } static int pred_weight_table(H264Context *h){ @@ -2557,6 +2567,7 @@ static void loop_filter(H264Context *h){ const int end_mb_y= s->mb_y + FRAME_MBAFF; const int old_slice_type= h->slice_type; const int end_mb_x = s->mb_x; + const int pixel_shift = h->pixel_shift; if(h->deblocking_filter) { int start_x= s->resync_mb_y == s->mb_y ? s->resync_mb_x : 0; @@ -2573,9 +2584,9 @@ static void loop_filter(H264Context *h){ s->mb_x= mb_x; s->mb_y= mb_y; - dest_y = s->current_picture.data[0] + ((mb_x<pixel_shift) + mb_y * s->linesize ) * 16; - dest_cb = s->current_picture.data[1] + ((mb_x<pixel_shift) + mb_y * s->uvlinesize) * 8; - dest_cr = s->current_picture.data[2] + ((mb_x<pixel_shift) + mb_y * s->uvlinesize) * 8; + dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; + dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; + dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; //FIXME simplify above if (MB_FIELD) { @@ -3060,7 +3071,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) { if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) { avctx->bits_per_raw_sample = h->sps.bit_depth_luma; - h->pixel_shift = h->sps.bit_depth_luma/9; + h->pixel_shift = h->sps.bit_depth_luma > 8; ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma); ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma); diff --git a/libavcodec/h264.h b/libavcodec/h264.h index a1db628bdc..04da701750 100644 --- a/libavcodec/h264.h +++ b/libavcodec/h264.h @@ -108,7 +108,7 @@ */ #define DELAYED_PIC_REF 4 -#define QP_MAX_MAX (51 + 2*6) // The maximum supported qp +#define QP_MAX_NUM (51 + 2*6) // The maximum supported qp /* NAL unit types */ enum { @@ -266,7 +266,7 @@ typedef struct MMCO{ typedef struct H264Context{ MpegEncContext s; H264DSPContext h264dsp; - int pixel_shift; + int pixel_shift; ///< 0 for 8-bit H264, 1 for high-bit-depth H264 int chroma_qp[2]; //QPc int qp_thresh; ///< QP threshold to skip loopfilter @@ -355,8 +355,8 @@ typedef struct H264Context{ */ PPS pps; //FIXME move to Picture perhaps? (->no) do we need that? - uint32_t dequant4_buffer[6][QP_MAX_MAX+1][16]; //FIXME should these be moved down? - uint32_t dequant8_buffer[2][QP_MAX_MAX+1][64]; + uint32_t dequant4_buffer[6][QP_MAX_NUM+1][16]; //FIXME should these be moved down? + uint32_t dequant8_buffer[2][QP_MAX_NUM+1][64]; uint32_t (*dequant4_coeff[6])[16]; uint32_t (*dequant8_coeff[2])[64]; @@ -597,7 +597,7 @@ typedef struct H264Context{ }H264Context; -extern const uint8_t ff_h264_chroma_qp[3][QP_MAX_MAX+1]; ///< One chroma qp table for each supported bit depth (8, 9, 10). +extern const uint8_t ff_h264_chroma_qp[3][QP_MAX_NUM+1]; ///< One chroma qp table for each supported bit depth (8, 9, 10). /** * Decode SEI diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c index 6017afc7aa..925ac44498 100644 --- a/libavcodec/h264_cabac.c +++ b/libavcodec/h264_cabac.c @@ -1103,10 +1103,11 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT #define STORE_BLOCK(type) \ - do {\ - uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;\ -\ - int j= scantable[index[--coeff_count]];\ + do { \ + uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base; \ + \ + int j= scantable[index[--coeff_count]]; \ + \ if( get_cabac( CC, ctx ) == 0 ) { \ node_ctx = coeff_abs_level_transition[0][node_ctx]; \ if( is_dc ) { \ @@ -1141,8 +1142,8 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT }else{ \ ((type*)block)[j] = ((int)(get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32)) >> 6; \ } \ - }\ - } while( coeff_count ); + } \ + } while ( coeff_count ); if (h->pixel_shift) { STORE_BLOCK(int32_t) @@ -1204,6 +1205,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) { int mb_xy; int mb_type, partition_count, cbp = 0; int dct8x8_allowed= h->pps.transform_8x8_mode; + const int pixel_shift = h->pixel_shift; mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride; @@ -1312,7 +1314,7 @@ decode_intra_mb: h->slice_table[ mb_xy ]= h->slice_num; if(IS_INTRA_PCM(mb_type)) { - const int mb_size = 384*h->sps.bit_depth_luma/8; + const int mb_size = (384*h->sps.bit_depth_luma) >> 3; const uint8_t *ptr; // We assume these blocks are very rare so we do not optimize it. @@ -1670,7 +1672,7 @@ decode_intra_mb: qmul = h->dequant4_coeff[0][s->qscale]; for( i = 0; i < 16; i++ ) { //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i ); - decode_cabac_residual_nondc(h, h->mb + (16*i<pixel_shift), 1, i, scan + 1, qmul, 15); + decode_cabac_residual_nondc(h, h->mb + (16*i << pixel_shift), 1, i, scan + 1, qmul, 15); } } else { fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1); @@ -1680,7 +1682,7 @@ decode_intra_mb: for( i8x8 = 0; i8x8 < 4; i8x8++ ) { if( cbp & (1<mb + (64*i8x8<pixel_shift), 5, 4*i8x8, + decode_cabac_residual_nondc(h, h->mb + (64*i8x8 << pixel_shift), 5, 4*i8x8, scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64); } else { qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale]; @@ -1688,7 +1690,7 @@ decode_intra_mb: const int index = 4*i8x8 + i4x4; //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index ); //START_TIMER - decode_cabac_residual_nondc(h, h->mb + (16*index<pixel_shift), 2, index, scan, qmul, 16); + decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 2, index, scan, qmul, 16); //STOP_TIMER("decode_residual") } } @@ -1703,7 +1705,7 @@ decode_intra_mb: int c; for( c = 0; c < 2; c++ ) { //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c ); - decode_cabac_residual_dc(h, h->mb + ((256 + 16*4*c)<pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4); + decode_cabac_residual_dc(h, h->mb + ((256 + 16*4*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4); } } @@ -1714,7 +1716,7 @@ decode_intra_mb: for( i = 0; i < 4; i++ ) { const int index = 16 + 4 * c + i; //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 ); - decode_cabac_residual_nondc(h, h->mb + (16*index<pixel_shift), 4, index, scan + 1, qmul, 15); + decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15); } } } else { diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c index 61b7436739..62e30f1311 100644 --- a/libavcodec/h264_cavlc.c +++ b/libavcodec/h264_cavlc.c @@ -542,6 +542,7 @@ int ff_h264_decode_mb_cavlc(H264Context *h){ int partition_count; unsigned int mb_type, cbp; int dct8x8_allowed= h->pps.transform_8x8_mode; + const int pixel_shift = h->pixel_shift; mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride; @@ -961,7 +962,7 @@ decode_intra_mb: for(i8x8=0; i8x8<4; i8x8++){ for(i4x4=0; i4x4<4; i4x4++){ const int index= i4x4 + 4*i8x8; - if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index<pixel_shift), index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){ + if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift), index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){ return -1; } } @@ -973,7 +974,7 @@ decode_intra_mb: for(i8x8=0; i8x8<4; i8x8++){ if(cbp & (1<mb[64*i8x8<pixel_shift]; + DCTELEM *buf = &h->mb[64*i8x8 << pixel_shift]; uint8_t *nnz; for(i4x4=0; i4x4<4; i4x4++){ if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4, @@ -986,7 +987,7 @@ decode_intra_mb: for(i4x4=0; i4x4<4; i4x4++){ const int index= i4x4 + 4*i8x8; - if( decode_residual(h, gb, h->mb + (16*index<pixel_shift), index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){ + if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){ return -1; } } @@ -1000,7 +1001,7 @@ decode_intra_mb: if(cbp&0x30){ for(chroma_idx=0; chroma_idx<2; chroma_idx++) - if( decode_residual(h, gb, h->mb + ((256 + 16*4*chroma_idx)<pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){ + if( decode_residual(h, gb, h->mb + ((256 + 16*4*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){ return -1; } } @@ -1010,7 +1011,7 @@ decode_intra_mb: const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]]; for(i4x4=0; i4x4<4; i4x4++){ const int index= 16 + 4*chroma_idx + i4x4; - if( decode_residual(h, gb, h->mb + (16*index<pixel_shift), index, scan + 1, qmul, 15) < 0){ + if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){ return -1; } } diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c index 325fd3cc61..2e61a3110a 100644 --- a/libavcodec/h264_loopfilter.c +++ b/libavcodec/h264_loopfilter.c @@ -101,197 +101,92 @@ static const uint8_t tc0_table[52*3][4] = { }; static void av_always_inline filter_mb_edgev( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h) { - const int bit_depth = h->sps.bit_depth_luma; - const int qp_bd_offset = 6*(bit_depth-8); + const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8); const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset; - const int alpha = alpha_table[index_a] << (bit_depth-8); - const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset] << (bit_depth-8); + const int alpha = alpha_table[index_a]; + const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset]; if (alpha ==0 || beta == 0) return; if( bS[0] < 4 ) { int8_t tc[4]; - tc[0] = tc0_table[index_a][bS[0]] << (bit_depth-8); - tc[1] = tc0_table[index_a][bS[1]] << (bit_depth-8); - tc[2] = tc0_table[index_a][bS[2]] << (bit_depth-8); - tc[3] = tc0_table[index_a][bS[3]] << (bit_depth-8); + tc[0] = tc0_table[index_a][bS[0]]; + tc[1] = tc0_table[index_a][bS[1]]; + tc[2] = tc0_table[index_a][bS[2]]; + tc[3] = tc0_table[index_a][bS[3]]; h->h264dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc); } else { h->h264dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta); } } static void av_always_inline filter_mb_edgecv( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { - const int bit_depth = h->sps.bit_depth_luma; - const int qp_bd_offset = 6*(bit_depth-8); + const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8); const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset; - const int alpha = alpha_table[index_a] << (bit_depth-8); - const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset] << (bit_depth-8); + const int alpha = alpha_table[index_a]; + const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset]; if (alpha ==0 || beta == 0) return; if( bS[0] < 4 ) { int8_t tc[4]; - tc[0] = (tc0_table[index_a][bS[0]] << (bit_depth-8))+1; - tc[1] = (tc0_table[index_a][bS[1]] << (bit_depth-8))+1; - tc[2] = (tc0_table[index_a][bS[2]] << (bit_depth-8))+1; - tc[3] = (tc0_table[index_a][bS[3]] << (bit_depth-8))+1; + tc[0] = tc0_table[index_a][bS[0]]+1; + tc[1] = tc0_table[index_a][bS[1]]+1; + tc[2] = tc0_table[index_a][bS[2]]+1; + tc[3] = tc0_table[index_a][bS[3]]+1; h->h264dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc); } else { h->h264dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); } } -static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int bsi, int qp ) { - int i; - const int bit_depth = h->sps.bit_depth_luma; - const int qp_bd_offset = 6*(bit_depth-8); +static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[7], int bsi, int qp ) { + const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8); int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset; - int alpha = alpha_table[index_a] << (bit_depth-8); - int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset] << (bit_depth-8); - for( i = 0; i < 8; i++, pix += stride) { - const int bS_index = (i >> 1) * bsi; - - if( bS[bS_index] == 0 ) { - continue; - } - - if( bS[bS_index] < 4 ) { - const int tc0 = tc0_table[index_a][bS[bS_index]] << (bit_depth-8); - const int p0 = pix[-1]; - const int p1 = pix[-2]; - const int p2 = pix[-3]; - const int q0 = pix[0]; - const int q1 = pix[1]; - const int q2 = pix[2]; - - if( FFABS( p0 - q0 ) < alpha && - FFABS( p1 - p0 ) < beta && - FFABS( q1 - q0 ) < beta ) { - int tc = tc0; - int i_delta; - - if( FFABS( p2 - p0 ) < beta ) { - if(tc0) - pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 ); - tc++; - } - if( FFABS( q2 - q0 ) < beta ) { - if(tc0) - pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 ); - tc++; - } - - i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); - pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */ - pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ - tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); - } - }else{ - const int p0 = pix[-1]; - const int p1 = pix[-2]; - const int p2 = pix[-3]; - - const int q0 = pix[0]; - const int q1 = pix[1]; - const int q2 = pix[2]; - - if( FFABS( p0 - q0 ) < alpha && - FFABS( p1 - p0 ) < beta && - FFABS( q1 - q0 ) < beta ) { - - if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ - if( FFABS( p2 - p0 ) < beta) - { - const int p3 = pix[-4]; - /* p0', p1', p2' */ - pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; - pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; - pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; - } else { - /* p0' */ - pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; - } - if( FFABS( q2 - q0 ) < beta) - { - const int q3 = pix[3]; - /* q0', q1', q2' */ - pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; - pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; - pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; - } else { - /* q0' */ - pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; - } - }else{ - /* p0', q0' */ - pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; - pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; - } - tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); - } - } - } -} -static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int bsi, int qp ) { - int i; - const int bit_depth = h->sps.bit_depth_luma; - const int qp_bd_offset = 6*(bit_depth-8); - int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset; - int alpha = alpha_table[index_a] << (bit_depth-8); - int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset] << (bit_depth-8); - for( i = 0; i < 4; i++, pix += stride) { - const int bS_index = i*bsi; - - if( bS[bS_index] == 0 ) { - continue; - } - - if( bS[bS_index] < 4 ) { - const int tc = (tc0_table[index_a][bS[bS_index]] << (bit_depth-8)) + 1; - const int p0 = pix[-1]; - const int p1 = pix[-2]; - const int q0 = pix[0]; - const int q1 = pix[1]; - - if( FFABS( p0 - q0 ) < alpha && - FFABS( p1 - p0 ) < beta && - FFABS( q1 - q0 ) < beta ) { - const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); - - pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */ - pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ - tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); - } - }else{ - const int p0 = pix[-1]; - const int p1 = pix[-2]; - const int q0 = pix[0]; - const int q1 = pix[1]; - - if( FFABS( p0 - q0 ) < alpha && - FFABS( p1 - p0 ) < beta && - FFABS( q1 - q0 ) < beta ) { - - pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ - pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ - tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); - } - } - } -} - -static void av_always_inline filter_mb_edgeh( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { - const int bit_depth = h->sps.bit_depth_luma; - const int qp_bd_offset = 6*(bit_depth-8); - const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset; - const int alpha = alpha_table[index_a] << (bit_depth-8); - const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset] << (bit_depth-8); + int alpha = alpha_table[index_a]; + int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset]; if (alpha ==0 || beta == 0) return; if( bS[0] < 4 ) { int8_t tc[4]; - tc[0] = tc0_table[index_a][bS[0]] << (bit_depth-8); - tc[1] = tc0_table[index_a][bS[1]] << (bit_depth-8); - tc[2] = tc0_table[index_a][bS[2]] << (bit_depth-8); - tc[3] = tc0_table[index_a][bS[3]] << (bit_depth-8); + tc[0] = tc0_table[index_a][bS[0*bsi]]; + tc[1] = tc0_table[index_a][bS[1*bsi]]; + tc[2] = tc0_table[index_a][bS[2*bsi]]; + tc[3] = tc0_table[index_a][bS[3*bsi]]; + h->h264dsp.h264_h_loop_filter_luma_mbaff(pix, stride, alpha, beta, tc); + } else { + h->h264dsp.h264_h_loop_filter_luma_mbaff_intra(pix, stride, alpha, beta); + } +} +static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[7], int bsi, int qp ) { + const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8); + int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset; + int alpha = alpha_table[index_a]; + int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset]; + if (alpha ==0 || beta == 0) return; + + if( bS[0] < 4 ) { + int8_t tc[4]; + tc[0] = tc0_table[index_a][bS[0*bsi]] + 1; + tc[1] = tc0_table[index_a][bS[1*bsi]] + 1; + tc[2] = tc0_table[index_a][bS[2*bsi]] + 1; + tc[3] = tc0_table[index_a][bS[3*bsi]] + 1; + h->h264dsp.h264_h_loop_filter_chroma_mbaff(pix, stride, alpha, beta, tc); + } else { + h->h264dsp.h264_h_loop_filter_chroma_mbaff_intra(pix, stride, alpha, beta); + } +} + +static void av_always_inline filter_mb_edgeh( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { + const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8); + const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset; + const int alpha = alpha_table[index_a]; + const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset]; + if (alpha ==0 || beta == 0) return; + + if( bS[0] < 4 ) { + int8_t tc[4]; + tc[0] = tc0_table[index_a][bS[0]]; + tc[1] = tc0_table[index_a][bS[1]]; + tc[2] = tc0_table[index_a][bS[2]]; + tc[3] = tc0_table[index_a][bS[3]]; h->h264dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc); } else { h->h264dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta); @@ -299,19 +194,18 @@ static void av_always_inline filter_mb_edgeh( uint8_t *pix, int stride, int16_t } static void av_always_inline filter_mb_edgech( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { - const int bit_depth = h->sps.bit_depth_luma; - const int qp_bd_offset = 6*(bit_depth-8); + const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8); const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset; - const int alpha = alpha_table[index_a] << (bit_depth-8); - const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset] << (bit_depth-8); + const int alpha = alpha_table[index_a]; + const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset]; if (alpha ==0 || beta == 0) return; if( bS[0] < 4 ) { int8_t tc[4]; - tc[0] = (tc0_table[index_a][bS[0]] << (bit_depth-8))+1; - tc[1] = (tc0_table[index_a][bS[1]] << (bit_depth-8))+1; - tc[2] = (tc0_table[index_a][bS[2]] << (bit_depth-8))+1; - tc[3] = (tc0_table[index_a][bS[3]] << (bit_depth-8))+1; + tc[0] = tc0_table[index_a][bS[0]]+1; + tc[1] = tc0_table[index_a][bS[1]]+1; + tc[2] = tc0_table[index_a][bS[2]]+1; + tc[3] = tc0_table[index_a][bS[3]]+1; h->h264dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); } else { h->h264dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta); @@ -650,10 +544,10 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } if( dir == 0 ) { - filter_mb_edgev( &img_y[4*edge<pixel_shift], linesize, bS, qp, h ); + filter_mb_edgev( &img_y[4*edge << h->pixel_shift], linesize, bS, qp, h ); if( (edge&1) == 0 ) { - filter_mb_edgecv( &img_cb[2*edge<pixel_shift], uvlinesize, bS, h->chroma_qp[0], h); - filter_mb_edgecv( &img_cr[2*edge<pixel_shift], uvlinesize, bS, h->chroma_qp[1], h); + filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h); + filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h); } } else { filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h ); diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c index ab20ecfeb7..f77a013112 100644 --- a/libavcodec/h264_ps.c +++ b/libavcodec/h264_ps.c @@ -70,7 +70,7 @@ static const AVRational pixel_aspect[17]={ QP(37,d), QP(37,d), QP(37,d), QP(38,d), QP(38,d), QP(38,d),\ QP(39,d), QP(39,d), QP(39,d), QP(39,d) -const uint8_t ff_h264_chroma_qp[3][QP_MAX_MAX+1] = { +const uint8_t ff_h264_chroma_qp[3][QP_MAX_NUM+1] = { { CHROMA_QP_TABLE_END(8) }, diff --git a/libavcodec/h264_refs.c b/libavcodec/h264_refs.c index a2058b5aec..9554201522 100644 --- a/libavcodec/h264_refs.c +++ b/libavcodec/h264_refs.c @@ -629,8 +629,9 @@ int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){ * short_ref and long_ref buffers. */ av_log(h->s.avctx, AV_LOG_ERROR, - "number of reference frames exceeds max (probably " - "corrupt input), discarding one long:%d short:%d max:%d\n", h->long_ref_count, h->short_ref_count, h->sps.ref_frame_count); + "number of reference frames (%d+%d) exceeds max (%d; probably " + "corrupt input), discarding one\n", + h->long_ref_count, h->short_ref_count, h->sps.ref_frame_count); if (h->long_ref_count && !h->short_ref_count) { for (i = 0; i < 16; ++i) diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c index 04c6ea6df4..96a38ff77d 100644 --- a/libavcodec/h264dsp.c +++ b/libavcodec/h264dsp.c @@ -30,15 +30,15 @@ #include "h264dsp.h" #define BIT_DEPTH 8 -#include "h264dsp_internal.h" +#include "h264dsp_template.c" #undef BIT_DEPTH #define BIT_DEPTH 9 -#include "h264dsp_internal.h" +#include "h264dsp_template.c" #undef BIT_DEPTH #define BIT_DEPTH 10 -#include "h264dsp_internal.h" +#include "h264dsp_template.c" #undef BIT_DEPTH void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) @@ -47,58 +47,62 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) #define FUNC(a, depth) a ## _ ## depth ## _c #define H264_DSP(depth) \ - c->h264_idct_add = FUNC(ff_h264_idct_add , depth);\ - c->h264_idct8_add = FUNC(ff_h264_idct8_add , depth);\ - c->h264_idct_dc_add = FUNC(ff_h264_idct_dc_add , depth);\ - c->h264_idct8_dc_add = FUNC(ff_h264_idct8_dc_add , depth);\ - c->h264_idct_add16 = FUNC(ff_h264_idct_add16 , depth);\ - c->h264_idct8_add4 = FUNC(ff_h264_idct8_add4 , depth);\ - c->h264_idct_add8 = FUNC(ff_h264_idct_add8 , depth);\ - c->h264_idct_add16intra = FUNC(ff_h264_idct_add16intra , depth);\ - c->h264_luma_dc_dequant_idct = FUNC(ff_h264_luma_dc_dequant_idct , depth);\ - c->h264_chroma_dc_dequant_idct = FUNC(ff_h264_chroma_dc_dequant_idct , depth);\ + c->h264_idct_add= FUNC(ff_h264_idct_add, depth);\ + c->h264_idct8_add= FUNC(ff_h264_idct8_add, depth);\ + c->h264_idct_dc_add= FUNC(ff_h264_idct_dc_add, depth);\ + c->h264_idct8_dc_add= FUNC(ff_h264_idct8_dc_add, depth);\ + c->h264_idct_add16 = FUNC(ff_h264_idct_add16, depth);\ + c->h264_idct8_add4 = FUNC(ff_h264_idct8_add4, depth);\ + c->h264_idct_add8 = FUNC(ff_h264_idct_add8, depth);\ + c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\ + c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\ + c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\ \ - c->weight_h264_pixels_tab[0] = FUNC( weight_h264_pixels16x16 , depth);\ - c->weight_h264_pixels_tab[1] = FUNC( weight_h264_pixels16x8 , depth);\ - c->weight_h264_pixels_tab[2] = FUNC( weight_h264_pixels8x16 , depth);\ - c->weight_h264_pixels_tab[3] = FUNC( weight_h264_pixels8x8 , depth);\ - c->weight_h264_pixels_tab[4] = FUNC( weight_h264_pixels8x4 , depth);\ - c->weight_h264_pixels_tab[5] = FUNC( weight_h264_pixels4x8 , depth);\ - c->weight_h264_pixels_tab[6] = FUNC( weight_h264_pixels4x4 , depth);\ - c->weight_h264_pixels_tab[7] = FUNC( weight_h264_pixels4x2 , depth);\ - c->weight_h264_pixels_tab[8] = FUNC( weight_h264_pixels2x4 , depth);\ - c->weight_h264_pixels_tab[9] = FUNC( weight_h264_pixels2x2 , depth);\ - c->biweight_h264_pixels_tab[0] = FUNC(biweight_h264_pixels16x16 , depth);\ - c->biweight_h264_pixels_tab[1] = FUNC(biweight_h264_pixels16x8 , depth);\ - c->biweight_h264_pixels_tab[2] = FUNC(biweight_h264_pixels8x16 , depth);\ - c->biweight_h264_pixels_tab[3] = FUNC(biweight_h264_pixels8x8 , depth);\ - c->biweight_h264_pixels_tab[4] = FUNC(biweight_h264_pixels8x4 , depth);\ - c->biweight_h264_pixels_tab[5] = FUNC(biweight_h264_pixels4x8 , depth);\ - c->biweight_h264_pixels_tab[6] = FUNC(biweight_h264_pixels4x4 , depth);\ - c->biweight_h264_pixels_tab[7] = FUNC(biweight_h264_pixels4x2 , depth);\ - c->biweight_h264_pixels_tab[8] = FUNC(biweight_h264_pixels2x4 , depth);\ - c->biweight_h264_pixels_tab[9] = FUNC(biweight_h264_pixels2x2 , depth);\ + c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\ + c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\ + c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels8x16, depth);\ + c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels8x8, depth);\ + c->weight_h264_pixels_tab[4]= FUNC(weight_h264_pixels8x4, depth);\ + c->weight_h264_pixels_tab[5]= FUNC(weight_h264_pixels4x8, depth);\ + c->weight_h264_pixels_tab[6]= FUNC(weight_h264_pixels4x4, depth);\ + c->weight_h264_pixels_tab[7]= FUNC(weight_h264_pixels4x2, depth);\ + c->weight_h264_pixels_tab[8]= FUNC(weight_h264_pixels2x4, depth);\ + c->weight_h264_pixels_tab[9]= FUNC(weight_h264_pixels2x2, depth);\ + c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16x16, depth);\ + c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels16x8, depth);\ + c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels8x16, depth);\ + c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels8x8, depth);\ + c->biweight_h264_pixels_tab[4]= FUNC(biweight_h264_pixels8x4, depth);\ + c->biweight_h264_pixels_tab[5]= FUNC(biweight_h264_pixels4x8, depth);\ + c->biweight_h264_pixels_tab[6]= FUNC(biweight_h264_pixels4x4, depth);\ + c->biweight_h264_pixels_tab[7]= FUNC(biweight_h264_pixels4x2, depth);\ + c->biweight_h264_pixels_tab[8]= FUNC(biweight_h264_pixels2x4, depth);\ + c->biweight_h264_pixels_tab[9]= FUNC(biweight_h264_pixels2x2, depth);\ \ - c->h264_v_loop_filter_luma = FUNC(h264_v_loop_filter_luma , depth);\ - c->h264_h_loop_filter_luma = FUNC(h264_h_loop_filter_luma , depth);\ - c->h264_v_loop_filter_luma_intra = FUNC(h264_v_loop_filter_luma_intra , depth);\ - c->h264_h_loop_filter_luma_intra = FUNC(h264_h_loop_filter_luma_intra , depth);\ - c->h264_v_loop_filter_chroma = FUNC(h264_v_loop_filter_chroma , depth);\ - c->h264_h_loop_filter_chroma = FUNC(h264_h_loop_filter_chroma , depth);\ - c->h264_v_loop_filter_chroma_intra = FUNC(h264_v_loop_filter_chroma_intra, depth);\ - c->h264_h_loop_filter_chroma_intra = FUNC(h264_h_loop_filter_chroma_intra, depth);\ + c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\ + c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\ + c->h264_h_loop_filter_luma_mbaff= FUNC(h264_h_loop_filter_luma_mbaff, depth);\ + c->h264_v_loop_filter_luma_intra= FUNC(h264_v_loop_filter_luma_intra, depth);\ + c->h264_h_loop_filter_luma_intra= FUNC(h264_h_loop_filter_luma_intra, depth);\ + c->h264_h_loop_filter_luma_mbaff_intra= FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\ + c->h264_v_loop_filter_chroma= FUNC(h264_v_loop_filter_chroma, depth);\ + c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\ + c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\ + c->h264_v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, depth);\ + c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\ + c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\ c->h264_loop_filter_strength= NULL; switch (bit_depth) { - case 9: - H264_DSP(9); - break; - case 10: - H264_DSP(10); - break; - default: - H264_DSP(8); - break; + case 9: + H264_DSP(9); + break; + case 10: + H264_DSP(10); + break; + default: + H264_DSP(8); + break; } if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth); diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h index 8a0b9ae72b..4b606efa17 100644 --- a/libavcodec/h264dsp.h +++ b/libavcodec/h264dsp.h @@ -45,13 +45,17 @@ typedef struct H264DSPContext{ /* loop filter */ void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); /* v/h_loop_filter_luma_intra: align 16 */ void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); + void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta); void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0); void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0); void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); + void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); // h264_loop_filter_strength: simd only. the C version is inlined in h264.c void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field); diff --git a/libavcodec/h264dsp_template.c b/libavcodec/h264dsp_template.c new file mode 100644 index 0000000000..91162ea900 --- /dev/null +++ b/libavcodec/h264dsp_template.c @@ -0,0 +1,313 @@ +/* + * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder + * Copyright (c) 2003-2010 Michael Niedermayer + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * H.264 / AVC / MPEG4 part10 DSP functions. + * @author Michael Niedermayer + */ + +#include "high_bit_depth.h" + +#define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom ) +#define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) +#define H264_WEIGHT(W,H) \ +static void FUNCC(weight_h264_pixels ## W ## x ## H)(uint8_t *_block, int stride, int log2_denom, int weight, int offset){ \ + int y; \ + pixel *block = (pixel*)_block; \ + stride /= sizeof(pixel); \ + offset <<= (log2_denom + (BIT_DEPTH-8)); \ + if(log2_denom) offset += 1<<(log2_denom-1); \ + for(y=0; y> 1 ) ) >> 1) - p1, -tc_orig, tc_orig ); + tc++; + } + if( FFABS( q2 - q0 ) < beta ) { + if(tc_orig) + pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc_orig, tc_orig ); + tc++; + } + + i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); + pix[-xstride] = av_clip_pixel( p0 + i_delta ); /* p0' */ + pix[0] = av_clip_pixel( q0 - i_delta ); /* q0' */ + } + pix += ystride; + } + } +} +static void FUNCC(h264_v_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + FUNCC(h264_loop_filter_luma)(pix, stride, sizeof(pixel), 4, alpha, beta, tc0); +} +static void FUNCC(h264_h_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); +} +static void FUNCC(h264_h_loop_filter_luma_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); +} + +static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma_intra)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta) +{ + pixel *pix = (pixel*)_pix; + int d; + xstride /= sizeof(pixel); + ystride /= sizeof(pixel); + alpha <<= BIT_DEPTH - 8; + beta <<= BIT_DEPTH - 8; + for( d = 0; d < 4 * inner_iters; d++ ) { + const int p2 = pix[-3*xstride]; + const int p1 = pix[-2*xstride]; + const int p0 = pix[-1*xstride]; + + const int q0 = pix[ 0*xstride]; + const int q1 = pix[ 1*xstride]; + const int q2 = pix[ 2*xstride]; + + if( FFABS( p0 - q0 ) < alpha && + FFABS( p1 - p0 ) < beta && + FFABS( q1 - q0 ) < beta ) { + + if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ + if( FFABS( p2 - p0 ) < beta) + { + const int p3 = pix[-4*xstride]; + /* p0', p1', p2' */ + pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; + pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; + pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; + } else { + /* p0' */ + pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; + } + if( FFABS( q2 - q0 ) < beta) + { + const int q3 = pix[3*xstride]; + /* q0', q1', q2' */ + pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; + pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; + pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; + } else { + /* q0' */ + pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; + } + }else{ + /* p0', q0' */ + pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; + pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; + } + } + pix += ystride; + } +} +static void FUNCC(h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta) +{ + FUNCC(h264_loop_filter_luma_intra)(pix, stride, sizeof(pixel), 4, alpha, beta); +} +static void FUNCC(h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta) +{ + FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); +} +static void FUNCC(h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta) +{ + FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); +} + +static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0) +{ + pixel *pix = (pixel*)_pix; + int i, d; + xstride /= sizeof(pixel); + ystride /= sizeof(pixel); + alpha <<= BIT_DEPTH - 8; + beta <<= BIT_DEPTH - 8; + for( i = 0; i < 4; i++ ) { + const int tc = ((tc0[i] - 1) << (BIT_DEPTH - 8)) + 1; + if( tc <= 0 ) { + pix += inner_iters*ystride; + continue; + } + for( d = 0; d < inner_iters; d++ ) { + const int p0 = pix[-1*xstride]; + const int p1 = pix[-2*xstride]; + const int q0 = pix[0]; + const int q1 = pix[1*xstride]; + + if( FFABS( p0 - q0 ) < alpha && + FFABS( p1 - p0 ) < beta && + FFABS( q1 - q0 ) < beta ) { + + int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); + + pix[-xstride] = av_clip_pixel( p0 + delta ); /* p0' */ + pix[0] = av_clip_pixel( q0 - delta ); /* q0' */ + } + pix += ystride; + } + } +} +static void FUNCC(h264_v_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + FUNCC(h264_loop_filter_chroma)(pix, stride, sizeof(pixel), 2, alpha, beta, tc0); +} +static void FUNCC(h264_h_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); +} +static void FUNCC(h264_h_loop_filter_chroma_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0); +} + +static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta) +{ + pixel *pix = (pixel*)_pix; + int d; + xstride /= sizeof(pixel); + ystride /= sizeof(pixel); + alpha <<= BIT_DEPTH - 8; + beta <<= BIT_DEPTH - 8; + for( d = 0; d < 4 * inner_iters; d++ ) { + const int p0 = pix[-1*xstride]; + const int p1 = pix[-2*xstride]; + const int q0 = pix[0]; + const int q1 = pix[1*xstride]; + + if( FFABS( p0 - q0 ) < alpha && + FFABS( p1 - p0 ) < beta && + FFABS( q1 - q0 ) < beta ) { + + pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ + pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ + } + pix += ystride; + } +} +static void FUNCC(h264_v_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta) +{ + FUNCC(h264_loop_filter_chroma_intra)(pix, stride, sizeof(pixel), 2, alpha, beta); +} +static void FUNCC(h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta) +{ + FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); +} +static void FUNCC(h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta) +{ + FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta); +} diff --git a/libavcodec/h264idct.c b/libavcodec/h264idct.c index 920356d01f..7d1ee007bc 100644 --- a/libavcodec/h264idct.c +++ b/libavcodec/h264idct.c @@ -26,13 +26,13 @@ */ #define BIT_DEPTH 8 -#include "h264idct_internal.h" +#include "h264idct_template.c" #undef BIT_DEPTH #define BIT_DEPTH 9 -#include "h264idct_internal.h" +#include "h264idct_template.c" #undef BIT_DEPTH #define BIT_DEPTH 10 -#include "h264idct_internal.h" +#include "h264idct_template.c" #undef BIT_DEPTH diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c new file mode 100644 index 0000000000..39c9a1c9eb --- /dev/null +++ b/libavcodec/h264idct_template.c @@ -0,0 +1,291 @@ +/* + * H.264 IDCT + * Copyright (c) 2004-2011 Michael Niedermayer + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * H.264 IDCT. + * @author Michael Niedermayer + */ + +#include "high_bit_depth.h" + +#ifndef AVCODEC_H264IDCT_INTERNAL_H +#define AVCODEC_H264IDCT_INTERNAL_H +//FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split +static const uint8_t scan8[16 + 2*4]={ + 4+1*8, 5+1*8, 4+2*8, 5+2*8, + 6+1*8, 7+1*8, 6+2*8, 7+2*8, + 4+3*8, 5+3*8, 4+4*8, 5+4*8, + 6+3*8, 7+3*8, 6+4*8, 7+4*8, + 1+1*8, 2+1*8, + 1+2*8, 2+2*8, + 1+4*8, 2+4*8, + 1+5*8, 2+5*8, +}; +#endif + +static av_always_inline void FUNCC(idct_internal)(uint8_t *_dst, DCTELEM *_block, int stride, int block_stride, int shift, int add){ + int i; + INIT_CLIP + pixel *dst = (pixel*)_dst; + dctcoef *block = (dctcoef*)_block; + stride /= sizeof(pixel); + + block[0] += 1<<(shift-1); + + for(i=0; i<4; i++){ + const int z0= block[i + block_stride*0] + block[i + block_stride*2]; + const int z1= block[i + block_stride*0] - block[i + block_stride*2]; + const int z2= (block[i + block_stride*1]>>1) - block[i + block_stride*3]; + const int z3= block[i + block_stride*1] + (block[i + block_stride*3]>>1); + + block[i + block_stride*0]= z0 + z3; + block[i + block_stride*1]= z1 + z2; + block[i + block_stride*2]= z1 - z2; + block[i + block_stride*3]= z0 - z3; + } + + for(i=0; i<4; i++){ + const int z0= block[0 + block_stride*i] + block[2 + block_stride*i]; + const int z1= block[0 + block_stride*i] - block[2 + block_stride*i]; + const int z2= (block[1 + block_stride*i]>>1) - block[3 + block_stride*i]; + const int z3= block[1 + block_stride*i] + (block[3 + block_stride*i]>>1); + + dst[i + 0*stride]= CLIP(add*dst[i + 0*stride] + ((z0 + z3) >> shift)); + dst[i + 1*stride]= CLIP(add*dst[i + 1*stride] + ((z1 + z2) >> shift)); + dst[i + 2*stride]= CLIP(add*dst[i + 2*stride] + ((z1 - z2) >> shift)); + dst[i + 3*stride]= CLIP(add*dst[i + 3*stride] + ((z0 - z3) >> shift)); + } +} + +void FUNCC(ff_h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride){ + FUNCC(idct_internal)(dst, block, stride, 4, 6, 1); +} + +void FUNCC(ff_h264_lowres_idct_add)(uint8_t *dst, int stride, DCTELEM *block){ + FUNCC(idct_internal)(dst, block, stride, 8, 3, 1); +} + +void FUNCC(ff_h264_lowres_idct_put)(uint8_t *dst, int stride, DCTELEM *block){ + FUNCC(idct_internal)(dst, block, stride, 8, 3, 0); +} + +void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){ + int i; + INIT_CLIP + pixel *dst = (pixel*)_dst; + dctcoef *block = (dctcoef*)_block; + stride /= sizeof(pixel); + + block[0] += 32; + + for( i = 0; i < 8; i++ ) + { + const int a0 = block[i+0*8] + block[i+4*8]; + const int a2 = block[i+0*8] - block[i+4*8]; + const int a4 = (block[i+2*8]>>1) - block[i+6*8]; + const int a6 = (block[i+6*8]>>1) + block[i+2*8]; + + const int b0 = a0 + a6; + const int b2 = a2 + a4; + const int b4 = a2 - a4; + const int b6 = a0 - a6; + + const int a1 = -block[i+3*8] + block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1); + const int a3 = block[i+1*8] + block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1); + const int a5 = -block[i+1*8] + block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1); + const int a7 = block[i+3*8] + block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1); + + const int b1 = (a7>>2) + a1; + const int b3 = a3 + (a5>>2); + const int b5 = (a3>>2) - a5; + const int b7 = a7 - (a1>>2); + + block[i+0*8] = b0 + b7; + block[i+7*8] = b0 - b7; + block[i+1*8] = b2 + b5; + block[i+6*8] = b2 - b5; + block[i+2*8] = b4 + b3; + block[i+5*8] = b4 - b3; + block[i+3*8] = b6 + b1; + block[i+4*8] = b6 - b1; + } + for( i = 0; i < 8; i++ ) + { + const int a0 = block[0+i*8] + block[4+i*8]; + const int a2 = block[0+i*8] - block[4+i*8]; + const int a4 = (block[2+i*8]>>1) - block[6+i*8]; + const int a6 = (block[6+i*8]>>1) + block[2+i*8]; + + const int b0 = a0 + a6; + const int b2 = a2 + a4; + const int b4 = a2 - a4; + const int b6 = a0 - a6; + + const int a1 = -block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1); + const int a3 = block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1); + const int a5 = -block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1); + const int a7 = block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1); + + const int b1 = (a7>>2) + a1; + const int b3 = a3 + (a5>>2); + const int b5 = (a3>>2) - a5; + const int b7 = a7 - (a1>>2); + + dst[i + 0*stride] = CLIP( dst[i + 0*stride] + ((b0 + b7) >> 6) ); + dst[i + 1*stride] = CLIP( dst[i + 1*stride] + ((b2 + b5) >> 6) ); + dst[i + 2*stride] = CLIP( dst[i + 2*stride] + ((b4 + b3) >> 6) ); + dst[i + 3*stride] = CLIP( dst[i + 3*stride] + ((b6 + b1) >> 6) ); + dst[i + 4*stride] = CLIP( dst[i + 4*stride] + ((b6 - b1) >> 6) ); + dst[i + 5*stride] = CLIP( dst[i + 5*stride] + ((b4 - b3) >> 6) ); + dst[i + 6*stride] = CLIP( dst[i + 6*stride] + ((b2 - b5) >> 6) ); + dst[i + 7*stride] = CLIP( dst[i + 7*stride] + ((b0 - b7) >> 6) ); + } +} + +// assumes all AC coefs are 0 +void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){ + int i, j; + int dc = (((dctcoef*)block)[0] + 32) >> 6; + INIT_CLIP + pixel *dst = (pixel*)_dst; + stride /= sizeof(pixel); + for( j = 0; j < 4; j++ ) + { + for( i = 0; i < 4; i++ ) + dst[i] = CLIP( dst[i] + dc ); + dst += stride; + } +} + +void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){ + int i, j; + int dc = (((dctcoef*)block)[0] + 32) >> 6; + INIT_CLIP + pixel *dst = (pixel*)_dst; + stride /= sizeof(pixel); + for( j = 0; j < 8; j++ ) + { + for( i = 0; i < 8; i++ ) + dst[i] = CLIP( dst[i] + dc ); + dst += stride; + } +} + +void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ + int i; + for(i=0; i<16; i++){ + int nnz = nnzc[ scan8[i] ]; + if(nnz){ + if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); + else FUNCC(idct_internal )(dst + block_offset[i], block + i*16*sizeof(pixel), stride, 4, 6, 1); + } + } +} + +void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ + int i; + for(i=0; i<16; i++){ + if(nnzc[ scan8[i] ]) FUNCC(idct_internal )(dst + block_offset[i], block + i*16*sizeof(pixel), stride, 4, 6, 1); + else if(((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); + } +} + +void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ + int i; + for(i=0; i<16; i+=4){ + int nnz = nnzc[ scan8[i] ]; + if(nnz){ + if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct8_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); + else FUNCC(ff_h264_idct8_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); + } + } +} + +void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ + int i; + for(i=16; i<16+8; i++){ + if(nnzc[ scan8[i] ]) + FUNCC(ff_h264_idct_add )(dest[(i&4)>>2] + block_offset[i], block + i*16*sizeof(pixel), stride); + else if(((dctcoef*)block)[i*16]) + FUNCC(ff_h264_idct_dc_add)(dest[(i&4)>>2] + block_offset[i], block + i*16*sizeof(pixel), stride); + } +} +/** + * IDCT transforms the 16 dc values and dequantizes them. + * @param qp quantization parameter + */ +void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *_output, DCTELEM *_input, int qmul){ +#define stride 16 + int i; + int temp[16]; + static const uint8_t x_offset[4]={0, 2*stride, 8*stride, 10*stride}; + dctcoef *input = (dctcoef*)_input; + dctcoef *output = (dctcoef*)_output; + + for(i=0; i<4; i++){ + const int z0= input[4*i+0] + input[4*i+1]; + const int z1= input[4*i+0] - input[4*i+1]; + const int z2= input[4*i+2] - input[4*i+3]; + const int z3= input[4*i+2] + input[4*i+3]; + + temp[4*i+0]= z0+z3; + temp[4*i+1]= z0-z3; + temp[4*i+2]= z1-z2; + temp[4*i+3]= z1+z2; + } + + for(i=0; i<4; i++){ + const int offset= x_offset[i]; + const int z0= temp[4*0+i] + temp[4*2+i]; + const int z1= temp[4*0+i] - temp[4*2+i]; + const int z2= temp[4*1+i] - temp[4*3+i]; + const int z3= temp[4*1+i] + temp[4*3+i]; + + output[stride* 0+offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); + output[stride* 1+offset]= ((((z1 + z2)*qmul + 128 ) >> 8)); + output[stride* 4+offset]= ((((z1 - z2)*qmul + 128 ) >> 8)); + output[stride* 5+offset]= ((((z0 - z3)*qmul + 128 ) >> 8)); + } +#undef stride +} + +void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *_block, int qmul){ + const int stride= 16*2; + const int xStride= 16; + int a,b,c,d,e; + dctcoef *block = (dctcoef*)_block; + + a= block[stride*0 + xStride*0]; + b= block[stride*0 + xStride*1]; + c= block[stride*1 + xStride*0]; + d= block[stride*1 + xStride*1]; + + e= a-b; + a= a+b; + b= c-d; + c= c+d; + + block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7; + block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7; + block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7; + block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7; +} diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c index 8d2c6f0355..f6533cf9ba 100644 --- a/libavcodec/h264pred.c +++ b/libavcodec/h264pred.c @@ -28,17 +28,338 @@ #include "h264pred.h" #define BIT_DEPTH 8 -#include "h264pred_internal.h" +#include "h264pred_template.c" #undef BIT_DEPTH #define BIT_DEPTH 9 -#include "h264pred_internal.h" +#include "h264pred_template.c" #undef BIT_DEPTH #define BIT_DEPTH 10 -#include "h264pred_internal.h" +#include "h264pred_template.c" #undef BIT_DEPTH +static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ + const int lt= src[-1-1*stride]; + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE + uint32_t v = PACK_4U8((lt + 2*t0 + t1 + 2) >> 2, + (t0 + 2*t1 + t2 + 2) >> 2, + (t1 + 2*t2 + t3 + 2) >> 2, + (t2 + 2*t3 + t4 + 2) >> 2); + + AV_WN32A(src+0*stride, v); + AV_WN32A(src+1*stride, v); + AV_WN32A(src+2*stride, v); + AV_WN32A(src+3*stride, v); +} + +static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ + const int lt= src[-1-1*stride]; + LOAD_LEFT_EDGE + + AV_WN32A(src+0*stride, ((lt + 2*l0 + l1 + 2) >> 2)*0x01010101); + AV_WN32A(src+1*stride, ((l0 + 2*l1 + l2 + 2) >> 2)*0x01010101); + AV_WN32A(src+2*stride, ((l1 + 2*l2 + l3 + 2) >> 2)*0x01010101); + AV_WN32A(src+3*stride, ((l2 + 2*l3 + l3 + 2) >> 2)*0x01010101); +} + +static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright, int stride){ + LOAD_TOP_EDGE + LOAD_LEFT_EDGE + const av_unused int unu0= t0; + const av_unused int unu1= l0; + + src[0+0*stride]=(l1 + t1)>>1; + src[1+0*stride]= + src[0+1*stride]=(l2 + t2)>>1; + src[2+0*stride]= + src[1+1*stride]= + src[0+2*stride]= + src[3+0*stride]= + src[2+1*stride]= + src[1+2*stride]= + src[0+3*stride]= + src[3+1*stride]= + src[2+2*stride]= + src[1+3*stride]= + src[3+2*stride]= + src[2+3*stride]= + src[3+3*stride]=(l3 + t3)>>1; +} + +static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright, int stride){ + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE + LOAD_LEFT_EDGE + LOAD_DOWN_LEFT_EDGE + + src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3; + src[1+0*stride]= + src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3; + src[2+0*stride]= + src[1+1*stride]= + src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + l4 + 2*l3 + 2)>>3; + src[3+0*stride]= + src[2+1*stride]= + src[1+2*stride]= + src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3 + l5 + 2*l4 + 2)>>3; + src[3+1*stride]= + src[2+2*stride]= + src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l4 + l6 + 2*l5 + 2)>>3; + src[3+2*stride]= + src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l5 + l7 + 2*l6 + 2)>>3; + src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2; +} + +static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){ + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE + LOAD_LEFT_EDGE + + src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3; + src[1+0*stride]= + src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3; + src[2+0*stride]= + src[1+1*stride]= + src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + 3*l3 + 2)>>3; + src[3+0*stride]= + src[2+1*stride]= + src[1+2*stride]= + src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3*4 + 2)>>3; + src[3+1*stride]= + src[2+2*stride]= + src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l3*4 + 2)>>3; + src[3+2*stride]= + src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l3*4 + 2)>>3; + src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2; +} + +static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright, int stride, + const int l0, const int l1, const int l2, const int l3, const int l4){ + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE + + src[0+0*stride]=(2*t0 + 2*t1 + l1 + 2*l2 + l3 + 4)>>3; + src[1+0*stride]= + src[0+2*stride]=(t1 + t2 + 1)>>1; + src[2+0*stride]= + src[1+2*stride]=(t2 + t3 + 1)>>1; + src[3+0*stride]= + src[2+2*stride]=(t3 + t4+ 1)>>1; + src[3+2*stride]=(t4 + t5+ 1)>>1; + src[0+1*stride]=(t0 + 2*t1 + t2 + l2 + 2*l3 + l4 + 4)>>3; + src[1+1*stride]= + src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2; + src[2+1*stride]= + src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2; + src[3+1*stride]= + src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2; + src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; +} + +static void pred4x4_vertical_left_rv40_c(uint8_t *src, const uint8_t *topright, int stride){ + LOAD_LEFT_EDGE + LOAD_DOWN_LEFT_EDGE + + pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l4); +} + +static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){ + LOAD_LEFT_EDGE + + pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l3); +} + +static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE + + src[0+0*stride]=(t0 + t1 + 1)>>1; + src[1+0*stride]= + src[0+2*stride]=(t1 + t2 + 1)>>1; + src[2+0*stride]= + src[1+2*stride]=(t2 + t3 + 1)>>1; + src[3+0*stride]= + src[2+2*stride]=(t3 + t4 + 1)>>1; + src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; + src[1+1*stride]= + src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2; + src[2+1*stride]= + src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2; + src[3+1*stride]= + src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2; + src[3+2*stride]=(t4 + 2*t5 + t6 + 2)>>2; + src[3+3*stride]=(t5 + 2*t6 + t7 + 2)>>2; +} + +static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright, int stride){ + LOAD_LEFT_EDGE + LOAD_DOWN_LEFT_EDGE + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE + + src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3; + src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3; + src[2+0*stride]= + src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3; + src[3+0*stride]= + src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3; + src[2+1*stride]= + src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3; + src[3+1*stride]= + src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3; + src[3+2*stride]= + src[1+3*stride]=(l3 + 2*l4 + l5 + 2)>>2; + src[0+3*stride]= + src[2+2*stride]=(t6 + t7 + l3 + l4 + 2)>>2; + src[2+3*stride]=(l4 + l5 + 1)>>1; + src[3+3*stride]=(l4 + 2*l5 + l6 + 2)>>2; +} + +static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){ + LOAD_LEFT_EDGE + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE + + src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3; + src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3; + src[2+0*stride]= + src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3; + src[3+0*stride]= + src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3; + src[2+1*stride]= + src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3; + src[3+1*stride]= + src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3; + src[3+2*stride]= + src[1+3*stride]=l3; + src[0+3*stride]= + src[2+2*stride]=(t6 + t7 + 2*l3 + 2)>>2; + src[2+3*stride]= + src[3+3*stride]=l3; +} + +static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ + uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride]; + uint8_t *top = src-stride; + int y; + + for (y = 0; y < 4; y++) { + uint8_t *cm_in = cm + src[-1]; + src[0] = cm_in[top[0]]; + src[1] = cm_in[top[1]]; + src[2] = cm_in[top[2]]; + src[3] = cm_in[top[3]]; + src += stride; + } +} + +static void pred16x16_plane_svq3_c(uint8_t *src, int stride){ + pred16x16_plane_compat_8_c(src, stride, 1, 0); +} + +static void pred16x16_plane_rv40_c(uint8_t *src, int stride){ + pred16x16_plane_compat_8_c(src, stride, 0, 1); +} + +static void pred16x16_tm_vp8_c(uint8_t *src, int stride){ + uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride]; + uint8_t *top = src-stride; + int y; + + for (y = 0; y < 16; y++) { + uint8_t *cm_in = cm + src[-1]; + src[0] = cm_in[top[0]]; + src[1] = cm_in[top[1]]; + src[2] = cm_in[top[2]]; + src[3] = cm_in[top[3]]; + src[4] = cm_in[top[4]]; + src[5] = cm_in[top[5]]; + src[6] = cm_in[top[6]]; + src[7] = cm_in[top[7]]; + src[8] = cm_in[top[8]]; + src[9] = cm_in[top[9]]; + src[10] = cm_in[top[10]]; + src[11] = cm_in[top[11]]; + src[12] = cm_in[top[12]]; + src[13] = cm_in[top[13]]; + src[14] = cm_in[top[14]]; + src[15] = cm_in[top[15]]; + src += stride; + } +} + +static void pred8x8_left_dc_rv40_c(uint8_t *src, int stride){ + int i; + int dc0; + + dc0=0; + for(i=0;i<8; i++) + dc0+= src[-1+i*stride]; + dc0= 0x01010101*((dc0 + 4)>>3); + + for(i=0; i<8; i++){ + ((uint32_t*)(src+i*stride))[0]= + ((uint32_t*)(src+i*stride))[1]= dc0; + } +} + +static void pred8x8_top_dc_rv40_c(uint8_t *src, int stride){ + int i; + int dc0; + + dc0=0; + for(i=0;i<8; i++) + dc0+= src[i-stride]; + dc0= 0x01010101*((dc0 + 4)>>3); + + for(i=0; i<8; i++){ + ((uint32_t*)(src+i*stride))[0]= + ((uint32_t*)(src+i*stride))[1]= dc0; + } +} + +static void pred8x8_dc_rv40_c(uint8_t *src, int stride){ + int i; + int dc0=0; + + for(i=0;i<4; i++){ + dc0+= src[-1+i*stride] + src[i-stride]; + dc0+= src[4+i-stride]; + dc0+= src[-1+(i+4)*stride]; + } + dc0= 0x01010101*((dc0 + 8)>>4); + + for(i=0; i<4; i++){ + ((uint32_t*)(src+i*stride))[0]= dc0; + ((uint32_t*)(src+i*stride))[1]= dc0; + } + for(i=4; i<8; i++){ + ((uint32_t*)(src+i*stride))[0]= dc0; + ((uint32_t*)(src+i*stride))[1]= dc0; + } +} + +static void pred8x8_tm_vp8_c(uint8_t *src, int stride){ + uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride]; + uint8_t *top = src-stride; + int y; + + for (y = 0; y < 8; y++) { + uint8_t *cm_in = cm + src[-1]; + src[0] = cm_in[top[0]]; + src[1] = cm_in[top[1]]; + src[2] = cm_in[top[2]]; + src[3] = cm_in[top[3]]; + src[4] = cm_in[top[4]]; + src[5] = cm_in[top[5]]; + src[6] = cm_in[top[6]]; + src[7] = cm_in[top[7]]; + src += stride; + } +} + /** * Set the intra prediction function pointers. */ @@ -49,26 +370,27 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ #undef FUNCC #define FUNC(a, depth) a ## _ ## depth #define FUNCC(a, depth) a ## _ ## depth ## _c +#define FUNCD(a) a ## _c #define H264_PRED(depth) \ if(codec_id != CODEC_ID_RV40){\ if(codec_id == CODEC_ID_VP8) {\ - h->pred4x4[VERT_PRED ]= FUNCC(pred4x4_vertical_vp8 , depth);\ - h->pred4x4[HOR_PRED ]= FUNCC(pred4x4_horizontal_vp8 , depth);\ + h->pred4x4[VERT_PRED ]= FUNCD(pred4x4_vertical_vp8);\ + h->pred4x4[HOR_PRED ]= FUNCD(pred4x4_horizontal_vp8);\ } else {\ h->pred4x4[VERT_PRED ]= FUNCC(pred4x4_vertical , depth);\ h->pred4x4[HOR_PRED ]= FUNCC(pred4x4_horizontal , depth);\ }\ h->pred4x4[DC_PRED ]= FUNCC(pred4x4_dc , depth);\ if(codec_id == CODEC_ID_SVQ3)\ - h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred4x4_down_left_svq3, depth);\ + h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_svq3);\ else\ h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred4x4_down_left , depth);\ h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right , depth);\ h->pred4x4[VERT_RIGHT_PRED ]= FUNCC(pred4x4_vertical_right , depth);\ h->pred4x4[HOR_DOWN_PRED ]= FUNCC(pred4x4_horizontal_down , depth);\ if (codec_id == CODEC_ID_VP8) {\ - h->pred4x4[VERT_LEFT_PRED ]= FUNCC(pred4x4_vertical_left_vp8 , depth);\ + h->pred4x4[VERT_LEFT_PRED ]= FUNCD(pred4x4_vertical_left_vp8);\ } else\ h->pred4x4[VERT_LEFT_PRED ]= FUNCC(pred4x4_vertical_left , depth);\ h->pred4x4[HOR_UP_PRED ]= FUNCC(pred4x4_horizontal_up , depth);\ @@ -77,7 +399,7 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ h->pred4x4[TOP_DC_PRED ]= FUNCC(pred4x4_top_dc , depth);\ h->pred4x4[DC_128_PRED ]= FUNCC(pred4x4_128_dc , depth);\ } else {\ - h->pred4x4[TM_VP8_PRED ]= FUNCC(pred4x4_tm_vp8 , depth);\ + h->pred4x4[TM_VP8_PRED ]= FUNCD(pred4x4_tm_vp8);\ h->pred4x4[DC_127_PRED ]= FUNCC(pred4x4_127_dc , depth);\ h->pred4x4[DC_129_PRED ]= FUNCC(pred4x4_129_dc , depth);\ h->pred4x4[VERT_VP8_PRED ]= FUNCC(pred4x4_vertical , depth);\ @@ -87,18 +409,18 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ h->pred4x4[VERT_PRED ]= FUNCC(pred4x4_vertical , depth);\ h->pred4x4[HOR_PRED ]= FUNCC(pred4x4_horizontal , depth);\ h->pred4x4[DC_PRED ]= FUNCC(pred4x4_dc , depth);\ - h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred4x4_down_left_rv40 , depth);\ + h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_rv40);\ h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right , depth);\ h->pred4x4[VERT_RIGHT_PRED ]= FUNCC(pred4x4_vertical_right , depth);\ h->pred4x4[HOR_DOWN_PRED ]= FUNCC(pred4x4_horizontal_down , depth);\ - h->pred4x4[VERT_LEFT_PRED ]= FUNCC(pred4x4_vertical_left_rv40, depth);\ - h->pred4x4[HOR_UP_PRED ]= FUNCC(pred4x4_horizontal_up_rv40, depth);\ + h->pred4x4[VERT_LEFT_PRED ]= FUNCD(pred4x4_vertical_left_rv40);\ + h->pred4x4[HOR_UP_PRED ]= FUNCD(pred4x4_horizontal_up_rv40);\ h->pred4x4[LEFT_DC_PRED ]= FUNCC(pred4x4_left_dc , depth);\ h->pred4x4[TOP_DC_PRED ]= FUNCC(pred4x4_top_dc , depth);\ h->pred4x4[DC_128_PRED ]= FUNCC(pred4x4_128_dc , depth);\ - h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= FUNCC(pred4x4_down_left_rv40_nodown, depth);\ - h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= FUNCC(pred4x4_horizontal_up_rv40_nodown , depth);\ - h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= FUNCC(pred4x4_vertical_left_rv40_nodown , depth);\ + h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_down_left_rv40_nodown);\ + h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= FUNCD(pred4x4_horizontal_up_rv40_nodown);\ + h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_vertical_left_rv40_nodown);\ }\ \ h->pred8x8l[VERT_PRED ]= FUNCC(pred8x8l_vertical , depth);\ @@ -119,7 +441,7 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ if (codec_id != CODEC_ID_VP8) {\ h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane , depth);\ } else\ - h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_tm_vp8 , depth);\ + h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\ if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){\ h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x8_dc , depth);\ h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc , depth);\ @@ -129,9 +451,9 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\ h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\ }else{\ - h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x8_dc_rv40 , depth);\ - h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc_rv40 , depth);\ - h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc_rv40 , depth);\ + h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\ + h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\ + h->pred8x8[TOP_DC_PRED8x8 ]= FUNCD(pred8x8_top_dc_rv40);\ if (codec_id == CODEC_ID_VP8) {\ h->pred8x8[DC_127_PRED8x8]= FUNCC(pred8x8_127_dc , depth);\ h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc , depth);\ @@ -144,13 +466,13 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ h->pred16x16[HOR_PRED8x8 ]= FUNCC(pred16x16_horizontal , depth);\ switch(codec_id){\ case CODEC_ID_SVQ3:\ - h->pred16x16[PLANE_PRED8x8 ]= FUNCC(pred16x16_plane_svq3 , depth);\ + h->pred16x16[PLANE_PRED8x8 ]= FUNCD(pred16x16_plane_svq3);\ break;\ case CODEC_ID_RV40:\ - h->pred16x16[PLANE_PRED8x8 ]= FUNCC(pred16x16_plane_rv40 , depth);\ + h->pred16x16[PLANE_PRED8x8 ]= FUNCD(pred16x16_plane_rv40);\ break;\ case CODEC_ID_VP8:\ - h->pred16x16[PLANE_PRED8x8 ]= FUNCC(pred16x16_tm_vp8 , depth);\ + h->pred16x16[PLANE_PRED8x8 ]= FUNCD(pred16x16_tm_vp8);\ h->pred16x16[DC_127_PRED8x8]= FUNCC(pred16x16_127_dc , depth);\ h->pred16x16[DC_129_PRED8x8]= FUNCC(pred16x16_129_dc , depth);\ break;\ diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c new file mode 100644 index 0000000000..066e837cdf --- /dev/null +++ b/libavcodec/h264pred_template.c @@ -0,0 +1,975 @@ +/* + * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder + * Copyright (c) 2003-2011 Michael Niedermayer + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * H.264 / AVC / MPEG4 part10 prediction functions. + * @author Michael Niedermayer + */ + +#include "mathops.h" +#include "high_bit_depth.h" + +static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int _stride){ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + const pixel4 a= ((pixel4*)(src-stride))[0]; + ((pixel4*)(src+0*stride))[0]= a; + ((pixel4*)(src+1*stride))[0]= a; + ((pixel4*)(src+2*stride))[0]= a; + ((pixel4*)(src+3*stride))[0]= a; +} + +static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright, int _stride){ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + ((pixel4*)(src+0*stride))[0]= PIXEL_SPLAT_X4(src[-1+0*stride]); + ((pixel4*)(src+1*stride))[0]= PIXEL_SPLAT_X4(src[-1+1*stride]); + ((pixel4*)(src+2*stride))[0]= PIXEL_SPLAT_X4(src[-1+2*stride]); + ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(src[-1+3*stride]); +} + +static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3; + + ((pixel4*)(src+0*stride))[0]= + ((pixel4*)(src+1*stride))[0]= + ((pixel4*)(src+2*stride))[0]= + ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc); +} + +static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2; + + ((pixel4*)(src+0*stride))[0]= + ((pixel4*)(src+1*stride))[0]= + ((pixel4*)(src+2*stride))[0]= + ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc); +} + +static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2; + + ((pixel4*)(src+0*stride))[0]= + ((pixel4*)(src+1*stride))[0]= + ((pixel4*)(src+2*stride))[0]= + ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc); +} + +static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + ((pixel4*)(src+0*stride))[0]= + ((pixel4*)(src+1*stride))[0]= + ((pixel4*)(src+2*stride))[0]= + ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)); +} + +static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + ((pixel4*)(src+0*stride))[0]= + ((pixel4*)(src+1*stride))[0]= + ((pixel4*)(src+2*stride))[0]= + ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1); +} + +static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + ((pixel4*)(src+0*stride))[0]= + ((pixel4*)(src+1*stride))[0]= + ((pixel4*)(src+2*stride))[0]= + ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1); +} + + +#define LOAD_TOP_RIGHT_EDGE\ + const int av_unused t4= topright[0];\ + const int av_unused t5= topright[1];\ + const int av_unused t6= topright[2];\ + const int av_unused t7= topright[3];\ + +#define LOAD_DOWN_LEFT_EDGE\ + const int av_unused l4= src[-1+4*stride];\ + const int av_unused l5= src[-1+5*stride];\ + const int av_unused l6= src[-1+6*stride];\ + const int av_unused l7= src[-1+7*stride];\ + +#define LOAD_LEFT_EDGE\ + const int av_unused l0= src[-1+0*stride];\ + const int av_unused l1= src[-1+1*stride];\ + const int av_unused l2= src[-1+2*stride];\ + const int av_unused l3= src[-1+3*stride];\ + +#define LOAD_TOP_EDGE\ + const int av_unused t0= src[ 0-1*stride];\ + const int av_unused t1= src[ 1-1*stride];\ + const int av_unused t2= src[ 2-1*stride];\ + const int av_unused t3= src[ 3-1*stride];\ + +static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright, int _stride){ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + const int lt= src[-1-1*stride]; + LOAD_TOP_EDGE + LOAD_LEFT_EDGE + + src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2; + src[0+2*stride]= + src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2; + src[0+1*stride]= + src[1+2*stride]= + src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2; + src[0+0*stride]= + src[1+1*stride]= + src[2+2*stride]= + src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2; + src[1+0*stride]= + src[2+1*stride]= + src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2; + src[2+0*stride]= + src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; + src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2; +} + +static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright, int _stride){ + pixel *src = (pixel*)_src; + const pixel *topright = (const pixel*)_topright; + int stride = _stride/sizeof(pixel); + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE +// LOAD_LEFT_EDGE + + src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2; + src[1+0*stride]= + src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2; + src[2+0*stride]= + src[1+1*stride]= + src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2; + src[3+0*stride]= + src[2+1*stride]= + src[1+2*stride]= + src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2; + src[3+1*stride]= + src[2+2*stride]= + src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2; + src[3+2*stride]= + src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2; + src[3+3*stride]=(t6 + 3*t7 + 2)>>2; +} + +static void FUNCC(pred4x4_vertical_right)(uint8_t *_src, const uint8_t *topright, int _stride){ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + const int lt= src[-1-1*stride]; + LOAD_TOP_EDGE + LOAD_LEFT_EDGE + + src[0+0*stride]= + src[1+2*stride]=(lt + t0 + 1)>>1; + src[1+0*stride]= + src[2+2*stride]=(t0 + t1 + 1)>>1; + src[2+0*stride]= + src[3+2*stride]=(t1 + t2 + 1)>>1; + src[3+0*stride]=(t2 + t3 + 1)>>1; + src[0+1*stride]= + src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2; + src[1+1*stride]= + src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2; + src[2+1*stride]= + src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2; + src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2; + src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2; + src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; +} + +static void FUNCC(pred4x4_vertical_left)(uint8_t *_src, const uint8_t *_topright, int _stride){ + pixel *src = (pixel*)_src; + const pixel *topright = (const pixel*)_topright; + int stride = _stride/sizeof(pixel); + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE + + src[0+0*stride]=(t0 + t1 + 1)>>1; + src[1+0*stride]= + src[0+2*stride]=(t1 + t2 + 1)>>1; + src[2+0*stride]= + src[1+2*stride]=(t2 + t3 + 1)>>1; + src[3+0*stride]= + src[2+2*stride]=(t3 + t4+ 1)>>1; + src[3+2*stride]=(t4 + t5+ 1)>>1; + src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; + src[1+1*stride]= + src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2; + src[2+1*stride]= + src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2; + src[3+1*stride]= + src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2; + src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; +} + +static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright, int _stride){ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + LOAD_LEFT_EDGE + + src[0+0*stride]=(l0 + l1 + 1)>>1; + src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2; + src[2+0*stride]= + src[0+1*stride]=(l1 + l2 + 1)>>1; + src[3+0*stride]= + src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2; + src[2+1*stride]= + src[0+2*stride]=(l2 + l3 + 1)>>1; + src[3+1*stride]= + src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2; + src[3+2*stride]= + src[1+3*stride]= + src[0+3*stride]= + src[2+2*stride]= + src[2+3*stride]= + src[3+3*stride]=l3; +} + +static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src, const uint8_t *topright, int _stride){ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + const int lt= src[-1-1*stride]; + LOAD_TOP_EDGE + LOAD_LEFT_EDGE + + src[0+0*stride]= + src[2+1*stride]=(lt + l0 + 1)>>1; + src[1+0*stride]= + src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2; + src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2; + src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2; + src[0+1*stride]= + src[2+2*stride]=(l0 + l1 + 1)>>1; + src[1+1*stride]= + src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2; + src[0+2*stride]= + src[2+3*stride]=(l1 + l2+ 1)>>1; + src[1+2*stride]= + src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; + src[0+3*stride]=(l2 + l3 + 1)>>1; + src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2; +} + +static void FUNCC(pred16x16_vertical)(uint8_t *_src, int _stride){ + int i; + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + const pixel4 a = ((pixel4*)(src-stride))[0]; + const pixel4 b = ((pixel4*)(src-stride))[1]; + const pixel4 c = ((pixel4*)(src-stride))[2]; + const pixel4 d = ((pixel4*)(src-stride))[3]; + + for(i=0; i<16; i++){ + ((pixel4*)(src+i*stride))[0] = a; + ((pixel4*)(src+i*stride))[1] = b; + ((pixel4*)(src+i*stride))[2] = c; + ((pixel4*)(src+i*stride))[3] = d; + } +} + +static void FUNCC(pred16x16_horizontal)(uint8_t *_src, int stride){ + int i; + pixel *src = (pixel*)_src; + stride /= sizeof(pixel); + + for(i=0; i<16; i++){ + ((pixel4*)(src+i*stride))[0] = + ((pixel4*)(src+i*stride))[1] = + ((pixel4*)(src+i*stride))[2] = + ((pixel4*)(src+i*stride))[3] = PIXEL_SPLAT_X4(src[-1+i*stride]); + } +} + +#define PREDICT_16x16_DC(v)\ + for(i=0; i<16; i++){\ + AV_WN4P(src+ 0, v);\ + AV_WN4P(src+ 4, v);\ + AV_WN4P(src+ 8, v);\ + AV_WN4P(src+12, v);\ + src += stride;\ + } + +static void FUNCC(pred16x16_dc)(uint8_t *_src, int stride){ + int i, dc=0; + pixel *src = (pixel*)_src; + pixel4 dcsplat; + stride /= sizeof(pixel); + + for(i=0;i<16; i++){ + dc+= src[-1+i*stride]; + } + + for(i=0;i<16; i++){ + dc+= src[i-stride]; + } + + dcsplat = PIXEL_SPLAT_X4((dc+16)>>5); + PREDICT_16x16_DC(dcsplat); +} + +static void FUNCC(pred16x16_left_dc)(uint8_t *_src, int stride){ + int i, dc=0; + pixel *src = (pixel*)_src; + pixel4 dcsplat; + stride /= sizeof(pixel); + + for(i=0;i<16; i++){ + dc+= src[-1+i*stride]; + } + + dcsplat = PIXEL_SPLAT_X4((dc+8)>>4); + PREDICT_16x16_DC(dcsplat); +} + +static void FUNCC(pred16x16_top_dc)(uint8_t *_src, int stride){ + int i, dc=0; + pixel *src = (pixel*)_src; + pixel4 dcsplat; + stride /= sizeof(pixel); + + for(i=0;i<16; i++){ + dc+= src[i-stride]; + } + + dcsplat = PIXEL_SPLAT_X4((dc+8)>>4); + PREDICT_16x16_DC(dcsplat); +} + +#define PRED16x16_X(n, v) \ +static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, int stride){\ + int i;\ + pixel *src = (pixel*)_src;\ + stride /= sizeof(pixel);\ + PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\ +} + +PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1); +PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0); +PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1); + +static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src, int _stride, const int svq3, const int rv40){ + int i, j, k; + int a; + INIT_CLIP + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + const pixel * const src0 = src +7-stride; + const pixel * src1 = src +8*stride-1; + const pixel * src2 = src1-2*stride; // == src+6*stride-1; + int H = src0[1] - src0[-1]; + int V = src1[0] - src2[ 0]; + for(k=2; k<=8; ++k) { + src1 += stride; src2 -= stride; + H += k*(src0[k] - src0[-k]); + V += k*(src1[0] - src2[ 0]); + } + if(svq3){ + H = ( 5*(H/4) ) / 16; + V = ( 5*(V/4) ) / 16; + + /* required for 100% accuracy */ + i = H; H = V; V = i; + }else if(rv40){ + H = ( H + (H>>2) ) >> 4; + V = ( V + (V>>2) ) >> 4; + }else{ + H = ( 5*H+32 ) >> 6; + V = ( 5*V+32 ) >> 6; + } + + a = 16*(src1[0] + src2[16] + 1) - 7*(V+H); + for(j=16; j>0; --j) { + int b = a; + a += V; + for(i=-16; i<0; i+=4) { + src[16+i] = CLIP((b ) >> 5); + src[17+i] = CLIP((b+ H) >> 5); + src[18+i] = CLIP((b+2*H) >> 5); + src[19+i] = CLIP((b+3*H) >> 5); + b += 4*H; + } + src += stride; + } +} + +static void FUNCC(pred16x16_plane)(uint8_t *src, int stride){ + FUNCC(pred16x16_plane_compat)(src, stride, 0, 0); +} + +static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){ + int i; + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + const pixel4 a= ((pixel4*)(src-stride))[0]; + const pixel4 b= ((pixel4*)(src-stride))[1]; + + for(i=0; i<8; i++){ + ((pixel4*)(src+i*stride))[0]= a; + ((pixel4*)(src+i*stride))[1]= b; + } +} + +static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){ + int i; + pixel *src = (pixel*)_src; + stride /= sizeof(pixel); + + for(i=0; i<8; i++){ + ((pixel4*)(src+i*stride))[0]= + ((pixel4*)(src+i*stride))[1]= PIXEL_SPLAT_X4(src[-1+i*stride]); + } +} + +#define PRED8x8_X(n, v)\ +static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\ + int i;\ + pixel *src = (pixel*)_src;\ + stride /= sizeof(pixel);\ + for(i=0; i<8; i++){\ + ((pixel4*)(src+i*stride))[0]=\ + ((pixel4*)(src+i*stride))[1]= PIXEL_SPLAT_X4(v);\ + }\ +} + +PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1); +PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0); +PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1); + +static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){ + int i; + int dc0, dc2; + pixel4 dc0splat, dc2splat; + pixel *src = (pixel*)_src; + stride /= sizeof(pixel); + + dc0=dc2=0; + for(i=0;i<4; i++){ + dc0+= src[-1+i*stride]; + dc2+= src[-1+(i+4)*stride]; + } + dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); + dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); + + for(i=0; i<4; i++){ + ((pixel4*)(src+i*stride))[0]= + ((pixel4*)(src+i*stride))[1]= dc0splat; + } + for(i=4; i<8; i++){ + ((pixel4*)(src+i*stride))[0]= + ((pixel4*)(src+i*stride))[1]= dc2splat; + } +} + +static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){ + int i; + int dc0, dc1; + pixel4 dc0splat, dc1splat; + pixel *src = (pixel*)_src; + stride /= sizeof(pixel); + + dc0=dc1=0; + for(i=0;i<4; i++){ + dc0+= src[i-stride]; + dc1+= src[4+i-stride]; + } + dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); + dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); + + for(i=0; i<4; i++){ + ((pixel4*)(src+i*stride))[0]= dc0splat; + ((pixel4*)(src+i*stride))[1]= dc1splat; + } + for(i=4; i<8; i++){ + ((pixel4*)(src+i*stride))[0]= dc0splat; + ((pixel4*)(src+i*stride))[1]= dc1splat; + } +} + +static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){ + int i; + int dc0, dc1, dc2; + pixel4 dc0splat, dc1splat, dc2splat, dc3splat; + pixel *src = (pixel*)_src; + stride /= sizeof(pixel); + + dc0=dc1=dc2=0; + for(i=0;i<4; i++){ + dc0+= src[-1+i*stride] + src[i-stride]; + dc1+= src[4+i-stride]; + dc2+= src[-1+(i+4)*stride]; + } + dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3); + dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); + dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); + dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3); + + for(i=0; i<4; i++){ + ((pixel4*)(src+i*stride))[0]= dc0splat; + ((pixel4*)(src+i*stride))[1]= dc1splat; + } + for(i=4; i<8; i++){ + ((pixel4*)(src+i*stride))[0]= dc2splat; + ((pixel4*)(src+i*stride))[1]= dc3splat; + } +} + +//the following 4 function should not be optimized! +static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){ + FUNCC(pred8x8_top_dc)(src, stride); + FUNCC(pred4x4_dc)(src, NULL, stride); +} + +static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){ + FUNCC(pred8x8_dc)(src, stride); + FUNCC(pred4x4_top_dc)(src, NULL, stride); +} + +static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){ + FUNCC(pred8x8_left_dc)(src, stride); + FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); + FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); +} + +static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){ + FUNCC(pred8x8_left_dc)(src, stride); + FUNCC(pred4x4_128_dc)(src , NULL, stride); + FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); +} + +static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){ + int j, k; + int a; + INIT_CLIP + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + const pixel * const src0 = src +3-stride; + const pixel * src1 = src +4*stride-1; + const pixel * src2 = src1-2*stride; // == src+2*stride-1; + int H = src0[1] - src0[-1]; + int V = src1[0] - src2[ 0]; + for(k=2; k<=4; ++k) { + src1 += stride; src2 -= stride; + H += k*(src0[k] - src0[-k]); + V += k*(src1[0] - src2[ 0]); + } + H = ( 17*H+16 ) >> 5; + V = ( 17*V+16 ) >> 5; + + a = 16*(src1[0] + src2[8]+1) - 3*(V+H); + for(j=8; j>0; --j) { + int b = a; + a += V; + src[0] = CLIP((b ) >> 5); + src[1] = CLIP((b+ H) >> 5); + src[2] = CLIP((b+2*H) >> 5); + src[3] = CLIP((b+3*H) >> 5); + src[4] = CLIP((b+4*H) >> 5); + src[5] = CLIP((b+5*H) >> 5); + src[6] = CLIP((b+6*H) >> 5); + src[7] = CLIP((b+7*H) >> 5); + src += stride; + } +} + +#define SRC(x,y) src[(x)+(y)*stride] +#define PL(y) \ + const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2; +#define PREDICT_8x8_LOAD_LEFT \ + const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \ + + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \ + PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \ + const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2 + +#define PT(x) \ + const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; +#define PREDICT_8x8_LOAD_TOP \ + const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \ + + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \ + PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \ + const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \ + + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2 + +#define PTR(x) \ + t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; +#define PREDICT_8x8_LOAD_TOPRIGHT \ + int t8, t9, t10, t11, t12, t13, t14, t15; \ + if(has_topright) { \ + PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \ + t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \ + } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1); + +#define PREDICT_8x8_LOAD_TOPLEFT \ + const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2 + +#define PREDICT_8x8_DC(v) \ + int y; \ + for( y = 0; y < 8; y++ ) { \ + ((pixel4*)src)[0] = \ + ((pixel4*)src)[1] = v; \ + src += stride; \ + } + +static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + + PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1))); +} +static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + + PREDICT_8x8_LOAD_LEFT; + const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3); + PREDICT_8x8_DC(dc); +} +static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + + PREDICT_8x8_LOAD_TOP; + const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3); + PREDICT_8x8_DC(dc); +} +static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + + PREDICT_8x8_LOAD_LEFT; + PREDICT_8x8_LOAD_TOP; + const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7 + +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4); + PREDICT_8x8_DC(dc); +} +static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft, int has_topright, int _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + + PREDICT_8x8_LOAD_LEFT; +#define ROW(y) ((pixel4*)(src+y*stride))[0] =\ + ((pixel4*)(src+y*stride))[1] = PIXEL_SPLAT_X4(l##y) + ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7); +#undef ROW +} +static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft, int has_topright, int _stride) +{ + int y; + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + + PREDICT_8x8_LOAD_TOP; + src[0] = t0; + src[1] = t1; + src[2] = t2; + src[3] = t3; + src[4] = t4; + src[5] = t5; + src[6] = t6; + src[7] = t7; + for( y = 1; y < 8; y++ ) { + ((pixel4*)(src+y*stride))[0] = ((pixel4*)src)[0]; + ((pixel4*)(src+y*stride))[1] = ((pixel4*)src)[1]; + } +} +static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft, int has_topright, int _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_TOPRIGHT; + SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2; + SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2; + SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2; + SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2; + SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2; + SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2; + SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2; + SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2; + SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2; + SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2; + SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2; + SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2; + SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2; + SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2; + SRC(7,7)= (t14 + 3*t15 + 2) >> 2; +} +static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft, int has_topright, int _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_LEFT; + PREDICT_8x8_LOAD_TOPLEFT; + SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2; + SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2; + SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2; + SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2; + SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2; + SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2; + SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2; + SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2; + SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2; + SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2; + SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2; + SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2; + SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2; + SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2; + SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2; +} +static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft, int has_topright, int _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_LEFT; + PREDICT_8x8_LOAD_TOPLEFT; + SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2; + SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2; + SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2; + SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2; + SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2; + SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2; + SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2; + SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1; + SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2; + SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1; + SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2; + SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1; + SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2; + SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1; + SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2; + SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1; + SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2; + SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1; + SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2; + SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1; + SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2; + SRC(7,0)= (t6 + t7 + 1) >> 1; +} +static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft, int has_topright, int _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_LEFT; + PREDICT_8x8_LOAD_TOPLEFT; + SRC(0,7)= (l6 + l7 + 1) >> 1; + SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2; + SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1; + SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2; + SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1; + SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2; + SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1; + SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2; + SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1; + SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2; + SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1; + SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2; + SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1; + SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2; + SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1; + SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2; + SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2; + SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2; + SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2; + SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2; + SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2; + SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2; +} +static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft, int has_topright, int _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_TOPRIGHT; + SRC(0,0)= (t0 + t1 + 1) >> 1; + SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2; + SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1; + SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2; + SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1; + SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2; + SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1; + SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2; + SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1; + SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2; + SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1; + SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2; + SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1; + SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2; + SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1; + SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2; + SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1; + SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2; + SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1; + SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2; + SRC(7,6)= (t10 + t11 + 1) >> 1; + SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2; +} +static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft, int has_topright, int _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride/sizeof(pixel); + PREDICT_8x8_LOAD_LEFT; + SRC(0,0)= (l0 + l1 + 1) >> 1; + SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2; + SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1; + SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2; + SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1; + SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2; + SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1; + SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2; + SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1; + SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2; + SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1; + SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2; + SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1; + SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2; + SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)= + SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)= + SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)= + SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7; +} +#undef PREDICT_8x8_LOAD_LEFT +#undef PREDICT_8x8_LOAD_TOP +#undef PREDICT_8x8_LOAD_TOPLEFT +#undef PREDICT_8x8_LOAD_TOPRIGHT +#undef PREDICT_8x8_DC +#undef PTR +#undef PT +#undef PL +#undef SRC + +static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ + int i; + pixel *pix = (pixel*)_pix; + const dctcoef *block = (const dctcoef*)_block; + stride /= sizeof(pixel); + pix -= stride; + for(i=0; i<4; i++){ + pixel v = pix[0]; + pix[1*stride]= v += block[0]; + pix[2*stride]= v += block[4]; + pix[3*stride]= v += block[8]; + pix[4*stride]= v + block[12]; + pix++; + block++; + } +} + +static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ + int i; + pixel *pix = (pixel*)_pix; + const dctcoef *block = (const dctcoef*)_block; + stride /= sizeof(pixel); + for(i=0; i<4; i++){ + pixel v = pix[-1]; + pix[0]= v += block[0]; + pix[1]= v += block[1]; + pix[2]= v += block[2]; + pix[3]= v + block[3]; + pix+= stride; + block+= 4; + } +} + +static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ + int i; + pixel *pix = (pixel*)_pix; + const dctcoef *block = (const dctcoef*)_block; + stride /= sizeof(pixel); + pix -= stride; + for(i=0; i<8; i++){ + pixel v = pix[0]; + pix[1*stride]= v += block[0]; + pix[2*stride]= v += block[8]; + pix[3*stride]= v += block[16]; + pix[4*stride]= v += block[24]; + pix[5*stride]= v += block[32]; + pix[6*stride]= v += block[40]; + pix[7*stride]= v += block[48]; + pix[8*stride]= v + block[56]; + pix++; + block++; + } +} + +static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ + int i; + pixel *pix = (pixel*)_pix; + const dctcoef *block = (const dctcoef*)_block; + stride /= sizeof(pixel); + for(i=0; i<8; i++){ + pixel v = pix[-1]; + pix[0]= v += block[0]; + pix[1]= v += block[1]; + pix[2]= v += block[2]; + pix[3]= v += block[3]; + pix[4]= v += block[4]; + pix[5]= v += block[5]; + pix[6]= v += block[6]; + pix[7]= v + block[7]; + pix+= stride; + block+= 8; + } +} + +static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ + int i; + for(i=0; i<16; i++) + FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); +} + +static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ + int i; + for(i=0; i<16; i++) + FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); +} + +static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ + int i; + for(i=0; i<4; i++) + FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); +} + +static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ + int i; + for(i=0; i<4; i++) + FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); +} diff --git a/libavcodec/high_bit_depth.h b/libavcodec/high_bit_depth.h new file mode 100644 index 0000000000..6f2b6a74f4 --- /dev/null +++ b/libavcodec/high_bit_depth.h @@ -0,0 +1,85 @@ +#include "dsputil.h" + +#ifndef BIT_DEPTH +#define BIT_DEPTH 8 +#endif + +#ifdef AVCODEC_H264_HIGH_DEPTH_H +# undef pixel +# undef pixel2 +# undef pixel4 +# undef dctcoef +# undef INIT_CLIP +# undef no_rnd_avg_pixel4 +# undef rnd_avg_pixel4 +# undef AV_RN2P +# undef AV_RN4P +# undef AV_WN2P +# undef AV_WN4P +# undef AV_WN4PA +# undef CLIP +# undef FUNC +# undef FUNCC +# undef av_clip_pixel +# undef PIXEL_SPLAT_X4 +#else +# define AVCODEC_H264_HIGH_DEPTH_H +# define CLIP_PIXEL(depth)\ + static inline uint16_t av_clip_pixel_ ## depth (int p)\ + {\ + const int pixel_max = (1 << depth)-1;\ + return (p & ~pixel_max) ? (-p)>>31 & pixel_max : p;\ + } + +CLIP_PIXEL( 9) +CLIP_PIXEL(10) +#endif + +#if BIT_DEPTH > 8 +# define pixel uint16_t +# define pixel2 uint32_t +# define pixel4 uint64_t +# define dctcoef int32_t + +# define INIT_CLIP +# define no_rnd_avg_pixel4 no_rnd_avg64 +# define rnd_avg_pixel4 rnd_avg64 +# define AV_RN2P AV_RN32 +# define AV_RN4P AV_RN64 +# define AV_WN2P AV_WN32 +# define AV_WN4P AV_WN64 +# define AV_WN4PA AV_WN64A +# define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL) +#else +# define pixel uint8_t +# define pixel2 uint16_t +# define pixel4 uint32_t +# define dctcoef int16_t + +# define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; +# define no_rnd_avg_pixel4 no_rnd_avg32 +# define rnd_avg_pixel4 rnd_avg32 +# define AV_RN2P AV_RN16 +# define AV_RN4P AV_RN32 +# define AV_WN2P AV_WN16 +# define AV_WN4P AV_WN32 +# define AV_WN4PA AV_WN32A +# define PIXEL_SPLAT_X4(x) ((x)*0x01010101U) +#endif + +#if BIT_DEPTH == 8 +# define av_clip_pixel(a) av_clip_uint8(a) +# define CLIP(a) cm[a] +# define FUNC(a) a ## _8 +# define FUNCC(a) a ## _8_c +#elif BIT_DEPTH == 9 +# define av_clip_pixel(a) av_clip_pixel_9(a) +# define CLIP(a) av_clip_pixel_9(a) +# define FUNC(a) a ## _9 +# define FUNCC(a) a ## _9_c +#elif BIT_DEPTH == 10 +# define av_clip_pixel(a) av_clip_pixel_10(a) +# define CLIP(a) av_clip_pixel_10(a) +# define FUNC(a) a ## _10 +# define FUNCC(a) a ## _10_c +#endif diff --git a/libavcodec/lpc.c b/libavcodec/lpc.c index 6d7671c81f..d041cafe85 100644 --- a/libavcodec/lpc.c +++ b/libavcodec/lpc.c @@ -158,7 +158,7 @@ int ff_lpc_calc_coefs(LPCContext *s, const int32_t *samples, int blocksize, int min_order, int max_order, int precision, int32_t coefs[][MAX_LPC_ORDER], int *shift, - enum AVLPCType lpc_type, int lpc_passes, + enum FFLPCType lpc_type, int lpc_passes, int omethod, int max_shift, int zero_shift) { double autoc[MAX_LPC_ORDER+1]; @@ -168,7 +168,7 @@ int ff_lpc_calc_coefs(LPCContext *s, int opt_order; assert(max_order >= MIN_LPC_ORDER && max_order <= MAX_LPC_ORDER && - lpc_type > AV_LPC_TYPE_FIXED); + lpc_type > FF_LPC_TYPE_FIXED); /* reinit LPC context if parameters have changed */ if (blocksize != s->blocksize || max_order != s->max_order || @@ -177,7 +177,7 @@ int ff_lpc_calc_coefs(LPCContext *s, ff_lpc_init(s, blocksize, max_order, lpc_type); } - if (lpc_type == AV_LPC_TYPE_LEVINSON) { + if (lpc_type == FF_LPC_TYPE_LEVINSON) { double *windowed_samples = s->windowed_samples + max_order; s->lpc_apply_welch_window(samples, blocksize, windowed_samples); @@ -188,7 +188,7 @@ int ff_lpc_calc_coefs(LPCContext *s, for(i=0; iblocksize = blocksize; s->max_order = max_order; s->lpc_type = lpc_type; - if (lpc_type == AV_LPC_TYPE_LEVINSON) { + if (lpc_type == FF_LPC_TYPE_LEVINSON) { s->windowed_samples = av_mallocz((blocksize + max_order + 2) * sizeof(*s->windowed_samples)); if (!s->windowed_samples) diff --git a/libavcodec/lpc.h b/libavcodec/lpc.h index 96b66df909..9db5dbac30 100644 --- a/libavcodec/lpc.h +++ b/libavcodec/lpc.h @@ -35,11 +35,22 @@ #define MIN_LPC_ORDER 1 #define MAX_LPC_ORDER 32 +/** + * LPC analysis type + */ +enum FFLPCType { + FF_LPC_TYPE_DEFAULT = -1, ///< use the codec default LPC type + FF_LPC_TYPE_NONE = 0, ///< do not use LPC prediction or use all zero coefficients + FF_LPC_TYPE_FIXED = 1, ///< fixed LPC coefficients + FF_LPC_TYPE_LEVINSON = 2, ///< Levinson-Durbin recursion + FF_LPC_TYPE_CHOLESKY = 3, ///< Cholesky factorization + FF_LPC_TYPE_NB , ///< Not part of ABI +}; typedef struct LPCContext { int blocksize; int max_order; - enum AVLPCType lpc_type; + enum FFLPCType lpc_type; double *windowed_samples; /** @@ -77,14 +88,14 @@ int ff_lpc_calc_coefs(LPCContext *s, const int32_t *samples, int blocksize, int min_order, int max_order, int precision, int32_t coefs[][MAX_LPC_ORDER], int *shift, - enum AVLPCType lpc_type, int lpc_passes, + enum FFLPCType lpc_type, int lpc_passes, int omethod, int max_shift, int zero_shift); /** * Initialize LPCContext. */ int ff_lpc_init(LPCContext *s, int blocksize, int max_order, - enum AVLPCType lpc_type); + enum FFLPCType lpc_type); void ff_lpc_init_x86(LPCContext *s); /** diff --git a/libavcodec/mlib/dsputil_mlib.c b/libavcodec/mlib/dsputil_mlib.c index 3b2d693d88..1a18a8a223 100644 --- a/libavcodec/mlib/dsputil_mlib.c +++ b/libavcodec/mlib/dsputil_mlib.c @@ -421,13 +421,13 @@ static void ff_fdct_mlib(DCTELEM *data) void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx) { - const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; c->get_pixels = get_pixels_mlib; c->diff_pixels = diff_pixels_mlib; c->add_pixels_clamped = add_pixels_clamped_mlib; - if (!h264_high_depth) { + if (!high_bit_depth) { c->put_pixels_tab[0][0] = put_pixels16_mlib; c->put_pixels_tab[0][1] = put_pixels16_x2_mlib; c->put_pixels_tab[0][2] = put_pixels16_y2_mlib; diff --git a/libavcodec/options.c b/libavcodec/options.c index c0eb790b59..de25635161 100644 --- a/libavcodec/options.c +++ b/libavcodec/options.c @@ -103,7 +103,7 @@ static const AVOption options[]={ {"umh", "umh motion estimation", 0, FF_OPT_TYPE_CONST, {.dbl = ME_UMH }, INT_MIN, INT_MAX, V|E, "me_method" }, {"iter", "iter motion estimation", 0, FF_OPT_TYPE_CONST, {.dbl = ME_ITER }, INT_MIN, INT_MAX, V|E, "me_method" }, {"extradata_size", NULL, OFFSET(extradata_size), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX}, -{"time_base", NULL, OFFSET(time_base), FF_OPT_TYPE_RATIONAL, {.dbl=0}, INT_MIN, INT_MAX}, +{"time_base", NULL, OFFSET(time_base), FF_OPT_TYPE_RATIONAL, {.dbl = 0}, INT_MIN, INT_MAX}, {"g", "set the group of picture size", OFFSET(gop_size), FF_OPT_TYPE_INT, {.dbl = 12 }, INT_MIN, INT_MAX, V|E}, {"ar", "set audio sampling rate (in Hz)", OFFSET(sample_rate), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX}, {"ac", "set number of audio channels", OFFSET(channels), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX}, @@ -224,7 +224,7 @@ static const AVOption options[]={ {"left", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PRED_LEFT }, INT_MIN, INT_MAX, V|E, "pred"}, {"plane", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PRED_PLANE }, INT_MIN, INT_MAX, V|E, "pred"}, {"median", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PRED_MEDIAN }, INT_MIN, INT_MAX, V|E, "pred"}, -{"aspect", "sample aspect ratio", OFFSET(sample_aspect_ratio), FF_OPT_TYPE_RATIONAL, {.dbl=0}, 0, 10, V|E}, +{"aspect", "sample aspect ratio", OFFSET(sample_aspect_ratio), FF_OPT_TYPE_RATIONAL, {.dbl = 0}, 0, 10, V|E}, {"debug", "print specific debug info", OFFSET(debug), FF_OPT_TYPE_FLAGS, {.dbl = DEFAULT }, 0, INT_MAX, V|A|S|E|D, "debug"}, {"pict", "picture info", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_PICT_INFO }, INT_MIN, INT_MAX, V|D, "debug"}, {"rc", "rate control", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_RC }, INT_MIN, INT_MAX, V|E, "debug"}, @@ -380,12 +380,14 @@ static const AVOption options[]={ {"ivlc", "intra vlc table", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_INTRA_VLC }, INT_MIN, INT_MAX, V|E, "flags2"}, {"b_sensitivity", "adjusts sensitivity of b_frame_strategy 1", OFFSET(b_sensitivity), FF_OPT_TYPE_INT, {.dbl = 40 }, 1, INT_MAX, V|E}, {"compression_level", NULL, OFFSET(compression_level), FF_OPT_TYPE_INT, {.dbl = FF_COMPRESSION_DEFAULT }, INT_MIN, INT_MAX, V|A|E}, -{"lpc_coeff_precision", "LPC coefficient precision (FLAC)", OFFSET(lpc_coeff_precision), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, INT_MAX, A|E}, {"min_prediction_order", NULL, OFFSET(min_prediction_order), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E}, {"max_prediction_order", NULL, OFFSET(max_prediction_order), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E}, -{"prediction_order_method", "search method for selecting prediction order", OFFSET(prediction_order_method), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E}, -{"min_partition_order", NULL, OFFSET(min_partition_order), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E}, -{"max_partition_order", NULL, OFFSET(max_partition_order), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E}, +#if FF_API_FLAC_GLOBAL_OPTS +{"lpc_coeff_precision", "deprecated, use flac-specific options", OFFSET(lpc_coeff_precision), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, INT_MAX, A|E}, +{"prediction_order_method", "deprecated, use flac-specific options", OFFSET(prediction_order_method), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E}, +{"min_partition_order", "deprecated, use flac-specific options", OFFSET(min_partition_order), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E}, +{"max_partition_order", "deprecated, use flac-specific options", OFFSET(max_partition_order), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E}, +#endif {"timecode_frame_start", "GOP timecode frame start number, in non drop frame format", OFFSET(timecode_frame_start), FF_OPT_TYPE_INT64, {.dbl = 0 }, 0, INT64_MAX, V|E}, {"drop_frame_timecode", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_DROP_FRAME_TIMECODE }, INT_MIN, INT_MAX, V|E, "flags2"}, {"non_linear_q", "use non linear quantizer", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_NON_LINEAR_QUANT }, INT_MIN, INT_MAX, V|E, "flags2"}, @@ -416,12 +418,14 @@ static const AVOption options[]={ {"intra_refresh", "use periodic insertion of intra blocks instead of keyframes", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_INTRA_REFRESH }, INT_MIN, INT_MAX, V|E, "flags2"}, {"crf_max", "in crf mode, prevents vbv from lowering quality beyond this point", OFFSET(crf_max), FF_OPT_TYPE_FLOAT, {.dbl = DEFAULT }, 0, 51, V|E}, {"log_level_offset", "set the log level offset", OFFSET(log_level_offset), FF_OPT_TYPE_INT, {.dbl = 0 }, INT_MIN, INT_MAX }, -{"lpc_type", "specify LPC algorithm", OFFSET(lpc_type), FF_OPT_TYPE_INT, {.dbl = AV_LPC_TYPE_DEFAULT }, AV_LPC_TYPE_DEFAULT, AV_LPC_TYPE_NB-1, A|E}, +#if FF_API_FLAC_GLOBAL_OPTS +{"lpc_type", "deprecated, use flac-specific options", OFFSET(lpc_type), FF_OPT_TYPE_INT, {.dbl = AV_LPC_TYPE_DEFAULT }, AV_LPC_TYPE_DEFAULT, AV_LPC_TYPE_NB-1, A|E}, {"none", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = AV_LPC_TYPE_NONE }, INT_MIN, INT_MAX, A|E, "lpc_type"}, {"fixed", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = AV_LPC_TYPE_FIXED }, INT_MIN, INT_MAX, A|E, "lpc_type"}, {"levinson", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = AV_LPC_TYPE_LEVINSON }, INT_MIN, INT_MAX, A|E, "lpc_type"}, {"cholesky", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = AV_LPC_TYPE_CHOLESKY }, INT_MIN, INT_MAX, A|E, "lpc_type"}, -{"lpc_passes", "number of passes to use for Cholesky factorization during LPC analysis", OFFSET(lpc_passes), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E}, +{"lpc_passes", "deprecated, use flac-specific options", OFFSET(lpc_passes), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E}, +#endif {"slices", "number of slices, used in parallelized decoding", OFFSET(slices), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, INT_MAX, V|E}, {"thread_type", "select multithreading type", OFFSET(thread_type), FF_OPT_TYPE_INT, {.dbl = FF_THREAD_SLICE|FF_THREAD_FRAME }, 0, INT_MAX, V|E|D, "thread_type"}, {"slice", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_THREAD_SLICE }, INT_MIN, INT_MAX, V|E|D, "thread_type"}, diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c index a8d0a61a85..bd432beb87 100644 --- a/libavcodec/ppc/dsputil_altivec.c +++ b/libavcodec/ppc/dsputil_altivec.c @@ -1384,7 +1384,7 @@ static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx) { - const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; c->pix_abs[0][1] = sad16_x2_altivec; c->pix_abs[0][2] = sad16_y2_altivec; @@ -1399,10 +1399,10 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx) c->pix_sum = pix_sum_altivec; c->diff_pixels = diff_pixels_altivec; c->get_pixels = get_pixels_altivec; - if (!h264_high_depth) + if (!high_bit_depth) c->clear_block = clear_block_altivec; c->add_bytes= add_bytes_altivec; - if (!h264_high_depth) { + if (!high_bit_depth) { c->put_pixels_tab[0][0] = put_pixels16_altivec; /* the two functions do the same thing, so use the same code */ c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec; diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c index 57f30ef703..327fe2c72f 100644 --- a/libavcodec/ppc/dsputil_ppc.c +++ b/libavcodec/ppc/dsputil_ppc.c @@ -153,11 +153,11 @@ static void prefetch_ppc(void *mem, int stride, int h) void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) { - const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; // Common optimizations whether AltiVec is available or not c->prefetch = prefetch_ppc; - if (!h264_high_depth) { + if (!high_bit_depth) { switch (check_dcbzl_effect()) { case 32: c->clear_blocks = clear_blocks_dcbz32_ppc; diff --git a/libavcodec/ppc/h264_altivec.c b/libavcodec/ppc/h264_altivec.c index 0e58846f51..9df18888ad 100644 --- a/libavcodec/ppc/h264_altivec.c +++ b/libavcodec/ppc/h264_altivec.c @@ -965,10 +965,10 @@ H264_WEIGHT( 8, 8) H264_WEIGHT( 8, 4) void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) { - const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { - if (!h264_high_depth) { + if (!high_bit_depth) { c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec; c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec; diff --git a/libavcodec/ps2/dsputil_mmi.c b/libavcodec/ps2/dsputil_mmi.c index 4190f9da10..349583f1ba 100644 --- a/libavcodec/ps2/dsputil_mmi.c +++ b/libavcodec/ps2/dsputil_mmi.c @@ -142,9 +142,9 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx) { const int idct_algo= avctx->idct_algo; - const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; - if (!h264_high_depth) { + if (!high_bit_depth) { c->clear_blocks = clear_blocks_mmi; c->put_pixels_tab[1][0] = put_pixels8_mmi; diff --git a/libavcodec/ra144enc.c b/libavcodec/ra144enc.c index a0912056d7..351ba9e871 100644 --- a/libavcodec/ra144enc.c +++ b/libavcodec/ra144enc.c @@ -54,7 +54,7 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx) ractx->lpc_coef[1] = ractx->lpc_tables[1]; ractx->avctx = avctx; ret = ff_lpc_init(&ractx->lpc_ctx, avctx->frame_size, LPC_ORDER, - AV_LPC_TYPE_LEVINSON); + FF_LPC_TYPE_LEVINSON); return ret; } @@ -461,7 +461,7 @@ static int ra144_encode_frame(AVCodecContext *avctx, uint8_t *frame, 32)]; ff_lpc_calc_coefs(&ractx->lpc_ctx, lpc_data, NBLOCKS * BLOCKSIZE, LPC_ORDER, - LPC_ORDER, 16, lpc_coefs, shift, AV_LPC_TYPE_LEVINSON, + LPC_ORDER, 16, lpc_coefs, shift, FF_LPC_TYPE_LEVINSON, 0, ORDER_METHOD_EST, 12, 0); for (i = 0; i < LPC_ORDER; i++) block_coefs[NBLOCKS - 1][i] = -(lpc_coefs[LPC_ORDER - 1][i] << diff --git a/libavcodec/sh4/dsputil_align.c b/libavcodec/sh4/dsputil_align.c index 93b663894a..8be9318cdb 100644 --- a/libavcodec/sh4/dsputil_align.c +++ b/libavcodec/sh4/dsputil_align.c @@ -333,9 +333,9 @@ DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK) void dsputil_init_align(DSPContext* c, AVCodecContext *avctx) { - const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; - if (!h264_high_depth) { + if (!high_bit_depth) { c->put_pixels_tab[0][0] = put_rnd_pixels16_o; c->put_pixels_tab[0][1] = put_rnd_pixels16_x; c->put_pixels_tab[0][2] = put_rnd_pixels16_y; @@ -405,7 +405,7 @@ void dsputil_init_align(DSPContext* c, AVCodecContext *avctx) dspfunc(avg_qpel, 1, 8); /* dspfunc(avg_no_rnd_qpel, 1, 8); */ - if (!h264_high_depth) { + if (!high_bit_depth) { dspfunc(put_h264_qpel, 0, 16); dspfunc(put_h264_qpel, 1, 8); dspfunc(put_h264_qpel, 2, 4); @@ -415,7 +415,7 @@ void dsputil_init_align(DSPContext* c, AVCodecContext *avctx) } #undef dspfunc - if (!h264_high_depth) { + if (!high_bit_depth) { c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4; c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4; c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4; diff --git a/libavcodec/sh4/dsputil_sh4.c b/libavcodec/sh4/dsputil_sh4.c index 219bb4c353..d254e1db6b 100644 --- a/libavcodec/sh4/dsputil_sh4.c +++ b/libavcodec/sh4/dsputil_sh4.c @@ -92,10 +92,10 @@ static void idct_add(uint8_t *dest, int line_size, DCTELEM *block) void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx) { const int idct_algo= avctx->idct_algo; - const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; dsputil_init_align(c,avctx); - if (!h264_high_depth) + if (!high_bit_depth) c->clear_blocks = clear_blocks_sh4; if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SH4){ c->idct_put = idct_put; diff --git a/libavcodec/sparc/dsputil_vis.c b/libavcodec/sparc/dsputil_vis.c index ba921ad772..e4236602f6 100644 --- a/libavcodec/sparc/dsputil_vis.c +++ b/libavcodec/sparc/dsputil_vis.c @@ -3953,7 +3953,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx) { /* VIS-specific optimizations */ int accel = vis_level (); - const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; if (accel & ACCEL_SPARC_VIS) { if(avctx->idct_algo==FF_IDCT_SIMPLEVIS){ @@ -3963,7 +3963,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx) c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; } - if (!h264_high_depth) { + if (!high_bit_depth) { c->put_pixels_tab[0][0] = MC_put_o_16_vis; c->put_pixels_tab[0][1] = MC_put_x_16_vis; c->put_pixels_tab[0][2] = MC_put_y_16_vis; diff --git a/libavcodec/utils.c b/libavcodec/utils.c index c5d932d399..86a1b4957a 100644 --- a/libavcodec/utils.c +++ b/libavcodec/utils.c @@ -1297,9 +1297,9 @@ int av_lockmgr_register(int (*cb)(void **mutex, enum AVLockOp op)) unsigned int ff_toupper4(unsigned int x) { return toupper( x &0xFF) - + (toupper((x>>8 )&0xFF)<<8 ) - + (toupper((x>>16)&0xFF)<<16) - + (toupper((x>>24)&0xFF)<<24); + + (toupper((x>>8 )&0xFF)<<8 ) + + (toupper((x>>16)&0xFF)<<16) + + (toupper((x>>24)&0xFF)<<24); } #if !HAVE_PTHREADS diff --git a/libavcodec/version.h b/libavcodec/version.h index 56d1eac52c..067cf4af89 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -62,5 +62,8 @@ #ifndef FF_API_OLD_FF_PICT_TYPES #define FF_API_OLD_FF_PICT_TYPES (LIBAVCODEC_VERSION_MAJOR < 54) #endif +#ifndef FF_API_FLAC_GLOBAL_OPTS +#define FF_API_FLAC_GLOBAL_OPTS (LIBAVCODEC_VERSION_MAJOR < 54) +#endif #endif /* AVCODEC_VERSION_H */ diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 985a15d2f1..1a5413bda4 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2322,7 +2322,7 @@ float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) { int mm_flags = av_get_cpu_flags(); - const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; if (avctx->dsp_mask) { if (avctx->dsp_mask & AV_CPU_FLAG_FORCE) @@ -2404,7 +2404,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->put_pixels_clamped = ff_put_pixels_clamped_mmx; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; c->add_pixels_clamped = ff_add_pixels_clamped_mmx; - if (!h264_high_depth) { + if (!high_bit_depth) { c->clear_block = clear_block_mmx; c->clear_blocks = clear_blocks_mmx; if ((mm_flags & AV_CPU_FLAG_SSE) && @@ -2421,7 +2421,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU - if (!h264_high_depth) { + if (!high_bit_depth) { SET_HPEL_FUNCS(put, 0, 16, mmx); SET_HPEL_FUNCS(put_no_rnd, 0, 16, mmx); SET_HPEL_FUNCS(avg, 0, 16, mmx); @@ -2436,13 +2436,13 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->gmc= gmc_mmx; #endif #if ARCH_X86_32 && HAVE_YASM - if (!h264_high_depth) + if (!high_bit_depth) c->emulated_edge_mc = emulated_edge_mc_mmx; #endif c->add_bytes= add_bytes_mmx; - if (!h264_high_depth) + if (!high_bit_depth) c->draw_edges = draw_edges_mmx; if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { @@ -2451,7 +2451,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } #if HAVE_YASM - if (!h264_high_depth) { + if (!high_bit_depth) { c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd; c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx; } @@ -2463,7 +2463,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) if (mm_flags & AV_CPU_FLAG_MMX2) { c->prefetch = prefetch_mmx2; - if (!h264_high_depth) { + if (!high_bit_depth) { c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; @@ -2480,7 +2480,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ - if (!h264_high_depth) { + if (!high_bit_depth) { c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; @@ -2529,7 +2529,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2); SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2); - if (!h264_high_depth) { + if (!high_bit_depth) { SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2); SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2); SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2); @@ -2547,7 +2547,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2; c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2; - if (!h264_high_depth) { + if (!high_bit_depth) { c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd; c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2; c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2; @@ -2564,7 +2564,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } else if (mm_flags & AV_CPU_FLAG_3DNOW) { c->prefetch = prefetch_3dnow; - if (!h264_high_depth) { + if (!high_bit_depth) { c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; @@ -2602,7 +2602,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow); SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow); - if (!h264_high_depth) { + if (!high_bit_depth) { SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow); SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow); SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow); @@ -2617,7 +2617,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow); #if HAVE_YASM - if (!h264_high_depth) { + if (!high_bit_depth) { c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd; c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow; } @@ -2635,7 +2635,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU; if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){ // these functions are slower than mmx on AMD, but faster on Intel - if (!h264_high_depth) { + if (!high_bit_depth) { c->put_pixels_tab[0][0] = put_pixels16_sse2; c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2; c->avg_pixels_tab[0][0] = avg_pixels16_sse2; @@ -2643,7 +2643,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } } if(mm_flags & AV_CPU_FLAG_SSE2){ - if (!h264_high_depth) { + if (!high_bit_depth) { H264_QPEL_FUNCS(0, 1, sse2); H264_QPEL_FUNCS(0, 2, sse2); H264_QPEL_FUNCS(0, 3, sse2); @@ -2660,7 +2660,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } #if HAVE_SSSE3 if(mm_flags & AV_CPU_FLAG_SSSE3){ - if (!h264_high_depth) { + if (!high_bit_depth) { H264_QPEL_FUNCS(1, 0, ssse3); H264_QPEL_FUNCS(1, 1, ssse3); H264_QPEL_FUNCS(1, 2, ssse3); @@ -2675,7 +2675,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) H264_QPEL_FUNCS(3, 3, ssse3); } #if HAVE_YASM - if (!h264_high_depth) { + if (!high_bit_depth) { c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd; c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd; c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3; @@ -2737,7 +2737,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } } - if (!h264_high_depth) + if (!high_bit_depth) c->emulated_edge_mc = emulated_edge_mc_sse; c->gmc= gmc_sse; #endif diff --git a/libavformat/utils.c b/libavformat/utils.c index b4c51d135a..7624cb4f67 100644 --- a/libavformat/utils.c +++ b/libavformat/utils.c @@ -3048,12 +3048,12 @@ static int ff_interleave_compare_dts(AVFormatContext *s, AVPacket *next, AVPacke { AVStream *st = s->streams[ pkt ->stream_index]; AVStream *st2= s->streams[ next->stream_index]; - int64_t a= st2->time_base.num * (int64_t)st ->time_base.den; - int64_t b= st ->time_base.num * (int64_t)st2->time_base.den; - int64_t dts1 = av_rescale_rnd(pkt->dts, b, a, AV_ROUND_DOWN); - if (dts1==next->dts && dts1==av_rescale_rnd(pkt->dts, b, a, AV_ROUND_UP)) + int comp = av_compare_ts(next->dts, st2->time_base, pkt->dts, + st->time_base); + + if (comp == 0) return pkt->stream_index < next->stream_index; - return dts1 < next->dts; + return comp > 0; } int av_interleave_packet_per_dts(AVFormatContext *s, AVPacket *out, AVPacket *pkt, int flush){ diff --git a/libavutil/opt.h b/libavutil/opt.h index 018d8a2b4f..b04c7905d6 100644 --- a/libavutil/opt.h +++ b/libavutil/opt.h @@ -67,6 +67,9 @@ typedef struct AVOption { union { double dbl; const char *str; + /* TODO those are unused now */ + int64_t i64; + AVRational q; } default_val; double min; ///< minimum valid value for the option double max; ///< maximum valid value for the option diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h index 6713da23f6..69d28dd4cc 100644 --- a/libavutil/pixfmt.h +++ b/libavutil/pixfmt.h @@ -136,7 +136,7 @@ enum PixelFormat { PIX_FMT_BGR48BE, ///< packed RGB 16:16:16, 48bpp, 16B, 16G, 16R, the 2-byte value for each R/G/B component is stored as big-endian PIX_FMT_BGR48LE, ///< packed RGB 16:16:16, 48bpp, 16B, 16G, 16R, the 2-byte value for each R/G/B component is stored as little-endian - //the following 4 formats are deprecated and should be replaced by PIX_FMT_YUV420P16* with the bpp stored seperately + //the following 6 formats are deprecated and should be replaced by PIX_FMT_YUV420P16* with the bpp stored seperately PIX_FMT_YUV420P9BE, ///< planar YUV 4:2:0, 13.5bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian PIX_FMT_YUV420P9LE, ///< planar YUV 4:2:0, 13.5bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian PIX_FMT_YUV420P10BE,///< planar YUV 4:2:0, 15bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian diff --git a/libswscale/utils.c b/libswscale/utils.c index 2c6269487a..f00a45cf38 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -107,9 +107,13 @@ const char *swscale_license(void) || (x)==PIX_FMT_YUV440P \ || (x)==PIX_FMT_MONOWHITE \ || (x)==PIX_FMT_MONOBLACK \ + || (x)==PIX_FMT_YUV420P9LE \ + || (x)==PIX_FMT_YUV420P10LE \ || (x)==PIX_FMT_YUV420P16LE \ || (x)==PIX_FMT_YUV422P16LE \ || (x)==PIX_FMT_YUV444P16LE \ + || (x)==PIX_FMT_YUV420P9BE \ + || (x)==PIX_FMT_YUV420P10BE \ || (x)==PIX_FMT_YUV420P16BE \ || (x)==PIX_FMT_YUV422P16BE \ || (x)==PIX_FMT_YUV444P16BE \ diff --git a/tests/fate/h264.mak b/tests/fate/h264.mak index 259e1e0ea0..9cb669c1a0 100644 --- a/tests/fate/h264.mak +++ b/tests/fate/h264.mak @@ -127,6 +127,13 @@ FATE_H264 = aud_mw_e \ frext-hpcvflnl_bcrm_a \ frext-hpcvmolq_brcm_b \ frext-hpcvnl_brcm_a \ + frext-pph10i1_panasonic_a \ + frext-pph10i2_panasonic_a \ + frext-pph10i3_panasonic_a \ + frext-pph10i4_panasonic_a \ + frext-pph10i5_panasonic_a \ + frext-pph10i6_panasonic_a \ + frext-pph10i7_panasonic_a \ hcbp2_hhi_a \ hcmp1_hhi_a \ ls_sva_d \ @@ -301,6 +308,13 @@ fate-h264-conformance-frext-hpcvfl_bcrm_a: CMD = framecrc -i $(SAMPLES)/h264-co fate-h264-conformance-frext-hpcvflnl_bcrm_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCVFLNL_BRCM_A.264 -vsync 0 fate-h264-conformance-frext-hpcvmolq_brcm_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCVMOLQ_BRCM_B.264 fate-h264-conformance-frext-hpcvnl_brcm_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCVNL_BRCM_A.264 +fate-h264-conformance-frext-pph10i1_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I1_Panasonic_A.264 +fate-h264-conformance-frext-pph10i2_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I2_Panasonic_A.264 +fate-h264-conformance-frext-pph10i3_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I3_Panasonic_A.264 +fate-h264-conformance-frext-pph10i4_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I4_Panasonic_A.264 +fate-h264-conformance-frext-pph10i5_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I5_Panasonic_A.264 +fate-h264-conformance-frext-pph10i6_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I6_Panasonic_A.264 +fate-h264-conformance-frext-pph10i7_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I7_Panasonic_A.264 fate-h264-conformance-hcbp2_hhi_a: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/HCBP2_HHI_A.264 fate-h264-conformance-hcmp1_hhi_a: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/HCMP1_HHI_A.264 fate-h264-conformance-ls_sva_d: CMD = framecrc -i $(SAMPLES)/h264-conformance/LS_SVA_D.264 diff --git a/tests/ref/fate/h264-conformance-frext-pph10i1_panasonic_a b/tests/ref/fate/h264-conformance-frext-pph10i1_panasonic_a new file mode 100644 index 0000000000..1cfc313e32 --- /dev/null +++ b/tests/ref/fate/h264-conformance-frext-pph10i1_panasonic_a @@ -0,0 +1,10 @@ +0, 0, 2764800, 0xcc4df07d +0, 3600, 2764800, 0x85f9e6d4 +0, 7200, 2764800, 0x23ffe90d +0, 10800, 2764800, 0xf0a6d453 +0, 14400, 2764800, 0x913a6392 +0, 18000, 2764800, 0xcc5f9736 +0, 21600, 2764800, 0x43f9f9ce +0, 25200, 2764800, 0xc874b44e +0, 28800, 2764800, 0x83b665e6 +0, 32400, 2764800, 0x5ea2e31e diff --git a/tests/ref/fate/h264-conformance-frext-pph10i2_panasonic_a b/tests/ref/fate/h264-conformance-frext-pph10i2_panasonic_a new file mode 100644 index 0000000000..274bdaf2b1 --- /dev/null +++ b/tests/ref/fate/h264-conformance-frext-pph10i2_panasonic_a @@ -0,0 +1,10 @@ +0, 0, 2764800, 0x4f710132 +0, 3600, 2764800, 0x57e5b713 +0, 7200, 2764800, 0xcca01477 +0, 10800, 2764800, 0xa19a95cd +0, 14400, 2764800, 0x700a757d +0, 18000, 2764800, 0xd8c6f60f +0, 21600, 2764800, 0x95a1bbc7 +0, 25200, 2764800, 0x0582077a +0, 28800, 2764800, 0x91595f91 +0, 32400, 2764800, 0xf5fe034a diff --git a/tests/ref/fate/h264-conformance-frext-pph10i3_panasonic_a b/tests/ref/fate/h264-conformance-frext-pph10i3_panasonic_a new file mode 100644 index 0000000000..195e45a67b --- /dev/null +++ b/tests/ref/fate/h264-conformance-frext-pph10i3_panasonic_a @@ -0,0 +1,10 @@ +0, 0, 2764800, 0xda69f69e +0, 3600, 2764800, 0x29ed832f +0, 7200, 2764800, 0xb3244cc4 +0, 10800, 2764800, 0xe41a312c +0, 14400, 2764800, 0xac0b344b +0, 18000, 2764800, 0xc585aa20 +0, 21600, 2764800, 0x0952054c +0, 25200, 2764800, 0xd1a02f87 +0, 28800, 2764800, 0xfcbfe87c +0, 32400, 2764800, 0xe4e9b8a2 diff --git a/tests/ref/fate/h264-conformance-frext-pph10i4_panasonic_a b/tests/ref/fate/h264-conformance-frext-pph10i4_panasonic_a new file mode 100644 index 0000000000..d351a7eb1f --- /dev/null +++ b/tests/ref/fate/h264-conformance-frext-pph10i4_panasonic_a @@ -0,0 +1,19 @@ +0, 0, 6220800, 0xca2a2a5e +0, 3600, 6220800, 0x8009a65e +0, 7200, 6220800, 0x63e72b3b +0, 10800, 6220800, 0x7459a1cc +0, 14400, 6220800, 0x02191aa9 +0, 18000, 6220800, 0x88dca590 +0, 21600, 6220800, 0x56dd150a +0, 25200, 6220800, 0x5f56a56f +0, 28800, 6220800, 0x67ada4b7 +0, 32400, 6220800, 0x88dca590 +0, 36000, 6220800, 0xd3b09fe5 +0, 39600, 6220800, 0x2223998c +0, 43200, 6220800, 0x5e5b2da5 +0, 46800, 6220800, 0x88dca590 +0, 50400, 6220800, 0x5e5b2da5 +0, 54000, 6220800, 0x88dca590 +0, 57600, 6220800, 0x5e5b2da5 +0, 61200, 6220800, 0x88dca590 +0, 64800, 6220800, 0x26e1ec8b diff --git a/tests/ref/fate/h264-conformance-frext-pph10i5_panasonic_a b/tests/ref/fate/h264-conformance-frext-pph10i5_panasonic_a new file mode 100644 index 0000000000..1afbac01e7 --- /dev/null +++ b/tests/ref/fate/h264-conformance-frext-pph10i5_panasonic_a @@ -0,0 +1,10 @@ +0, 0, 6220800, 0x1df58ce9 +0, 3600, 6220800, 0x8f2859ce +0, 7200, 6220800, 0x229cc7ff +0, 10800, 6220800, 0x73e86984 +0, 14400, 6220800, 0xb6d4504b +0, 18000, 6220800, 0x4e7d4883 +0, 21600, 6220800, 0xbec3f0f7 +0, 25200, 6220800, 0x1d9af065 +0, 28800, 6220800, 0x44851549 +0, 32400, 6220800, 0xfcf8728e diff --git a/tests/ref/fate/h264-conformance-frext-pph10i6_panasonic_a b/tests/ref/fate/h264-conformance-frext-pph10i6_panasonic_a new file mode 100644 index 0000000000..6d105466c9 --- /dev/null +++ b/tests/ref/fate/h264-conformance-frext-pph10i6_panasonic_a @@ -0,0 +1,10 @@ +0, 0, 6220800, 0x408daf70 +0, 3600, 6220800, 0x59b254a3 +0, 7200, 6220800, 0x4cf4279c +0, 10800, 6220800, 0x5c9437ae +0, 14400, 6220800, 0x986c3eb8 +0, 18000, 6220800, 0x23fd883e +0, 21600, 6220800, 0x84f222fe +0, 25200, 6220800, 0xe7f91107 +0, 28800, 6220800, 0xb544b31e +0, 32400, 6220800, 0x1ebdde56 diff --git a/tests/ref/fate/h264-conformance-frext-pph10i7_panasonic_a b/tests/ref/fate/h264-conformance-frext-pph10i7_panasonic_a new file mode 100644 index 0000000000..28825446f9 --- /dev/null +++ b/tests/ref/fate/h264-conformance-frext-pph10i7_panasonic_a @@ -0,0 +1,10 @@ +0, 0, 6220800, 0xf81873fe +0, 3600, 6220800, 0x7b96fbdc +0, 7200, 6220800, 0x75dbafc4 +0, 10800, 6220800, 0x7524301e +0, 14400, 6220800, 0x0f3621ab +0, 18000, 6220800, 0xa5e25b35 +0, 21600, 6220800, 0x063a8116 +0, 25200, 6220800, 0x48ebc8ff +0, 28800, 6220800, 0x1f635df8 +0, 32400, 6220800, 0xe282c8bd