Moved dequant idct into common

These functions are now used by the encoder.
This is WIP with the goal of creating a common idct/add for
the encoder and decoder.  A boost of 1.8% was seen for
the HD rt test clip used.

[Tero] Added needed changes to ARM side.

Change-Id: Ibbb8000be09034203d7adffc457d3c3f8b06a5bf
This commit is contained in:
Scott LaVarnway 2011-12-15 14:23:36 -05:00
parent c8df1656bd
commit a53d5a4c44
39 changed files with 192 additions and 272 deletions

View File

@ -63,6 +63,12 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
rtcd->recon.copy8x8 = vp8_copy_mem8x8_v6;
rtcd->recon.copy8x4 = vp8_copy_mem8x4_v6;
rtcd->recon.intra4x4_predict = vp8_intra4x4_predict_armv6;
rtcd->dequant.block = vp8_dequantize_b_v6;
rtcd->dequant.idct_add = vp8_dequant_idct_add_v6;
rtcd->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_v6;
rtcd->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6;
}
#endif
@ -97,6 +103,12 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
vp8_build_intra_predictors_mby_neon;
rtcd->recon.build_intra_predictors_mby_s =
vp8_build_intra_predictors_mby_s_neon;
rtcd->dequant.block = vp8_dequantize_b_neon;
rtcd->dequant.idct_add = vp8_dequant_idct_add_neon;
rtcd->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_neon;
rtcd->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon;
}
#endif

View File

@ -10,7 +10,7 @@
#include "vpx_config.h"
#include "vp8/common/idct.h"
#include "vp8/decoder/dequantize.h"
#include "vp8/common/dequantize.h"
void vp8_dequant_idct_add_y_block_v6(short *q, short *dq,

View File

@ -10,9 +10,8 @@
#include "vpx_config.h"
#include "vp8/decoder/dequantize.h"
#include "vp8/common/dequantize.h"
#include "vp8/common/idct.h"
#include "vpx_mem/vpx_mem.h"
#if HAVE_ARMV7
extern void vp8_dequantize_b_loop_neon(short *Q, short *DQC, short *DQ);

View File

@ -22,13 +22,13 @@ extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6);
#undef vp8_dequant_block
#define vp8_dequant_block vp8_dequantize_b_v6
#undef vp8_dequant_idct_add
#undef vp8_dequant_idct_add
#define vp8_dequant_idct_add vp8_dequant_idct_add_v6
#undef vp8_dequant_idct_add_y_block
#undef vp8_dequant_idct_add_y_block
#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_v6
#undef vp8_dequant_idct_add_uv_block
#undef vp8_dequant_idct_add_uv_block
#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_v6
#endif
#endif
@ -44,13 +44,13 @@ extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon);
#undef vp8_dequant_block
#define vp8_dequant_block vp8_dequantize_b_neon
#undef vp8_dequant_idct_add
#undef vp8_dequant_idct_add
#define vp8_dequant_idct_add vp8_dequant_idct_add_neon
#undef vp8_dequant_idct_add_y_block
#undef vp8_dequant_idct_add_y_block
#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_neon
#undef vp8_dequant_idct_add_uv_block
#undef vp8_dequant_idct_add_uv_block
#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_neon
#endif

View File

@ -10,7 +10,7 @@
#include "vpx_config.h"
#include "vp8/common/idct.h"
#include "vp8/decoder/dequantize.h"
#include "vp8/common/dequantize.h"
/* place these declarations here because we don't want to maintain them
* outside of this scope

View File

@ -70,6 +70,14 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
#if CONFIG_RUNTIME_CPU_DETECT
VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
rtcd->dequant.block = vp8_dequantize_b_c;
rtcd->dequant.idct_add = vp8_dequant_idct_add_c;
rtcd->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_c;
rtcd->dequant.idct_add_uv_block =
vp8_dequant_idct_add_uv_block_c;
rtcd->idct.idct16 = vp8_short_idct4x4llm_c;
rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c;
rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c;

View File

@ -1,56 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "invtrans.h"
void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b,
int pitch)
{
if (*b->eob > 1)
{
IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->predictor, pitch,
*(b->base_dst) + b->dst, b->dst_stride);
}
else
{
IDCT_INVOKE(rtcd, idct1_scalar_add)(b->dqcoeff[0], b->predictor, pitch,
*(b->base_dst) + b->dst, b->dst_stride);
}
}
void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
{
int i;
if(x->mode_info_context->mbmi.mode != SPLITMV)
{
/* do 2nd order transform on the dc block */
IDCT_INVOKE(rtcd, iwalsh16)(x->block[24].dqcoeff, x->dqcoeff);
}
for (i = 0; i < 16; i++)
{
vp8_inverse_transform_b(rtcd, &x->block[i], 16);
}
}
void vp8_inverse_transform_mbuv(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
{
int i;
for (i = 16; i < 24; i++)
{
vp8_inverse_transform_b(rtcd, &x->block[i], 8);
}
}

View File

@ -15,9 +15,51 @@
#include "vpx_config.h"
#include "idct.h"
#include "blockd.h"
extern void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch);
extern void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
extern void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
extern void vp8_inverse_transform_mbuv(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
#include "onyxc_int.h"
static void eob_adjust(char *eobs, short *diff)
{
/* eob adjust.... the idct can only skip if both the dc and eob are zero */
int js;
for(js = 0; js < 16; js++)
{
if((eobs[js] == 0) && (diff[0] != 0))
eobs[js]++;
diff+=16;
}
}
static void vp8_inverse_transform_mby(MACROBLOCKD *xd,
const VP8_COMMON_RTCD *rtcd)
{
short *DQC = xd->block[0].dequant;
/* save the dc dequant constant in case it is overridden */
short dc_dequant_temp = DQC[0];
if (xd->mode_info_context->mbmi.mode != SPLITMV)
{
/* do 2nd order transform on the dc block */
if (xd->eobs[24] > 1)
{
IDCT_INVOKE(&rtcd->idct, iwalsh16)
(&xd->block[24].dqcoeff[0], xd->qcoeff);
}
else
{
IDCT_INVOKE(&rtcd->idct, iwalsh1)
(&xd->block[24].dqcoeff[0], xd->qcoeff);
}
eob_adjust(xd->eobs, xd->qcoeff);
/* override the dc dequant constant */
DQC[0] = 1;
}
DEQUANT_INVOKE (&rtcd->dequant, idct_add_y_block)
(xd->qcoeff, xd->block[0].dequant,
xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs);
/* restore the dc dequant constant */
DQC[0] = dc_dequant_temp;
}
#endif

View File

@ -22,6 +22,7 @@
#if CONFIG_POSTPROC
#include "postproc.h"
#endif
#include "dequantize.h"
/*#ifdef PACKET_TESTING*/
#include "header.h"
@ -73,6 +74,7 @@ typedef enum
typedef struct VP8_COMMON_RTCD
{
#if CONFIG_RUNTIME_CPU_DETECT
vp8_dequant_rtcd_vtable_t dequant;
vp8_idct_rtcd_vtable_t idct;
vp8_recon_rtcd_vtable_t recon;
vp8_subpix_rtcd_vtable_t subpix;

View File

@ -334,11 +334,12 @@ void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x)
/*encoder only*/
void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x)
void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x,
unsigned char *dst_y,
int dst_ystride)
{
unsigned char *ptr_base;
unsigned char *ptr;
unsigned char *pred_ptr = x->predictor;
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int pre_stride = x->block[0].pre_stride;
@ -348,11 +349,13 @@ void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x)
if ((mv_row | mv_col) & 7)
{
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16);
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7,
dst_y, dst_ystride);
}
else
{
RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16);
RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_y,
dst_ystride);
}
}
@ -596,69 +599,3 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *xd)
build_inter4x4_predictors_mb(xd);
}
}
/* encoder only*/
static void build_inter4x4_predictors_mb_e(MACROBLOCKD *x)
{
int i;
if (x->mode_info_context->mbmi.partitioning < 3)
{
x->block[ 0].bmi = x->mode_info_context->bmi[ 0];
x->block[ 2].bmi = x->mode_info_context->bmi[ 2];
x->block[ 8].bmi = x->mode_info_context->bmi[ 8];
x->block[10].bmi = x->mode_info_context->bmi[10];
build_inter_predictors4b(x, &x->block[ 0], x->block[ 0].predictor, 16);
build_inter_predictors4b(x, &x->block[ 2], x->block[ 2].predictor, 16);
build_inter_predictors4b(x, &x->block[ 8], x->block[ 8].predictor, 16);
build_inter_predictors4b(x, &x->block[10], x->block[10].predictor, 16);
}
else
{
for (i = 0; i < 16; i += 2)
{
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
x->block[i+0].bmi = x->mode_info_context->bmi[i+0];
x->block[i+1].bmi = x->mode_info_context->bmi[i+1];
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, d0->predictor, 16);
else
{
build_inter_predictors_b(d0, d0->predictor, 16, x->subpixel_predict);
build_inter_predictors_b(d1, d1->predictor, 16, x->subpixel_predict);
}
}
}
for (i = 16; i < 24; i += 2)
{
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, d0->predictor, 8);
else
{
build_inter_predictors_b(d0, d0->predictor, 8, x->subpixel_predict);
build_inter_predictors_b(d1, d1->predictor, 8, x->subpixel_predict);
}
}
}
void vp8_build_inter_predictors_mb_e(MACROBLOCKD *xd)
{
if (xd->mode_info_context->mbmi.mode != SPLITMV)
{
vp8_build_inter16x16_predictors_mb(xd, xd->predictor, &xd->predictor[256],
&xd->predictor[320], 16, 8);
}
else
{
build_4x4uvmvs(xd);
build_inter4x4_predictors_mb_e(xd);
}
}

View File

@ -21,11 +21,13 @@ extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
int dst_uvstride);
extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x);
extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf);
extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x,
unsigned char *dst_y,
int dst_ystride);
extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch,
vp8_subpix_fn_t sppf);
extern void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x);
extern void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x);
extern void vp8_build_inter_predictors_mb_e(MACROBLOCKD *xd);
#endif

View File

@ -10,7 +10,17 @@
#include "vpx_config.h"
#include "vp8/common/idct.h"
#include "vp8/decoder/dequantize.h"
#include "vp8/common/dequantize.h"
extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
void vp8_dequantize_b_mmx(BLOCKD *d)
{
short *sq = (short *) d->qcoeff;
short *dq = (short *) d->dqcoeff;
short *q = (short *) d->dequant;
vp8_dequantize_b_impl_mmx(sq, dq, q);
}
void vp8_dequant_idct_add_y_block_mmx
(short *q, short *dq,

View File

@ -10,7 +10,7 @@
#include "vpx_config.h"
#include "vp8/common/idct.h"
#include "vp8/decoder/dequantize.h"
#include "vp8/common/dequantize.h"
void vp8_idct_dequant_0_2x_sse2
(short *q, short *dq ,

View File

@ -37,6 +37,11 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
if (flags & HAS_MMX)
{
rtcd->dequant.block = vp8_dequantize_b_mmx;
rtcd->dequant.idct_add = vp8_dequant_idct_add_mmx;
rtcd->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_mmx;
rtcd->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx;
rtcd->idct.idct16 = vp8_short_idct4x4llm_mmx;
rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_mmx;
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_mmx;
@ -82,6 +87,9 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
rtcd->recon.build_intra_predictors_mbuv_s =
vp8_build_intra_predictors_mbuv_s_sse2;
rtcd->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
rtcd->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_sse2;
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_sse2;

View File

@ -11,9 +11,6 @@
#include "vpx_config.h"
#include "vpx_ports/arm.h"
#include "vp8/common/blockd.h"
#include "vp8/common/pragmas.h"
#include "vp8/decoder/dequantize.h"
#include "vp8/decoder/onyxd_int.h"
void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
@ -30,20 +27,12 @@ void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
#if HAVE_ARMV6
if (flags & HAS_MEDIA)
{
pbi->dequant.block = vp8_dequantize_b_v6;
pbi->dequant.idct_add = vp8_dequant_idct_add_v6;
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_v6;
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6;
}
#endif
#if HAVE_ARMV7
if (flags & HAS_NEON)
{
pbi->dequant.block = vp8_dequantize_b_neon;
pbi->dequant.idct_add = vp8_dequant_idct_add_neon;
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_neon;
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon;
}
#endif
#endif

View File

@ -15,7 +15,7 @@
#include "vp8/common/reconintra4x4.h"
#include "vp8/common/recon.h"
#include "vp8/common/reconinter.h"
#include "dequantize.h"
#include "vp8/common/dequantize.h"
#include "detokenize.h"
#include "vp8/common/invtrans.h"
#include "vp8/common/alloccommon.h"
@ -32,7 +32,7 @@
#endif
#include "vpx_mem/vpx_mem.h"
#include "vp8/common/idct.h"
#include "dequantize.h"
#include "vp8/common/threading.h"
#include "decoderthreading.h"
#include "dboolhuff.h"
@ -218,7 +218,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
{
if (xd->eobs[i] > 1)
{
DEQUANT_INVOKE(&pbi->dequant, idct_add)
DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add)
(b->qcoeff, b->dequant,
*(b->base_dst) + b->dst, b->dst_stride);
}
@ -247,7 +247,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
/* do 2nd order transform on the dc block */
if (xd->eobs[24] > 1)
{
DEQUANT_INVOKE(&pbi->dequant, block)(b);
DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b);
IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0],
xd->qcoeff);
@ -272,7 +272,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
DQC[0] = 1;
}
DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block)
(xd->qcoeff, xd->block[0].dequant,
xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs);
@ -281,7 +281,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
DQC[0] = dc_dequant_temp;
}
DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block)
(xd->qcoeff+16*16, xd->block[16].dequant,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs+16);

View File

@ -10,7 +10,7 @@
#include "vpx_config.h"
#include "vp8/decoder/dequantize.h"
#include "vp8/common/dequantize.h"
#include "vp8/decoder/onyxd_int.h"
extern void vp8_arch_x86_decode_init(VP8D_COMP *pbi);
@ -20,11 +20,7 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
{
/* Pure C: */
#if CONFIG_RUNTIME_CPU_DETECT
pbi->mb.rtcd = &pbi->common.rtcd;
pbi->dequant.block = vp8_dequantize_b_c;
pbi->dequant.idct_add = vp8_dequant_idct_add_c;
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_c;
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
pbi->mb.rtcd = &pbi->common.rtcd;
#endif
#if ARCH_X86 || ARCH_X86_64

View File

@ -16,7 +16,8 @@
#include "treereader.h"
#include "vp8/common/onyxc_int.h"
#include "vp8/common/threading.h"
#include "dequantize.h"
#if CONFIG_ERROR_CONCEALMENT
#include "ec_types.h"
#endif
@ -93,11 +94,6 @@ typedef struct VP8Decompressor
DATARATE dr[16];
#if CONFIG_RUNTIME_CPU_DETECT
vp8_dequant_rtcd_vtable_t dequant;
#endif
vp8_prob prob_intra;
vp8_prob prob_last;
vp8_prob prob_gf;

View File

@ -189,7 +189,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
{
if (xd->eobs[i] > 1)
{
DEQUANT_INVOKE(&pbi->dequant, idct_add)
DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add)
(b->qcoeff, b->dequant,
*(b->base_dst) + b->dst, b->dst_stride);
}
@ -217,7 +217,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
/* do 2nd order transform on the dc block */
if (xd->eobs[24] > 1)
{
DEQUANT_INVOKE(&pbi->dequant, block)(b);
DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b);
IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0],
xd->qcoeff);
@ -248,13 +248,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
DQC = local_dequant;
}
DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block)
(xd->qcoeff, DQC,
xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs);
}
DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block)
(xd->qcoeff+16*16, xd->block[16].dequant,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs+16);

View File

@ -13,47 +13,7 @@
#include "vpx_ports/x86.h"
#include "vp8/decoder/onyxd_int.h"
#if HAVE_MMX
void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
void vp8_dequantize_b_mmx(BLOCKD *d)
{
short *sq = (short *) d->qcoeff;
short *dq = (short *) d->dqcoeff;
short *q = (short *) d->dequant;
vp8_dequantize_b_impl_mmx(sq, dq, q);
}
#endif
void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
{
#if CONFIG_RUNTIME_CPU_DETECT
int flags = x86_simd_caps();
/* Note:
*
* This platform can be built without runtime CPU detection as well. If
* you modify any of the function mappings present in this file, be sure
* to also update them in static mapings (<arch>/filename_<arch>.h)
*/
/* Override default functions with fastest ones for this CPU. */
#if HAVE_MMX
if (flags & HAS_MMX)
{
pbi->dequant.block = vp8_dequantize_b_mmx;
pbi->dequant.idct_add = vp8_dequant_idct_add_mmx;
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_mmx;
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx;
}
#endif
#if HAVE_SSE2
if (flags & HAS_SSE2)
{
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
}
#endif
#endif
}

View File

@ -1091,8 +1091,10 @@ static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x )
#endif
}
int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int mb_row, int mb_col)
int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
int mb_row, int mb_col)
{
MACROBLOCKD *xd = &x->e_mbd;
int rate;
if (cpi->sf.RD && cpi->compressor_speed != 2)
@ -1112,14 +1114,17 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
sum_intra_stats(cpi, x);
vp8_tokenize_mb(cpi, &x->e_mbd, t);
if (x->e_mbd.mode_info_context->mbmi.mode != B_PRED)
vp8_inverse_transform_mby(IF_RTCD(&cpi->rtcd.common->idct), &x->e_mbd);
vp8_inverse_transform_mbuv(IF_RTCD(&cpi->rtcd.common->idct), &x->e_mbd);
if (xd->mode_info_context->mbmi.mode != B_PRED)
vp8_inverse_transform_mby(xd, IF_RTCD(&cpi->common.rtcd));
DEQUANT_INVOKE (&cpi->common.rtcd.dequant, idct_add_uv_block)
(xd->qcoeff+16*16, xd->block[16].dequant,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs+16);
return rate;
}
#ifdef SPEEDSTATS
@ -1312,12 +1317,14 @@ int vp8cx_encode_inter_macroblock
if (!x->skip)
{
vp8_tokenize_mb(cpi, xd, t);
if (x->e_mbd.mode_info_context->mbmi.mode != B_PRED)
{
vp8_inverse_transform_mby(IF_RTCD(&cpi->rtcd.common->idct),
&x->e_mbd);
}
vp8_inverse_transform_mbuv(IF_RTCD(&cpi->rtcd.common->idct), &x->e_mbd);
if (xd->mode_info_context->mbmi.mode != B_PRED)
vp8_inverse_transform_mby(xd, IF_RTCD(&cpi->common.rtcd));
DEQUANT_INVOKE (&cpi->common.rtcd.dequant, idct_add_uv_block)
(xd->qcoeff+16*16, xd->block[16].dequant,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs+16);
}
else
{

View File

@ -45,7 +45,7 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
vp8_encode_intra16x16mby(rtcd, x);
vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
vp8_inverse_transform_mby(&x->e_mbd, IF_RTCD(&cpi->common.rtcd));
}
else
{
@ -77,8 +77,17 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
x->quantize_b(be, b);
vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 16);
if (*b->eob > 1)
{
IDCT_INVOKE(IF_RTCD(&rtcd->common->idct), idct16)(b->dqcoeff,
b->predictor, 16, *(b->base_dst) + b->dst, b->dst_stride);
}
else
{
IDCT_INVOKE(IF_RTCD(&rtcd->common->idct), idct1_scalar_add)
(b->dqcoeff[0], b->predictor, 16, *(b->base_dst) + b->dst,
b->dst_stride);
}
}
void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb)
@ -96,11 +105,12 @@ void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb)
void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
{
BLOCK *b = &x->block[0];
MACROBLOCKD *xd = &x->e_mbd;
RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mby)(&x->e_mbd);
RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mby_s)(&x->e_mbd);
ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src),
b->src_stride, x->e_mbd.predictor, 16);
ENCODEMB_INVOKE(&rtcd->encodemb, submby) (x->src_diff, *(b->base_src),
b->src_stride, xd->dst.y_buffer, xd->dst.y_stride);
vp8_transform_intra_mby(x);
@ -108,16 +118,17 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
if (x->optimize)
vp8_optimize_mby(x, rtcd);
}
void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
{
RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mbuv)(&x->e_mbd);
MACROBLOCKD *xd = &x->e_mbd;
RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mbuv_s)(&x->e_mbd);
ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer,
x->src.v_buffer, x->src.uv_stride, &x->e_mbd.predictor[256],
&x->e_mbd.predictor[320], 8);
x->src.v_buffer, x->src.uv_stride, xd->dst.u_buffer,
xd->dst.v_buffer, xd->dst.uv_stride);
vp8_transform_mbuv(x);
@ -125,5 +136,4 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
if (x->optimize)
vp8_optimize_mbuv(x, rtcd);
}

View File

@ -105,10 +105,10 @@ static void vp8_subtract_mb(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
BLOCK *b = &x->block[0];
ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src),
b->src_stride, x->e_mbd.predictor, 16);
b->src_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride);
ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer,
x->src.v_buffer, x->src.uv_stride, &x->e_mbd.predictor[256],
&x->e_mbd.predictor[320], 8);
x->src.v_buffer, x->src.uv_stride, x->e_mbd.dst.u_buffer,
x->e_mbd.dst.v_buffer, x->e_mbd.dst.uv_stride);
}
static void build_dcblock(MACROBLOCK *x)
@ -625,7 +625,7 @@ void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
{
vp8_build_inter_predictors_mb_e(&x->e_mbd);
vp8_build_inter_predictors_mb(&x->e_mbd);
vp8_subtract_mb(rtcd, x);
@ -635,7 +635,6 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
if (x->optimize)
optimize_mb(x, rtcd);
}
/* this funciton is used by first pass only */
@ -643,15 +642,15 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
{
BLOCK *b = &x->block[0];
vp8_build_inter16x16_predictors_mby(&x->e_mbd);
vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.dst.y_buffer,
x->e_mbd.dst.y_stride);
ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src),
b->src_stride, x->e_mbd.predictor, 16);
b->src_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride);
transform_mby(x);
vp8_quantize_mby(x);
vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
vp8_inverse_transform_mby(&x->e_mbd, IF_RTCD(rtcd->common));
}

View File

@ -12,6 +12,7 @@
#ifndef __INC_ENCODEMB_H
#define __INC_ENCODEMB_H
#include "vpx_config.h"
#include "block.h"

View File

@ -2166,7 +2166,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
continue;
vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
vp8_build_inter16x16_predictors_mby(&x->e_mbd);
vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16);
if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
x->skip = 1;

View File

@ -20,6 +20,8 @@ VP8_COMMON_SRCS-yes += common/blockd.c
VP8_COMMON_SRCS-yes += common/coefupdateprobs.h
VP8_COMMON_SRCS-yes += common/debugmodes.c
VP8_COMMON_SRCS-yes += common/default_coef_probs.h
VP8_COMMON_SRCS-yes += common/dequantize.c
VP8_COMMON_SRCS-yes += common/dequantize.h
VP8_COMMON_SRCS-yes += common/entropy.c
VP8_COMMON_SRCS-yes += common/entropymode.c
VP8_COMMON_SRCS-yes += common/entropymv.c
@ -28,6 +30,7 @@ VP8_COMMON_SRCS-yes += common/filter.c
VP8_COMMON_SRCS-yes += common/filter.h
VP8_COMMON_SRCS-yes += common/findnearmv.c
VP8_COMMON_SRCS-yes += common/generic/systemdependent.c
VP8_COMMON_SRCS-yes += common/idct_blk.c
VP8_COMMON_SRCS-yes += common/idctllm.c
VP8_COMMON_SRCS-yes += common/alloccommon.h
VP8_COMMON_SRCS-yes += common/blockd.h
@ -57,7 +60,6 @@ VP8_COMMON_SRCS-yes += common/swapyv12buffer.h
VP8_COMMON_SRCS-yes += common/systemdependent.h
VP8_COMMON_SRCS-yes += common/threading.h
VP8_COMMON_SRCS-yes += common/treecoder.h
VP8_COMMON_SRCS-yes += common/invtrans.c
VP8_COMMON_SRCS-yes += common/loopfilter.c
VP8_COMMON_SRCS-yes += common/loopfilter_filters.c
VP8_COMMON_SRCS-yes += common/mbpitch.c
@ -69,9 +71,13 @@ VP8_COMMON_SRCS-yes += common/reconintra.c
VP8_COMMON_SRCS-yes += common/reconintra4x4.c
VP8_COMMON_SRCS-yes += common/setupintrarecon.c
VP8_COMMON_SRCS-yes += common/swapyv12buffer.c
VP8_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/textblit.c
VP8_COMMON_SRCS-yes += common/treecoder.c
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/dequantize_x86.h
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.c
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.h
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/idct_x86.h
@ -84,11 +90,14 @@ VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp8_asm_stubs.c
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/loopfilter_x86.c
VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.h
VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/dequantize_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idct_blk_mmx.c
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/subpixel_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_blk_sse2.c
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_wrapper_sse2.c
@ -115,6 +124,8 @@ VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/loopfilter_arm.h
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/recon_arm.h
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/reconintra_arm.c
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/subpixel_arm.h
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/dequantize_arm.c
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/dequantize_arm.h
# common (armv6)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/bilinearfilter_v6$(ASM)
@ -129,6 +140,9 @@ VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/loopfilter_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/simpleloopfilter_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/sixtappredict8x4_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/intra4x4_predict_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/dequant_idct_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/dequantize_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/idct_blk_v6.c
# common (neon)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/bilinearpredict4x4_neon$(ASM)
@ -151,3 +165,8 @@ VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/sixtappredict8x8_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/sixtappredict16x16_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/save_neon_reg$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/dequant_idct_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/idct_dequant_full_2x_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/idct_dequant_0_2x_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/dequantizeb_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/idct_blk_neon.c

View File

@ -52,7 +52,6 @@ VP8_DX_SRCS-yes += decoder/asm_dec_offsets.c
VP8_DX_SRCS-yes += decoder/dboolhuff.c
VP8_DX_SRCS-yes += decoder/decodemv.c
VP8_DX_SRCS-yes += decoder/decodframe.c
VP8_DX_SRCS-yes += decoder/dequantize.c
VP8_DX_SRCS-yes += decoder/detokenize.c
VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/ec_types.h
VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/error_concealment.h
@ -61,20 +60,14 @@ VP8_DX_SRCS-yes += decoder/generic/dsystemdependent.c
VP8_DX_SRCS-yes += decoder/dboolhuff.h
VP8_DX_SRCS-yes += decoder/decodemv.h
VP8_DX_SRCS-yes += decoder/decoderthreading.h
VP8_DX_SRCS-yes += decoder/dequantize.h
VP8_DX_SRCS-yes += decoder/detokenize.h
VP8_DX_SRCS-yes += decoder/onyxd_int.h
VP8_DX_SRCS-yes += decoder/treereader.h
VP8_DX_SRCS-yes += decoder/onyxd_if.c
VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/threading.c
VP8_DX_SRCS-yes += decoder/idct_blk.c
VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.h
VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.c
VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes))
VP8_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/dequantize_x86.h
VP8_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/x86_dsystemdependent.c
VP8_DX_SRCS-$(HAVE_MMX) += decoder/x86/dequantize_mmx.asm
VP8_DX_SRCS-$(HAVE_MMX) += decoder/x86/idct_blk_mmx.c
VP8_DX_SRCS-$(HAVE_SSE2) += decoder/x86/idct_blk_sse2.c

View File

@ -12,17 +12,3 @@
#VP8_DX_SRCS list is modified according to different platforms.
VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/arm_dsystemdependent.c
VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/dequantize_arm.c
VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/dequantize_arm.h
#File list for armv6
VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequant_idct_v6$(ASM)
VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequantize_v6$(ASM)
VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/idct_blk_v6.c
#File list for neon
VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/dequant_idct_neon$(ASM)
VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_dequant_full_2x_neon$(ASM)
VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_dequant_0_2x_neon$(ASM)
VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/dequantizeb_neon$(ASM)
VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_blk_neon.c