vpx/vp8/decoder/idct_blk.c
Ronald S. Bultje 5d4cffb35f Superblock coding.
This commit adds a pick_sb_mode() function which selects the best 32x32
superblock coding mode. Then it selects the best per-MB modes, compares
the two and encodes that in the bitstream.

The bitstream coding is rather simplistic right now. At the SB level,
we code a bit to indicate whether this block uses SB-coding (32x32
prediction) or MB-coding (anything else), and then we follow with the
actual modes. This could and should be modified in the future, but is
omitted from this commit because it will likely involve reorganizing
much more code rather than just adding SB coding, so it's better to let
that be judged on its own merits.

Gains on derf: about even, YT/HD: +0.75%, STD/HD: +1.5%.

Change-Id: Iae313a7cbd8f75b3c66d04a68b991cb096eaaba6
2012-08-20 14:43:34 -07:00

272 lines
7.4 KiB
C

/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_ports/config.h"
#include "vp8/common/idct.h"
#include "dequantize.h"
void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,
unsigned char *dest, int pitch, int stride,
int Dc);
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
unsigned char *dest, int pitch, int stride);
void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
unsigned char *dst_ptr, int pitch, int stride);
#if CONFIG_LOSSLESS
void vp8_dequant_idct_add_lossless_c(short *input, short *dq, unsigned char *pred,
unsigned char *dest, int pitch, int stride);
void vp8_dc_only_idct_add_lossless_c(short input_dc, unsigned char *pred_ptr,
unsigned char *dst_ptr, int pitch, int stride);
#endif
void vp8_dequant_dc_idct_add_y_block_c
(short *q, short *dq, unsigned char *pre,
unsigned char *dst, int stride, char *eobs, short *dc) {
int i, j;
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
if (*eobs++ > 1)
vp8_dequant_dc_idct_add_c(q, dq, pre, dst, 16, stride, dc[0]);
else
vp8_dc_only_idct_add_c(dc[0], pre, dst, 16, stride);
q += 16;
pre += 4;
dst += 4;
dc++;
}
pre += 64 - 16;
dst += 4 * stride - 16;
}
}
void vp8_dequant_idct_add_y_block_c
(short *q, short *dq, unsigned char *pre,
unsigned char *dst, int stride, char *eobs) {
int i, j;
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
if (*eobs++ > 1)
vp8_dequant_idct_add_c(q, dq, pre, dst, 16, stride);
else {
vp8_dc_only_idct_add_c(q[0]*dq[0], pre, dst, 16, stride);
((int *)q)[0] = 0;
}
q += 16;
pre += 4;
dst += 4;
}
pre += 64 - 16;
dst += 4 * stride - 16;
}
}
void vp8_dequant_idct_add_uv_block_c
(short *q, short *dq, unsigned char *pre,
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) {
int i, j;
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
if (*eobs++ > 1)
vp8_dequant_idct_add_c(q, dq, pre, dstu, 8, stride);
else {
vp8_dc_only_idct_add_c(q[0]*dq[0], pre, dstu, 8, stride);
((int *)q)[0] = 0;
}
q += 16;
pre += 4;
dstu += 4;
}
pre += 32 - 8;
dstu += 4 * stride - 8;
}
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
if (*eobs++ > 1)
vp8_dequant_idct_add_c(q, dq, pre, dstv, 8, stride);
else {
vp8_dc_only_idct_add_c(q[0]*dq[0], pre, dstv, 8, stride);
((int *)q)[0] = 0;
}
q += 16;
pre += 4;
dstv += 4;
}
pre += 32 - 8;
dstv += 4 * stride - 8;
}
}
void vp8_dequant_dc_idct_add_y_block_8x8_c
(short *q, short *dq, unsigned char *pre,
unsigned char *dst, int stride, char *eobs, short *dc, MACROBLOCKD *xd) {
vp8_dequant_dc_idct_add_8x8_c(q, dq, pre, dst, 16, stride, dc[0]);
vp8_dequant_dc_idct_add_8x8_c(&q[64], dq, pre + 8, dst + 8, 16, stride, dc[1]);
vp8_dequant_dc_idct_add_8x8_c(&q[128], dq, pre + 8 * 16, dst + 8 * stride, 16, stride, dc[4]);
vp8_dequant_dc_idct_add_8x8_c(&q[192], dq, pre + 8 * 16 + 8, dst + 8 * stride + 8, 16, stride, dc[8]);
}
#if CONFIG_SUPERBLOCKS
void vp8_dequant_dc_idct_add_y_block_8x8_inplace_c
(short *q, short *dq,
unsigned char *dst, int stride, char *eobs, short *dc, MACROBLOCKD *xd) {
vp8_dequant_dc_idct_add_8x8_c(q, dq, dst, dst, stride, stride, dc[0]);
vp8_dequant_dc_idct_add_8x8_c(&q[64], dq, dst + 8, dst + 8, stride, stride, dc[1]);
vp8_dequant_dc_idct_add_8x8_c(&q[128], dq, dst + 8 * stride, dst + 8 * stride, stride, stride, dc[4]);
vp8_dequant_dc_idct_add_8x8_c(&q[192], dq, dst + 8 * stride + 8, dst + 8 * stride + 8, stride, stride, dc[8]);
}
#endif
void vp8_dequant_idct_add_y_block_8x8_c
(short *q, short *dq, unsigned char *pre,
unsigned char *dst, int stride, char *eobs, MACROBLOCKD *xd) {
unsigned char *origdest = dst;
unsigned char *origpred = pre;
vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride);
vp8_dequant_idct_add_8x8_c(&q[64], dq, origpred + 8, origdest + 8, 16, stride);
vp8_dequant_idct_add_8x8_c(&q[128], dq, origpred + 8 * 16, origdest + 8 * stride, 16, stride);
vp8_dequant_idct_add_8x8_c(&q[192], dq, origpred + 8 * 16 + 8, origdest + 8 * stride + 8, 16, stride);
}
void vp8_dequant_idct_add_uv_block_8x8_c
(short *q, short *dq, unsigned char *pre,
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs, MACROBLOCKD *xd) {
vp8_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride);
q += 64;
pre += 64;
vp8_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride);
}
#if CONFIG_SUPERBLOCKS
void vp8_dequant_idct_add_uv_block_8x8_inplace_c
(short *q, short *dq,
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs, MACROBLOCKD *xd) {
vp8_dequant_idct_add_8x8_c(q, dq, dstu, dstu, stride, stride);
q += 64;
vp8_dequant_idct_add_8x8_c(q, dq, dstv, dstv, stride, stride);
}
#endif
#if CONFIG_LOSSLESS
void vp8_dequant_dc_idct_add_y_block_lossless_c
(short *q, short *dq, unsigned char *pre,
unsigned char *dst, int stride, char *eobs, short *dc) {
int i, j;
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
if (*eobs++ > 1)
vp8_dequant_dc_idct_add_lossless_c(q, dq, pre, dst, 16, stride, dc[0]);
else
vp8_dc_only_inv_walsh_add_c(dc[0], pre, dst, 16, stride);
q += 16;
pre += 4;
dst += 4;
dc++;
}
pre += 64 - 16;
dst += 4 * stride - 16;
}
}
void vp8_dequant_idct_add_y_block_lossless_c
(short *q, short *dq, unsigned char *pre,
unsigned char *dst, int stride, char *eobs) {
int i, j;
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
if (*eobs++ > 1)
vp8_dequant_idct_add_lossless_c(q, dq, pre, dst, 16, stride);
else {
vp8_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dst, 16, stride);
((int *)q)[0] = 0;
}
q += 16;
pre += 4;
dst += 4;
}
pre += 64 - 16;
dst += 4 * stride - 16;
}
}
void vp8_dequant_idct_add_uv_block_lossless_c
(short *q, short *dq, unsigned char *pre,
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) {
int i, j;
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
if (*eobs++ > 1)
vp8_dequant_idct_add_lossless_c(q, dq, pre, dstu, 8, stride);
else {
vp8_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dstu, 8, stride);
((int *)q)[0] = 0;
}
q += 16;
pre += 4;
dstu += 4;
}
pre += 32 - 8;
dstu += 4 * stride - 8;
}
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
if (*eobs++ > 1)
vp8_dequant_idct_add_lossless_c(q, dq, pre, dstv, 8, stride);
else {
vp8_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dstv, 8, stride);
((int *)q)[0] = 0;
}
q += 16;
pre += 4;
dstv += 4;
}
pre += 32 - 8;
dstv += 4 * stride - 8;
}
}
#endif