vpx/vp8/common/idctllm.c
Yaowu Xu 5b42ae09ae experiment extending the quantizer range
Prior to this change, VP8 min quantizer is 4, which caps the
highest quality around 51DB. This experimental change extends
the min quantizer to 1, removes the cap and allows the highest
quality to be around ~73DB, consistent with the fdct/idct round trip
error. To test this change, at configure time use options:

--enable-experimental --enable-extend_qrange

The following is a brief log of changes in each of the patch sets

patch set 1:
In this commit, the quantization/dequantization constants are kept
unchanged, instead scaling factor 4 is rolled into fdct/idct.
Fixed Q0 encoding tests on mobile:
  Before:    9560.567kbps Overall PSNR:50.255DB VPXSSIM:98.288
  Now:   18035.774kbps Overall PSNR:73.022DB VPXSSIM:99.991

patch set 2:
regenerated dc/ac quantizer lookup tables based on the scaling
factor rolled in the fdct/idct. Also slightly extended the range
towards the high quantizer end.

patch set 3:
slightly tweaked the quantizer tables and generated bits_per_mb
table based on Paul's suggestions.

patch set 4:
fix a typo in idct, re-calculated tables relating active max Q
to active min Q

patch set 5:
added rdmult lookup table based on Q

patch set 6:
fix rdmult scale: dct coefficient has scaled up by 4

patch set 7:
make transform coefficients to be within 16bits

patch set 8:
normalize 2nd order quantizers

patch set 9:
fix mis-spellings

patch set 10:
change the configure script and macros to allow experimental code
to be enabled at configure time with --enable-extend_qrange

patch set 11:
rebase for merge

Change-Id: Ib50641ddd44aba2a52ed890222c309faa31cc59c
2011-01-19 13:22:35 -08:00

225 lines
5.0 KiB
C

/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/****************************************************************************
* Notes:
*
* This implementation makes use of 16 bit fixed point verio of two multiply
* constants:
* 1. sqrt(2) * cos (pi/8)
* 2. sqrt(2) * sin (pi/8)
* Becuase the first constant is bigger than 1, to maintain the same 16 bit
* fixed point precision as the second one, we use a trick of
* x * a = x + x*(a-1)
* so
* x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
**************************************************************************/
#include "vpx_ports/config.h"
static const int cospi8sqrt2minus1 = 20091;
static const int sinpi8sqrt2 = 35468;
static const int rounding = 0;
void vp8_short_idct4x4llm_c(short *input, short *output, int pitch)
{
int i;
int a1, b1, c1, d1;
short *ip = input;
short *op = output;
int temp1, temp2;
int shortpitch = pitch >> 1;
for (i = 0; i < 4; i++)
{
a1 = ip[0] + ip[8];
b1 = ip[0] - ip[8];
temp1 = (ip[4] * sinpi8sqrt2 + rounding) >> 16;
temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1 + rounding) >> 16);
c1 = temp1 - temp2;
temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1 + rounding) >> 16);
temp2 = (ip[12] * sinpi8sqrt2 + rounding) >> 16;
d1 = temp1 + temp2;
op[shortpitch*0] = a1 + d1;
op[shortpitch*3] = a1 - d1;
op[shortpitch*1] = b1 + c1;
op[shortpitch*2] = b1 - c1;
ip++;
op++;
}
ip = output;
op = output;
for (i = 0; i < 4; i++)
{
a1 = ip[0] + ip[2];
b1 = ip[0] - ip[2];
temp1 = (ip[1] * sinpi8sqrt2 + rounding) >> 16;
temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1 + rounding) >> 16);
c1 = temp1 - temp2;
temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1 + rounding) >> 16);
temp2 = (ip[3] * sinpi8sqrt2 + rounding) >> 16;
d1 = temp1 + temp2;
#if !CONFIG_EXTEND_QRANGE
op[0] = (a1 + d1 + 4) >> 3;
op[3] = (a1 - d1 + 4) >> 3;
op[1] = (b1 + c1 + 4) >> 3;
op[2] = (b1 - c1 + 4) >> 3;
#else
op[0] = (a1 + d1 + 16) >> 5;
op[3] = (a1 - d1 + 16) >> 5;
op[1] = (b1 + c1 + 16) >> 5;
op[2] = (b1 - c1 + 16) >> 5;
#endif
ip += shortpitch;
op += shortpitch;
}
}
void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch)
{
int i;
int a1;
short *op = output;
int shortpitch = pitch >> 1;
#if !CONFIG_EXTEND_QRANGE
a1 = ((input[0] + 4) >> 3);
#else
a1 = ((input[0] + 16) >> 5);
#endif
for (i = 0; i < 4; i++)
{
op[0] = a1;
op[1] = a1;
op[2] = a1;
op[3] = a1;
op += shortpitch;
}
}
void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
{
#if !CONFIG_EXTEND_QRANGE
int a1 = ((input_dc + 4) >> 3);
#else
int a1 = ((input_dc + 16) >> 5);
#endif
int r, c;
for (r = 0; r < 4; r++)
{
for (c = 0; c < 4; c++)
{
int a = a1 + pred_ptr[c] ;
if (a < 0)
a = 0;
if (a > 255)
a = 255;
dst_ptr[c] = (unsigned char) a ;
}
dst_ptr += stride;
pred_ptr += pitch;
}
}
void vp8_short_inv_walsh4x4_c(short *input, short *output)
{
int i;
int a1, b1, c1, d1;
int a2, b2, c2, d2;
short *ip = input;
short *op = output;
for (i = 0; i < 4; i++)
{
a1 = ip[0] + ip[12];
b1 = ip[4] + ip[8];
c1 = ip[4] - ip[8];
d1 = ip[0] - ip[12];
op[0] = a1 + b1;
op[4] = c1 + d1;
op[8] = a1 - b1;
op[12] = d1 - c1;
ip++;
op++;
}
ip = output;
op = output;
for (i = 0; i < 4; i++)
{
a1 = ip[0] + ip[3];
b1 = ip[1] + ip[2];
c1 = ip[1] - ip[2];
d1 = ip[0] - ip[3];
a2 = a1 + b1;
b2 = c1 + d1;
c2 = a1 - b1;
d2 = d1 - c1;
#if !CONFIG_EXTEND_QRANGE
op[0] = (a2 + 3) >> 3;
op[1] = (b2 + 3) >> 3;
op[2] = (c2 + 3) >> 3;
op[3] = (d2 + 3) >> 3;
#else
op[0] = (a2 + 1) >> 2;
op[1] = (b2 + 1) >> 2;
op[2] = (c2 + 1) >> 2;
op[3] = (d2 + 1) >> 2;
#endif
ip += 4;
op += 4;
}
}
void vp8_short_inv_walsh4x4_1_c(short *input, short *output)
{
int i;
int a1;
short *op = output;
#if !CONFIG_EXTEND_QRANGE
a1 = (input[0] + 3 )>> 3;
#else
a1 = (input[0] + 1 )>> 2;
#endif
for (i = 0; i < 4; i++)
{
op[0] = a1;
op[1] = a1;
op[2] = a1;
op[3] = a1;
op += 4;
}
}