2010-05-18 17:58:33 +02:00
|
|
|
/*
|
2010-09-09 14:16:39 +02:00
|
|
|
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
2010-05-18 17:58:33 +02:00
|
|
|
*
|
2010-06-18 18:39:21 +02:00
|
|
|
* Use of this source code is governed by a BSD-style license
|
2010-06-04 22:19:40 +02:00
|
|
|
* that can be found in the LICENSE file in the root of the source
|
|
|
|
* tree. An additional intellectual property rights grant can be found
|
2010-06-18 18:39:21 +02:00
|
|
|
* in the file PATENTS. All contributing project authors may
|
2010-06-04 22:19:40 +02:00
|
|
|
* be found in the AUTHORS file in the root of the source tree.
|
2010-05-18 17:58:33 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
2012-11-09 02:09:30 +01:00
|
|
|
#include "vp9_rtcd.h"
|
2012-11-28 19:41:40 +01:00
|
|
|
#include "vp9/decoder/vp9_dequantize.h"
|
2010-05-18 17:58:33 +02:00
|
|
|
#include "vpx_mem/vpx_mem.h"
|
2012-11-28 19:41:40 +01:00
|
|
|
#include "vp9/decoder/vp9_onyxd_int.h"
|
Consistently use get_prob(), clip_prob() and newly added clip_pixel().
Add a function clip_pixel() to clip a pixel value to the [0,255] range
of allowed values, and use this where-ever appropriate (e.g. prediction,
reconstruction). Likewise, consistently use the recently added function
clip_prob(), which calculates a binary probability in the [1,255] range.
If possible, try to use get_prob() or its sister get_binary_prob() to
calculate binary probabilities, for consistency.
Since in some places, this means that binary probability calculations
are changed (we use {255,256}*count0/(total) in a range of places,
and all of these are now changed to use 256*count0+(total>>1)/total),
this changes the encoding result, so this patch warrants some extensive
testing.
Change-Id: Ibeeff8d886496839b8e0c0ace9ccc552351f7628
2012-12-10 21:09:07 +01:00
|
|
|
#include "vp9/common/vp9_common.h"
|
2013-02-23 02:27:34 +01:00
|
|
|
|
2012-11-10 02:50:13 +01:00
|
|
|
static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch,
|
|
|
|
uint8_t *dest, int stride, int width, int height) {
|
2012-11-07 01:06:22 +01:00
|
|
|
int r, c;
|
|
|
|
|
|
|
|
for (r = 0; r < height; r++) {
|
2013-02-23 02:27:34 +01:00
|
|
|
for (c = 0; c < width; c++)
|
Consistently use get_prob(), clip_prob() and newly added clip_pixel().
Add a function clip_pixel() to clip a pixel value to the [0,255] range
of allowed values, and use this where-ever appropriate (e.g. prediction,
reconstruction). Likewise, consistently use the recently added function
clip_prob(), which calculates a binary probability in the [1,255] range.
If possible, try to use get_prob() or its sister get_binary_prob() to
calculate binary probabilities, for consistency.
Since in some places, this means that binary probability calculations
are changed (we use {255,256}*count0/(total) in a range of places,
and all of these are now changed to use 256*count0+(total>>1)/total),
this changes the encoding result, so this patch warrants some extensive
testing.
Change-Id: Ibeeff8d886496839b8e0c0ace9ccc552351f7628
2012-12-10 21:09:07 +01:00
|
|
|
dest[c] = clip_pixel(diff[c] + pred[c]);
|
2012-11-07 01:06:22 +01:00
|
|
|
|
|
|
|
dest += stride;
|
|
|
|
diff += width;
|
|
|
|
pred += pitch;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-11-10 02:50:13 +01:00
|
|
|
static void add_constant_residual(const int16_t diff, const uint8_t *pred,
|
|
|
|
int pitch, uint8_t *dest, int stride,
|
|
|
|
int width, int height) {
|
|
|
|
int r, c;
|
|
|
|
|
|
|
|
for (r = 0; r < height; r++) {
|
2013-02-23 02:27:34 +01:00
|
|
|
for (c = 0; c < width; c++)
|
Consistently use get_prob(), clip_prob() and newly added clip_pixel().
Add a function clip_pixel() to clip a pixel value to the [0,255] range
of allowed values, and use this where-ever appropriate (e.g. prediction,
reconstruction). Likewise, consistently use the recently added function
clip_prob(), which calculates a binary probability in the [1,255] range.
If possible, try to use get_prob() or its sister get_binary_prob() to
calculate binary probabilities, for consistency.
Since in some places, this means that binary probability calculations
are changed (we use {255,256}*count0/(total) in a range of places,
and all of these are now changed to use 256*count0+(total>>1)/total),
this changes the encoding result, so this patch warrants some extensive
testing.
Change-Id: Ibeeff8d886496839b8e0c0ace9ccc552351f7628
2012-12-10 21:09:07 +01:00
|
|
|
dest[c] = clip_pixel(diff + pred[c]);
|
2012-11-10 02:50:13 +01:00
|
|
|
|
|
|
|
dest += stride;
|
|
|
|
pred += pitch;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-11-16 21:07:12 +01:00
|
|
|
void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input,
|
|
|
|
const int16_t *dq,
|
2012-11-10 02:50:13 +01:00
|
|
|
uint8_t *pred, uint8_t *dest,
|
2013-02-21 19:04:40 +01:00
|
|
|
int pitch, int stride, int eob) {
|
2012-06-25 21:26:09 +02:00
|
|
|
int i;
|
2013-02-28 22:28:05 +01:00
|
|
|
int16_t output[16];
|
2012-06-25 21:26:09 +02:00
|
|
|
|
2013-02-23 02:27:34 +01:00
|
|
|
for (i = 0; i < 16; i++)
|
2013-02-28 22:28:05 +01:00
|
|
|
input[i] *= dq[i];
|
2012-06-25 21:26:09 +02:00
|
|
|
|
2013-02-26 03:19:55 +01:00
|
|
|
vp9_short_iht4x4(input, output, 4, tx_type);
|
2012-06-25 21:26:09 +02:00
|
|
|
vpx_memset(input, 0, 32);
|
2013-02-28 22:28:05 +01:00
|
|
|
add_residual(output, pred, pitch, dest, stride, 4, 4);
|
2012-06-25 21:26:09 +02:00
|
|
|
}
|
|
|
|
|
2012-11-16 21:07:12 +01:00
|
|
|
void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input,
|
|
|
|
const int16_t *dq,
|
2012-11-10 02:50:13 +01:00
|
|
|
uint8_t *pred, uint8_t *dest,
|
2013-02-21 19:04:40 +01:00
|
|
|
int pitch, int stride, int eob) {
|
2012-11-10 02:50:13 +01:00
|
|
|
int16_t output[64];
|
2013-02-28 22:28:05 +01:00
|
|
|
|
2013-02-21 19:04:40 +01:00
|
|
|
if (eob == 0) {
|
2013-02-28 22:28:05 +01:00
|
|
|
// All 0 DCT coefficients
|
2012-12-13 00:49:39 +01:00
|
|
|
vp9_copy_mem8x8(pred, pitch, dest, stride);
|
2013-02-21 19:04:40 +01:00
|
|
|
} else if (eob > 0) {
|
2013-02-28 22:28:05 +01:00
|
|
|
int i;
|
|
|
|
|
2013-02-22 20:03:14 +01:00
|
|
|
input[0] *= dq[0];
|
2013-02-23 02:27:34 +01:00
|
|
|
for (i = 1; i < 64; i++)
|
2013-02-22 20:03:14 +01:00
|
|
|
input[i] *= dq[1];
|
2012-08-02 18:07:33 +02:00
|
|
|
|
2013-02-26 03:19:55 +01:00
|
|
|
vp9_short_iht8x8(input, output, 8, tx_type);
|
2012-12-13 00:49:39 +01:00
|
|
|
vpx_memset(input, 0, 128);
|
2013-02-28 22:28:05 +01:00
|
|
|
add_residual(output, pred, pitch, dest, stride, 8, 8);
|
2012-12-13 00:49:39 +01:00
|
|
|
}
|
2012-08-02 18:07:33 +02:00
|
|
|
}
|
|
|
|
|
2012-11-16 21:07:12 +01:00
|
|
|
void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred,
|
2013-02-28 22:01:41 +01:00
|
|
|
uint8_t *dest, int pitch, int stride, int eob) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int i;
|
2013-02-28 22:28:05 +01:00
|
|
|
int16_t output[16];
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-02-28 22:01:41 +01:00
|
|
|
if (eob > 1) {
|
|
|
|
for (i = 0; i < 16; i++)
|
|
|
|
input[i] *= dq[i];
|
2011-07-20 23:21:24 +02:00
|
|
|
|
2013-02-28 22:01:41 +01:00
|
|
|
// the idct halves ( >> 1) the pitch
|
2013-03-04 21:01:27 +01:00
|
|
|
vp9_short_idct4x4llm(input, output, 4 << 1);
|
2013-02-28 22:01:41 +01:00
|
|
|
|
|
|
|
vpx_memset(input, 0, 32);
|
|
|
|
|
|
|
|
add_residual(output, pred, pitch, dest, stride, 4, 4);
|
|
|
|
} else {
|
|
|
|
vp9_dc_only_idct_add(input[0]*dq[0], pred, dest, pitch, stride);
|
|
|
|
((int *)input)[0] = 0;
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-11-16 21:07:12 +01:00
|
|
|
void vp9_dequant_dc_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred,
|
2013-02-22 20:03:14 +01:00
|
|
|
uint8_t *dest, int pitch, int stride, int dc) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int i;
|
2012-11-10 02:50:13 +01:00
|
|
|
int16_t output[16];
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-02-22 20:03:14 +01:00
|
|
|
input[0] = dc;
|
2011-07-20 23:21:24 +02:00
|
|
|
|
2013-02-23 02:27:34 +01:00
|
|
|
for (i = 1; i < 16; i++)
|
2013-02-22 20:03:14 +01:00
|
|
|
input[i] *= dq[i];
|
2011-07-20 23:21:24 +02:00
|
|
|
|
2013-02-28 22:28:05 +01:00
|
|
|
// the idct halves ( >> 1) the pitch
|
2013-03-04 21:01:27 +01:00
|
|
|
vp9_short_idct4x4llm(input, output, 4 << 1);
|
2012-07-14 00:21:29 +02:00
|
|
|
vpx_memset(input, 0, 32);
|
2013-02-28 22:28:05 +01:00
|
|
|
add_residual(output, pred, pitch, dest, stride, 4, 4);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2011-02-14 23:18:18 +01:00
|
|
|
|
2012-11-16 21:07:12 +01:00
|
|
|
void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq,
|
2012-11-10 02:50:13 +01:00
|
|
|
uint8_t *pred, uint8_t *dest,
|
2013-02-28 22:01:41 +01:00
|
|
|
int pitch, int stride, int eob) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int i;
|
2013-02-28 22:28:05 +01:00
|
|
|
int16_t output[16];
|
Add lossless compression mode.
This commit adds lossless compression capability to the experimental
branch. The lossless experiment can be enabled using --enable-lossless
in configure. When the experiment is enabled, the encoder will use
lossless compression mode by command line option --lossless, and the
decoder automatically recognizes a losslessly encoded clip and decodes
accordingly.
To achieve the lossless coding, this commit has changed the following:
1. To encode at lossless mode, encoder forces the use of unit
quantizer, i.e, Q 0, where effective quantization is 1. Encoder also
disables the usage of 8x8 transform and allows only 4x4 transform;
2. At Q 0, the first order 4x4 DCT/IDCT have been switched over
to a pair of forward and inverse Walsh-Hadamard Transform
(http://goo.gl/EIsfy), with proper scaling applied to match the range
of the original 4x4 DCT/IDCT pair;
3. At Q 0, the second order remains to use the previous
walsh-hadamard transform pair. However, to maintain the reversibility
in second order transform at Q 0, scaling down is applied to first
order DC coefficients prior to forward transform, and scaling up is
applied to the second order output prior to quantization. Symmetric
upscaling and downscaling are added around inverse second order
transform;
4. At lossless mode, encoder also disables a number of minor
features to ensure no loss is introduced, these features includes:
a. Trellis quantization optimization
b. Loop filtering
c. Aggressive zero-binning, rounding and zero-bin boosting
d. Mode based zero-bin boosting
Lossless coding test was performed on all clips within the derf set,
to verify that the commit has achieved lossless compression for all
clips. The average compression ratio is around 2.57 to 1.
(http://goo.gl/dEShs)
Change-Id: Ia3aba7dd09df40dd590f93b9aba134defbc64e34
2012-06-14 04:03:31 +02:00
|
|
|
|
2013-02-28 22:01:41 +01:00
|
|
|
if (eob > 1) {
|
|
|
|
for (i = 0; i < 16; i++)
|
|
|
|
input[i] *= dq[i];
|
Add lossless compression mode.
This commit adds lossless compression capability to the experimental
branch. The lossless experiment can be enabled using --enable-lossless
in configure. When the experiment is enabled, the encoder will use
lossless compression mode by command line option --lossless, and the
decoder automatically recognizes a losslessly encoded clip and decodes
accordingly.
To achieve the lossless coding, this commit has changed the following:
1. To encode at lossless mode, encoder forces the use of unit
quantizer, i.e, Q 0, where effective quantization is 1. Encoder also
disables the usage of 8x8 transform and allows only 4x4 transform;
2. At Q 0, the first order 4x4 DCT/IDCT have been switched over
to a pair of forward and inverse Walsh-Hadamard Transform
(http://goo.gl/EIsfy), with proper scaling applied to match the range
of the original 4x4 DCT/IDCT pair;
3. At Q 0, the second order remains to use the previous
walsh-hadamard transform pair. However, to maintain the reversibility
in second order transform at Q 0, scaling down is applied to first
order DC coefficients prior to forward transform, and scaling up is
applied to the second order output prior to quantization. Symmetric
upscaling and downscaling are added around inverse second order
transform;
4. At lossless mode, encoder also disables a number of minor
features to ensure no loss is introduced, these features includes:
a. Trellis quantization optimization
b. Loop filtering
c. Aggressive zero-binning, rounding and zero-bin boosting
d. Mode based zero-bin boosting
Lossless coding test was performed on all clips within the derf set,
to verify that the commit has achieved lossless compression for all
clips. The average compression ratio is around 2.57 to 1.
(http://goo.gl/dEShs)
Change-Id: Ia3aba7dd09df40dd590f93b9aba134defbc64e34
2012-06-14 04:03:31 +02:00
|
|
|
|
2013-02-28 22:01:41 +01:00
|
|
|
vp9_short_inv_walsh4x4_x8_c(input, output, 4 << 1);
|
|
|
|
|
|
|
|
vpx_memset(input, 0, 32);
|
|
|
|
|
|
|
|
add_residual(output, pred, pitch, dest, stride, 4, 4);
|
|
|
|
} else {
|
|
|
|
vp9_dc_only_inv_walsh_add(input[0]*dq[0], pred, dest, pitch, stride);
|
|
|
|
((int *)input)[0] = 0;
|
|
|
|
}
|
Add lossless compression mode.
This commit adds lossless compression capability to the experimental
branch. The lossless experiment can be enabled using --enable-lossless
in configure. When the experiment is enabled, the encoder will use
lossless compression mode by command line option --lossless, and the
decoder automatically recognizes a losslessly encoded clip and decodes
accordingly.
To achieve the lossless coding, this commit has changed the following:
1. To encode at lossless mode, encoder forces the use of unit
quantizer, i.e, Q 0, where effective quantization is 1. Encoder also
disables the usage of 8x8 transform and allows only 4x4 transform;
2. At Q 0, the first order 4x4 DCT/IDCT have been switched over
to a pair of forward and inverse Walsh-Hadamard Transform
(http://goo.gl/EIsfy), with proper scaling applied to match the range
of the original 4x4 DCT/IDCT pair;
3. At Q 0, the second order remains to use the previous
walsh-hadamard transform pair. However, to maintain the reversibility
in second order transform at Q 0, scaling down is applied to first
order DC coefficients prior to forward transform, and scaling up is
applied to the second order output prior to quantization. Symmetric
upscaling and downscaling are added around inverse second order
transform;
4. At lossless mode, encoder also disables a number of minor
features to ensure no loss is introduced, these features includes:
a. Trellis quantization optimization
b. Loop filtering
c. Aggressive zero-binning, rounding and zero-bin boosting
d. Mode based zero-bin boosting
Lossless coding test was performed on all clips within the derf set,
to verify that the commit has achieved lossless compression for all
clips. The average compression ratio is around 2.57 to 1.
(http://goo.gl/dEShs)
Change-Id: Ia3aba7dd09df40dd590f93b9aba134defbc64e34
2012-06-14 04:03:31 +02:00
|
|
|
}
|
|
|
|
|
2012-11-16 21:07:12 +01:00
|
|
|
void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq,
|
2012-11-10 02:50:13 +01:00
|
|
|
uint8_t *pred,
|
|
|
|
uint8_t *dest,
|
2012-10-31 00:16:28 +01:00
|
|
|
int pitch, int stride, int dc) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int i;
|
2012-11-10 02:50:13 +01:00
|
|
|
int16_t output[16];
|
Add lossless compression mode.
This commit adds lossless compression capability to the experimental
branch. The lossless experiment can be enabled using --enable-lossless
in configure. When the experiment is enabled, the encoder will use
lossless compression mode by command line option --lossless, and the
decoder automatically recognizes a losslessly encoded clip and decodes
accordingly.
To achieve the lossless coding, this commit has changed the following:
1. To encode at lossless mode, encoder forces the use of unit
quantizer, i.e, Q 0, where effective quantization is 1. Encoder also
disables the usage of 8x8 transform and allows only 4x4 transform;
2. At Q 0, the first order 4x4 DCT/IDCT have been switched over
to a pair of forward and inverse Walsh-Hadamard Transform
(http://goo.gl/EIsfy), with proper scaling applied to match the range
of the original 4x4 DCT/IDCT pair;
3. At Q 0, the second order remains to use the previous
walsh-hadamard transform pair. However, to maintain the reversibility
in second order transform at Q 0, scaling down is applied to first
order DC coefficients prior to forward transform, and scaling up is
applied to the second order output prior to quantization. Symmetric
upscaling and downscaling are added around inverse second order
transform;
4. At lossless mode, encoder also disables a number of minor
features to ensure no loss is introduced, these features includes:
a. Trellis quantization optimization
b. Loop filtering
c. Aggressive zero-binning, rounding and zero-bin boosting
d. Mode based zero-bin boosting
Lossless coding test was performed on all clips within the derf set,
to verify that the commit has achieved lossless compression for all
clips. The average compression ratio is around 2.57 to 1.
(http://goo.gl/dEShs)
Change-Id: Ia3aba7dd09df40dd590f93b9aba134defbc64e34
2012-06-14 04:03:31 +02:00
|
|
|
|
2013-02-23 02:27:34 +01:00
|
|
|
input[0] = dc;
|
Add lossless compression mode.
This commit adds lossless compression capability to the experimental
branch. The lossless experiment can be enabled using --enable-lossless
in configure. When the experiment is enabled, the encoder will use
lossless compression mode by command line option --lossless, and the
decoder automatically recognizes a losslessly encoded clip and decodes
accordingly.
To achieve the lossless coding, this commit has changed the following:
1. To encode at lossless mode, encoder forces the use of unit
quantizer, i.e, Q 0, where effective quantization is 1. Encoder also
disables the usage of 8x8 transform and allows only 4x4 transform;
2. At Q 0, the first order 4x4 DCT/IDCT have been switched over
to a pair of forward and inverse Walsh-Hadamard Transform
(http://goo.gl/EIsfy), with proper scaling applied to match the range
of the original 4x4 DCT/IDCT pair;
3. At Q 0, the second order remains to use the previous
walsh-hadamard transform pair. However, to maintain the reversibility
in second order transform at Q 0, scaling down is applied to first
order DC coefficients prior to forward transform, and scaling up is
applied to the second order output prior to quantization. Symmetric
upscaling and downscaling are added around inverse second order
transform;
4. At lossless mode, encoder also disables a number of minor
features to ensure no loss is introduced, these features includes:
a. Trellis quantization optimization
b. Loop filtering
c. Aggressive zero-binning, rounding and zero-bin boosting
d. Mode based zero-bin boosting
Lossless coding test was performed on all clips within the derf set,
to verify that the commit has achieved lossless compression for all
clips. The average compression ratio is around 2.57 to 1.
(http://goo.gl/dEShs)
Change-Id: Ia3aba7dd09df40dd590f93b9aba134defbc64e34
2012-06-14 04:03:31 +02:00
|
|
|
|
2013-02-23 02:27:34 +01:00
|
|
|
for (i = 1; i < 16; i++)
|
2013-02-22 20:03:14 +01:00
|
|
|
input[i] *= dq[i];
|
Add lossless compression mode.
This commit adds lossless compression capability to the experimental
branch. The lossless experiment can be enabled using --enable-lossless
in configure. When the experiment is enabled, the encoder will use
lossless compression mode by command line option --lossless, and the
decoder automatically recognizes a losslessly encoded clip and decodes
accordingly.
To achieve the lossless coding, this commit has changed the following:
1. To encode at lossless mode, encoder forces the use of unit
quantizer, i.e, Q 0, where effective quantization is 1. Encoder also
disables the usage of 8x8 transform and allows only 4x4 transform;
2. At Q 0, the first order 4x4 DCT/IDCT have been switched over
to a pair of forward and inverse Walsh-Hadamard Transform
(http://goo.gl/EIsfy), with proper scaling applied to match the range
of the original 4x4 DCT/IDCT pair;
3. At Q 0, the second order remains to use the previous
walsh-hadamard transform pair. However, to maintain the reversibility
in second order transform at Q 0, scaling down is applied to first
order DC coefficients prior to forward transform, and scaling up is
applied to the second order output prior to quantization. Symmetric
upscaling and downscaling are added around inverse second order
transform;
4. At lossless mode, encoder also disables a number of minor
features to ensure no loss is introduced, these features includes:
a. Trellis quantization optimization
b. Loop filtering
c. Aggressive zero-binning, rounding and zero-bin boosting
d. Mode based zero-bin boosting
Lossless coding test was performed on all clips within the derf set,
to verify that the commit has achieved lossless compression for all
clips. The average compression ratio is around 2.57 to 1.
(http://goo.gl/dEShs)
Change-Id: Ia3aba7dd09df40dd590f93b9aba134defbc64e34
2012-06-14 04:03:31 +02:00
|
|
|
|
2012-10-31 00:25:53 +01:00
|
|
|
vp9_short_inv_walsh4x4_x8_c(input, output, 4 << 1);
|
2012-07-14 00:21:29 +02:00
|
|
|
vpx_memset(input, 0, 32);
|
2013-02-28 22:28:05 +01:00
|
|
|
add_residual(output, pred, pitch, dest, stride, 4, 4);
|
Add lossless compression mode.
This commit adds lossless compression capability to the experimental
branch. The lossless experiment can be enabled using --enable-lossless
in configure. When the experiment is enabled, the encoder will use
lossless compression mode by command line option --lossless, and the
decoder automatically recognizes a losslessly encoded clip and decodes
accordingly.
To achieve the lossless coding, this commit has changed the following:
1. To encode at lossless mode, encoder forces the use of unit
quantizer, i.e, Q 0, where effective quantization is 1. Encoder also
disables the usage of 8x8 transform and allows only 4x4 transform;
2. At Q 0, the first order 4x4 DCT/IDCT have been switched over
to a pair of forward and inverse Walsh-Hadamard Transform
(http://goo.gl/EIsfy), with proper scaling applied to match the range
of the original 4x4 DCT/IDCT pair;
3. At Q 0, the second order remains to use the previous
walsh-hadamard transform pair. However, to maintain the reversibility
in second order transform at Q 0, scaling down is applied to first
order DC coefficients prior to forward transform, and scaling up is
applied to the second order output prior to quantization. Symmetric
upscaling and downscaling are added around inverse second order
transform;
4. At lossless mode, encoder also disables a number of minor
features to ensure no loss is introduced, these features includes:
a. Trellis quantization optimization
b. Loop filtering
c. Aggressive zero-binning, rounding and zero-bin boosting
d. Mode based zero-bin boosting
Lossless coding test was performed on all clips within the derf set,
to verify that the commit has achieved lossless compression for all
clips. The average compression ratio is around 2.57 to 1.
(http://goo.gl/dEShs)
Change-Id: Ia3aba7dd09df40dd590f93b9aba134defbc64e34
2012-06-14 04:03:31 +02:00
|
|
|
}
|
|
|
|
|
2012-11-16 21:07:12 +01:00
|
|
|
void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq,
|
|
|
|
uint8_t *pred, uint8_t *dest, int pitch,
|
2013-02-15 19:15:42 +01:00
|
|
|
int stride, int eob) {
|
2012-11-10 02:50:13 +01:00
|
|
|
int16_t output[64];
|
2011-02-14 23:18:18 +01:00
|
|
|
|
2013-02-28 22:28:05 +01:00
|
|
|
|
|
|
|
// If dc is 1, then input[0] is the reconstructed value, do not need
|
|
|
|
// dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
|
2013-02-15 19:15:42 +01:00
|
|
|
input[0] *= dq[0];
|
2011-11-10 21:54:22 +01:00
|
|
|
|
2013-02-28 22:28:05 +01:00
|
|
|
// The calculation can be simplified if there are not many non-zero dct
|
|
|
|
// coefficients. Use eobs to decide what to do.
|
|
|
|
// TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c.
|
|
|
|
// Combine that with code here.
|
2012-12-19 00:31:19 +01:00
|
|
|
if (eob == 0) {
|
2013-02-28 22:28:05 +01:00
|
|
|
// All 0 DCT coefficients
|
2012-11-10 02:50:13 +01:00
|
|
|
vp9_copy_mem8x8(pred, pitch, dest, stride);
|
2012-12-19 00:31:19 +01:00
|
|
|
} else if (eob == 1) {
|
2013-02-28 22:28:05 +01:00
|
|
|
// DC only DCT coefficient
|
2013-02-02 00:34:28 +01:00
|
|
|
int16_t in = input[0];
|
2012-11-10 02:50:13 +01:00
|
|
|
int16_t out;
|
2013-02-28 22:28:05 +01:00
|
|
|
|
|
|
|
// Note: the idct1 will need to be modified accordingly whenever
|
|
|
|
// vp9_short_idct8x8_c() is modified.
|
2013-02-02 00:34:28 +01:00
|
|
|
vp9_short_idct1_8x8_c(&in, &out);
|
2012-11-10 02:50:13 +01:00
|
|
|
input[0] = 0;
|
2011-02-14 23:18:18 +01:00
|
|
|
|
2012-11-10 02:50:13 +01:00
|
|
|
add_constant_residual(out, pred, pitch, dest, stride, 8, 8);
|
2012-12-19 00:31:19 +01:00
|
|
|
} else if (eob <= 10) {
|
2013-02-28 22:28:05 +01:00
|
|
|
input[1] *= dq[1];
|
|
|
|
input[2] *= dq[1];
|
|
|
|
input[3] *= dq[1];
|
|
|
|
input[8] *= dq[1];
|
|
|
|
input[9] *= dq[1];
|
|
|
|
input[10] *= dq[1];
|
|
|
|
input[16] *= dq[1];
|
|
|
|
input[17] *= dq[1];
|
|
|
|
input[24] *= dq[1];
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2012-11-10 02:50:13 +01:00
|
|
|
vp9_short_idct10_8x8_c(input, output, 16);
|
2011-02-14 23:18:18 +01:00
|
|
|
|
2012-11-10 02:50:13 +01:00
|
|
|
input[0] = input[1] = input[2] = input[3] = 0;
|
|
|
|
input[8] = input[9] = input[10] = 0;
|
|
|
|
input[16] = input[17] = 0;
|
|
|
|
input[24] = 0;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-02-28 22:28:05 +01:00
|
|
|
add_residual(output, pred, pitch, dest, stride, 8, 8);
|
2012-11-10 02:50:13 +01:00
|
|
|
} else {
|
2013-02-28 22:28:05 +01:00
|
|
|
int i;
|
|
|
|
|
2012-11-10 02:50:13 +01:00
|
|
|
// recover quantizer for 4 4x4 blocks
|
2013-02-23 02:27:34 +01:00
|
|
|
for (i = 1; i < 64; i++)
|
2013-02-22 20:03:14 +01:00
|
|
|
input[i] *= dq[1];
|
2013-02-23 02:27:34 +01:00
|
|
|
|
2012-11-10 02:50:13 +01:00
|
|
|
// the idct halves ( >> 1) the pitch
|
2013-02-28 22:28:05 +01:00
|
|
|
vp9_short_idct8x8_c(input, output, 8 << 1);
|
2012-11-10 02:50:13 +01:00
|
|
|
vpx_memset(input, 0, 128);
|
2013-02-28 22:28:05 +01:00
|
|
|
add_residual(output, pred, pitch, dest, stride, 8, 8);
|
2012-11-10 02:50:13 +01:00
|
|
|
}
|
2011-02-14 23:18:18 +01:00
|
|
|
}
|
|
|
|
|
2012-11-10 02:50:13 +01:00
|
|
|
void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input,
|
2012-11-16 21:07:12 +01:00
|
|
|
const int16_t *dq, uint8_t *pred,
|
2012-12-13 00:49:39 +01:00
|
|
|
uint8_t *dest, int pitch, int stride,
|
2013-02-21 19:04:40 +01:00
|
|
|
int eob) {
|
2012-11-10 02:50:13 +01:00
|
|
|
int16_t output[256];
|
2013-02-28 22:28:05 +01:00
|
|
|
|
2013-02-21 19:04:40 +01:00
|
|
|
if (eob == 0) {
|
2013-02-28 22:28:05 +01:00
|
|
|
// All 0 DCT coefficients
|
2012-12-13 00:49:39 +01:00
|
|
|
vp9_copy_mem16x16(pred, pitch, dest, stride);
|
2013-02-21 19:04:40 +01:00
|
|
|
} else if (eob > 0) {
|
2013-02-28 22:28:05 +01:00
|
|
|
int i;
|
|
|
|
|
2013-02-22 20:03:14 +01:00
|
|
|
input[0] *= dq[0];
|
2012-08-29 20:25:38 +02:00
|
|
|
|
2012-12-13 00:49:39 +01:00
|
|
|
// recover quantizer for 4 4x4 blocks
|
|
|
|
for (i = 1; i < 256; i++)
|
2013-02-22 20:03:14 +01:00
|
|
|
input[i] *= dq[1];
|
2012-08-29 20:25:38 +02:00
|
|
|
|
2012-12-13 00:49:39 +01:00
|
|
|
// inverse hybrid transform
|
2013-02-26 03:19:55 +01:00
|
|
|
vp9_short_iht16x16(input, output, 16, tx_type);
|
2012-08-29 20:25:38 +02:00
|
|
|
|
2012-12-13 00:49:39 +01:00
|
|
|
// the idct halves ( >> 1) the pitch
|
|
|
|
// vp9_short_idct16x16_c(input, output, 32);
|
2012-08-29 20:25:38 +02:00
|
|
|
|
2012-12-13 00:49:39 +01:00
|
|
|
vpx_memset(input, 0, 512);
|
2012-08-29 20:25:38 +02:00
|
|
|
|
2013-02-28 22:28:05 +01:00
|
|
|
add_residual(output, pred, pitch, dest, stride, 16, 16);
|
2012-12-13 00:49:39 +01:00
|
|
|
}
|
2012-08-29 20:25:38 +02:00
|
|
|
}
|
|
|
|
|
2012-11-16 21:07:12 +01:00
|
|
|
void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq,
|
|
|
|
uint8_t *pred, uint8_t *dest, int pitch,
|
2012-12-19 00:31:19 +01:00
|
|
|
int stride, int eob) {
|
2012-11-07 01:06:22 +01:00
|
|
|
int16_t output[256];
|
2012-08-03 02:03:14 +02:00
|
|
|
|
2012-11-07 01:06:22 +01:00
|
|
|
/* The calculation can be simplified if there are not many non-zero dct
|
|
|
|
* coefficients. Use eobs to separate different cases. */
|
2012-12-19 00:31:19 +01:00
|
|
|
if (eob == 0) {
|
2012-11-07 01:06:22 +01:00
|
|
|
/* All 0 DCT coefficient */
|
|
|
|
vp9_copy_mem16x16(pred, pitch, dest, stride);
|
2012-12-19 00:31:19 +01:00
|
|
|
} else if (eob == 1) {
|
2012-11-07 01:06:22 +01:00
|
|
|
/* DC only DCT coefficient. */
|
2013-02-01 01:16:28 +01:00
|
|
|
int16_t in = input[0] * dq[0];
|
2012-11-07 01:06:22 +01:00
|
|
|
int16_t out;
|
2012-11-10 02:50:13 +01:00
|
|
|
/* Note: the idct1 will need to be modified accordingly whenever
|
|
|
|
* vp9_short_idct16x16_c() is modified. */
|
2013-02-01 01:16:28 +01:00
|
|
|
vp9_short_idct1_16x16_c(&in, &out);
|
2012-11-07 01:06:22 +01:00
|
|
|
input[0] = 0;
|
2012-08-03 02:03:14 +02:00
|
|
|
|
2012-11-10 02:50:13 +01:00
|
|
|
add_constant_residual(out, pred, pitch, dest, stride, 16, 16);
|
2012-12-19 00:31:19 +01:00
|
|
|
} else if (eob <= 10) {
|
2013-02-28 22:28:05 +01:00
|
|
|
input[0] *= dq[0];
|
|
|
|
|
|
|
|
input[1] *= dq[1];
|
|
|
|
input[2] *= dq[1];
|
|
|
|
input[3] *= dq[1];
|
|
|
|
input[16] *= dq[1];
|
|
|
|
input[17] *= dq[1];
|
|
|
|
input[18] *= dq[1];
|
|
|
|
input[32] *= dq[1];
|
|
|
|
input[33] *= dq[1];
|
|
|
|
input[48] *= dq[1];
|
2012-11-07 01:06:22 +01:00
|
|
|
|
|
|
|
// the idct halves ( >> 1) the pitch
|
|
|
|
vp9_short_idct10_16x16_c(input, output, 32);
|
|
|
|
|
|
|
|
input[0] = input[1] = input[2] = input[3] = 0;
|
|
|
|
input[16] = input[17] = input[18] = 0;
|
|
|
|
input[32] = input[33] = 0;
|
|
|
|
input[48] = 0;
|
|
|
|
|
2013-02-28 22:28:05 +01:00
|
|
|
add_residual(output, pred, pitch, dest, stride, 16, 16);
|
2012-11-07 01:06:22 +01:00
|
|
|
} else {
|
2013-02-28 22:28:05 +01:00
|
|
|
int i;
|
|
|
|
|
2013-02-22 20:03:14 +01:00
|
|
|
input[0] *= dq[0];
|
2012-11-07 01:06:22 +01:00
|
|
|
|
|
|
|
// recover quantizer for 4 4x4 blocks
|
|
|
|
for (i = 1; i < 256; i++)
|
2013-02-22 20:03:14 +01:00
|
|
|
input[i] *= dq[1];
|
2012-11-07 01:06:22 +01:00
|
|
|
|
|
|
|
// the idct halves ( >> 1) the pitch
|
2013-02-28 22:28:05 +01:00
|
|
|
vp9_short_idct16x16_c(input, output, 16 << 1);
|
2012-11-07 01:06:22 +01:00
|
|
|
|
|
|
|
vpx_memset(input, 0, 512);
|
|
|
|
|
2013-02-28 22:28:05 +01:00
|
|
|
add_residual(output, pred, pitch, dest, stride, 16, 16);
|
2012-08-03 02:03:14 +02:00
|
|
|
}
|
|
|
|
}
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
|
2013-01-08 19:29:22 +01:00
|
|
|
void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq,
|
|
|
|
uint8_t *pred, uint8_t *dest, int pitch,
|
|
|
|
int stride, int eob) {
|
2012-12-19 00:31:19 +01:00
|
|
|
int16_t output[1024];
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
|
2013-01-25 02:09:56 +01:00
|
|
|
if (eob) {
|
2013-02-05 01:49:17 +01:00
|
|
|
input[0] = input[0] * dq[0] / 2;
|
|
|
|
if (eob == 1) {
|
2013-03-01 01:32:05 +01:00
|
|
|
vp9_short_idct1_32x32(input, output);
|
2013-02-05 01:49:17 +01:00
|
|
|
add_constant_residual(output[0], pred, pitch, dest, stride, 32, 32);
|
|
|
|
input[0] = 0;
|
2013-03-01 01:32:05 +01:00
|
|
|
} else if (eob <= 10) {
|
|
|
|
input[1] = input[1] * dq[1] / 2;
|
|
|
|
input[2] = input[2] * dq[1] / 2;
|
|
|
|
input[3] = input[3] * dq[1] / 2;
|
|
|
|
input[32] = input[32] * dq[1] / 2;
|
|
|
|
input[33] = input[33] * dq[1] / 2;
|
|
|
|
input[34] = input[34] * dq[1] / 2;
|
|
|
|
input[64] = input[64] * dq[1] / 2;
|
|
|
|
input[65] = input[65] * dq[1] / 2;
|
|
|
|
input[96] = input[96] * dq[1] / 2;
|
|
|
|
|
|
|
|
// the idct halves ( >> 1) the pitch
|
|
|
|
vp9_short_idct10_32x32(input, output, 64);
|
|
|
|
|
|
|
|
input[0] = input[1] = input[2] = input[3] = 0;
|
|
|
|
input[32] = input[33] = input[34] = 0;
|
|
|
|
input[64] = input[65] = 0;
|
|
|
|
input[96] = 0;
|
|
|
|
|
|
|
|
add_residual(output, pred, pitch, dest, stride, 32, 32);
|
2013-02-05 01:49:17 +01:00
|
|
|
} else {
|
2013-02-28 22:28:05 +01:00
|
|
|
int i;
|
2013-02-05 01:49:17 +01:00
|
|
|
for (i = 1; i < 1024; i++)
|
|
|
|
input[i] = input[i] * dq[1] / 2;
|
2013-03-01 01:32:05 +01:00
|
|
|
vp9_short_idct32x32(input, output, 64);
|
2013-02-05 01:49:17 +01:00
|
|
|
vpx_memset(input, 0, 2048);
|
|
|
|
add_residual(output, pred, pitch, dest, stride, 32, 32);
|
|
|
|
}
|
2013-01-25 02:09:56 +01:00
|
|
|
}
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
}
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
void vp9_dequant_idct_add_uv_block_16x16_c(int16_t *q, const int16_t *dq,
|
|
|
|
uint8_t *dstu,
|
|
|
|
uint8_t *dstv,
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
int stride,
|
2013-02-21 19:04:40 +01:00
|
|
|
MACROBLOCKD *xd) {
|
|
|
|
vp9_dequant_idct_add_16x16_c(q, dq, dstu, dstu, stride, stride,
|
2013-02-27 19:00:24 +01:00
|
|
|
xd->eobs[16]);
|
2013-02-21 19:04:40 +01:00
|
|
|
vp9_dequant_idct_add_16x16_c(q + 256, dq, dstv, dstv, stride, stride,
|
2013-02-27 19:00:24 +01:00
|
|
|
xd->eobs[20]);
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
}
|