2010-05-18 17:58:33 +02:00
|
|
|
/*
|
2010-09-09 14:16:39 +02:00
|
|
|
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
2010-05-18 17:58:33 +02:00
|
|
|
*
|
2010-06-18 18:39:21 +02:00
|
|
|
* Use of this source code is governed by a BSD-style license
|
2010-06-04 22:19:40 +02:00
|
|
|
* that can be found in the LICENSE file in the root of the source
|
|
|
|
* tree. An additional intellectual property rights grant can be found
|
2010-06-18 18:39:21 +02:00
|
|
|
* in the file PATENTS. All contributing project authors may
|
2010-06-04 22:19:40 +02:00
|
|
|
* be found in the AUTHORS file in the root of the source tree.
|
2010-05-18 17:58:33 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
#include <math.h>
|
2010-12-02 00:50:14 +01:00
|
|
|
#include "vpx_ports/config.h"
|
Add lossless compression mode.
This commit adds lossless compression capability to the experimental
branch. The lossless experiment can be enabled using --enable-lossless
in configure. When the experiment is enabled, the encoder will use
lossless compression mode by command line option --lossless, and the
decoder automatically recognizes a losslessly encoded clip and decodes
accordingly.
To achieve the lossless coding, this commit has changed the following:
1. To encode at lossless mode, encoder forces the use of unit
quantizer, i.e, Q 0, where effective quantization is 1. Encoder also
disables the usage of 8x8 transform and allows only 4x4 transform;
2. At Q 0, the first order 4x4 DCT/IDCT have been switched over
to a pair of forward and inverse Walsh-Hadamard Transform
(http://goo.gl/EIsfy), with proper scaling applied to match the range
of the original 4x4 DCT/IDCT pair;
3. At Q 0, the second order remains to use the previous
walsh-hadamard transform pair. However, to maintain the reversibility
in second order transform at Q 0, scaling down is applied to first
order DC coefficients prior to forward transform, and scaling up is
applied to the second order output prior to quantization. Symmetric
upscaling and downscaling are added around inverse second order
transform;
4. At lossless mode, encoder also disables a number of minor
features to ensure no loss is introduced, these features includes:
a. Trellis quantization optimization
b. Loop filtering
c. Aggressive zero-binning, rounding and zero-bin boosting
d. Mode based zero-bin boosting
Lossless coding test was performed on all clips within the derf set,
to verify that the commit has achieved lossless compression for all
clips. The average compression ratio is around 2.57 to 1.
(http://goo.gl/dEShs)
Change-Id: Ia3aba7dd09df40dd590f93b9aba134defbc64e34
2012-06-14 04:03:31 +02:00
|
|
|
#include "vp8/common/idct.h"
|
2012-10-05 12:16:46 +02:00
|
|
|
#include "vp8/common/systemdependent.h"
|
2011-02-14 23:18:18 +01:00
|
|
|
|
2012-06-25 21:26:09 +02:00
|
|
|
#include "vp8/common/blockd.h"
|
|
|
|
|
2012-08-02 18:07:33 +02:00
|
|
|
// TODO: these transforms can be converted into integer forms to reduce
|
|
|
|
// the complexity
|
2012-06-25 21:26:09 +02:00
|
|
|
float dct_4[16] = {
|
|
|
|
0.500000000000000, 0.500000000000000, 0.500000000000000, 0.500000000000000,
|
|
|
|
0.653281482438188, 0.270598050073099, -0.270598050073099, -0.653281482438188,
|
|
|
|
0.500000000000000, -0.500000000000000, -0.500000000000000, 0.500000000000000,
|
|
|
|
0.270598050073099, -0.653281482438188, 0.653281482438188, -0.270598050073099
|
|
|
|
};
|
|
|
|
|
|
|
|
float adst_4[16] = {
|
|
|
|
0.228013428883779, 0.428525073124360, 0.577350269189626, 0.656538502008139,
|
|
|
|
0.577350269189626, 0.577350269189626, 0.000000000000000, -0.577350269189626,
|
|
|
|
0.656538502008139, -0.228013428883779, -0.577350269189626, 0.428525073124359,
|
|
|
|
0.428525073124360, -0.656538502008139, 0.577350269189626, -0.228013428883779
|
|
|
|
};
|
|
|
|
|
2012-08-02 18:07:33 +02:00
|
|
|
float dct_8[64] = {
|
|
|
|
0.353553390593274, 0.353553390593274, 0.353553390593274, 0.353553390593274,
|
|
|
|
0.353553390593274, 0.353553390593274, 0.353553390593274, 0.353553390593274,
|
|
|
|
0.490392640201615, 0.415734806151273, 0.277785116509801, 0.097545161008064,
|
|
|
|
-0.097545161008064, -0.277785116509801, -0.415734806151273, -0.490392640201615,
|
|
|
|
0.461939766255643, 0.191341716182545, -0.191341716182545, -0.461939766255643,
|
|
|
|
-0.461939766255643, -0.191341716182545, 0.191341716182545, 0.461939766255643,
|
|
|
|
0.415734806151273, -0.097545161008064, -0.490392640201615, -0.277785116509801,
|
|
|
|
0.277785116509801, 0.490392640201615, 0.097545161008064, -0.415734806151273,
|
|
|
|
0.353553390593274, -0.353553390593274, -0.353553390593274, 0.353553390593274,
|
|
|
|
0.353553390593274, -0.353553390593274, -0.353553390593274, 0.353553390593274,
|
|
|
|
0.277785116509801, -0.490392640201615, 0.097545161008064, 0.415734806151273,
|
|
|
|
-0.415734806151273, -0.097545161008064, 0.490392640201615, -0.277785116509801,
|
|
|
|
0.191341716182545, -0.461939766255643, 0.461939766255643, -0.191341716182545,
|
|
|
|
-0.191341716182545, 0.461939766255643, -0.461939766255643, 0.191341716182545,
|
|
|
|
0.097545161008064, -0.277785116509801, 0.415734806151273, -0.490392640201615,
|
|
|
|
0.490392640201615, -0.415734806151273, 0.277785116509801, -0.097545161008064
|
|
|
|
};
|
|
|
|
|
|
|
|
float adst_8[64] = {
|
|
|
|
0.089131608307533, 0.175227946595735, 0.255357107325376, 0.326790388032145,
|
|
|
|
0.387095214016349, 0.434217976756762, 0.466553967085785, 0.483002021635509,
|
|
|
|
0.255357107325376, 0.434217976756762, 0.483002021635509, 0.387095214016349,
|
|
|
|
0.175227946595735, -0.089131608307533, -0.326790388032145, -0.466553967085785,
|
|
|
|
0.387095214016349, 0.466553967085785, 0.175227946595735, -0.255357107325376,
|
|
|
|
-0.483002021635509, -0.326790388032145, 0.089131608307533, 0.434217976756762,
|
|
|
|
0.466553967085785, 0.255357107325376, -0.326790388032145, -0.434217976756762,
|
|
|
|
0.089131608307533, 0.483002021635509, 0.175227946595735, -0.387095214016348,
|
|
|
|
0.483002021635509, -0.089131608307533, -0.466553967085785, 0.175227946595735,
|
|
|
|
0.434217976756762, -0.255357107325376, -0.387095214016348, 0.326790388032145,
|
|
|
|
0.434217976756762, -0.387095214016348, -0.089131608307533, 0.466553967085786,
|
|
|
|
-0.326790388032145, -0.175227946595735, 0.483002021635509, -0.255357107325375,
|
|
|
|
0.326790388032145, -0.483002021635509, 0.387095214016349, -0.089131608307534,
|
|
|
|
-0.255357107325377, 0.466553967085785, -0.434217976756762, 0.175227946595736,
|
|
|
|
0.175227946595735, -0.326790388032145, 0.434217976756762, -0.483002021635509,
|
|
|
|
0.466553967085785, -0.387095214016348, 0.255357107325376, -0.089131608307532
|
|
|
|
};
|
2012-06-25 21:26:09 +02:00
|
|
|
|
2012-08-29 20:25:38 +02:00
|
|
|
float dct_16[256] = {
|
|
|
|
0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
|
|
|
|
0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
|
|
|
|
0.351851, 0.338330, 0.311806, 0.273300, 0.224292, 0.166664, 0.102631, 0.034654,
|
|
|
|
-0.034654, -0.102631, -0.166664, -0.224292, -0.273300, -0.311806, -0.338330, -0.351851,
|
|
|
|
0.346760, 0.293969, 0.196424, 0.068975, -0.068975, -0.196424, -0.293969, -0.346760,
|
|
|
|
-0.346760, -0.293969, -0.196424, -0.068975, 0.068975, 0.196424, 0.293969, 0.346760,
|
|
|
|
0.338330, 0.224292, 0.034654, -0.166664, -0.311806, -0.351851, -0.273300, -0.102631,
|
|
|
|
0.102631, 0.273300, 0.351851, 0.311806, 0.166664, -0.034654, -0.224292, -0.338330,
|
|
|
|
0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
|
|
|
|
0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
|
|
|
|
0.311806, 0.034654, -0.273300, -0.338330, -0.102631, 0.224292, 0.351851, 0.166664,
|
|
|
|
-0.166664, -0.351851, -0.224292, 0.102631, 0.338330, 0.273300, -0.034654, -0.311806,
|
|
|
|
0.293969, -0.068975, -0.346760, -0.196424, 0.196424, 0.346760, 0.068975, -0.293969,
|
|
|
|
-0.293969, 0.068975, 0.346760, 0.196424, -0.196424, -0.346760, -0.068975, 0.293969,
|
|
|
|
0.273300, -0.166664, -0.338330, 0.034654, 0.351851, 0.102631, -0.311806, -0.224292,
|
|
|
|
0.224292, 0.311806, -0.102631, -0.351851, -0.034654, 0.338330, 0.166664, -0.273300,
|
|
|
|
0.250000, -0.250000, -0.250000, 0.250000, 0.250000, -0.250000, -0.250000, 0.250000,
|
|
|
|
0.250000, -0.250000, -0.250000, 0.250000, 0.250000, -0.250000, -0.250000, 0.250000,
|
|
|
|
0.224292, -0.311806, -0.102631, 0.351851, -0.034654, -0.338330, 0.166664, 0.273300,
|
|
|
|
-0.273300, -0.166664, 0.338330, 0.034654, -0.351851, 0.102631, 0.311806, -0.224292,
|
|
|
|
0.196424, -0.346760, 0.068975, 0.293969, -0.293969, -0.068975, 0.346760, -0.196424,
|
|
|
|
-0.196424, 0.346760, -0.068975, -0.293969, 0.293969, 0.068975, -0.346760, 0.196424,
|
|
|
|
0.166664, -0.351851, 0.224292, 0.102631, -0.338330, 0.273300, 0.034654, -0.311806,
|
|
|
|
0.311806, -0.034654, -0.273300, 0.338330, -0.102631, -0.224292, 0.351851, -0.166664,
|
|
|
|
0.135299, -0.326641, 0.326641, -0.135299, -0.135299, 0.326641, -0.326641, 0.135299,
|
|
|
|
0.135299, -0.326641, 0.326641, -0.135299, -0.135299, 0.326641, -0.326641, 0.135299,
|
|
|
|
0.102631, -0.273300, 0.351851, -0.311806, 0.166664, 0.034654, -0.224292, 0.338330,
|
|
|
|
-0.338330, 0.224292, -0.034654, -0.166664, 0.311806, -0.351851, 0.273300, -0.102631,
|
|
|
|
0.068975, -0.196424, 0.293969, -0.346760, 0.346760, -0.293969, 0.196424, -0.068975,
|
|
|
|
-0.068975, 0.196424, -0.293969, 0.346760, -0.346760, 0.293969, -0.196424, 0.068975,
|
|
|
|
0.034654, -0.102631, 0.166664, -0.224292, 0.273300, -0.311806, 0.338330, -0.351851,
|
|
|
|
0.351851, -0.338330, 0.311806, -0.273300, 0.224292, -0.166664, 0.102631, -0.034654
|
|
|
|
};
|
|
|
|
|
|
|
|
float adst_16[256] = {
|
|
|
|
0.033094, 0.065889, 0.098087, 0.129396, 0.159534, 0.188227, 0.215215, 0.240255,
|
|
|
|
0.263118, 0.283599, 0.301511, 0.316693, 0.329007, 0.338341, 0.344612, 0.347761,
|
|
|
|
0.098087, 0.188227, 0.263118, 0.316693, 0.344612, 0.344612, 0.316693, 0.263118,
|
|
|
|
0.188227, 0.098087, 0.000000, -0.098087, -0.188227, -0.263118, -0.316693, -0.344612,
|
|
|
|
0.159534, 0.283599, 0.344612, 0.329007, 0.240255, 0.098087, -0.065889, -0.215215,
|
|
|
|
-0.316693, -0.347761, -0.301511, -0.188227, -0.033094, 0.129396, 0.263118, 0.338341,
|
|
|
|
0.215215, 0.338341, 0.316693, 0.159534, -0.065889, -0.263118, -0.347761, -0.283599,
|
|
|
|
-0.098087, 0.129396, 0.301511, 0.344612, 0.240255, 0.033094, -0.188227, -0.329007,
|
|
|
|
0.263118, 0.344612, 0.188227, -0.098087, -0.316693, -0.316693, -0.098087, 0.188227,
|
|
|
|
0.344612, 0.263118, 0.000000, -0.263118, -0.344612, -0.188227, 0.098087, 0.316693,
|
|
|
|
0.301511, 0.301511, 0.000000, -0.301511, -0.301511, -0.000000, 0.301511, 0.301511,
|
|
|
|
0.000000, -0.301511, -0.301511, -0.000000, 0.301511, 0.301511, 0.000000, -0.301511,
|
|
|
|
0.329007, 0.215215, -0.188227, -0.338341, -0.033094, 0.316693, 0.240255, -0.159534,
|
|
|
|
-0.344612, -0.065889, 0.301511, 0.263118, -0.129396, -0.347761, -0.098087, 0.283599,
|
|
|
|
0.344612, 0.098087, -0.316693, -0.188227, 0.263118, 0.263118, -0.188227, -0.316693,
|
|
|
|
0.098087, 0.344612, 0.000000, -0.344612, -0.098087, 0.316693, 0.188227, -0.263118,
|
|
|
|
0.347761, -0.033094, -0.344612, 0.065889, 0.338341, -0.098087, -0.329007, 0.129396,
|
|
|
|
0.316693, -0.159534, -0.301511, 0.188227, 0.283599, -0.215215, -0.263118, 0.240255,
|
|
|
|
0.338341, -0.159534, -0.263118, 0.283599, 0.129396, -0.344612, 0.033094, 0.329007,
|
|
|
|
-0.188227, -0.240255, 0.301511, 0.098087, -0.347761, 0.065889, 0.316693, -0.215215,
|
|
|
|
0.316693, -0.263118, -0.098087, 0.344612, -0.188227, -0.188227, 0.344612, -0.098087,
|
|
|
|
-0.263118, 0.316693, 0.000000, -0.316693, 0.263118, 0.098087, -0.344612, 0.188227,
|
|
|
|
0.283599, -0.329007, 0.098087, 0.215215, -0.347761, 0.188227, 0.129396, -0.338341,
|
|
|
|
0.263118, 0.033094, -0.301511, 0.316693, -0.065889, -0.240255, 0.344612, -0.159534,
|
|
|
|
0.240255, -0.347761, 0.263118, -0.033094, -0.215215, 0.344612, -0.283599, 0.065889,
|
|
|
|
0.188227, -0.338341, 0.301511, -0.098087, -0.159534, 0.329007, -0.316693, 0.129396,
|
|
|
|
0.188227, -0.316693, 0.344612, -0.263118, 0.098087, 0.098087, -0.263118, 0.344612,
|
|
|
|
-0.316693, 0.188227, 0.000000, -0.188227, 0.316693, -0.344612, 0.263118, -0.098087,
|
|
|
|
0.129396, -0.240255, 0.316693, -0.347761, 0.329007, -0.263118, 0.159534, -0.033094,
|
|
|
|
-0.098087, 0.215215, -0.301511, 0.344612, -0.338341, 0.283599, -0.188227, 0.065889,
|
|
|
|
0.065889, -0.129396, 0.188227, -0.240255, 0.283599, -0.316693, 0.338341, -0.347761,
|
|
|
|
0.344612, -0.329007, 0.301511, -0.263118, 0.215215, -0.159534, 0.098087, -0.033094
|
|
|
|
};
|
|
|
|
|
2012-05-09 18:31:14 +02:00
|
|
|
static const int xC1S7 = 16069;
|
|
|
|
static const int xC2S6 = 15137;
|
|
|
|
static const int xC3S5 = 13623;
|
|
|
|
static const int xC4S4 = 11585;
|
|
|
|
static const int xC5S3 = 9102;
|
|
|
|
static const int xC6S2 = 6270;
|
|
|
|
static const int xC7S1 = 3196;
|
|
|
|
|
|
|
|
#define SHIFT_BITS 14
|
|
|
|
#define DOROUND(X) X += (1<<(SHIFT_BITS-1));
|
|
|
|
|
|
|
|
#define FINAL_SHIFT 3
|
|
|
|
#define FINAL_ROUNDING (1<<(FINAL_SHIFT -1))
|
|
|
|
#define IN_SHIFT (FINAL_SHIFT+1)
|
|
|
|
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
void vp8_short_fdct8x8_c(short *InputData, short *OutputData, int pitch) {
|
|
|
|
int loop;
|
|
|
|
int short_pitch = pitch >> 1;
|
|
|
|
int is07, is12, is34, is56;
|
|
|
|
int is0734, is1256;
|
|
|
|
int id07, id12, id34, id56;
|
|
|
|
int irot_input_x, irot_input_y;
|
|
|
|
int icommon_product1; // Re-used product (c4s4 * (s12 - s56))
|
|
|
|
int icommon_product2; // Re-used product (c4s4 * (d12 + d56))
|
|
|
|
int temp1, temp2; // intermediate variable for computation
|
|
|
|
|
|
|
|
int InterData[64];
|
|
|
|
int *ip = InterData;
|
|
|
|
short *op = OutputData;
|
|
|
|
|
|
|
|
for (loop = 0; loop < 8; loop++) {
|
|
|
|
// Pre calculate some common sums and differences.
|
|
|
|
is07 = (InputData[0] + InputData[7]) << IN_SHIFT;
|
|
|
|
is12 = (InputData[1] + InputData[2]) << IN_SHIFT;
|
|
|
|
is34 = (InputData[3] + InputData[4]) << IN_SHIFT;
|
|
|
|
is56 = (InputData[5] + InputData[6]) << IN_SHIFT;
|
|
|
|
id07 = (InputData[0] - InputData[7]) << IN_SHIFT;
|
|
|
|
id12 = (InputData[1] - InputData[2]) << IN_SHIFT;
|
|
|
|
id34 = (InputData[3] - InputData[4]) << IN_SHIFT;
|
|
|
|
id56 = (InputData[5] - InputData[6]) << IN_SHIFT;
|
|
|
|
|
|
|
|
is0734 = is07 + is34;
|
|
|
|
is1256 = is12 + is56;
|
|
|
|
|
|
|
|
// Pre-Calculate some common product terms.
|
|
|
|
icommon_product1 = xC4S4 * (is12 - is56);
|
|
|
|
DOROUND(icommon_product1)
|
|
|
|
icommon_product1 >>= SHIFT_BITS;
|
|
|
|
|
|
|
|
icommon_product2 = xC4S4 * (id12 + id56);
|
|
|
|
DOROUND(icommon_product2)
|
|
|
|
icommon_product2 >>= SHIFT_BITS;
|
|
|
|
|
|
|
|
|
|
|
|
ip[0] = (xC4S4 * (is0734 + is1256));
|
|
|
|
DOROUND(ip[0]);
|
|
|
|
ip[0] >>= SHIFT_BITS;
|
|
|
|
|
|
|
|
ip[4] = (xC4S4 * (is0734 - is1256));
|
|
|
|
DOROUND(ip[4]);
|
|
|
|
ip[4] >>= SHIFT_BITS;
|
|
|
|
|
|
|
|
// Define inputs to rotation for outputs 2 and 6
|
|
|
|
irot_input_x = id12 - id56;
|
|
|
|
irot_input_y = is07 - is34;
|
|
|
|
|
|
|
|
// Apply rotation for outputs 2 and 6.
|
|
|
|
temp1 = xC6S2 * irot_input_x;
|
|
|
|
DOROUND(temp1);
|
|
|
|
temp1 >>= SHIFT_BITS;
|
|
|
|
temp2 = xC2S6 * irot_input_y;
|
|
|
|
DOROUND(temp2);
|
|
|
|
temp2 >>= SHIFT_BITS;
|
|
|
|
ip[2] = temp1 + temp2;
|
|
|
|
|
|
|
|
temp1 = xC6S2 * irot_input_y;
|
|
|
|
DOROUND(temp1);
|
|
|
|
temp1 >>= SHIFT_BITS;
|
|
|
|
temp2 = xC2S6 * irot_input_x;
|
|
|
|
DOROUND(temp2);
|
|
|
|
temp2 >>= SHIFT_BITS;
|
|
|
|
ip[6] = temp1 - temp2;
|
|
|
|
|
|
|
|
// Define inputs to rotation for outputs 1 and 7
|
|
|
|
irot_input_x = icommon_product1 + id07;
|
|
|
|
irot_input_y = -(id34 + icommon_product2);
|
|
|
|
|
|
|
|
// Apply rotation for outputs 1 and 7.
|
|
|
|
temp1 = xC1S7 * irot_input_x;
|
|
|
|
DOROUND(temp1);
|
|
|
|
temp1 >>= SHIFT_BITS;
|
|
|
|
temp2 = xC7S1 * irot_input_y;
|
|
|
|
DOROUND(temp2);
|
|
|
|
temp2 >>= SHIFT_BITS;
|
|
|
|
ip[1] = temp1 - temp2;
|
|
|
|
|
|
|
|
temp1 = xC7S1 * irot_input_x;
|
|
|
|
DOROUND(temp1);
|
|
|
|
temp1 >>= SHIFT_BITS;
|
|
|
|
temp2 = xC1S7 * irot_input_y;
|
|
|
|
DOROUND(temp2);
|
|
|
|
temp2 >>= SHIFT_BITS;
|
|
|
|
ip[7] = temp1 + temp2;
|
|
|
|
|
|
|
|
// Define inputs to rotation for outputs 3 and 5
|
|
|
|
irot_input_x = id07 - icommon_product1;
|
|
|
|
irot_input_y = id34 - icommon_product2;
|
|
|
|
|
|
|
|
// Apply rotation for outputs 3 and 5.
|
|
|
|
temp1 = xC3S5 * irot_input_x;
|
|
|
|
DOROUND(temp1);
|
|
|
|
temp1 >>= SHIFT_BITS;
|
|
|
|
temp2 = xC5S3 * irot_input_y;
|
|
|
|
DOROUND(temp2);
|
|
|
|
temp2 >>= SHIFT_BITS;
|
|
|
|
ip[3] = temp1 - temp2;
|
|
|
|
|
|
|
|
|
|
|
|
temp1 = xC5S3 * irot_input_x;
|
|
|
|
DOROUND(temp1);
|
|
|
|
temp1 >>= SHIFT_BITS;
|
|
|
|
temp2 = xC3S5 * irot_input_y;
|
|
|
|
DOROUND(temp2);
|
|
|
|
temp2 >>= SHIFT_BITS;
|
|
|
|
ip[5] = temp1 + temp2;
|
|
|
|
|
|
|
|
// Increment data pointer for next row
|
|
|
|
InputData += short_pitch;
|
|
|
|
ip += 8;
|
|
|
|
}
|
2012-05-09 18:31:14 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// Performed DCT on rows, now transform the columns
|
|
|
|
ip = InterData;
|
|
|
|
for (loop = 0; loop < 8; loop++) {
|
|
|
|
// Pre calculate some common sums and differences.
|
|
|
|
is07 = ip[0 * 8] + ip[7 * 8];
|
|
|
|
is12 = ip[1 * 8] + ip[2 * 8];
|
|
|
|
is34 = ip[3 * 8] + ip[4 * 8];
|
|
|
|
is56 = ip[5 * 8] + ip[6 * 8];
|
|
|
|
|
|
|
|
id07 = ip[0 * 8] - ip[7 * 8];
|
|
|
|
id12 = ip[1 * 8] - ip[2 * 8];
|
|
|
|
id34 = ip[3 * 8] - ip[4 * 8];
|
|
|
|
id56 = ip[5 * 8] - ip[6 * 8];
|
|
|
|
|
|
|
|
is0734 = is07 + is34;
|
|
|
|
is1256 = is12 + is56;
|
|
|
|
|
|
|
|
// Pre-Calculate some common product terms
|
|
|
|
icommon_product1 = xC4S4 * (is12 - is56);
|
|
|
|
icommon_product2 = xC4S4 * (id12 + id56);
|
|
|
|
DOROUND(icommon_product1)
|
|
|
|
DOROUND(icommon_product2)
|
|
|
|
icommon_product1 >>= SHIFT_BITS;
|
|
|
|
icommon_product2 >>= SHIFT_BITS;
|
|
|
|
|
|
|
|
|
|
|
|
temp1 = xC4S4 * (is0734 + is1256);
|
|
|
|
temp2 = xC4S4 * (is0734 - is1256);
|
|
|
|
DOROUND(temp1);
|
|
|
|
DOROUND(temp2);
|
|
|
|
temp1 >>= SHIFT_BITS;
|
|
|
|
|
|
|
|
temp2 >>= SHIFT_BITS;
|
|
|
|
op[0 * 8] = (temp1 + FINAL_ROUNDING) >> FINAL_SHIFT;
|
|
|
|
op[4 * 8] = (temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
|
|
|
|
|
|
|
|
// Define inputs to rotation for outputs 2 and 6
|
|
|
|
irot_input_x = id12 - id56;
|
|
|
|
irot_input_y = is07 - is34;
|
|
|
|
|
|
|
|
// Apply rotation for outputs 2 and 6.
|
|
|
|
temp1 = xC6S2 * irot_input_x;
|
|
|
|
DOROUND(temp1);
|
|
|
|
temp1 >>= SHIFT_BITS;
|
|
|
|
temp2 = xC2S6 * irot_input_y;
|
|
|
|
DOROUND(temp2);
|
|
|
|
temp2 >>= SHIFT_BITS;
|
|
|
|
op[2 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
|
|
|
|
|
|
|
|
temp1 = xC6S2 * irot_input_y;
|
|
|
|
DOROUND(temp1);
|
|
|
|
temp1 >>= SHIFT_BITS;
|
|
|
|
temp2 = xC2S6 * irot_input_x;
|
|
|
|
DOROUND(temp2);
|
|
|
|
temp2 >>= SHIFT_BITS;
|
|
|
|
op[6 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
|
|
|
|
|
|
|
|
// Define inputs to rotation for outputs 1 and 7
|
|
|
|
irot_input_x = icommon_product1 + id07;
|
|
|
|
irot_input_y = -(id34 + icommon_product2);
|
|
|
|
|
|
|
|
// Apply rotation for outputs 1 and 7.
|
|
|
|
temp1 = xC1S7 * irot_input_x;
|
|
|
|
DOROUND(temp1);
|
|
|
|
temp1 >>= SHIFT_BITS;
|
|
|
|
temp2 = xC7S1 * irot_input_y;
|
|
|
|
DOROUND(temp2);
|
|
|
|
temp2 >>= SHIFT_BITS;
|
|
|
|
op[1 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
|
|
|
|
|
|
|
|
temp1 = xC7S1 * irot_input_x;
|
|
|
|
DOROUND(temp1);
|
|
|
|
temp1 >>= SHIFT_BITS;
|
|
|
|
temp2 = xC1S7 * irot_input_y;
|
|
|
|
DOROUND(temp2);
|
|
|
|
temp2 >>= SHIFT_BITS;
|
|
|
|
op[7 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
|
|
|
|
|
|
|
|
// Define inputs to rotation for outputs 3 and 5
|
|
|
|
irot_input_x = id07 - icommon_product1;
|
|
|
|
irot_input_y = id34 - icommon_product2;
|
|
|
|
|
|
|
|
// Apply rotation for outputs 3 and 5.
|
|
|
|
temp1 = xC3S5 * irot_input_x;
|
|
|
|
DOROUND(temp1);
|
|
|
|
temp1 >>= SHIFT_BITS;
|
|
|
|
temp2 = xC5S3 * irot_input_y;
|
|
|
|
DOROUND(temp2);
|
|
|
|
temp2 >>= SHIFT_BITS;
|
|
|
|
op[3 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
|
|
|
|
|
|
|
|
|
|
|
|
temp1 = xC5S3 * irot_input_x;
|
|
|
|
DOROUND(temp1);
|
|
|
|
temp1 >>= SHIFT_BITS;
|
|
|
|
temp2 = xC3S5 * irot_input_y;
|
|
|
|
DOROUND(temp2);
|
|
|
|
temp2 >>= SHIFT_BITS;
|
|
|
|
op[5 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
|
|
|
|
|
|
|
|
// Increment data pointer for next column.
|
|
|
|
ip++;
|
|
|
|
op++;
|
|
|
|
}
|
2012-05-09 18:31:14 +02:00
|
|
|
}
|
2011-02-14 23:18:18 +01:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
void vp8_short_fhaar2x2_c(short *input, short *output, int pitch) { // pitch = 8
|
|
|
|
/* [1 1; 1 -1] orthogonal transform */
|
|
|
|
/* use position: 0,1, 4, 8 */
|
|
|
|
int i;
|
|
|
|
short *ip1 = input;
|
|
|
|
short *op1 = output;
|
|
|
|
for (i = 0; i < 16; i++) {
|
|
|
|
op1[i] = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
op1[0] = (ip1[0] + ip1[1] + ip1[4] + ip1[8] + 1) >> 1;
|
|
|
|
op1[1] = (ip1[0] - ip1[1] + ip1[4] - ip1[8]) >> 1;
|
|
|
|
op1[4] = (ip1[0] + ip1[1] - ip1[4] - ip1[8]) >> 1;
|
|
|
|
op1[8] = (ip1[0] - ip1[1] - ip1[4] + ip1[8]) >> 1;
|
2011-02-14 23:18:18 +01:00
|
|
|
|
|
|
|
}
|
2012-02-29 02:11:12 +01:00
|
|
|
|
2012-08-06 23:48:11 +02:00
|
|
|
void vp8_fht_c(short *input, short *output, int pitch,
|
|
|
|
TX_TYPE tx_type, int tx_dim) {
|
2012-08-02 18:07:33 +02:00
|
|
|
|
2012-10-05 12:16:46 +02:00
|
|
|
vp8_clear_system_state(); // Make it simd safe : __asm emms;
|
|
|
|
{
|
|
|
|
int i, j, k;
|
|
|
|
float bufa[256], bufb[256]; // buffers are for floating-point test purpose
|
|
|
|
// the implementation could be simplified in
|
|
|
|
// conjunction with integer transform
|
|
|
|
short *ip = input;
|
|
|
|
short *op = output;
|
|
|
|
|
|
|
|
float *pfa = &bufa[0];
|
|
|
|
float *pfb = &bufb[0];
|
|
|
|
|
|
|
|
// pointers to vertical and horizontal transforms
|
|
|
|
float *ptv, *pth;
|
|
|
|
|
2012-10-16 01:41:41 +02:00
|
|
|
assert(tx_type != DCT_DCT);
|
2012-10-05 12:16:46 +02:00
|
|
|
// load and convert residual array into floating-point
|
|
|
|
for(j = 0; j < tx_dim; j++) {
|
|
|
|
for(i = 0; i < tx_dim; i++) {
|
|
|
|
pfa[i] = (float)ip[i];
|
2012-08-02 18:07:33 +02:00
|
|
|
}
|
2012-10-05 12:16:46 +02:00
|
|
|
pfa += tx_dim;
|
|
|
|
ip += pitch / 2;
|
2012-08-02 18:07:33 +02:00
|
|
|
}
|
2012-10-05 12:16:46 +02:00
|
|
|
|
|
|
|
// vertical transformation
|
2012-08-02 18:07:33 +02:00
|
|
|
pfa = &bufa[0];
|
2012-10-05 12:16:46 +02:00
|
|
|
pfb = &bufb[0];
|
2012-08-02 18:07:33 +02:00
|
|
|
|
2012-10-05 12:16:46 +02:00
|
|
|
switch(tx_type) {
|
|
|
|
case ADST_ADST :
|
|
|
|
case ADST_DCT :
|
|
|
|
ptv = (tx_dim == 4) ? &adst_4[0] :
|
|
|
|
((tx_dim == 8) ? &adst_8[0] : &adst_16[0]);
|
|
|
|
break;
|
2012-08-02 18:07:33 +02:00
|
|
|
|
2012-10-05 12:16:46 +02:00
|
|
|
default :
|
|
|
|
ptv = (tx_dim == 4) ? &dct_4[0] :
|
|
|
|
((tx_dim == 8) ? &dct_8[0] : &dct_16[0]);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
for(j = 0; j < tx_dim; j++) {
|
|
|
|
for(i = 0; i < tx_dim; i++) {
|
|
|
|
pfb[i] = 0;
|
|
|
|
for(k = 0; k < tx_dim; k++) {
|
|
|
|
pfb[i] += ptv[k] * pfa[(k * tx_dim)];
|
|
|
|
}
|
|
|
|
pfa += 1;
|
2012-08-02 18:07:33 +02:00
|
|
|
}
|
2012-10-05 12:16:46 +02:00
|
|
|
pfb += tx_dim;
|
|
|
|
ptv += tx_dim;
|
|
|
|
pfa = &bufa[0];
|
2012-09-10 07:42:35 +02:00
|
|
|
}
|
2012-08-02 18:07:33 +02:00
|
|
|
|
2012-10-05 12:16:46 +02:00
|
|
|
// horizontal transformation
|
|
|
|
pfa = &bufa[0];
|
|
|
|
pfb = &bufb[0];
|
2012-08-02 18:07:33 +02:00
|
|
|
|
|
|
|
switch(tx_type) {
|
|
|
|
case ADST_ADST :
|
|
|
|
case DCT_ADST :
|
2012-08-29 20:25:38 +02:00
|
|
|
pth = (tx_dim == 4) ? &adst_4[0] :
|
|
|
|
((tx_dim == 8) ? &adst_8[0] : &adst_16[0]);
|
2012-08-02 18:07:33 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
default :
|
2012-08-29 20:25:38 +02:00
|
|
|
pth = (tx_dim == 4) ? &dct_4[0] :
|
|
|
|
((tx_dim == 8) ? &dct_8[0] : &dct_16[0]);
|
2012-08-02 18:07:33 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2012-10-05 12:16:46 +02:00
|
|
|
for(j = 0; j < tx_dim; j++) {
|
|
|
|
for(i = 0; i < tx_dim; i++) {
|
|
|
|
pfa[i] = 0;
|
|
|
|
for(k = 0; k < tx_dim; k++) {
|
|
|
|
pfa[i] += pfb[k] * pth[k];
|
|
|
|
}
|
|
|
|
pth += tx_dim;
|
|
|
|
}
|
2012-08-02 18:07:33 +02:00
|
|
|
|
2012-10-05 12:16:46 +02:00
|
|
|
pfa += tx_dim;
|
|
|
|
pfb += tx_dim;
|
|
|
|
// pth -= tx_dim * tx_dim;
|
|
|
|
|
|
|
|
switch(tx_type) {
|
|
|
|
case ADST_ADST :
|
|
|
|
case DCT_ADST :
|
|
|
|
pth = (tx_dim == 4) ? &adst_4[0] :
|
|
|
|
((tx_dim == 8) ? &adst_8[0] : &adst_16[0]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
default :
|
|
|
|
pth = (tx_dim == 4) ? &dct_4[0] :
|
|
|
|
((tx_dim == 8) ? &dct_8[0] : &dct_16[0]);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// convert to short integer format and load BLOCKD buffer
|
|
|
|
op = output ;
|
|
|
|
pfa = &bufa[0] ;
|
|
|
|
|
|
|
|
for(j = 0; j < tx_dim; j++) {
|
|
|
|
for(i = 0; i < tx_dim; i++) {
|
|
|
|
op[i] = (pfa[i] > 0 ) ? (short)( 8 * pfa[i] + 0.49) :
|
|
|
|
-(short)(- 8 * pfa[i] + 0.49);
|
|
|
|
}
|
|
|
|
op += tx_dim;
|
|
|
|
pfa += tx_dim;
|
2012-08-02 18:07:33 +02:00
|
|
|
}
|
|
|
|
}
|
2012-10-05 12:16:46 +02:00
|
|
|
vp8_clear_system_state(); // Make it simd safe : __asm emms;
|
2012-08-02 18:07:33 +02:00
|
|
|
}
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
void vp8_short_fdct4x4_c(short *input, short *output, int pitch) {
|
|
|
|
int i;
|
|
|
|
int a1, b1, c1, d1;
|
|
|
|
short *ip = input;
|
|
|
|
short *op = output;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
a1 = ((ip[0] + ip[3]) << 5);
|
|
|
|
b1 = ((ip[1] + ip[2]) << 5);
|
|
|
|
c1 = ((ip[1] - ip[2]) << 5);
|
|
|
|
d1 = ((ip[0] - ip[3]) << 5);
|
2012-02-09 17:44:46 +01:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
op[0] = a1 + b1;
|
|
|
|
op[2] = a1 - b1;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
op[1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12;
|
|
|
|
op[3] = (d1 * 2217 - c1 * 5352 + 7500) >> 12;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
ip += pitch / 2;
|
|
|
|
op += 4;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
|
|
|
ip = output;
|
|
|
|
op = output;
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
a1 = ip[0] + ip[12];
|
|
|
|
b1 = ip[4] + ip[8];
|
|
|
|
c1 = ip[4] - ip[8];
|
|
|
|
d1 = ip[0] - ip[12];
|
|
|
|
|
|
|
|
op[0] = (a1 + b1 + 7) >> 4;
|
|
|
|
op[8] = (a1 - b1 + 7) >> 4;
|
|
|
|
|
|
|
|
op[4] = ((c1 * 2217 + d1 * 5352 + 12000) >> 16) + (d1 != 0);
|
|
|
|
op[12] = (d1 * 2217 - c1 * 5352 + 51000) >> 16;
|
|
|
|
|
|
|
|
ip++;
|
|
|
|
op++;
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-06-25 21:26:09 +02:00
|
|
|
void vp8_short_fdct8x4_c(short *input, short *output, int pitch)
|
|
|
|
{
|
|
|
|
vp8_short_fdct4x4_c(input, output, pitch);
|
|
|
|
vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
|
2011-07-20 23:21:24 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
void vp8_short_walsh4x4_c(short *input, short *output, int pitch) {
|
|
|
|
int i;
|
|
|
|
int a1, b1, c1, d1;
|
|
|
|
short *ip = input;
|
|
|
|
short *op = output;
|
|
|
|
int pitch_short = pitch >> 1;
|
|
|
|
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
a1 = ip[0 * pitch_short] + ip[3 * pitch_short];
|
|
|
|
b1 = ip[1 * pitch_short] + ip[2 * pitch_short];
|
|
|
|
c1 = ip[1 * pitch_short] - ip[2 * pitch_short];
|
|
|
|
d1 = ip[0 * pitch_short] - ip[3 * pitch_short];
|
|
|
|
|
|
|
|
op[0] = (a1 + b1 + 1) >> 1;
|
|
|
|
op[4] = (c1 + d1) >> 1;
|
|
|
|
op[8] = (a1 - b1) >> 1;
|
|
|
|
op[12] = (d1 - c1) >> 1;
|
|
|
|
|
|
|
|
ip++;
|
|
|
|
op++;
|
|
|
|
}
|
|
|
|
ip = output;
|
|
|
|
op = output;
|
|
|
|
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
a1 = ip[0] + ip[3];
|
|
|
|
b1 = ip[1] + ip[2];
|
|
|
|
c1 = ip[1] - ip[2];
|
|
|
|
d1 = ip[0] - ip[3];
|
|
|
|
|
|
|
|
op[0] = (a1 + b1 + 1) >> 1;
|
|
|
|
op[1] = (c1 + d1) >> 1;
|
|
|
|
op[2] = (a1 - b1) >> 1;
|
|
|
|
op[3] = (d1 - c1) >> 1;
|
|
|
|
|
|
|
|
ip += 4;
|
|
|
|
op += 4;
|
|
|
|
}
|
Add lossless compression mode.
This commit adds lossless compression capability to the experimental
branch. The lossless experiment can be enabled using --enable-lossless
in configure. When the experiment is enabled, the encoder will use
lossless compression mode by command line option --lossless, and the
decoder automatically recognizes a losslessly encoded clip and decodes
accordingly.
To achieve the lossless coding, this commit has changed the following:
1. To encode at lossless mode, encoder forces the use of unit
quantizer, i.e, Q 0, where effective quantization is 1. Encoder also
disables the usage of 8x8 transform and allows only 4x4 transform;
2. At Q 0, the first order 4x4 DCT/IDCT have been switched over
to a pair of forward and inverse Walsh-Hadamard Transform
(http://goo.gl/EIsfy), with proper scaling applied to match the range
of the original 4x4 DCT/IDCT pair;
3. At Q 0, the second order remains to use the previous
walsh-hadamard transform pair. However, to maintain the reversibility
in second order transform at Q 0, scaling down is applied to first
order DC coefficients prior to forward transform, and scaling up is
applied to the second order output prior to quantization. Symmetric
upscaling and downscaling are added around inverse second order
transform;
4. At lossless mode, encoder also disables a number of minor
features to ensure no loss is introduced, these features includes:
a. Trellis quantization optimization
b. Loop filtering
c. Aggressive zero-binning, rounding and zero-bin boosting
d. Mode based zero-bin boosting
Lossless coding test was performed on all clips within the derf set,
to verify that the commit has achieved lossless compression for all
clips. The average compression ratio is around 2.57 to 1.
(http://goo.gl/dEShs)
Change-Id: Ia3aba7dd09df40dd590f93b9aba134defbc64e34
2012-06-14 04:03:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#if CONFIG_LOSSLESS
|
2012-07-14 00:21:29 +02:00
|
|
|
void vp8_short_walsh4x4_lossless_c(short *input, short *output, int pitch) {
|
|
|
|
int i;
|
|
|
|
int a1, b1, c1, d1;
|
|
|
|
short *ip = input;
|
|
|
|
short *op = output;
|
|
|
|
int pitch_short = pitch >> 1;
|
|
|
|
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
a1 = (ip[0 * pitch_short] + ip[3 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR;
|
|
|
|
b1 = (ip[1 * pitch_short] + ip[2 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR;
|
|
|
|
c1 = (ip[1 * pitch_short] - ip[2 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR;
|
|
|
|
d1 = (ip[0 * pitch_short] - ip[3 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR;
|
|
|
|
|
|
|
|
op[0] = (a1 + b1 + 1) >> 1;
|
|
|
|
op[4] = (c1 + d1) >> 1;
|
|
|
|
op[8] = (a1 - b1) >> 1;
|
|
|
|
op[12] = (d1 - c1) >> 1;
|
|
|
|
|
|
|
|
ip++;
|
|
|
|
op++;
|
|
|
|
}
|
|
|
|
ip = output;
|
|
|
|
op = output;
|
|
|
|
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
a1 = ip[0] + ip[3];
|
|
|
|
b1 = ip[1] + ip[2];
|
|
|
|
c1 = ip[1] - ip[2];
|
|
|
|
d1 = ip[0] - ip[3];
|
|
|
|
|
|
|
|
op[0] = ((a1 + b1 + 1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
|
|
|
|
op[1] = ((c1 + d1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
|
|
|
|
op[2] = ((a1 - b1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
|
|
|
|
op[3] = ((d1 - c1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
|
|
|
|
|
|
|
|
ip += 4;
|
|
|
|
op += 4;
|
|
|
|
}
|
Add lossless compression mode.
This commit adds lossless compression capability to the experimental
branch. The lossless experiment can be enabled using --enable-lossless
in configure. When the experiment is enabled, the encoder will use
lossless compression mode by command line option --lossless, and the
decoder automatically recognizes a losslessly encoded clip and decodes
accordingly.
To achieve the lossless coding, this commit has changed the following:
1. To encode at lossless mode, encoder forces the use of unit
quantizer, i.e, Q 0, where effective quantization is 1. Encoder also
disables the usage of 8x8 transform and allows only 4x4 transform;
2. At Q 0, the first order 4x4 DCT/IDCT have been switched over
to a pair of forward and inverse Walsh-Hadamard Transform
(http://goo.gl/EIsfy), with proper scaling applied to match the range
of the original 4x4 DCT/IDCT pair;
3. At Q 0, the second order remains to use the previous
walsh-hadamard transform pair. However, to maintain the reversibility
in second order transform at Q 0, scaling down is applied to first
order DC coefficients prior to forward transform, and scaling up is
applied to the second order output prior to quantization. Symmetric
upscaling and downscaling are added around inverse second order
transform;
4. At lossless mode, encoder also disables a number of minor
features to ensure no loss is introduced, these features includes:
a. Trellis quantization optimization
b. Loop filtering
c. Aggressive zero-binning, rounding and zero-bin boosting
d. Mode based zero-bin boosting
Lossless coding test was performed on all clips within the derf set,
to verify that the commit has achieved lossless compression for all
clips. The average compression ratio is around 2.57 to 1.
(http://goo.gl/dEShs)
Change-Id: Ia3aba7dd09df40dd590f93b9aba134defbc64e34
2012-06-14 04:03:31 +02:00
|
|
|
}
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
void vp8_short_walsh4x4_x8_c(short *input, short *output, int pitch) {
|
|
|
|
int i;
|
|
|
|
int a1, b1, c1, d1;
|
|
|
|
short *ip = input;
|
|
|
|
short *op = output;
|
|
|
|
int pitch_short = pitch >> 1;
|
|
|
|
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
a1 = ip[0 * pitch_short] + ip[3 * pitch_short];
|
|
|
|
b1 = ip[1 * pitch_short] + ip[2 * pitch_short];
|
|
|
|
c1 = ip[1 * pitch_short] - ip[2 * pitch_short];
|
|
|
|
d1 = ip[0 * pitch_short] - ip[3 * pitch_short];
|
|
|
|
|
|
|
|
op[0] = (a1 + b1 + 1) >> 1;
|
|
|
|
op[4] = (c1 + d1) >> 1;
|
|
|
|
op[8] = (a1 - b1) >> 1;
|
|
|
|
op[12] = (d1 - c1) >> 1;
|
|
|
|
|
|
|
|
ip++;
|
|
|
|
op++;
|
|
|
|
}
|
|
|
|
ip = output;
|
|
|
|
op = output;
|
|
|
|
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
a1 = ip[0] + ip[3];
|
|
|
|
b1 = ip[1] + ip[2];
|
|
|
|
c1 = ip[1] - ip[2];
|
|
|
|
d1 = ip[0] - ip[3];
|
|
|
|
|
|
|
|
op[0] = ((a1 + b1 + 1) >> 1) << WHT_UPSCALE_FACTOR;
|
|
|
|
op[1] = ((c1 + d1) >> 1) << WHT_UPSCALE_FACTOR;
|
|
|
|
op[2] = ((a1 - b1) >> 1) << WHT_UPSCALE_FACTOR;
|
|
|
|
op[3] = ((d1 - c1) >> 1) << WHT_UPSCALE_FACTOR;
|
|
|
|
|
|
|
|
ip += 4;
|
|
|
|
op += 4;
|
|
|
|
}
|
Add lossless compression mode.
This commit adds lossless compression capability to the experimental
branch. The lossless experiment can be enabled using --enable-lossless
in configure. When the experiment is enabled, the encoder will use
lossless compression mode by command line option --lossless, and the
decoder automatically recognizes a losslessly encoded clip and decodes
accordingly.
To achieve the lossless coding, this commit has changed the following:
1. To encode at lossless mode, encoder forces the use of unit
quantizer, i.e, Q 0, where effective quantization is 1. Encoder also
disables the usage of 8x8 transform and allows only 4x4 transform;
2. At Q 0, the first order 4x4 DCT/IDCT have been switched over
to a pair of forward and inverse Walsh-Hadamard Transform
(http://goo.gl/EIsfy), with proper scaling applied to match the range
of the original 4x4 DCT/IDCT pair;
3. At Q 0, the second order remains to use the previous
walsh-hadamard transform pair. However, to maintain the reversibility
in second order transform at Q 0, scaling down is applied to first
order DC coefficients prior to forward transform, and scaling up is
applied to the second order output prior to quantization. Symmetric
upscaling and downscaling are added around inverse second order
transform;
4. At lossless mode, encoder also disables a number of minor
features to ensure no loss is introduced, these features includes:
a. Trellis quantization optimization
b. Loop filtering
c. Aggressive zero-binning, rounding and zero-bin boosting
d. Mode based zero-bin boosting
Lossless coding test was performed on all clips within the derf set,
to verify that the commit has achieved lossless compression for all
clips. The average compression ratio is around 2.57 to 1.
(http://goo.gl/dEShs)
Change-Id: Ia3aba7dd09df40dd590f93b9aba134defbc64e34
2012-06-14 04:03:31 +02:00
|
|
|
}
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
void vp8_short_walsh8x4_x8_c(short *input, short *output, int pitch) {
|
Add lossless compression mode.
This commit adds lossless compression capability to the experimental
branch. The lossless experiment can be enabled using --enable-lossless
in configure. When the experiment is enabled, the encoder will use
lossless compression mode by command line option --lossless, and the
decoder automatically recognizes a losslessly encoded clip and decodes
accordingly.
To achieve the lossless coding, this commit has changed the following:
1. To encode at lossless mode, encoder forces the use of unit
quantizer, i.e, Q 0, where effective quantization is 1. Encoder also
disables the usage of 8x8 transform and allows only 4x4 transform;
2. At Q 0, the first order 4x4 DCT/IDCT have been switched over
to a pair of forward and inverse Walsh-Hadamard Transform
(http://goo.gl/EIsfy), with proper scaling applied to match the range
of the original 4x4 DCT/IDCT pair;
3. At Q 0, the second order remains to use the previous
walsh-hadamard transform pair. However, to maintain the reversibility
in second order transform at Q 0, scaling down is applied to first
order DC coefficients prior to forward transform, and scaling up is
applied to the second order output prior to quantization. Symmetric
upscaling and downscaling are added around inverse second order
transform;
4. At lossless mode, encoder also disables a number of minor
features to ensure no loss is introduced, these features includes:
a. Trellis quantization optimization
b. Loop filtering
c. Aggressive zero-binning, rounding and zero-bin boosting
d. Mode based zero-bin boosting
Lossless coding test was performed on all clips within the derf set,
to verify that the commit has achieved lossless compression for all
clips. The average compression ratio is around 2.57 to 1.
(http://goo.gl/dEShs)
Change-Id: Ia3aba7dd09df40dd590f93b9aba134defbc64e34
2012-06-14 04:03:31 +02:00
|
|
|
vp8_short_walsh4x4_x8_c(input, output, pitch);
|
|
|
|
vp8_short_walsh4x4_x8_c(input + 4, output + 16, pitch);
|
|
|
|
}
|
|
|
|
#endif
|
2012-08-03 02:03:14 +02:00
|
|
|
|
2012-08-07 22:55:49 +02:00
|
|
|
static const double C1 = 0.995184726672197;
|
|
|
|
static const double C2 = 0.98078528040323;
|
|
|
|
static const double C3 = 0.956940335732209;
|
|
|
|
static const double C4 = 0.923879532511287;
|
|
|
|
static const double C5 = 0.881921264348355;
|
|
|
|
static const double C6 = 0.831469612302545;
|
|
|
|
static const double C7 = 0.773010453362737;
|
|
|
|
static const double C8 = 0.707106781186548;
|
|
|
|
static const double C9 = 0.634393284163646;
|
|
|
|
static const double C10 = 0.555570233019602;
|
|
|
|
static const double C11 = 0.471396736825998;
|
|
|
|
static const double C12 = 0.38268343236509;
|
|
|
|
static const double C13 = 0.290284677254462;
|
|
|
|
static const double C14 = 0.195090322016128;
|
|
|
|
static const double C15 = 0.098017140329561;
|
|
|
|
|
2012-08-03 02:03:14 +02:00
|
|
|
static void dct16x16_1d(double input[16], double output[16]) {
|
2012-10-05 12:16:46 +02:00
|
|
|
vp8_clear_system_state(); // Make it simd safe : __asm emms;
|
|
|
|
{
|
|
|
|
double step[16];
|
|
|
|
double intermediate[16];
|
|
|
|
double temp1, temp2;
|
|
|
|
|
|
|
|
// step 1
|
|
|
|
step[ 0] = input[0] + input[15];
|
|
|
|
step[ 1] = input[1] + input[14];
|
|
|
|
step[ 2] = input[2] + input[13];
|
|
|
|
step[ 3] = input[3] + input[12];
|
|
|
|
step[ 4] = input[4] + input[11];
|
|
|
|
step[ 5] = input[5] + input[10];
|
|
|
|
step[ 6] = input[6] + input[ 9];
|
|
|
|
step[ 7] = input[7] + input[ 8];
|
|
|
|
step[ 8] = input[7] - input[ 8];
|
|
|
|
step[ 9] = input[6] - input[ 9];
|
|
|
|
step[10] = input[5] - input[10];
|
|
|
|
step[11] = input[4] - input[11];
|
|
|
|
step[12] = input[3] - input[12];
|
|
|
|
step[13] = input[2] - input[13];
|
|
|
|
step[14] = input[1] - input[14];
|
|
|
|
step[15] = input[0] - input[15];
|
|
|
|
|
|
|
|
// step 2
|
|
|
|
output[0] = step[0] + step[7];
|
|
|
|
output[1] = step[1] + step[6];
|
|
|
|
output[2] = step[2] + step[5];
|
|
|
|
output[3] = step[3] + step[4];
|
|
|
|
output[4] = step[3] - step[4];
|
|
|
|
output[5] = step[2] - step[5];
|
|
|
|
output[6] = step[1] - step[6];
|
|
|
|
output[7] = step[0] - step[7];
|
|
|
|
|
|
|
|
temp1 = step[ 8]*C7;
|
|
|
|
temp2 = step[15]*C9;
|
|
|
|
output[ 8] = temp1 + temp2;
|
|
|
|
|
|
|
|
temp1 = step[ 9]*C11;
|
|
|
|
temp2 = step[14]*C5;
|
|
|
|
output[ 9] = temp1 - temp2;
|
|
|
|
|
|
|
|
temp1 = step[10]*C3;
|
|
|
|
temp2 = step[13]*C13;
|
|
|
|
output[10] = temp1 + temp2;
|
|
|
|
|
|
|
|
temp1 = step[11]*C15;
|
|
|
|
temp2 = step[12]*C1;
|
|
|
|
output[11] = temp1 - temp2;
|
|
|
|
|
|
|
|
temp1 = step[11]*C1;
|
|
|
|
temp2 = step[12]*C15;
|
|
|
|
output[12] = temp2 + temp1;
|
|
|
|
|
|
|
|
temp1 = step[10]*C13;
|
|
|
|
temp2 = step[13]*C3;
|
|
|
|
output[13] = temp2 - temp1;
|
|
|
|
|
|
|
|
temp1 = step[ 9]*C5;
|
|
|
|
temp2 = step[14]*C11;
|
|
|
|
output[14] = temp2 + temp1;
|
|
|
|
|
|
|
|
temp1 = step[ 8]*C9;
|
|
|
|
temp2 = step[15]*C7;
|
|
|
|
output[15] = temp2 - temp1;
|
|
|
|
|
|
|
|
// step 3
|
|
|
|
step[ 0] = output[0] + output[3];
|
|
|
|
step[ 1] = output[1] + output[2];
|
|
|
|
step[ 2] = output[1] - output[2];
|
|
|
|
step[ 3] = output[0] - output[3];
|
|
|
|
|
|
|
|
temp1 = output[4]*C14;
|
|
|
|
temp2 = output[7]*C2;
|
|
|
|
step[ 4] = temp1 + temp2;
|
|
|
|
|
|
|
|
temp1 = output[5]*C10;
|
|
|
|
temp2 = output[6]*C6;
|
|
|
|
step[ 5] = temp1 + temp2;
|
|
|
|
|
|
|
|
temp1 = output[5]*C6;
|
|
|
|
temp2 = output[6]*C10;
|
|
|
|
step[ 6] = temp2 - temp1;
|
|
|
|
|
|
|
|
temp1 = output[4]*C2;
|
|
|
|
temp2 = output[7]*C14;
|
|
|
|
step[ 7] = temp2 - temp1;
|
|
|
|
|
|
|
|
step[ 8] = output[ 8] + output[11];
|
|
|
|
step[ 9] = output[ 9] + output[10];
|
|
|
|
step[10] = output[ 9] - output[10];
|
|
|
|
step[11] = output[ 8] - output[11];
|
|
|
|
|
|
|
|
step[12] = output[12] + output[15];
|
|
|
|
step[13] = output[13] + output[14];
|
|
|
|
step[14] = output[13] - output[14];
|
|
|
|
step[15] = output[12] - output[15];
|
|
|
|
|
|
|
|
// step 4
|
|
|
|
output[ 0] = (step[ 0] + step[ 1]);
|
|
|
|
output[ 8] = (step[ 0] - step[ 1]);
|
|
|
|
|
|
|
|
temp1 = step[2]*C12;
|
|
|
|
temp2 = step[3]*C4;
|
|
|
|
temp1 = temp1 + temp2;
|
|
|
|
output[ 4] = 2*(temp1*C8);
|
|
|
|
|
|
|
|
temp1 = step[2]*C4;
|
|
|
|
temp2 = step[3]*C12;
|
|
|
|
temp1 = temp2 - temp1;
|
|
|
|
output[12] = 2*(temp1*C8);
|
|
|
|
|
|
|
|
output[ 2] = 2*((step[4] + step[ 5])*C8);
|
|
|
|
output[14] = 2*((step[7] - step[ 6])*C8);
|
|
|
|
|
|
|
|
temp1 = step[4] - step[5];
|
|
|
|
temp2 = step[6] + step[7];
|
|
|
|
output[ 6] = (temp1 + temp2);
|
|
|
|
output[10] = (temp1 - temp2);
|
|
|
|
|
|
|
|
intermediate[8] = step[8] + step[14];
|
|
|
|
intermediate[9] = step[9] + step[15];
|
|
|
|
|
|
|
|
temp1 = intermediate[8]*C12;
|
|
|
|
temp2 = intermediate[9]*C4;
|
|
|
|
temp1 = temp1 - temp2;
|
|
|
|
output[3] = 2*(temp1*C8);
|
|
|
|
|
|
|
|
temp1 = intermediate[8]*C4;
|
|
|
|
temp2 = intermediate[9]*C12;
|
|
|
|
temp1 = temp2 + temp1;
|
|
|
|
output[13] = 2*(temp1*C8);
|
|
|
|
|
|
|
|
output[ 9] = 2*((step[10] + step[11])*C8);
|
|
|
|
|
|
|
|
intermediate[11] = step[10] - step[11];
|
|
|
|
intermediate[12] = step[12] + step[13];
|
|
|
|
intermediate[13] = step[12] - step[13];
|
|
|
|
intermediate[14] = step[ 8] - step[14];
|
|
|
|
intermediate[15] = step[ 9] - step[15];
|
|
|
|
|
|
|
|
output[15] = (intermediate[11] + intermediate[12]);
|
|
|
|
output[ 1] = -(intermediate[11] - intermediate[12]);
|
|
|
|
|
|
|
|
output[ 7] = 2*(intermediate[13]*C8);
|
|
|
|
|
|
|
|
temp1 = intermediate[14]*C12;
|
|
|
|
temp2 = intermediate[15]*C4;
|
|
|
|
temp1 = temp1 - temp2;
|
|
|
|
output[11] = -2*(temp1*C8);
|
|
|
|
|
|
|
|
temp1 = intermediate[14]*C4;
|
|
|
|
temp2 = intermediate[15]*C12;
|
|
|
|
temp1 = temp2 + temp1;
|
|
|
|
output[ 5] = 2*(temp1*C8);
|
|
|
|
}
|
|
|
|
vp8_clear_system_state(); // Make it simd safe : __asm emms;
|
2012-08-03 02:03:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void vp8_short_fdct16x16_c(short *input, short *out, int pitch) {
|
2012-10-05 12:16:46 +02:00
|
|
|
vp8_clear_system_state(); // Make it simd safe : __asm emms;
|
|
|
|
{
|
2012-08-03 02:03:14 +02:00
|
|
|
int shortpitch = pitch >> 1;
|
|
|
|
int i, j;
|
|
|
|
double output[256];
|
|
|
|
// First transform columns
|
|
|
|
for (i = 0; i < 16; i++) {
|
|
|
|
double temp_in[16], temp_out[16];
|
|
|
|
for (j = 0; j < 16; j++)
|
|
|
|
temp_in[j] = input[j*shortpitch + i];
|
|
|
|
dct16x16_1d(temp_in, temp_out);
|
|
|
|
for (j = 0; j < 16; j++)
|
|
|
|
output[j*16 + i] = temp_out[j];
|
|
|
|
}
|
|
|
|
// Then transform rows
|
|
|
|
for (i = 0; i < 16; ++i) {
|
|
|
|
double temp_in[16], temp_out[16];
|
|
|
|
for (j = 0; j < 16; ++j)
|
|
|
|
temp_in[j] = output[j + i*16];
|
|
|
|
dct16x16_1d(temp_in, temp_out);
|
|
|
|
for (j = 0; j < 16; ++j)
|
|
|
|
output[j + i*16] = temp_out[j];
|
|
|
|
}
|
|
|
|
// Scale by some magic number
|
|
|
|
for (i = 0; i < 256; i++)
|
|
|
|
out[i] = (short)round(output[i]/2);
|
2012-10-05 12:16:46 +02:00
|
|
|
}
|
|
|
|
vp8_clear_system_state(); // Make it simd safe : __asm emms;
|
2012-08-03 02:03:14 +02:00
|
|
|
}
|