Adds wavelet transforms + hybrid dct/dwt variants
The wavelets implemented are 2/6, 5/3 and 9/7 each with a lifting based scheme for even block sizes. The 9/7 one is a double implementation currently. This is to start experiments with: 1. Replacing large transforms (32x32 and 64x64) with wavelets or wavelet-dct hybrids that can hopefully localize errors better spatially. (Will also need alternate entropy coder) 2. Super-resolution modes where the higher sub-bands may be selectively skipped from being conveyed, while a smart reconstruction recovers the lost frequencies. The current patch includes two types of 32x32 and 64x64 transforms: one where only wavelets are used, and another where a single level wavelet decomposition is followed by a lower resolution dct on the low-low band. Change-Id: I2d6755c4e6c8ec9386a04633dacbe0de3b0043ec
This commit is contained in:
parent
5a69abc66b
commit
b433dd4443
1
configure
vendored
1
configure
vendored
@ -301,6 +301,7 @@ EXPERIMENT_LIST="
|
|||||||
bitstream_fixes
|
bitstream_fixes
|
||||||
newmvref
|
newmvref
|
||||||
misc_entropy
|
misc_entropy
|
||||||
|
wavelets
|
||||||
"
|
"
|
||||||
CONFIG_LIST="
|
CONFIG_LIST="
|
||||||
external_build
|
external_build
|
||||||
|
@ -709,6 +709,33 @@ void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if CONFIG_WAVELETS
|
||||||
|
void vp9_idct16x16_noscale_c(const tran_low_t *input, int16_t *dest,
|
||||||
|
int stride) {
|
||||||
|
tran_low_t out[16 * 16];
|
||||||
|
tran_low_t *outptr = out;
|
||||||
|
int i, j;
|
||||||
|
tran_low_t temp_in[16], temp_out[16];
|
||||||
|
|
||||||
|
// First transform rows
|
||||||
|
for (i = 0; i < 16; ++i) {
|
||||||
|
idct16(input, outptr);
|
||||||
|
input += 16;
|
||||||
|
outptr += 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Then transform columns
|
||||||
|
for (i = 0; i < 16; ++i) {
|
||||||
|
for (j = 0; j < 16; ++j)
|
||||||
|
temp_in[j] = out[j * 16 + i];
|
||||||
|
idct16(temp_in, temp_out);
|
||||||
|
for (j = 0; j < 16; ++j) {
|
||||||
|
dest[j * stride + i] = ROUND_POWER_OF_TWO(temp_out[j], 3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // CONFIG_WAVELETS
|
||||||
|
|
||||||
static void iadst16(const tran_low_t *input, tran_low_t *output) {
|
static void iadst16(const tran_low_t *input, tran_low_t *output) {
|
||||||
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
|
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
|
||||||
tran_high_t s9, s10, s11, s12, s13, s14, s15;
|
tran_high_t s9, s10, s11, s12, s13, s14, s15;
|
||||||
@ -1361,6 +1388,46 @@ void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if CONFIG_WAVELETS
|
||||||
|
void vp9_idct32x32_noscale_c(const tran_low_t *input, int16_t *dest,
|
||||||
|
int stride) {
|
||||||
|
tran_low_t out[32 * 32];
|
||||||
|
tran_low_t *outptr = out;
|
||||||
|
int i, j;
|
||||||
|
tran_low_t temp_in[32], temp_out[32];
|
||||||
|
|
||||||
|
// Rows
|
||||||
|
for (i = 0; i < 32; ++i) {
|
||||||
|
int16_t zero_coeff[16];
|
||||||
|
for (j = 0; j < 16; ++j)
|
||||||
|
zero_coeff[j] = input[2 * j] | input[2 * j + 1];
|
||||||
|
for (j = 0; j < 8; ++j)
|
||||||
|
zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
|
||||||
|
for (j = 0; j < 4; ++j)
|
||||||
|
zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
|
||||||
|
for (j = 0; j < 2; ++j)
|
||||||
|
zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
|
||||||
|
|
||||||
|
if (zero_coeff[0] | zero_coeff[1])
|
||||||
|
idct32(input, outptr);
|
||||||
|
else
|
||||||
|
vpx_memset(outptr, 0, sizeof(tran_low_t) * 32);
|
||||||
|
input += 32;
|
||||||
|
outptr += 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Columns
|
||||||
|
for (i = 0; i < 32; ++i) {
|
||||||
|
for (j = 0; j < 32; ++j)
|
||||||
|
temp_in[j] = out[j * 32 + i];
|
||||||
|
idct32(temp_in, temp_out);
|
||||||
|
for (j = 0; j < 32; ++j) {
|
||||||
|
dest[j * stride + i] = ROUND_POWER_OF_TWO(temp_out[j], 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // CONFIG_WAVELETS
|
||||||
|
|
||||||
void vp9_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest,
|
void vp9_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest,
|
||||||
int stride) {
|
int stride) {
|
||||||
tran_low_t out[32 * 32] = {0};
|
tran_low_t out[32 * 32] = {0};
|
||||||
|
@ -156,7 +156,7 @@ void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
|
|||||||
#if CONFIG_TX64X64
|
#if CONFIG_TX64X64
|
||||||
void vp9_idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
|
void vp9_idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||||
int eob);
|
int eob);
|
||||||
#endif
|
#endif // CONFIG_TX64X64
|
||||||
void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
||||||
int stride, int eob);
|
int stride, int eob);
|
||||||
void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
||||||
|
352
vp9/common/vp9_idwt.c
Normal file
352
vp9/common/vp9_idwt.c
Normal file
@ -0,0 +1,352 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
#include "./vp9_rtcd.h"
|
||||||
|
#include "vp9/common/vp9_systemdependent.h"
|
||||||
|
#include "vp9/common/vp9_blockd.h"
|
||||||
|
#include "vp9/common/vp9_idwt.h"
|
||||||
|
|
||||||
|
|
||||||
|
// Note: block length must be even for this implementation
|
||||||
|
static void synthesis_53_row(int length,
|
||||||
|
tran_low_t *lowpass, tran_low_t *highpass,
|
||||||
|
tran_low_t *x) {
|
||||||
|
tran_low_t r, *a, *b;
|
||||||
|
int n;
|
||||||
|
|
||||||
|
n = length >> 1;
|
||||||
|
b = highpass;
|
||||||
|
a = lowpass;
|
||||||
|
r = *highpass;
|
||||||
|
while (n--) {
|
||||||
|
*a++ -= (r + (*b) + 1) >> 1;
|
||||||
|
r = *b++;
|
||||||
|
}
|
||||||
|
|
||||||
|
n = length >> 1;
|
||||||
|
b = highpass;
|
||||||
|
a = lowpass;
|
||||||
|
while (--n) {
|
||||||
|
*x++ = ((r = *a++) + 1) >> 1;
|
||||||
|
*x++ = *b++ + ((r + (*a) + 2) >> 2);
|
||||||
|
}
|
||||||
|
*x++ = ((r = *a) + 1) >> 1;
|
||||||
|
*x++ = *b + ((r + 1) >> 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void synthesis_53_col(int length,
|
||||||
|
tran_low_t *lowpass, tran_low_t *highpass,
|
||||||
|
tran_low_t *x) {
|
||||||
|
tran_low_t r, *a, *b;
|
||||||
|
int n;
|
||||||
|
|
||||||
|
n = length >> 1;
|
||||||
|
b = highpass;
|
||||||
|
a = lowpass;
|
||||||
|
r = *highpass;
|
||||||
|
while (n--) {
|
||||||
|
*a++ -= (r + (*b) + 1) >> 1;
|
||||||
|
r = *b++;
|
||||||
|
}
|
||||||
|
|
||||||
|
n = length >> 1;
|
||||||
|
b = highpass;
|
||||||
|
a = lowpass;
|
||||||
|
while (--n) {
|
||||||
|
r = *a++;
|
||||||
|
*x++ = r;
|
||||||
|
*x++ = ((*b++) << 1) + ((r + (*a) + 1) >> 1);
|
||||||
|
}
|
||||||
|
*x++ = *a;
|
||||||
|
*x++ = ((*b) << 1) + *a;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dyadic_synthesize_53(int levels, int width, int height,
|
||||||
|
tran_low_t *c, int pitch_c,
|
||||||
|
int16_t *x, int pitch_x,
|
||||||
|
int dwt_scale_bits) {
|
||||||
|
int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
|
||||||
|
tran_low_t buffer[2 * DWT_MAX_LENGTH];
|
||||||
|
const int dwt_scale_rnd = 1 << (dwt_scale_bits - 1);
|
||||||
|
|
||||||
|
th[0] = hh;
|
||||||
|
tw[0] = hw;
|
||||||
|
for (i = 1; i <= levels; i++) {
|
||||||
|
th[i] = (th[i - 1] + 1) >> 1;
|
||||||
|
tw[i] = (tw[i - 1] + 1) >> 1;
|
||||||
|
}
|
||||||
|
for (lv = levels - 1; lv >= 0; lv--) {
|
||||||
|
nh = th[lv];
|
||||||
|
nw = tw[lv];
|
||||||
|
hh = th[lv + 1];
|
||||||
|
hw = tw[lv + 1];
|
||||||
|
if ((nh < 2) || (nw < 2)) continue;
|
||||||
|
for (j = 0; j < nw; j++) {
|
||||||
|
for (i = 0; i < nh; i++)
|
||||||
|
buffer[i] = c[i * pitch_c + j];
|
||||||
|
synthesis_53_col(nh, buffer, buffer + hh, buffer + nh);
|
||||||
|
for (i = 0; i < nh; i++)
|
||||||
|
c[i * pitch_c + j] = buffer[i + nh];
|
||||||
|
}
|
||||||
|
for (i = 0; i < nh; i++) {
|
||||||
|
memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer));
|
||||||
|
synthesis_53_row(nw, buffer, buffer + hw, &c[i * pitch_c]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (i = 0; i < height; i++) {
|
||||||
|
for (j = 0; j < width; j++) {
|
||||||
|
x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ?
|
||||||
|
((c[i * pitch_c + j] + dwt_scale_rnd) >> dwt_scale_bits) :
|
||||||
|
-((-c[i * pitch_c + j] + dwt_scale_rnd) >> dwt_scale_bits);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note: block length must be even for this implementation
|
||||||
|
static void synthesis_26_row(int length,
|
||||||
|
tran_low_t *lowpass, tran_low_t *highpass,
|
||||||
|
tran_low_t *x) {
|
||||||
|
tran_low_t r, s, *a, *b;
|
||||||
|
int i, n = length >> 1;
|
||||||
|
|
||||||
|
if (n >= 4) {
|
||||||
|
a = lowpass;
|
||||||
|
b = highpass;
|
||||||
|
r = *lowpass;
|
||||||
|
while (--n) {
|
||||||
|
*b++ += (r - a[1] + 4) >> 3;
|
||||||
|
r = *a++;
|
||||||
|
}
|
||||||
|
*b += (r - *a + 4) >> 3;
|
||||||
|
}
|
||||||
|
a = lowpass;
|
||||||
|
b = highpass;
|
||||||
|
for (i = length >> 1; i; i--) {
|
||||||
|
s = *b++;
|
||||||
|
r = *a++;
|
||||||
|
*x++ = (r + s + 1) >> 1;
|
||||||
|
*x++ = (r - s + 1) >> 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void synthesis_26_col(int length,
|
||||||
|
tran_low_t *lowpass, tran_low_t *highpass,
|
||||||
|
tran_low_t *x) {
|
||||||
|
tran_low_t r, s, *a, *b;
|
||||||
|
int i, n = length >> 1;
|
||||||
|
|
||||||
|
if (n >= 4) {
|
||||||
|
a = lowpass;
|
||||||
|
b = highpass;
|
||||||
|
r = *lowpass;
|
||||||
|
while (--n) {
|
||||||
|
*b++ += (r - a[1] + 4) >> 3;
|
||||||
|
r = *a++;
|
||||||
|
}
|
||||||
|
*b += (r - *a + 4) >> 3;
|
||||||
|
}
|
||||||
|
a = lowpass;
|
||||||
|
b = highpass;
|
||||||
|
for (i = length >> 1; i; i--) {
|
||||||
|
s = *b++;
|
||||||
|
r = *a++;
|
||||||
|
*x++ = r + s;
|
||||||
|
*x++ = r - s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dyadic_synthesize_26(int levels, int width, int height,
|
||||||
|
tran_low_t *c, int pitch_c,
|
||||||
|
int16_t *x, int pitch_x,
|
||||||
|
int dwt_scale_bits) {
|
||||||
|
int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
|
||||||
|
tran_low_t buffer[2 * DWT_MAX_LENGTH];
|
||||||
|
const int dwt_scale_rnd = 1 << (dwt_scale_bits - 1);
|
||||||
|
|
||||||
|
th[0] = hh;
|
||||||
|
tw[0] = hw;
|
||||||
|
for (i = 1; i <= levels; i++) {
|
||||||
|
th[i] = (th[i - 1] + 1) >> 1;
|
||||||
|
tw[i] = (tw[i - 1] + 1) >> 1;
|
||||||
|
}
|
||||||
|
for (lv = levels - 1; lv >= 0; lv--) {
|
||||||
|
nh = th[lv];
|
||||||
|
nw = tw[lv];
|
||||||
|
hh = th[lv + 1];
|
||||||
|
hw = tw[lv + 1];
|
||||||
|
if ((nh < 2) || (nw < 2)) continue;
|
||||||
|
for (j = 0; j < nw; j++) {
|
||||||
|
for (i = 0; i < nh; i++)
|
||||||
|
buffer[i] = c[i * pitch_c + j];
|
||||||
|
synthesis_26_col(nh, buffer, buffer + hh, buffer + nh);
|
||||||
|
for (i = 0; i < nh; i++)
|
||||||
|
c[i * pitch_c + j] = buffer[i + nh];
|
||||||
|
}
|
||||||
|
for (i = 0; i < nh; i++) {
|
||||||
|
memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer));
|
||||||
|
synthesis_26_row(nw, buffer, buffer + hw, &c[i * pitch_c]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (i = 0; i < height; i++) {
|
||||||
|
for (j = 0; j < width; j++) {
|
||||||
|
x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ?
|
||||||
|
((c[i * pitch_c + j] + dwt_scale_rnd) >> dwt_scale_bits) :
|
||||||
|
-((-c[i * pitch_c + j] + dwt_scale_rnd) >> dwt_scale_bits);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void synthesis_97(int length, double *lowpass, double *highpass,
|
||||||
|
double *x) {
|
||||||
|
const double a_predict1 = -1.586134342;
|
||||||
|
const double a_update1 = -0.05298011854;
|
||||||
|
const double a_predict2 = 0.8829110762;
|
||||||
|
const double a_update2 = 0.4435068522;
|
||||||
|
const double s_low = 1.149604398;
|
||||||
|
const double s_high = 1/1.149604398;
|
||||||
|
const double inv_s_low = 1 / s_low;
|
||||||
|
const double inv_s_high = 1 / s_high;
|
||||||
|
int i;
|
||||||
|
double y[DWT_MAX_LENGTH];
|
||||||
|
// Undo pack and scale
|
||||||
|
for (i = 0; i < length / 2; i++) {
|
||||||
|
y[i * 2] = lowpass[i] * inv_s_low;
|
||||||
|
y[i * 2 + 1] = highpass[i] * inv_s_high;
|
||||||
|
}
|
||||||
|
memcpy(x, y, sizeof(*y) * length);
|
||||||
|
// Undo update 2
|
||||||
|
for (i = 2; i < length; i += 2) {
|
||||||
|
x[i] -= a_update2 * (x[i - 1] + x[i + 1]);
|
||||||
|
}
|
||||||
|
x[0] -= 2 * a_update2 * x[1];
|
||||||
|
// Undo predict 2
|
||||||
|
for (i = 1; i < length - 2; i += 2) {
|
||||||
|
x[i] -= a_predict2 * (x[i - 1] + x[i + 1]);
|
||||||
|
}
|
||||||
|
x[length - 1] -= 2 * a_predict2 * x[length - 2];
|
||||||
|
// Undo update 1
|
||||||
|
for (i = 2; i < length; i += 2) {
|
||||||
|
x[i] -= a_update1 * (x[i - 1] + x[i + 1]);
|
||||||
|
}
|
||||||
|
x[0] -= 2 * a_update1 * x[1];
|
||||||
|
// Undo predict 1
|
||||||
|
for (i = 1; i < length - 2; i += 2) {
|
||||||
|
x[i] -= a_predict1 * (x[i - 1] + x[i + 1]);
|
||||||
|
}
|
||||||
|
x[length - 1] -= 2 * a_predict1 * x[length - 2];
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dyadic_synthesize_97(int levels, int width, int height,
|
||||||
|
tran_low_t *c, int pitch_c,
|
||||||
|
int16_t *x, int pitch_x,
|
||||||
|
int dwt_scale_bits) {
|
||||||
|
int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
|
||||||
|
double buffer[2 * DWT_MAX_LENGTH];
|
||||||
|
double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH];
|
||||||
|
|
||||||
|
for (i = 0; i < height; i++)
|
||||||
|
for (j = 0; j < width; j++)
|
||||||
|
y[i * DWT_MAX_LENGTH + j] = c[i * pitch_c + j];
|
||||||
|
th[0] = hh;
|
||||||
|
tw[0] = hw;
|
||||||
|
for (i = 1; i <= levels; i++) {
|
||||||
|
th[i] = (th[i - 1] + 1) >> 1;
|
||||||
|
tw[i] = (tw[i - 1] + 1) >> 1;
|
||||||
|
}
|
||||||
|
for (lv = levels - 1; lv >= 0; lv--) {
|
||||||
|
nh = th[lv];
|
||||||
|
nw = tw[lv];
|
||||||
|
hh = th[lv + 1];
|
||||||
|
hw = tw[lv + 1];
|
||||||
|
if ((nh < 2) || (nw < 2)) continue;
|
||||||
|
for (j = 0; j < nw; j++) {
|
||||||
|
for (i = 0; i < nh; i++)
|
||||||
|
buffer[i] = y[i * DWT_MAX_LENGTH + j];
|
||||||
|
synthesis_97(nh, buffer, buffer + hh, buffer + nh);
|
||||||
|
for (i = 0; i < nh; i++)
|
||||||
|
y[i * DWT_MAX_LENGTH + j] = buffer[i + nh];
|
||||||
|
}
|
||||||
|
for (i = 0; i < nh; i++) {
|
||||||
|
memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer));
|
||||||
|
synthesis_97(nw, buffer, buffer + hw, &y[i * DWT_MAX_LENGTH]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (i = 0; i < height; i++)
|
||||||
|
for (j = 0; j < width; j++)
|
||||||
|
x[i * pitch_x + j] = round(y[i * DWT_MAX_LENGTH + j] /
|
||||||
|
(1 << dwt_scale_bits));
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_idwt32x32_c(tran_low_t *input, tran_low_t *output, int stride) {
|
||||||
|
#if DWT_TYPE == 26
|
||||||
|
dyadic_synthesize_26(4, 32, 32, input, 32, output, stride, 2);
|
||||||
|
#elif DWT_TYPE == 97
|
||||||
|
dyadic_synthesize_97(4, 32, 32, input, 32, output, stride, 2);
|
||||||
|
#elif DWT_TYPE == 53
|
||||||
|
dyadic_synthesize_53(4, 32, 32, input, 32, output, stride, 2);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_idwtdct32x32_c(tran_low_t *input, tran_low_t *output, int stride) {
|
||||||
|
const int dwt_levels = 1;
|
||||||
|
tran_low_t buffer[16 * 16];
|
||||||
|
tran_low_t buffer2[32 * 32];
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < 32; ++i) {
|
||||||
|
memcpy(&buffer2[i * 32], &input[i * 32], sizeof(buffer2[0]) * 32);
|
||||||
|
}
|
||||||
|
for (i = 0; i < 16; ++i) {
|
||||||
|
memcpy(&buffer[i * 16], &input[i * 32], sizeof(buffer[0]) * 16);
|
||||||
|
}
|
||||||
|
vp9_idct16x16_noscale(buffer, buffer2, 32);
|
||||||
|
|
||||||
|
#if DWT_TYPE == 26
|
||||||
|
dyadic_synthesize_26(dwt_levels, 32, 32, buffer2, 32, output, stride, 2);
|
||||||
|
#elif DWT_TYPE == 97
|
||||||
|
dyadic_synthesize_97(dwt_levels, 32, 32, buffer2, 32, output, stride, 2);
|
||||||
|
#elif DWT_TYPE == 53
|
||||||
|
dyadic_synthesize_53(dwt_levels, 32, 32, buffer2, 32, output, stride, 2);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#if CONFIG_TX64X64
|
||||||
|
void vp9_idwt64x64_c(tran_low_t *input, tran_low_t *output, int stride) {
|
||||||
|
#if DWT_TYPE == 26
|
||||||
|
dyadic_synthesize_26(4, 64, 64, input, 64, output, stride, 1);
|
||||||
|
#elif DWT_TYPE == 97
|
||||||
|
dyadic_synthesize_97(4, 64, 64, input, 64, output, stride, 1);
|
||||||
|
#elif DWT_TYPE == 53
|
||||||
|
dyadic_synthesize_53(4, 64, 64, input, 64, output, stride, 1);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_idwtdct64x64_c(tran_low_t *input, tran_low_t *output, int stride) {
|
||||||
|
const int dwt_levels = 1;
|
||||||
|
tran_low_t buffer[32 * 32];
|
||||||
|
tran_low_t buffer2[64 * 64];
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < 64; ++i) {
|
||||||
|
memcpy(&buffer2[i * 64], &input[i * 64], sizeof(buffer2[0]) * 64);
|
||||||
|
}
|
||||||
|
for (i = 0; i < 32; ++i) {
|
||||||
|
memcpy(&buffer[i * 32], &input[i * 64], sizeof(buffer[0]) * 32);
|
||||||
|
}
|
||||||
|
vp9_idct32x32_noscale(buffer, buffer2, 64);
|
||||||
|
#if DWT_TYPE == 26
|
||||||
|
dyadic_synthesize_26(dwt_levels, 64, 64, buffer2, 64, output, stride, 1);
|
||||||
|
#elif DWT_TYPE == 97
|
||||||
|
dyadic_synthesize_97(dwt_levels, 64, 64, buffer2, 64, output, stride, 1);
|
||||||
|
#elif DWT_TYPE == 53
|
||||||
|
dyadic_synthesize_53(dwt_levels, 64, 64, buffer2, 64, output, stride, 1);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif // CONFIG_TX64X64
|
39
vp9/common/vp9_idwt.h
Normal file
39
vp9/common/vp9_idwt.h
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef VP9_COMMON_VP9_IDWT_H_
|
||||||
|
#define VP9_COMMON_VP9_IDWT_H_
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#include "./vpx_config.h"
|
||||||
|
#include "vp9/common/vp9_common.h"
|
||||||
|
#include "vp9/common/vp9_enums.h"
|
||||||
|
#include "vp9/common/vp9_idct.h"
|
||||||
|
|
||||||
|
#define DWT_MAX_LENGTH 64
|
||||||
|
#define DWT_TYPE 26 // 26/53/97
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CONFIG_TX64X64
|
||||||
|
void vp9_idwt64x64(tran_low_t *input, tran_low_t *output, int stride);
|
||||||
|
void vp9_idwtdct64x64(tran_low_t *input, tran_low_t *output, int stride);
|
||||||
|
#endif // CONFIG_TX64X64
|
||||||
|
void vp9_idwt32x32(tran_low_t *input, tran_low_t *output, int stride);
|
||||||
|
void vp9_idwtdct32x32(tran_low_t *input, tran_low_t *output, int stride);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} // extern "C"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // VP9_COMMON_VP9_IDWT_H_
|
@ -396,6 +396,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||||||
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct16x16_256_add/;
|
specialize qw/vp9_idct16x16_256_add/;
|
||||||
|
|
||||||
|
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||||
|
add_proto qw/void vp9_idct16x16_noscale/, "const tran_low_t *input, int16_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_idct16x16_noscale/;
|
||||||
|
}
|
||||||
|
|
||||||
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct16x16_10_add/;
|
specialize qw/vp9_idct16x16_10_add/;
|
||||||
|
|
||||||
@ -411,6 +416,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||||
add_proto qw/void vp9_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct64x64_4096_add/;
|
specialize qw/vp9_idct64x64_4096_add/;
|
||||||
|
|
||||||
|
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||||
|
add_proto qw/void vp9_idct32x32_noscale/, "const tran_low_t *input, int16_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_idct32x32_noscale/;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||||
@ -454,6 +464,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||||||
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct16x16_256_add/;
|
specialize qw/vp9_idct16x16_256_add/;
|
||||||
|
|
||||||
|
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||||
|
add_proto qw/void vp9_idct16x16_noscale/, "const tran_low_t *input, int16_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_idct16x16_noscale/;
|
||||||
|
}
|
||||||
|
|
||||||
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct16x16_10_add/;
|
specialize qw/vp9_idct16x16_10_add/;
|
||||||
|
|
||||||
@ -469,6 +484,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||||
add_proto qw/void vp9_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct64x64_4096_add/;
|
specialize qw/vp9_idct64x64_4096_add/;
|
||||||
|
|
||||||
|
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||||
|
add_proto qw/void vp9_idct32x32_noscale/, "const tran_low_t *input, int16_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_idct32x32_noscale/;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||||
@ -516,6 +536,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||||||
specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/;
|
specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/;
|
||||||
$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
|
$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
|
||||||
|
|
||||||
|
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||||
|
add_proto qw/void vp9_idct16x16_noscale/, "const tran_low_t *input, int16_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_idct16x16_noscale/;
|
||||||
|
}
|
||||||
|
|
||||||
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/;
|
specialize qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/;
|
||||||
$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon;
|
$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon;
|
||||||
@ -535,6 +560,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||||
add_proto qw/void vp9_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
add_proto qw/void vp9_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||||
specialize qw/vp9_idct64x64_4096_add/;
|
specialize qw/vp9_idct64x64_4096_add/;
|
||||||
|
|
||||||
|
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||||
|
add_proto qw/void vp9_idct32x32_noscale/, "const tran_low_t *input, int16_t *dest, int dest_stride";
|
||||||
|
specialize qw/vp9_idct32x32_noscale/;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||||
@ -1498,12 +1528,22 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||||||
add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
|
add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
specialize qw/vp9_fdct32x32_rd/;
|
specialize qw/vp9_fdct32x32_rd/;
|
||||||
|
|
||||||
|
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||||
|
add_proto qw/void vp9_fdct16x16_noscale/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_fdct16x16_noscale/;
|
||||||
|
}
|
||||||
|
|
||||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||||
add_proto qw/void vp9_fdct64x64_1/, "const int16_t *input, tran_low_t *output, int stride";
|
add_proto qw/void vp9_fdct64x64_1/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
specialize qw/vp9_fdct64x64_1/;
|
specialize qw/vp9_fdct64x64_1/;
|
||||||
|
|
||||||
add_proto qw/void vp9_fdct64x64/, "const int16_t *input, tran_low_t *output, int stride";
|
add_proto qw/void vp9_fdct64x64/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
specialize qw/vp9_fdct64x64/;
|
specialize qw/vp9_fdct64x64/;
|
||||||
|
|
||||||
|
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||||
|
add_proto qw/void vp9_fdct32x32_noscale/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_fdct32x32_noscale/;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
specialize qw/vp9_fdct32x32_rd sse2/;
|
specialize qw/vp9_fdct32x32_rd sse2/;
|
||||||
} else {
|
} else {
|
||||||
@ -1546,12 +1586,22 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||||||
add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
|
add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
specialize qw/vp9_fdct32x32_rd sse2 avx2/;
|
specialize qw/vp9_fdct32x32_rd sse2 avx2/;
|
||||||
|
|
||||||
|
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||||
|
add_proto qw/void vp9_fdct16x16_noscale/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_fdct16x16_noscale/;
|
||||||
|
}
|
||||||
|
|
||||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||||
add_proto qw/void vp9_fdct64x64_1/, "const int16_t *input, tran_low_t *output, int stride";
|
add_proto qw/void vp9_fdct64x64_1/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
specialize qw/vp9_fdct64x64_1/;
|
specialize qw/vp9_fdct64x64_1/;
|
||||||
|
|
||||||
add_proto qw/void vp9_fdct64x64/, "const int16_t *input, tran_low_t *output, int stride";
|
add_proto qw/void vp9_fdct64x64/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
specialize qw/vp9_fdct64x64/;
|
specialize qw/vp9_fdct64x64/;
|
||||||
|
|
||||||
|
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||||
|
add_proto qw/void vp9_fdct32x32_noscale/, "const int16_t *input, tran_low_t *output, int stride";
|
||||||
|
specialize qw/vp9_fdct32x32_noscale/;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -522,6 +522,193 @@ void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if CONFIG_WAVELETS
|
||||||
|
// The difference between this one and the function above is scaling
|
||||||
|
// of the input. This function does not scale so that the actual 2D
|
||||||
|
// transform is unitary. The function above scales the transform to be
|
||||||
|
// 8 times unitary.
|
||||||
|
void vp9_fdct16x16_noscale_c(const int16_t *input, tran_low_t *output,
|
||||||
|
int stride) {
|
||||||
|
// The 2D transform is done with two passes which are actually pretty
|
||||||
|
// similar. In the first one, we transform the columns and transpose
|
||||||
|
// the results. In the second one, we transform the rows. To achieve that,
|
||||||
|
// as the first pass results are transposed, we transpose the columns (that
|
||||||
|
// is the transposed rows) and transpose the results (so that it goes back
|
||||||
|
// in normal/row positions).
|
||||||
|
int pass;
|
||||||
|
// We need an intermediate buffer between passes.
|
||||||
|
tran_low_t intermediate[256];
|
||||||
|
const int16_t *in_pass0 = input;
|
||||||
|
const tran_low_t *in = NULL;
|
||||||
|
tran_low_t *out = intermediate;
|
||||||
|
// Do the two transform/transpose passes
|
||||||
|
for (pass = 0; pass < 2; ++pass) {
|
||||||
|
tran_high_t step1[8]; // canbe16
|
||||||
|
tran_high_t step2[8]; // canbe16
|
||||||
|
tran_high_t step3[8]; // canbe16
|
||||||
|
tran_high_t input[8]; // canbe16
|
||||||
|
tran_high_t temp1, temp2; // needs32
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < 16; i++) {
|
||||||
|
if (0 == pass) {
|
||||||
|
// Calculate input for the first 8 results.
|
||||||
|
input[0] = (in_pass0[0 * stride] + in_pass0[15 * stride]) >> 1;
|
||||||
|
input[1] = (in_pass0[1 * stride] + in_pass0[14 * stride]) >> 1;
|
||||||
|
input[2] = (in_pass0[2 * stride] + in_pass0[13 * stride]) >> 1;
|
||||||
|
input[3] = (in_pass0[3 * stride] + in_pass0[12 * stride]) >> 1;
|
||||||
|
input[4] = (in_pass0[4 * stride] + in_pass0[11 * stride]) >> 1;
|
||||||
|
input[5] = (in_pass0[5 * stride] + in_pass0[10 * stride]) >> 1;
|
||||||
|
input[6] = (in_pass0[6 * stride] + in_pass0[ 9 * stride]) >> 1;
|
||||||
|
input[7] = (in_pass0[7 * stride] + in_pass0[ 8 * stride]) >> 1;
|
||||||
|
// Calculate input for the next 8 results.
|
||||||
|
step1[0] = (in_pass0[7 * stride] - in_pass0[ 8 * stride]) >> 1;
|
||||||
|
step1[1] = (in_pass0[6 * stride] - in_pass0[ 9 * stride]) >> 1;
|
||||||
|
step1[2] = (in_pass0[5 * stride] - in_pass0[10 * stride]) >> 1;
|
||||||
|
step1[3] = (in_pass0[4 * stride] - in_pass0[11 * stride]) >> 1;
|
||||||
|
step1[4] = (in_pass0[3 * stride] - in_pass0[12 * stride]) >> 1;
|
||||||
|
step1[5] = (in_pass0[2 * stride] - in_pass0[13 * stride]) >> 1;
|
||||||
|
step1[6] = (in_pass0[1 * stride] - in_pass0[14 * stride]) >> 1;
|
||||||
|
step1[7] = (in_pass0[0 * stride] - in_pass0[15 * stride]) >> 1;
|
||||||
|
} else {
|
||||||
|
// Calculate input for the first 8 results.
|
||||||
|
input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
|
||||||
|
input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2);
|
||||||
|
input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2);
|
||||||
|
input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2);
|
||||||
|
input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2);
|
||||||
|
input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2);
|
||||||
|
input[6] = ((in[6 * 16] + 1) >> 2) + ((in[ 9 * 16] + 1) >> 2);
|
||||||
|
input[7] = ((in[7 * 16] + 1) >> 2) + ((in[ 8 * 16] + 1) >> 2);
|
||||||
|
// Calculate input for the next 8 results.
|
||||||
|
step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[ 8 * 16] + 1) >> 2);
|
||||||
|
step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[ 9 * 16] + 1) >> 2);
|
||||||
|
step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2);
|
||||||
|
step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2);
|
||||||
|
step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2);
|
||||||
|
step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2);
|
||||||
|
step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2);
|
||||||
|
step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2);
|
||||||
|
}
|
||||||
|
// Work on the first eight values; fdct8(input, even_results);
|
||||||
|
{
|
||||||
|
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
|
||||||
|
tran_high_t t0, t1, t2, t3; // needs32
|
||||||
|
tran_high_t x0, x1, x2, x3; // canbe16
|
||||||
|
|
||||||
|
// stage 1
|
||||||
|
s0 = input[0] + input[7];
|
||||||
|
s1 = input[1] + input[6];
|
||||||
|
s2 = input[2] + input[5];
|
||||||
|
s3 = input[3] + input[4];
|
||||||
|
s4 = input[3] - input[4];
|
||||||
|
s5 = input[2] - input[5];
|
||||||
|
s6 = input[1] - input[6];
|
||||||
|
s7 = input[0] - input[7];
|
||||||
|
|
||||||
|
// fdct4(step, step);
|
||||||
|
x0 = s0 + s3;
|
||||||
|
x1 = s1 + s2;
|
||||||
|
x2 = s1 - s2;
|
||||||
|
x3 = s0 - s3;
|
||||||
|
t0 = (x0 + x1) * cospi_16_64;
|
||||||
|
t1 = (x0 - x1) * cospi_16_64;
|
||||||
|
t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
|
||||||
|
t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
|
||||||
|
out[0] = fdct_round_shift(t0);
|
||||||
|
out[4] = fdct_round_shift(t2);
|
||||||
|
out[8] = fdct_round_shift(t1);
|
||||||
|
out[12] = fdct_round_shift(t3);
|
||||||
|
|
||||||
|
// Stage 2
|
||||||
|
t0 = (s6 - s5) * cospi_16_64;
|
||||||
|
t1 = (s6 + s5) * cospi_16_64;
|
||||||
|
t2 = fdct_round_shift(t0);
|
||||||
|
t3 = fdct_round_shift(t1);
|
||||||
|
|
||||||
|
// Stage 3
|
||||||
|
x0 = s4 + t2;
|
||||||
|
x1 = s4 - t2;
|
||||||
|
x2 = s7 - t3;
|
||||||
|
x3 = s7 + t3;
|
||||||
|
|
||||||
|
// Stage 4
|
||||||
|
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
|
||||||
|
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
|
||||||
|
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
|
||||||
|
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
|
||||||
|
out[2] = fdct_round_shift(t0);
|
||||||
|
out[6] = fdct_round_shift(t2);
|
||||||
|
out[10] = fdct_round_shift(t1);
|
||||||
|
out[14] = fdct_round_shift(t3);
|
||||||
|
}
|
||||||
|
// Work on the next eight values; step1 -> odd_results
|
||||||
|
{
|
||||||
|
// step 2
|
||||||
|
temp1 = (step1[5] - step1[2]) * cospi_16_64;
|
||||||
|
temp2 = (step1[4] - step1[3]) * cospi_16_64;
|
||||||
|
step2[2] = fdct_round_shift(temp1);
|
||||||
|
step2[3] = fdct_round_shift(temp2);
|
||||||
|
temp1 = (step1[4] + step1[3]) * cospi_16_64;
|
||||||
|
temp2 = (step1[5] + step1[2]) * cospi_16_64;
|
||||||
|
step2[4] = fdct_round_shift(temp1);
|
||||||
|
step2[5] = fdct_round_shift(temp2);
|
||||||
|
// step 3
|
||||||
|
step3[0] = step1[0] + step2[3];
|
||||||
|
step3[1] = step1[1] + step2[2];
|
||||||
|
step3[2] = step1[1] - step2[2];
|
||||||
|
step3[3] = step1[0] - step2[3];
|
||||||
|
step3[4] = step1[7] - step2[4];
|
||||||
|
step3[5] = step1[6] - step2[5];
|
||||||
|
step3[6] = step1[6] + step2[5];
|
||||||
|
step3[7] = step1[7] + step2[4];
|
||||||
|
// step 4
|
||||||
|
temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
|
||||||
|
temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64;
|
||||||
|
step2[1] = fdct_round_shift(temp1);
|
||||||
|
step2[2] = fdct_round_shift(temp2);
|
||||||
|
temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64;
|
||||||
|
temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
|
||||||
|
step2[5] = fdct_round_shift(temp1);
|
||||||
|
step2[6] = fdct_round_shift(temp2);
|
||||||
|
// step 5
|
||||||
|
step1[0] = step3[0] + step2[1];
|
||||||
|
step1[1] = step3[0] - step2[1];
|
||||||
|
step1[2] = step3[3] + step2[2];
|
||||||
|
step1[3] = step3[3] - step2[2];
|
||||||
|
step1[4] = step3[4] - step2[5];
|
||||||
|
step1[5] = step3[4] + step2[5];
|
||||||
|
step1[6] = step3[7] - step2[6];
|
||||||
|
step1[7] = step3[7] + step2[6];
|
||||||
|
// step 6
|
||||||
|
temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
|
||||||
|
temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
|
||||||
|
out[1] = fdct_round_shift(temp1);
|
||||||
|
out[9] = fdct_round_shift(temp2);
|
||||||
|
temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
|
||||||
|
temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
|
||||||
|
out[5] = fdct_round_shift(temp1);
|
||||||
|
out[13] = fdct_round_shift(temp2);
|
||||||
|
temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
|
||||||
|
temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
|
||||||
|
out[3] = fdct_round_shift(temp1);
|
||||||
|
out[11] = fdct_round_shift(temp2);
|
||||||
|
temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
|
||||||
|
temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
|
||||||
|
out[7] = fdct_round_shift(temp1);
|
||||||
|
out[15] = fdct_round_shift(temp2);
|
||||||
|
}
|
||||||
|
// Do next column (which is a transposed row in second/horizontal pass)
|
||||||
|
in++;
|
||||||
|
in_pass0++;
|
||||||
|
out += 16;
|
||||||
|
}
|
||||||
|
// Setup in/out for next pass.
|
||||||
|
in = intermediate;
|
||||||
|
out = output;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // CONFIG_WAVELETS
|
||||||
|
|
||||||
void vp9_fadst8(const tran_low_t *input, tran_low_t *output) {
|
void vp9_fadst8(const tran_low_t *input, tran_low_t *output) {
|
||||||
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
|
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
|
||||||
|
|
||||||
@ -1389,6 +1576,35 @@ void vp9_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if CONFIG_WAVELETS
|
||||||
|
void vp9_fdct32x32_noscale_c(const int16_t *input, tran_low_t *out,
|
||||||
|
int stride) {
|
||||||
|
int i, j;
|
||||||
|
tran_high_t output[32 * 32];
|
||||||
|
|
||||||
|
// Columns
|
||||||
|
for (i = 0; i < 32; ++i) {
|
||||||
|
tran_high_t temp_in[32], temp_out[32];
|
||||||
|
for (j = 0; j < 32; ++j)
|
||||||
|
temp_in[j] = input[j * stride + i];
|
||||||
|
vp9_fdct32(temp_in, temp_out, 0);
|
||||||
|
for (j = 0; j < 32; ++j)
|
||||||
|
output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rows
|
||||||
|
for (i = 0; i < 32; ++i) {
|
||||||
|
tran_high_t temp_in[32], temp_out[32];
|
||||||
|
for (j = 0; j < 32; ++j)
|
||||||
|
temp_in[j] = output[j + i * 32];
|
||||||
|
vp9_fdct32(temp_in, temp_out, 0);
|
||||||
|
for (j = 0; j < 32; ++j)
|
||||||
|
out[j + i * 32] = (tran_low_t)
|
||||||
|
((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // CONFIG_WAVELETS
|
||||||
|
|
||||||
// Note that although we use dct_32_round in dct32 computation flow,
|
// Note that although we use dct_32_round in dct32 computation flow,
|
||||||
// this 2d fdct32x32 for rate-distortion optimization loop is operating
|
// this 2d fdct32x32 for rate-distortion optimization loop is operating
|
||||||
// within 16 bits precision.
|
// within 16 bits precision.
|
||||||
|
323
vp9/encoder/vp9_dwt.c
Normal file
323
vp9/encoder/vp9_dwt.c
Normal file
@ -0,0 +1,323 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
#include "./vpx_config.h"
|
||||||
|
#include "./vp9_rtcd.h"
|
||||||
|
|
||||||
|
#include "vp9/encoder/vp9_dct.h"
|
||||||
|
#include "vp9/encoder/vp9_dwt.h"
|
||||||
|
|
||||||
|
// Note: block length must be even for this implementation
|
||||||
|
static void analysis_53_row(int length, tran_low_t *x,
|
||||||
|
tran_low_t *lowpass, tran_low_t *highpass) {
|
||||||
|
int n;
|
||||||
|
tran_low_t r, *a, *b;
|
||||||
|
|
||||||
|
n = length >> 1;
|
||||||
|
b = highpass;
|
||||||
|
a = lowpass;
|
||||||
|
while (--n) {
|
||||||
|
*a++ = (r = *x++) << 1;
|
||||||
|
*b++ = *x - ((r + x[1] + 1) >> 1);
|
||||||
|
x++;
|
||||||
|
}
|
||||||
|
*a = (r = *x++) << 1;
|
||||||
|
*b = *x - r;
|
||||||
|
|
||||||
|
n = length >> 1;
|
||||||
|
b = highpass;
|
||||||
|
a = lowpass;
|
||||||
|
r = *highpass;
|
||||||
|
while (n--) {
|
||||||
|
*a++ += (r + (*b) + 1) >> 1;
|
||||||
|
r = *b++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void analysis_53_col(int length, tran_low_t *x,
|
||||||
|
tran_low_t *lowpass, tran_low_t *highpass) {
|
||||||
|
int n;
|
||||||
|
tran_low_t r, *a, *b;
|
||||||
|
|
||||||
|
n = length >> 1;
|
||||||
|
b = highpass;
|
||||||
|
a = lowpass;
|
||||||
|
while (--n) {
|
||||||
|
*a++ = (r = *x++);
|
||||||
|
*b++ = (((*x) << 1) - (r + x[1]) + 2) >> 2;
|
||||||
|
x++;
|
||||||
|
}
|
||||||
|
*a = (r = *x++);
|
||||||
|
*b = (*x - r + 1) >> 1;
|
||||||
|
|
||||||
|
n = length >> 1;
|
||||||
|
b = highpass;
|
||||||
|
a = lowpass;
|
||||||
|
r = *highpass;
|
||||||
|
while (n--) {
|
||||||
|
*a++ += (r + (*b) + 1) >> 1;
|
||||||
|
r = *b++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dyadic_analyze_53(int levels, int width, int height,
|
||||||
|
int16_t *x, int pitch_x,
|
||||||
|
tran_low_t *c, int pitch_c,
|
||||||
|
int dwt_scale_bits) {
|
||||||
|
int lv, i, j, nh, nw, hh = height, hw = width;
|
||||||
|
tran_low_t buffer[2 * DWT_MAX_LENGTH];
|
||||||
|
for (i = 0; i < height; i++) {
|
||||||
|
for (j = 0; j < width; j++) {
|
||||||
|
c[i * pitch_c + j] = x[i * pitch_x + j] << dwt_scale_bits;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (lv = 0; lv < levels; lv++) {
|
||||||
|
nh = hh;
|
||||||
|
hh = (hh + 1) >> 1;
|
||||||
|
nw = hw;
|
||||||
|
hw = (hw + 1) >> 1;
|
||||||
|
if ((nh < 2) || (nw < 2)) return;
|
||||||
|
for (i = 0; i < nh; i++) {
|
||||||
|
memcpy(buffer, &c[i * pitch_c], nw * sizeof(tran_low_t));
|
||||||
|
analysis_53_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw);
|
||||||
|
}
|
||||||
|
for (j = 0; j < nw; j++) {
|
||||||
|
for (i = 0; i < nh; i++)
|
||||||
|
buffer[i + nh] = c[i * pitch_c + j];
|
||||||
|
analysis_53_col(nh, buffer + nh, buffer, buffer + hh);
|
||||||
|
for (i = 0; i < nh; i++)
|
||||||
|
c[i * pitch_c + j] = buffer[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void analysis_26_row(int length, tran_low_t *x,
|
||||||
|
tran_low_t *lowpass, tran_low_t *highpass) {
|
||||||
|
int i, n;
|
||||||
|
tran_low_t r, s, *a, *b;
|
||||||
|
a = lowpass;
|
||||||
|
b = highpass;
|
||||||
|
for (i = length >> 1; i; i--) {
|
||||||
|
r = *x++;
|
||||||
|
s = *x++;
|
||||||
|
*a++ = r + s;
|
||||||
|
*b++ = r - s;
|
||||||
|
}
|
||||||
|
n = length >> 1;
|
||||||
|
if (n >= 4) {
|
||||||
|
a = lowpass;
|
||||||
|
b = highpass;
|
||||||
|
r = *lowpass;
|
||||||
|
while (--n) {
|
||||||
|
*b++ -= (r - a[1] + 4) >> 3;
|
||||||
|
r = *a++;
|
||||||
|
}
|
||||||
|
*b -= (r - *a + 4) >> 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void analysis_26_col(int length, tran_low_t *x,
|
||||||
|
tran_low_t *lowpass, tran_low_t *highpass) {
|
||||||
|
int i, n;
|
||||||
|
tran_low_t r, s, *a, *b;
|
||||||
|
a = lowpass;
|
||||||
|
b = highpass;
|
||||||
|
for (i = length >> 1; i; i--) {
|
||||||
|
r = *x++;
|
||||||
|
s = *x++;
|
||||||
|
*a++ = (r + s + 1) >> 1;
|
||||||
|
*b++ = (r - s + 1) >> 1;
|
||||||
|
}
|
||||||
|
n = length >> 1;
|
||||||
|
if (n >= 4) {
|
||||||
|
a = lowpass;
|
||||||
|
b = highpass;
|
||||||
|
r = *lowpass;
|
||||||
|
while (--n) {
|
||||||
|
*b++ -= (r - a[1] + 4) >> 3;
|
||||||
|
r = *a++;
|
||||||
|
}
|
||||||
|
*b -= (r - *a + 4) >> 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dyadic_analyze_26(int levels, int width, int height,
|
||||||
|
int16_t *x, int pitch_x,
|
||||||
|
tran_low_t *c, int pitch_c,
|
||||||
|
int dwt_scale_bits) {
|
||||||
|
int lv, i, j, nh, nw, hh = height, hw = width;
|
||||||
|
tran_low_t buffer[2 * DWT_MAX_LENGTH];
|
||||||
|
for (i = 0; i < height; i++) {
|
||||||
|
for (j = 0; j < width; j++) {
|
||||||
|
c[i * pitch_c + j] = x[i * pitch_x + j] << dwt_scale_bits;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (lv = 0; lv < levels; lv++) {
|
||||||
|
nh = hh;
|
||||||
|
hh = (hh + 1) >> 1;
|
||||||
|
nw = hw;
|
||||||
|
hw = (hw + 1) >> 1;
|
||||||
|
if ((nh < 2) || (nw < 2)) return;
|
||||||
|
for (i = 0; i < nh; i++) {
|
||||||
|
memcpy(buffer, &c[i * pitch_c], nw * sizeof(tran_low_t));
|
||||||
|
analysis_26_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw);
|
||||||
|
}
|
||||||
|
for (j = 0; j < nw; j++) {
|
||||||
|
for (i = 0; i < nh; i++)
|
||||||
|
buffer[i + nh] = c[i * pitch_c + j];
|
||||||
|
analysis_26_col(nh, buffer + nh, buffer, buffer + hh);
|
||||||
|
for (i = 0; i < nh; i++)
|
||||||
|
c[i * pitch_c + j] = buffer[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void analysis_97(int length, double *x,
|
||||||
|
double *lowpass, double *highpass) {
|
||||||
|
static const double a_predict1 = -1.586134342;
|
||||||
|
static const double a_update1 = -0.05298011854;
|
||||||
|
static const double a_predict2 = 0.8829110762;
|
||||||
|
static const double a_update2 = 0.4435068522;
|
||||||
|
static const double s_low = 1.149604398;
|
||||||
|
static const double s_high = 1/1.149604398;
|
||||||
|
int i;
|
||||||
|
double y[DWT_MAX_LENGTH];
|
||||||
|
// Predict 1
|
||||||
|
for (i = 1; i < length - 2; i += 2) {
|
||||||
|
x[i] += a_predict1 * (x[i - 1] + x[i + 1]);
|
||||||
|
}
|
||||||
|
x[length - 1] += 2 * a_predict1 * x[length - 2];
|
||||||
|
// Update 1
|
||||||
|
for (i = 2; i < length; i += 2) {
|
||||||
|
x[i] += a_update1 * (x[i - 1] + x[i + 1]);
|
||||||
|
}
|
||||||
|
x[0] += 2 * a_update1 * x[1];
|
||||||
|
// Predict 2
|
||||||
|
for (i = 1; i < length - 2; i += 2) {
|
||||||
|
x[i] += a_predict2 * (x[i - 1] + x[i + 1]);
|
||||||
|
}
|
||||||
|
x[length - 1] += 2 * a_predict2 * x[length - 2];
|
||||||
|
// Update 2
|
||||||
|
for (i = 2; i < length; i += 2) {
|
||||||
|
x[i] += a_update2 * (x[i - 1] + x[i + 1]);
|
||||||
|
}
|
||||||
|
x[0] += 2 * a_update2 * x[1];
|
||||||
|
memcpy(y, x, sizeof(*y) * length);
|
||||||
|
// Scale and pack
|
||||||
|
for (i = 0; i < length / 2; i++) {
|
||||||
|
lowpass[i] = y[2 * i] * s_low;
|
||||||
|
highpass[i] = y[2 * i + 1] * s_high;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dyadic_analyze_97(int levels, int width, int height,
|
||||||
|
int16_t *x, int pitch_x,
|
||||||
|
tran_low_t *c, int pitch_c,
|
||||||
|
int dwt_scale_bits) {
|
||||||
|
int lv, i, j, nh, nw, hh = height, hw = width;
|
||||||
|
double buffer[2 * DWT_MAX_LENGTH];
|
||||||
|
double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH];
|
||||||
|
for (i = 0; i < height; i++) {
|
||||||
|
for (j = 0; j < width; j++) {
|
||||||
|
y[i * DWT_MAX_LENGTH + j] = x[i * pitch_x + j] << dwt_scale_bits;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (lv = 0; lv < levels; lv++) {
|
||||||
|
nh = hh;
|
||||||
|
hh = (hh + 1) >> 1;
|
||||||
|
nw = hw;
|
||||||
|
hw = (hw + 1) >> 1;
|
||||||
|
if ((nh < 2) || (nw < 2)) return;
|
||||||
|
for (i = 0; i < nh; i++) {
|
||||||
|
memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer));
|
||||||
|
analysis_97(nw, buffer, &y[i * DWT_MAX_LENGTH],
|
||||||
|
&y[i * DWT_MAX_LENGTH] + hw);
|
||||||
|
}
|
||||||
|
for (j = 0; j < nw; j++) {
|
||||||
|
for (i = 0; i < nh; i++)
|
||||||
|
buffer[i + nh] = y[i * DWT_MAX_LENGTH + j];
|
||||||
|
analysis_97(nh, buffer + nh, buffer, buffer + hh);
|
||||||
|
for (i = 0; i < nh; i++)
|
||||||
|
y[i * DWT_MAX_LENGTH + j] = buffer[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (i = 0; i < height; i++) {
|
||||||
|
for (j = 0; j < width; j++) {
|
||||||
|
c[i * pitch_c + j] = round(y[i * DWT_MAX_LENGTH + j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_fdwt32x32_c(tran_low_t *input, tran_low_t *output, int stride) {
|
||||||
|
#if DWT_TYPE == 26
|
||||||
|
dyadic_analyze_26(4, 32, 32, input, stride, output, 32, 2);
|
||||||
|
#elif DWT_TYPE == 97
|
||||||
|
dyadic_analyze_97(4, 32, 32, input, stride, output, 32, 2);
|
||||||
|
#elif DWT_TYPE == 53
|
||||||
|
dyadic_analyze_53(4, 32, 32, input, stride, output, 32, 2);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_fdwtdct32x32_c(tran_low_t *input, tran_low_t *output,
|
||||||
|
int stride) {
|
||||||
|
const int dwt_levels = 1;
|
||||||
|
tran_low_t buffer[16 * 16];
|
||||||
|
int i, j;
|
||||||
|
// Scales up by 2-bit from unitary
|
||||||
|
#if DWT_TYPE == 26
|
||||||
|
dyadic_analyze_26(dwt_levels, 32, 32, input, stride, output, 32, 2);
|
||||||
|
#elif DWT_TYPE == 97
|
||||||
|
dyadic_analyze_97(dwt_levels, 32, 32, input, stride, output, 32, 2);
|
||||||
|
#elif DWT_TYPE == 53
|
||||||
|
dyadic_analyze_53(dwt_levels, 32, 32, input, stride, output, 32, 2);
|
||||||
|
#endif
|
||||||
|
// 16x16 dct in LL band that is unitary
|
||||||
|
vp9_fdct16x16_noscale(output, buffer, 32);
|
||||||
|
// Note that the transform overall is 2-bit scaled up from unitary
|
||||||
|
for (i = 0; i < 16; ++i) {
|
||||||
|
memcpy(&output[i * 32], &buffer[i * 16], sizeof(buffer[0]) * 16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if CONFIG_TX64X64
|
||||||
|
void vp9_fdwt64x64_c(tran_low_t *input, tran_low_t *output, int stride) {
|
||||||
|
#if DWT_TYPE == 26
|
||||||
|
dyadic_analyze_26(4, 64, 64, input, stride, output, 64, 1);
|
||||||
|
#elif DWT_TYPE == 97
|
||||||
|
dyadic_analyze_97(4, 64, 64, input, stride, output, 64, 1);
|
||||||
|
#elif DWT_TYPE == 53
|
||||||
|
dyadic_analyze_53(4, 64, 64, input, stride, output, 64, 1);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_fdwtdct64x64_c(tran_low_t *input, tran_low_t *output,
|
||||||
|
int stride) {
|
||||||
|
const int dwt_levels = 1;
|
||||||
|
tran_low_t buffer[32 * 32];
|
||||||
|
int i;
|
||||||
|
// Scales up by 1-bit from unitary
|
||||||
|
#if DWT_TYPE == 26
|
||||||
|
dyadic_analyze_26(dwt_levels, 64, 64, input, stride, output, 64, 1);
|
||||||
|
#elif DWT_TYPE == 97
|
||||||
|
dyadic_analyze_97(dwt_levels, 64, 64, input, stride, output, 64, 1);
|
||||||
|
#elif DWT_TYPE == 53
|
||||||
|
dyadic_analyze_53(dwt_levels, 64, 64, input, stride, output, 64, 1);
|
||||||
|
#endif
|
||||||
|
// 32x32 dct in LL band that is unitary
|
||||||
|
vp9_fdct32x32_noscale(output, buffer, 64);
|
||||||
|
// Note that the transform overall is 1-bit scaled up from unitary
|
||||||
|
for (i = 0; i < 32; ++i) {
|
||||||
|
memcpy(&output[i * 64], &buffer[i * 32], sizeof(buffer[0]) * 32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // CONFIG_TX64X64
|
32
vp9/encoder/vp9_dwt.h
Normal file
32
vp9/encoder/vp9_dwt.h
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef VP9_ENCODER_VP9_DWT_H_
|
||||||
|
#define VP9_ENCODER_VP9_DWT_H_
|
||||||
|
|
||||||
|
#include "./vpx_config.h"
|
||||||
|
#include "vp9/common/vp9_idwt.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CONFIG_TX64X64
|
||||||
|
void vp9_fdwt64x64(tran_low_t *input, tran_low_t *output, int stride);
|
||||||
|
void vp9_fdwtdct64x64(tran_low_t *input, tran_low_t *output, int stride);
|
||||||
|
#endif // CONFIG_TX64X64
|
||||||
|
void vp9_fdwt32x32(tran_low_t *input, tran_low_t *output, int stride);
|
||||||
|
void vp9_fdwtdct32x32(tran_low_t *input, tran_low_t *output, int stride);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} // extern "C"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // VP9_ENCODER_VP9_DWT_H_
|
@ -70,6 +70,8 @@ VP9_COMMON_SRCS-yes += common/vp9_scan.c
|
|||||||
VP9_COMMON_SRCS-yes += common/vp9_scan.h
|
VP9_COMMON_SRCS-yes += common/vp9_scan.h
|
||||||
VP9_COMMON_SRCS-$(CONFIG_GLOBAL_MOTION) += common/vp9_motion_model.c
|
VP9_COMMON_SRCS-$(CONFIG_GLOBAL_MOTION) += common/vp9_motion_model.c
|
||||||
VP9_COMMON_SRCS-$(CONFIG_GLOBAL_MOTION) += common/vp9_motion_model.h
|
VP9_COMMON_SRCS-$(CONFIG_GLOBAL_MOTION) += common/vp9_motion_model.h
|
||||||
|
VP9_COMMON_SRCS-$(CONFIG_WAVELETS) += common/vp9_idwt.c
|
||||||
|
VP9_COMMON_SRCS-$(CONFIG_WAVELETS) += common/vp9_idwt.h
|
||||||
|
|
||||||
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c
|
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c
|
||||||
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_intrin_sse2.c
|
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_intrin_sse2.c
|
||||||
|
@ -85,6 +85,8 @@ VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_global_motion.c
|
|||||||
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_global_motion.h
|
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_global_motion.h
|
||||||
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_motion_field.c
|
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_motion_field.c
|
||||||
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_motion_field.h
|
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_motion_field.h
|
||||||
|
VP9_CX_SRCS-$(CONFIG_WAVELETS) += encoder/vp9_dwt.c
|
||||||
|
VP9_CX_SRCS-$(CONFIG_WAVELETS) += encoder/vp9_dwt.h
|
||||||
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c
|
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c
|
||||||
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.h
|
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.h
|
||||||
VP9_CX_SRCS-yes += encoder/vp9_tokenize.c
|
VP9_CX_SRCS-yes += encoder/vp9_tokenize.c
|
||||||
|
Loading…
x
Reference in New Issue
Block a user