Adds wavelet transforms + hybrid dct/dwt variants
The wavelets implemented are 2/6, 5/3 and 9/7 each with a lifting based scheme for even block sizes. The 9/7 one is a double implementation currently. This is to start experiments with: 1. Replacing large transforms (32x32 and 64x64) with wavelets or wavelet-dct hybrids that can hopefully localize errors better spatially. (Will also need alternate entropy coder) 2. Super-resolution modes where the higher sub-bands may be selectively skipped from being conveyed, while a smart reconstruction recovers the lost frequencies. The current patch includes two types of 32x32 and 64x64 transforms: one where only wavelets are used, and another where a single level wavelet decomposition is followed by a lower resolution dct on the low-low band. Change-Id: I2d6755c4e6c8ec9386a04633dacbe0de3b0043ec
This commit is contained in:
parent
5a69abc66b
commit
b433dd4443
1
configure
vendored
1
configure
vendored
@ -301,6 +301,7 @@ EXPERIMENT_LIST="
|
||||
bitstream_fixes
|
||||
newmvref
|
||||
misc_entropy
|
||||
wavelets
|
||||
"
|
||||
CONFIG_LIST="
|
||||
external_build
|
||||
|
@ -709,6 +709,33 @@ void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest,
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_WAVELETS
|
||||
void vp9_idct16x16_noscale_c(const tran_low_t *input, int16_t *dest,
|
||||
int stride) {
|
||||
tran_low_t out[16 * 16];
|
||||
tran_low_t *outptr = out;
|
||||
int i, j;
|
||||
tran_low_t temp_in[16], temp_out[16];
|
||||
|
||||
// First transform rows
|
||||
for (i = 0; i < 16; ++i) {
|
||||
idct16(input, outptr);
|
||||
input += 16;
|
||||
outptr += 16;
|
||||
}
|
||||
|
||||
// Then transform columns
|
||||
for (i = 0; i < 16; ++i) {
|
||||
for (j = 0; j < 16; ++j)
|
||||
temp_in[j] = out[j * 16 + i];
|
||||
idct16(temp_in, temp_out);
|
||||
for (j = 0; j < 16; ++j) {
|
||||
dest[j * stride + i] = ROUND_POWER_OF_TWO(temp_out[j], 3);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_WAVELETS
|
||||
|
||||
static void iadst16(const tran_low_t *input, tran_low_t *output) {
|
||||
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
|
||||
tran_high_t s9, s10, s11, s12, s13, s14, s15;
|
||||
@ -1361,6 +1388,46 @@ void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_WAVELETS
|
||||
void vp9_idct32x32_noscale_c(const tran_low_t *input, int16_t *dest,
|
||||
int stride) {
|
||||
tran_low_t out[32 * 32];
|
||||
tran_low_t *outptr = out;
|
||||
int i, j;
|
||||
tran_low_t temp_in[32], temp_out[32];
|
||||
|
||||
// Rows
|
||||
for (i = 0; i < 32; ++i) {
|
||||
int16_t zero_coeff[16];
|
||||
for (j = 0; j < 16; ++j)
|
||||
zero_coeff[j] = input[2 * j] | input[2 * j + 1];
|
||||
for (j = 0; j < 8; ++j)
|
||||
zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
|
||||
for (j = 0; j < 4; ++j)
|
||||
zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
|
||||
for (j = 0; j < 2; ++j)
|
||||
zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
|
||||
|
||||
if (zero_coeff[0] | zero_coeff[1])
|
||||
idct32(input, outptr);
|
||||
else
|
||||
vpx_memset(outptr, 0, sizeof(tran_low_t) * 32);
|
||||
input += 32;
|
||||
outptr += 32;
|
||||
}
|
||||
|
||||
// Columns
|
||||
for (i = 0; i < 32; ++i) {
|
||||
for (j = 0; j < 32; ++j)
|
||||
temp_in[j] = out[j * 32 + i];
|
||||
idct32(temp_in, temp_out);
|
||||
for (j = 0; j < 32; ++j) {
|
||||
dest[j * stride + i] = ROUND_POWER_OF_TWO(temp_out[j], 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_WAVELETS
|
||||
|
||||
void vp9_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest,
|
||||
int stride) {
|
||||
tran_low_t out[32 * 32] = {0};
|
||||
|
@ -156,7 +156,7 @@ void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
#if CONFIG_TX64X64
|
||||
void vp9_idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int eob);
|
||||
#endif
|
||||
#endif // CONFIG_TX64X64
|
||||
void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob);
|
||||
void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
||||
|
352
vp9/common/vp9_idwt.c
Normal file
352
vp9/common/vp9_idwt.c
Normal file
@ -0,0 +1,352 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_systemdependent.h"
|
||||
#include "vp9/common/vp9_blockd.h"
|
||||
#include "vp9/common/vp9_idwt.h"
|
||||
|
||||
|
||||
// Note: block length must be even for this implementation
|
||||
static void synthesis_53_row(int length,
|
||||
tran_low_t *lowpass, tran_low_t *highpass,
|
||||
tran_low_t *x) {
|
||||
tran_low_t r, *a, *b;
|
||||
int n;
|
||||
|
||||
n = length >> 1;
|
||||
b = highpass;
|
||||
a = lowpass;
|
||||
r = *highpass;
|
||||
while (n--) {
|
||||
*a++ -= (r + (*b) + 1) >> 1;
|
||||
r = *b++;
|
||||
}
|
||||
|
||||
n = length >> 1;
|
||||
b = highpass;
|
||||
a = lowpass;
|
||||
while (--n) {
|
||||
*x++ = ((r = *a++) + 1) >> 1;
|
||||
*x++ = *b++ + ((r + (*a) + 2) >> 2);
|
||||
}
|
||||
*x++ = ((r = *a) + 1) >> 1;
|
||||
*x++ = *b + ((r + 1) >> 1);
|
||||
}
|
||||
|
||||
static void synthesis_53_col(int length,
|
||||
tran_low_t *lowpass, tran_low_t *highpass,
|
||||
tran_low_t *x) {
|
||||
tran_low_t r, *a, *b;
|
||||
int n;
|
||||
|
||||
n = length >> 1;
|
||||
b = highpass;
|
||||
a = lowpass;
|
||||
r = *highpass;
|
||||
while (n--) {
|
||||
*a++ -= (r + (*b) + 1) >> 1;
|
||||
r = *b++;
|
||||
}
|
||||
|
||||
n = length >> 1;
|
||||
b = highpass;
|
||||
a = lowpass;
|
||||
while (--n) {
|
||||
r = *a++;
|
||||
*x++ = r;
|
||||
*x++ = ((*b++) << 1) + ((r + (*a) + 1) >> 1);
|
||||
}
|
||||
*x++ = *a;
|
||||
*x++ = ((*b) << 1) + *a;
|
||||
}
|
||||
|
||||
static void dyadic_synthesize_53(int levels, int width, int height,
|
||||
tran_low_t *c, int pitch_c,
|
||||
int16_t *x, int pitch_x,
|
||||
int dwt_scale_bits) {
|
||||
int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
|
||||
tran_low_t buffer[2 * DWT_MAX_LENGTH];
|
||||
const int dwt_scale_rnd = 1 << (dwt_scale_bits - 1);
|
||||
|
||||
th[0] = hh;
|
||||
tw[0] = hw;
|
||||
for (i = 1; i <= levels; i++) {
|
||||
th[i] = (th[i - 1] + 1) >> 1;
|
||||
tw[i] = (tw[i - 1] + 1) >> 1;
|
||||
}
|
||||
for (lv = levels - 1; lv >= 0; lv--) {
|
||||
nh = th[lv];
|
||||
nw = tw[lv];
|
||||
hh = th[lv + 1];
|
||||
hw = tw[lv + 1];
|
||||
if ((nh < 2) || (nw < 2)) continue;
|
||||
for (j = 0; j < nw; j++) {
|
||||
for (i = 0; i < nh; i++)
|
||||
buffer[i] = c[i * pitch_c + j];
|
||||
synthesis_53_col(nh, buffer, buffer + hh, buffer + nh);
|
||||
for (i = 0; i < nh; i++)
|
||||
c[i * pitch_c + j] = buffer[i + nh];
|
||||
}
|
||||
for (i = 0; i < nh; i++) {
|
||||
memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer));
|
||||
synthesis_53_row(nw, buffer, buffer + hw, &c[i * pitch_c]);
|
||||
}
|
||||
}
|
||||
for (i = 0; i < height; i++) {
|
||||
for (j = 0; j < width; j++) {
|
||||
x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ?
|
||||
((c[i * pitch_c + j] + dwt_scale_rnd) >> dwt_scale_bits) :
|
||||
-((-c[i * pitch_c + j] + dwt_scale_rnd) >> dwt_scale_bits);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Note: block length must be even for this implementation
|
||||
static void synthesis_26_row(int length,
|
||||
tran_low_t *lowpass, tran_low_t *highpass,
|
||||
tran_low_t *x) {
|
||||
tran_low_t r, s, *a, *b;
|
||||
int i, n = length >> 1;
|
||||
|
||||
if (n >= 4) {
|
||||
a = lowpass;
|
||||
b = highpass;
|
||||
r = *lowpass;
|
||||
while (--n) {
|
||||
*b++ += (r - a[1] + 4) >> 3;
|
||||
r = *a++;
|
||||
}
|
||||
*b += (r - *a + 4) >> 3;
|
||||
}
|
||||
a = lowpass;
|
||||
b = highpass;
|
||||
for (i = length >> 1; i; i--) {
|
||||
s = *b++;
|
||||
r = *a++;
|
||||
*x++ = (r + s + 1) >> 1;
|
||||
*x++ = (r - s + 1) >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void synthesis_26_col(int length,
|
||||
tran_low_t *lowpass, tran_low_t *highpass,
|
||||
tran_low_t *x) {
|
||||
tran_low_t r, s, *a, *b;
|
||||
int i, n = length >> 1;
|
||||
|
||||
if (n >= 4) {
|
||||
a = lowpass;
|
||||
b = highpass;
|
||||
r = *lowpass;
|
||||
while (--n) {
|
||||
*b++ += (r - a[1] + 4) >> 3;
|
||||
r = *a++;
|
||||
}
|
||||
*b += (r - *a + 4) >> 3;
|
||||
}
|
||||
a = lowpass;
|
||||
b = highpass;
|
||||
for (i = length >> 1; i; i--) {
|
||||
s = *b++;
|
||||
r = *a++;
|
||||
*x++ = r + s;
|
||||
*x++ = r - s;
|
||||
}
|
||||
}
|
||||
|
||||
static void dyadic_synthesize_26(int levels, int width, int height,
|
||||
tran_low_t *c, int pitch_c,
|
||||
int16_t *x, int pitch_x,
|
||||
int dwt_scale_bits) {
|
||||
int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
|
||||
tran_low_t buffer[2 * DWT_MAX_LENGTH];
|
||||
const int dwt_scale_rnd = 1 << (dwt_scale_bits - 1);
|
||||
|
||||
th[0] = hh;
|
||||
tw[0] = hw;
|
||||
for (i = 1; i <= levels; i++) {
|
||||
th[i] = (th[i - 1] + 1) >> 1;
|
||||
tw[i] = (tw[i - 1] + 1) >> 1;
|
||||
}
|
||||
for (lv = levels - 1; lv >= 0; lv--) {
|
||||
nh = th[lv];
|
||||
nw = tw[lv];
|
||||
hh = th[lv + 1];
|
||||
hw = tw[lv + 1];
|
||||
if ((nh < 2) || (nw < 2)) continue;
|
||||
for (j = 0; j < nw; j++) {
|
||||
for (i = 0; i < nh; i++)
|
||||
buffer[i] = c[i * pitch_c + j];
|
||||
synthesis_26_col(nh, buffer, buffer + hh, buffer + nh);
|
||||
for (i = 0; i < nh; i++)
|
||||
c[i * pitch_c + j] = buffer[i + nh];
|
||||
}
|
||||
for (i = 0; i < nh; i++) {
|
||||
memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer));
|
||||
synthesis_26_row(nw, buffer, buffer + hw, &c[i * pitch_c]);
|
||||
}
|
||||
}
|
||||
for (i = 0; i < height; i++) {
|
||||
for (j = 0; j < width; j++) {
|
||||
x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ?
|
||||
((c[i * pitch_c + j] + dwt_scale_rnd) >> dwt_scale_bits) :
|
||||
-((-c[i * pitch_c + j] + dwt_scale_rnd) >> dwt_scale_bits);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void synthesis_97(int length, double *lowpass, double *highpass,
|
||||
double *x) {
|
||||
const double a_predict1 = -1.586134342;
|
||||
const double a_update1 = -0.05298011854;
|
||||
const double a_predict2 = 0.8829110762;
|
||||
const double a_update2 = 0.4435068522;
|
||||
const double s_low = 1.149604398;
|
||||
const double s_high = 1/1.149604398;
|
||||
const double inv_s_low = 1 / s_low;
|
||||
const double inv_s_high = 1 / s_high;
|
||||
int i;
|
||||
double y[DWT_MAX_LENGTH];
|
||||
// Undo pack and scale
|
||||
for (i = 0; i < length / 2; i++) {
|
||||
y[i * 2] = lowpass[i] * inv_s_low;
|
||||
y[i * 2 + 1] = highpass[i] * inv_s_high;
|
||||
}
|
||||
memcpy(x, y, sizeof(*y) * length);
|
||||
// Undo update 2
|
||||
for (i = 2; i < length; i += 2) {
|
||||
x[i] -= a_update2 * (x[i - 1] + x[i + 1]);
|
||||
}
|
||||
x[0] -= 2 * a_update2 * x[1];
|
||||
// Undo predict 2
|
||||
for (i = 1; i < length - 2; i += 2) {
|
||||
x[i] -= a_predict2 * (x[i - 1] + x[i + 1]);
|
||||
}
|
||||
x[length - 1] -= 2 * a_predict2 * x[length - 2];
|
||||
// Undo update 1
|
||||
for (i = 2; i < length; i += 2) {
|
||||
x[i] -= a_update1 * (x[i - 1] + x[i + 1]);
|
||||
}
|
||||
x[0] -= 2 * a_update1 * x[1];
|
||||
// Undo predict 1
|
||||
for (i = 1; i < length - 2; i += 2) {
|
||||
x[i] -= a_predict1 * (x[i - 1] + x[i + 1]);
|
||||
}
|
||||
x[length - 1] -= 2 * a_predict1 * x[length - 2];
|
||||
}
|
||||
|
||||
static void dyadic_synthesize_97(int levels, int width, int height,
|
||||
tran_low_t *c, int pitch_c,
|
||||
int16_t *x, int pitch_x,
|
||||
int dwt_scale_bits) {
|
||||
int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
|
||||
double buffer[2 * DWT_MAX_LENGTH];
|
||||
double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH];
|
||||
|
||||
for (i = 0; i < height; i++)
|
||||
for (j = 0; j < width; j++)
|
||||
y[i * DWT_MAX_LENGTH + j] = c[i * pitch_c + j];
|
||||
th[0] = hh;
|
||||
tw[0] = hw;
|
||||
for (i = 1; i <= levels; i++) {
|
||||
th[i] = (th[i - 1] + 1) >> 1;
|
||||
tw[i] = (tw[i - 1] + 1) >> 1;
|
||||
}
|
||||
for (lv = levels - 1; lv >= 0; lv--) {
|
||||
nh = th[lv];
|
||||
nw = tw[lv];
|
||||
hh = th[lv + 1];
|
||||
hw = tw[lv + 1];
|
||||
if ((nh < 2) || (nw < 2)) continue;
|
||||
for (j = 0; j < nw; j++) {
|
||||
for (i = 0; i < nh; i++)
|
||||
buffer[i] = y[i * DWT_MAX_LENGTH + j];
|
||||
synthesis_97(nh, buffer, buffer + hh, buffer + nh);
|
||||
for (i = 0; i < nh; i++)
|
||||
y[i * DWT_MAX_LENGTH + j] = buffer[i + nh];
|
||||
}
|
||||
for (i = 0; i < nh; i++) {
|
||||
memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer));
|
||||
synthesis_97(nw, buffer, buffer + hw, &y[i * DWT_MAX_LENGTH]);
|
||||
}
|
||||
}
|
||||
for (i = 0; i < height; i++)
|
||||
for (j = 0; j < width; j++)
|
||||
x[i * pitch_x + j] = round(y[i * DWT_MAX_LENGTH + j] /
|
||||
(1 << dwt_scale_bits));
|
||||
}
|
||||
|
||||
void vp9_idwt32x32_c(tran_low_t *input, tran_low_t *output, int stride) {
|
||||
#if DWT_TYPE == 26
|
||||
dyadic_synthesize_26(4, 32, 32, input, 32, output, stride, 2);
|
||||
#elif DWT_TYPE == 97
|
||||
dyadic_synthesize_97(4, 32, 32, input, 32, output, stride, 2);
|
||||
#elif DWT_TYPE == 53
|
||||
dyadic_synthesize_53(4, 32, 32, input, 32, output, stride, 2);
|
||||
#endif
|
||||
}
|
||||
|
||||
void vp9_idwtdct32x32_c(tran_low_t *input, tran_low_t *output, int stride) {
|
||||
const int dwt_levels = 1;
|
||||
tran_low_t buffer[16 * 16];
|
||||
tran_low_t buffer2[32 * 32];
|
||||
int i;
|
||||
for (i = 0; i < 32; ++i) {
|
||||
memcpy(&buffer2[i * 32], &input[i * 32], sizeof(buffer2[0]) * 32);
|
||||
}
|
||||
for (i = 0; i < 16; ++i) {
|
||||
memcpy(&buffer[i * 16], &input[i * 32], sizeof(buffer[0]) * 16);
|
||||
}
|
||||
vp9_idct16x16_noscale(buffer, buffer2, 32);
|
||||
|
||||
#if DWT_TYPE == 26
|
||||
dyadic_synthesize_26(dwt_levels, 32, 32, buffer2, 32, output, stride, 2);
|
||||
#elif DWT_TYPE == 97
|
||||
dyadic_synthesize_97(dwt_levels, 32, 32, buffer2, 32, output, stride, 2);
|
||||
#elif DWT_TYPE == 53
|
||||
dyadic_synthesize_53(dwt_levels, 32, 32, buffer2, 32, output, stride, 2);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
void vp9_idwt64x64_c(tran_low_t *input, tran_low_t *output, int stride) {
|
||||
#if DWT_TYPE == 26
|
||||
dyadic_synthesize_26(4, 64, 64, input, 64, output, stride, 1);
|
||||
#elif DWT_TYPE == 97
|
||||
dyadic_synthesize_97(4, 64, 64, input, 64, output, stride, 1);
|
||||
#elif DWT_TYPE == 53
|
||||
dyadic_synthesize_53(4, 64, 64, input, 64, output, stride, 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
void vp9_idwtdct64x64_c(tran_low_t *input, tran_low_t *output, int stride) {
|
||||
const int dwt_levels = 1;
|
||||
tran_low_t buffer[32 * 32];
|
||||
tran_low_t buffer2[64 * 64];
|
||||
int i;
|
||||
for (i = 0; i < 64; ++i) {
|
||||
memcpy(&buffer2[i * 64], &input[i * 64], sizeof(buffer2[0]) * 64);
|
||||
}
|
||||
for (i = 0; i < 32; ++i) {
|
||||
memcpy(&buffer[i * 32], &input[i * 64], sizeof(buffer[0]) * 32);
|
||||
}
|
||||
vp9_idct32x32_noscale(buffer, buffer2, 64);
|
||||
#if DWT_TYPE == 26
|
||||
dyadic_synthesize_26(dwt_levels, 64, 64, buffer2, 64, output, stride, 1);
|
||||
#elif DWT_TYPE == 97
|
||||
dyadic_synthesize_97(dwt_levels, 64, 64, buffer2, 64, output, stride, 1);
|
||||
#elif DWT_TYPE == 53
|
||||
dyadic_synthesize_53(dwt_levels, 64, 64, buffer2, 64, output, stride, 1);
|
||||
#endif
|
||||
}
|
||||
#endif // CONFIG_TX64X64
|
39
vp9/common/vp9_idwt.h
Normal file
39
vp9/common/vp9_idwt.h
Normal file
@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP9_COMMON_VP9_IDWT_H_
|
||||
#define VP9_COMMON_VP9_IDWT_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vp9/common/vp9_enums.h"
|
||||
#include "vp9/common/vp9_idct.h"
|
||||
|
||||
#define DWT_MAX_LENGTH 64
|
||||
#define DWT_TYPE 26 // 26/53/97
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
void vp9_idwt64x64(tran_low_t *input, tran_low_t *output, int stride);
|
||||
void vp9_idwtdct64x64(tran_low_t *input, tran_low_t *output, int stride);
|
||||
#endif // CONFIG_TX64X64
|
||||
void vp9_idwt32x32(tran_low_t *input, tran_low_t *output, int stride);
|
||||
void vp9_idwtdct32x32(tran_low_t *input, tran_low_t *output, int stride);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP9_COMMON_VP9_IDWT_H_
|
@ -396,6 +396,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_256_add/;
|
||||
|
||||
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||
add_proto qw/void vp9_idct16x16_noscale/, "const tran_low_t *input, int16_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_noscale/;
|
||||
}
|
||||
|
||||
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_10_add/;
|
||||
|
||||
@ -411,6 +416,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||
add_proto qw/void vp9_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct64x64_4096_add/;
|
||||
|
||||
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||
add_proto qw/void vp9_idct32x32_noscale/, "const tran_low_t *input, int16_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_noscale/;
|
||||
}
|
||||
}
|
||||
|
||||
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
@ -454,6 +464,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_256_add/;
|
||||
|
||||
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||
add_proto qw/void vp9_idct16x16_noscale/, "const tran_low_t *input, int16_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_noscale/;
|
||||
}
|
||||
|
||||
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_10_add/;
|
||||
|
||||
@ -469,6 +484,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||
add_proto qw/void vp9_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct64x64_4096_add/;
|
||||
|
||||
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||
add_proto qw/void vp9_idct32x32_noscale/, "const tran_low_t *input, int16_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_noscale/;
|
||||
}
|
||||
}
|
||||
|
||||
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
@ -516,6 +536,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/;
|
||||
$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
|
||||
|
||||
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||
add_proto qw/void vp9_idct16x16_noscale/, "const tran_low_t *input, int16_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_noscale/;
|
||||
}
|
||||
|
||||
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/;
|
||||
$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon;
|
||||
@ -535,6 +560,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||
add_proto qw/void vp9_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct64x64_4096_add/;
|
||||
|
||||
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||
add_proto qw/void vp9_idct32x32_noscale/, "const tran_low_t *input, int16_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_noscale/;
|
||||
}
|
||||
}
|
||||
|
||||
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
@ -1498,12 +1528,22 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_fdct32x32_rd/;
|
||||
|
||||
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||
add_proto qw/void vp9_fdct16x16_noscale/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_fdct16x16_noscale/;
|
||||
}
|
||||
|
||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||
add_proto qw/void vp9_fdct64x64_1/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_fdct64x64_1/;
|
||||
|
||||
add_proto qw/void vp9_fdct64x64/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_fdct64x64/;
|
||||
|
||||
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||
add_proto qw/void vp9_fdct32x32_noscale/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_fdct32x32_noscale/;
|
||||
}
|
||||
}
|
||||
specialize qw/vp9_fdct32x32_rd sse2/;
|
||||
} else {
|
||||
@ -1546,12 +1586,22 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_fdct32x32_rd sse2 avx2/;
|
||||
|
||||
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||
add_proto qw/void vp9_fdct16x16_noscale/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_fdct16x16_noscale/;
|
||||
}
|
||||
|
||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||
add_proto qw/void vp9_fdct64x64_1/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_fdct64x64_1/;
|
||||
|
||||
add_proto qw/void vp9_fdct64x64/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_fdct64x64/;
|
||||
|
||||
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
|
||||
add_proto qw/void vp9_fdct32x32_noscale/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_fdct32x32_noscale/;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -522,6 +522,193 @@ void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_WAVELETS
|
||||
// The difference between this one and the function above is scaling
|
||||
// of the input. This function does not scale so that the actual 2D
|
||||
// transform is unitary. The function above scales the transform to be
|
||||
// 8 times unitary.
|
||||
void vp9_fdct16x16_noscale_c(const int16_t *input, tran_low_t *output,
|
||||
int stride) {
|
||||
// The 2D transform is done with two passes which are actually pretty
|
||||
// similar. In the first one, we transform the columns and transpose
|
||||
// the results. In the second one, we transform the rows. To achieve that,
|
||||
// as the first pass results are transposed, we transpose the columns (that
|
||||
// is the transposed rows) and transpose the results (so that it goes back
|
||||
// in normal/row positions).
|
||||
int pass;
|
||||
// We need an intermediate buffer between passes.
|
||||
tran_low_t intermediate[256];
|
||||
const int16_t *in_pass0 = input;
|
||||
const tran_low_t *in = NULL;
|
||||
tran_low_t *out = intermediate;
|
||||
// Do the two transform/transpose passes
|
||||
for (pass = 0; pass < 2; ++pass) {
|
||||
tran_high_t step1[8]; // canbe16
|
||||
tran_high_t step2[8]; // canbe16
|
||||
tran_high_t step3[8]; // canbe16
|
||||
tran_high_t input[8]; // canbe16
|
||||
tran_high_t temp1, temp2; // needs32
|
||||
int i;
|
||||
for (i = 0; i < 16; i++) {
|
||||
if (0 == pass) {
|
||||
// Calculate input for the first 8 results.
|
||||
input[0] = (in_pass0[0 * stride] + in_pass0[15 * stride]) >> 1;
|
||||
input[1] = (in_pass0[1 * stride] + in_pass0[14 * stride]) >> 1;
|
||||
input[2] = (in_pass0[2 * stride] + in_pass0[13 * stride]) >> 1;
|
||||
input[3] = (in_pass0[3 * stride] + in_pass0[12 * stride]) >> 1;
|
||||
input[4] = (in_pass0[4 * stride] + in_pass0[11 * stride]) >> 1;
|
||||
input[5] = (in_pass0[5 * stride] + in_pass0[10 * stride]) >> 1;
|
||||
input[6] = (in_pass0[6 * stride] + in_pass0[ 9 * stride]) >> 1;
|
||||
input[7] = (in_pass0[7 * stride] + in_pass0[ 8 * stride]) >> 1;
|
||||
// Calculate input for the next 8 results.
|
||||
step1[0] = (in_pass0[7 * stride] - in_pass0[ 8 * stride]) >> 1;
|
||||
step1[1] = (in_pass0[6 * stride] - in_pass0[ 9 * stride]) >> 1;
|
||||
step1[2] = (in_pass0[5 * stride] - in_pass0[10 * stride]) >> 1;
|
||||
step1[3] = (in_pass0[4 * stride] - in_pass0[11 * stride]) >> 1;
|
||||
step1[4] = (in_pass0[3 * stride] - in_pass0[12 * stride]) >> 1;
|
||||
step1[5] = (in_pass0[2 * stride] - in_pass0[13 * stride]) >> 1;
|
||||
step1[6] = (in_pass0[1 * stride] - in_pass0[14 * stride]) >> 1;
|
||||
step1[7] = (in_pass0[0 * stride] - in_pass0[15 * stride]) >> 1;
|
||||
} else {
|
||||
// Calculate input for the first 8 results.
|
||||
input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
|
||||
input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2);
|
||||
input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2);
|
||||
input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2);
|
||||
input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2);
|
||||
input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2);
|
||||
input[6] = ((in[6 * 16] + 1) >> 2) + ((in[ 9 * 16] + 1) >> 2);
|
||||
input[7] = ((in[7 * 16] + 1) >> 2) + ((in[ 8 * 16] + 1) >> 2);
|
||||
// Calculate input for the next 8 results.
|
||||
step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[ 8 * 16] + 1) >> 2);
|
||||
step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[ 9 * 16] + 1) >> 2);
|
||||
step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2);
|
||||
step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2);
|
||||
step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2);
|
||||
step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2);
|
||||
step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2);
|
||||
step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2);
|
||||
}
|
||||
// Work on the first eight values; fdct8(input, even_results);
|
||||
{
|
||||
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
|
||||
tran_high_t t0, t1, t2, t3; // needs32
|
||||
tran_high_t x0, x1, x2, x3; // canbe16
|
||||
|
||||
// stage 1
|
||||
s0 = input[0] + input[7];
|
||||
s1 = input[1] + input[6];
|
||||
s2 = input[2] + input[5];
|
||||
s3 = input[3] + input[4];
|
||||
s4 = input[3] - input[4];
|
||||
s5 = input[2] - input[5];
|
||||
s6 = input[1] - input[6];
|
||||
s7 = input[0] - input[7];
|
||||
|
||||
// fdct4(step, step);
|
||||
x0 = s0 + s3;
|
||||
x1 = s1 + s2;
|
||||
x2 = s1 - s2;
|
||||
x3 = s0 - s3;
|
||||
t0 = (x0 + x1) * cospi_16_64;
|
||||
t1 = (x0 - x1) * cospi_16_64;
|
||||
t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
|
||||
t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
|
||||
out[0] = fdct_round_shift(t0);
|
||||
out[4] = fdct_round_shift(t2);
|
||||
out[8] = fdct_round_shift(t1);
|
||||
out[12] = fdct_round_shift(t3);
|
||||
|
||||
// Stage 2
|
||||
t0 = (s6 - s5) * cospi_16_64;
|
||||
t1 = (s6 + s5) * cospi_16_64;
|
||||
t2 = fdct_round_shift(t0);
|
||||
t3 = fdct_round_shift(t1);
|
||||
|
||||
// Stage 3
|
||||
x0 = s4 + t2;
|
||||
x1 = s4 - t2;
|
||||
x2 = s7 - t3;
|
||||
x3 = s7 + t3;
|
||||
|
||||
// Stage 4
|
||||
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
|
||||
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
|
||||
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
|
||||
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
|
||||
out[2] = fdct_round_shift(t0);
|
||||
out[6] = fdct_round_shift(t2);
|
||||
out[10] = fdct_round_shift(t1);
|
||||
out[14] = fdct_round_shift(t3);
|
||||
}
|
||||
// Work on the next eight values; step1 -> odd_results
|
||||
{
|
||||
// step 2
|
||||
temp1 = (step1[5] - step1[2]) * cospi_16_64;
|
||||
temp2 = (step1[4] - step1[3]) * cospi_16_64;
|
||||
step2[2] = fdct_round_shift(temp1);
|
||||
step2[3] = fdct_round_shift(temp2);
|
||||
temp1 = (step1[4] + step1[3]) * cospi_16_64;
|
||||
temp2 = (step1[5] + step1[2]) * cospi_16_64;
|
||||
step2[4] = fdct_round_shift(temp1);
|
||||
step2[5] = fdct_round_shift(temp2);
|
||||
// step 3
|
||||
step3[0] = step1[0] + step2[3];
|
||||
step3[1] = step1[1] + step2[2];
|
||||
step3[2] = step1[1] - step2[2];
|
||||
step3[3] = step1[0] - step2[3];
|
||||
step3[4] = step1[7] - step2[4];
|
||||
step3[5] = step1[6] - step2[5];
|
||||
step3[6] = step1[6] + step2[5];
|
||||
step3[7] = step1[7] + step2[4];
|
||||
// step 4
|
||||
temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
|
||||
temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64;
|
||||
step2[1] = fdct_round_shift(temp1);
|
||||
step2[2] = fdct_round_shift(temp2);
|
||||
temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64;
|
||||
temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
|
||||
step2[5] = fdct_round_shift(temp1);
|
||||
step2[6] = fdct_round_shift(temp2);
|
||||
// step 5
|
||||
step1[0] = step3[0] + step2[1];
|
||||
step1[1] = step3[0] - step2[1];
|
||||
step1[2] = step3[3] + step2[2];
|
||||
step1[3] = step3[3] - step2[2];
|
||||
step1[4] = step3[4] - step2[5];
|
||||
step1[5] = step3[4] + step2[5];
|
||||
step1[6] = step3[7] - step2[6];
|
||||
step1[7] = step3[7] + step2[6];
|
||||
// step 6
|
||||
temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
|
||||
temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
|
||||
out[1] = fdct_round_shift(temp1);
|
||||
out[9] = fdct_round_shift(temp2);
|
||||
temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
|
||||
temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
|
||||
out[5] = fdct_round_shift(temp1);
|
||||
out[13] = fdct_round_shift(temp2);
|
||||
temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
|
||||
temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
|
||||
out[3] = fdct_round_shift(temp1);
|
||||
out[11] = fdct_round_shift(temp2);
|
||||
temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
|
||||
temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
|
||||
out[7] = fdct_round_shift(temp1);
|
||||
out[15] = fdct_round_shift(temp2);
|
||||
}
|
||||
// Do next column (which is a transposed row in second/horizontal pass)
|
||||
in++;
|
||||
in_pass0++;
|
||||
out += 16;
|
||||
}
|
||||
// Setup in/out for next pass.
|
||||
in = intermediate;
|
||||
out = output;
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_WAVELETS
|
||||
|
||||
void vp9_fadst8(const tran_low_t *input, tran_low_t *output) {
|
||||
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
|
||||
|
||||
@ -1389,6 +1576,35 @@ void vp9_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_WAVELETS
|
||||
void vp9_fdct32x32_noscale_c(const int16_t *input, tran_low_t *out,
|
||||
int stride) {
|
||||
int i, j;
|
||||
tran_high_t output[32 * 32];
|
||||
|
||||
// Columns
|
||||
for (i = 0; i < 32; ++i) {
|
||||
tran_high_t temp_in[32], temp_out[32];
|
||||
for (j = 0; j < 32; ++j)
|
||||
temp_in[j] = input[j * stride + i];
|
||||
vp9_fdct32(temp_in, temp_out, 0);
|
||||
for (j = 0; j < 32; ++j)
|
||||
output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
|
||||
}
|
||||
|
||||
// Rows
|
||||
for (i = 0; i < 32; ++i) {
|
||||
tran_high_t temp_in[32], temp_out[32];
|
||||
for (j = 0; j < 32; ++j)
|
||||
temp_in[j] = output[j + i * 32];
|
||||
vp9_fdct32(temp_in, temp_out, 0);
|
||||
for (j = 0; j < 32; ++j)
|
||||
out[j + i * 32] = (tran_low_t)
|
||||
((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_WAVELETS
|
||||
|
||||
// Note that although we use dct_32_round in dct32 computation flow,
|
||||
// this 2d fdct32x32 for rate-distortion optimization loop is operating
|
||||
// within 16 bits precision.
|
||||
|
323
vp9/encoder/vp9_dwt.c
Normal file
323
vp9/encoder/vp9_dwt.c
Normal file
@ -0,0 +1,323 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
|
||||
#include "vp9/encoder/vp9_dct.h"
|
||||
#include "vp9/encoder/vp9_dwt.h"
|
||||
|
||||
// Note: block length must be even for this implementation
|
||||
static void analysis_53_row(int length, tran_low_t *x,
|
||||
tran_low_t *lowpass, tran_low_t *highpass) {
|
||||
int n;
|
||||
tran_low_t r, *a, *b;
|
||||
|
||||
n = length >> 1;
|
||||
b = highpass;
|
||||
a = lowpass;
|
||||
while (--n) {
|
||||
*a++ = (r = *x++) << 1;
|
||||
*b++ = *x - ((r + x[1] + 1) >> 1);
|
||||
x++;
|
||||
}
|
||||
*a = (r = *x++) << 1;
|
||||
*b = *x - r;
|
||||
|
||||
n = length >> 1;
|
||||
b = highpass;
|
||||
a = lowpass;
|
||||
r = *highpass;
|
||||
while (n--) {
|
||||
*a++ += (r + (*b) + 1) >> 1;
|
||||
r = *b++;
|
||||
}
|
||||
}
|
||||
|
||||
static void analysis_53_col(int length, tran_low_t *x,
|
||||
tran_low_t *lowpass, tran_low_t *highpass) {
|
||||
int n;
|
||||
tran_low_t r, *a, *b;
|
||||
|
||||
n = length >> 1;
|
||||
b = highpass;
|
||||
a = lowpass;
|
||||
while (--n) {
|
||||
*a++ = (r = *x++);
|
||||
*b++ = (((*x) << 1) - (r + x[1]) + 2) >> 2;
|
||||
x++;
|
||||
}
|
||||
*a = (r = *x++);
|
||||
*b = (*x - r + 1) >> 1;
|
||||
|
||||
n = length >> 1;
|
||||
b = highpass;
|
||||
a = lowpass;
|
||||
r = *highpass;
|
||||
while (n--) {
|
||||
*a++ += (r + (*b) + 1) >> 1;
|
||||
r = *b++;
|
||||
}
|
||||
}
|
||||
|
||||
static void dyadic_analyze_53(int levels, int width, int height,
|
||||
int16_t *x, int pitch_x,
|
||||
tran_low_t *c, int pitch_c,
|
||||
int dwt_scale_bits) {
|
||||
int lv, i, j, nh, nw, hh = height, hw = width;
|
||||
tran_low_t buffer[2 * DWT_MAX_LENGTH];
|
||||
for (i = 0; i < height; i++) {
|
||||
for (j = 0; j < width; j++) {
|
||||
c[i * pitch_c + j] = x[i * pitch_x + j] << dwt_scale_bits;
|
||||
}
|
||||
}
|
||||
for (lv = 0; lv < levels; lv++) {
|
||||
nh = hh;
|
||||
hh = (hh + 1) >> 1;
|
||||
nw = hw;
|
||||
hw = (hw + 1) >> 1;
|
||||
if ((nh < 2) || (nw < 2)) return;
|
||||
for (i = 0; i < nh; i++) {
|
||||
memcpy(buffer, &c[i * pitch_c], nw * sizeof(tran_low_t));
|
||||
analysis_53_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw);
|
||||
}
|
||||
for (j = 0; j < nw; j++) {
|
||||
for (i = 0; i < nh; i++)
|
||||
buffer[i + nh] = c[i * pitch_c + j];
|
||||
analysis_53_col(nh, buffer + nh, buffer, buffer + hh);
|
||||
for (i = 0; i < nh; i++)
|
||||
c[i * pitch_c + j] = buffer[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void analysis_26_row(int length, tran_low_t *x,
|
||||
tran_low_t *lowpass, tran_low_t *highpass) {
|
||||
int i, n;
|
||||
tran_low_t r, s, *a, *b;
|
||||
a = lowpass;
|
||||
b = highpass;
|
||||
for (i = length >> 1; i; i--) {
|
||||
r = *x++;
|
||||
s = *x++;
|
||||
*a++ = r + s;
|
||||
*b++ = r - s;
|
||||
}
|
||||
n = length >> 1;
|
||||
if (n >= 4) {
|
||||
a = lowpass;
|
||||
b = highpass;
|
||||
r = *lowpass;
|
||||
while (--n) {
|
||||
*b++ -= (r - a[1] + 4) >> 3;
|
||||
r = *a++;
|
||||
}
|
||||
*b -= (r - *a + 4) >> 3;
|
||||
}
|
||||
}
|
||||
|
||||
static void analysis_26_col(int length, tran_low_t *x,
|
||||
tran_low_t *lowpass, tran_low_t *highpass) {
|
||||
int i, n;
|
||||
tran_low_t r, s, *a, *b;
|
||||
a = lowpass;
|
||||
b = highpass;
|
||||
for (i = length >> 1; i; i--) {
|
||||
r = *x++;
|
||||
s = *x++;
|
||||
*a++ = (r + s + 1) >> 1;
|
||||
*b++ = (r - s + 1) >> 1;
|
||||
}
|
||||
n = length >> 1;
|
||||
if (n >= 4) {
|
||||
a = lowpass;
|
||||
b = highpass;
|
||||
r = *lowpass;
|
||||
while (--n) {
|
||||
*b++ -= (r - a[1] + 4) >> 3;
|
||||
r = *a++;
|
||||
}
|
||||
*b -= (r - *a + 4) >> 3;
|
||||
}
|
||||
}
|
||||
|
||||
static void dyadic_analyze_26(int levels, int width, int height,
|
||||
int16_t *x, int pitch_x,
|
||||
tran_low_t *c, int pitch_c,
|
||||
int dwt_scale_bits) {
|
||||
int lv, i, j, nh, nw, hh = height, hw = width;
|
||||
tran_low_t buffer[2 * DWT_MAX_LENGTH];
|
||||
for (i = 0; i < height; i++) {
|
||||
for (j = 0; j < width; j++) {
|
||||
c[i * pitch_c + j] = x[i * pitch_x + j] << dwt_scale_bits;
|
||||
}
|
||||
}
|
||||
for (lv = 0; lv < levels; lv++) {
|
||||
nh = hh;
|
||||
hh = (hh + 1) >> 1;
|
||||
nw = hw;
|
||||
hw = (hw + 1) >> 1;
|
||||
if ((nh < 2) || (nw < 2)) return;
|
||||
for (i = 0; i < nh; i++) {
|
||||
memcpy(buffer, &c[i * pitch_c], nw * sizeof(tran_low_t));
|
||||
analysis_26_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw);
|
||||
}
|
||||
for (j = 0; j < nw; j++) {
|
||||
for (i = 0; i < nh; i++)
|
||||
buffer[i + nh] = c[i * pitch_c + j];
|
||||
analysis_26_col(nh, buffer + nh, buffer, buffer + hh);
|
||||
for (i = 0; i < nh; i++)
|
||||
c[i * pitch_c + j] = buffer[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void analysis_97(int length, double *x,
|
||||
double *lowpass, double *highpass) {
|
||||
static const double a_predict1 = -1.586134342;
|
||||
static const double a_update1 = -0.05298011854;
|
||||
static const double a_predict2 = 0.8829110762;
|
||||
static const double a_update2 = 0.4435068522;
|
||||
static const double s_low = 1.149604398;
|
||||
static const double s_high = 1/1.149604398;
|
||||
int i;
|
||||
double y[DWT_MAX_LENGTH];
|
||||
// Predict 1
|
||||
for (i = 1; i < length - 2; i += 2) {
|
||||
x[i] += a_predict1 * (x[i - 1] + x[i + 1]);
|
||||
}
|
||||
x[length - 1] += 2 * a_predict1 * x[length - 2];
|
||||
// Update 1
|
||||
for (i = 2; i < length; i += 2) {
|
||||
x[i] += a_update1 * (x[i - 1] + x[i + 1]);
|
||||
}
|
||||
x[0] += 2 * a_update1 * x[1];
|
||||
// Predict 2
|
||||
for (i = 1; i < length - 2; i += 2) {
|
||||
x[i] += a_predict2 * (x[i - 1] + x[i + 1]);
|
||||
}
|
||||
x[length - 1] += 2 * a_predict2 * x[length - 2];
|
||||
// Update 2
|
||||
for (i = 2; i < length; i += 2) {
|
||||
x[i] += a_update2 * (x[i - 1] + x[i + 1]);
|
||||
}
|
||||
x[0] += 2 * a_update2 * x[1];
|
||||
memcpy(y, x, sizeof(*y) * length);
|
||||
// Scale and pack
|
||||
for (i = 0; i < length / 2; i++) {
|
||||
lowpass[i] = y[2 * i] * s_low;
|
||||
highpass[i] = y[2 * i + 1] * s_high;
|
||||
}
|
||||
}
|
||||
|
||||
static void dyadic_analyze_97(int levels, int width, int height,
|
||||
int16_t *x, int pitch_x,
|
||||
tran_low_t *c, int pitch_c,
|
||||
int dwt_scale_bits) {
|
||||
int lv, i, j, nh, nw, hh = height, hw = width;
|
||||
double buffer[2 * DWT_MAX_LENGTH];
|
||||
double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH];
|
||||
for (i = 0; i < height; i++) {
|
||||
for (j = 0; j < width; j++) {
|
||||
y[i * DWT_MAX_LENGTH + j] = x[i * pitch_x + j] << dwt_scale_bits;
|
||||
}
|
||||
}
|
||||
for (lv = 0; lv < levels; lv++) {
|
||||
nh = hh;
|
||||
hh = (hh + 1) >> 1;
|
||||
nw = hw;
|
||||
hw = (hw + 1) >> 1;
|
||||
if ((nh < 2) || (nw < 2)) return;
|
||||
for (i = 0; i < nh; i++) {
|
||||
memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer));
|
||||
analysis_97(nw, buffer, &y[i * DWT_MAX_LENGTH],
|
||||
&y[i * DWT_MAX_LENGTH] + hw);
|
||||
}
|
||||
for (j = 0; j < nw; j++) {
|
||||
for (i = 0; i < nh; i++)
|
||||
buffer[i + nh] = y[i * DWT_MAX_LENGTH + j];
|
||||
analysis_97(nh, buffer + nh, buffer, buffer + hh);
|
||||
for (i = 0; i < nh; i++)
|
||||
y[i * DWT_MAX_LENGTH + j] = buffer[i];
|
||||
}
|
||||
}
|
||||
for (i = 0; i < height; i++) {
|
||||
for (j = 0; j < width; j++) {
|
||||
c[i * pitch_c + j] = round(y[i * DWT_MAX_LENGTH + j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_fdwt32x32_c(tran_low_t *input, tran_low_t *output, int stride) {
|
||||
#if DWT_TYPE == 26
|
||||
dyadic_analyze_26(4, 32, 32, input, stride, output, 32, 2);
|
||||
#elif DWT_TYPE == 97
|
||||
dyadic_analyze_97(4, 32, 32, input, stride, output, 32, 2);
|
||||
#elif DWT_TYPE == 53
|
||||
dyadic_analyze_53(4, 32, 32, input, stride, output, 32, 2);
|
||||
#endif
|
||||
}
|
||||
|
||||
void vp9_fdwtdct32x32_c(tran_low_t *input, tran_low_t *output,
|
||||
int stride) {
|
||||
const int dwt_levels = 1;
|
||||
tran_low_t buffer[16 * 16];
|
||||
int i, j;
|
||||
// Scales up by 2-bit from unitary
|
||||
#if DWT_TYPE == 26
|
||||
dyadic_analyze_26(dwt_levels, 32, 32, input, stride, output, 32, 2);
|
||||
#elif DWT_TYPE == 97
|
||||
dyadic_analyze_97(dwt_levels, 32, 32, input, stride, output, 32, 2);
|
||||
#elif DWT_TYPE == 53
|
||||
dyadic_analyze_53(dwt_levels, 32, 32, input, stride, output, 32, 2);
|
||||
#endif
|
||||
// 16x16 dct in LL band that is unitary
|
||||
vp9_fdct16x16_noscale(output, buffer, 32);
|
||||
// Note that the transform overall is 2-bit scaled up from unitary
|
||||
for (i = 0; i < 16; ++i) {
|
||||
memcpy(&output[i * 32], &buffer[i * 16], sizeof(buffer[0]) * 16);
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
void vp9_fdwt64x64_c(tran_low_t *input, tran_low_t *output, int stride) {
|
||||
#if DWT_TYPE == 26
|
||||
dyadic_analyze_26(4, 64, 64, input, stride, output, 64, 1);
|
||||
#elif DWT_TYPE == 97
|
||||
dyadic_analyze_97(4, 64, 64, input, stride, output, 64, 1);
|
||||
#elif DWT_TYPE == 53
|
||||
dyadic_analyze_53(4, 64, 64, input, stride, output, 64, 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
void vp9_fdwtdct64x64_c(tran_low_t *input, tran_low_t *output,
|
||||
int stride) {
|
||||
const int dwt_levels = 1;
|
||||
tran_low_t buffer[32 * 32];
|
||||
int i;
|
||||
// Scales up by 1-bit from unitary
|
||||
#if DWT_TYPE == 26
|
||||
dyadic_analyze_26(dwt_levels, 64, 64, input, stride, output, 64, 1);
|
||||
#elif DWT_TYPE == 97
|
||||
dyadic_analyze_97(dwt_levels, 64, 64, input, stride, output, 64, 1);
|
||||
#elif DWT_TYPE == 53
|
||||
dyadic_analyze_53(dwt_levels, 64, 64, input, stride, output, 64, 1);
|
||||
#endif
|
||||
// 32x32 dct in LL band that is unitary
|
||||
vp9_fdct32x32_noscale(output, buffer, 64);
|
||||
// Note that the transform overall is 1-bit scaled up from unitary
|
||||
for (i = 0; i < 32; ++i) {
|
||||
memcpy(&output[i * 64], &buffer[i * 32], sizeof(buffer[0]) * 32);
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_TX64X64
|
32
vp9/encoder/vp9_dwt.h
Normal file
32
vp9/encoder/vp9_dwt.h
Normal file
@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP9_ENCODER_VP9_DWT_H_
|
||||
#define VP9_ENCODER_VP9_DWT_H_
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vp9/common/vp9_idwt.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
void vp9_fdwt64x64(tran_low_t *input, tran_low_t *output, int stride);
|
||||
void vp9_fdwtdct64x64(tran_low_t *input, tran_low_t *output, int stride);
|
||||
#endif // CONFIG_TX64X64
|
||||
void vp9_fdwt32x32(tran_low_t *input, tran_low_t *output, int stride);
|
||||
void vp9_fdwtdct32x32(tran_low_t *input, tran_low_t *output, int stride);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP9_ENCODER_VP9_DWT_H_
|
@ -70,6 +70,8 @@ VP9_COMMON_SRCS-yes += common/vp9_scan.c
|
||||
VP9_COMMON_SRCS-yes += common/vp9_scan.h
|
||||
VP9_COMMON_SRCS-$(CONFIG_GLOBAL_MOTION) += common/vp9_motion_model.c
|
||||
VP9_COMMON_SRCS-$(CONFIG_GLOBAL_MOTION) += common/vp9_motion_model.h
|
||||
VP9_COMMON_SRCS-$(CONFIG_WAVELETS) += common/vp9_idwt.c
|
||||
VP9_COMMON_SRCS-$(CONFIG_WAVELETS) += common/vp9_idwt.h
|
||||
|
||||
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c
|
||||
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_intrin_sse2.c
|
||||
|
@ -85,6 +85,8 @@ VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_global_motion.c
|
||||
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_global_motion.h
|
||||
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_motion_field.c
|
||||
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_motion_field.h
|
||||
VP9_CX_SRCS-$(CONFIG_WAVELETS) += encoder/vp9_dwt.c
|
||||
VP9_CX_SRCS-$(CONFIG_WAVELETS) += encoder/vp9_dwt.h
|
||||
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c
|
||||
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.h
|
||||
VP9_CX_SRCS-yes += encoder/vp9_tokenize.c
|
||||
|
Loading…
x
Reference in New Issue
Block a user