Adds wavelet transforms + hybrid dct/dwt variants

The wavelets implemented are 2/6, 5/3 and 9/7 each with
a lifting based scheme for even block sizes. The 9/7
one is a double implementation currently.

This is to start experiments with:
1. Replacing large transforms (32x32 and 64x64) with wavelets
or wavelet-dct hybrids that can hopefully localize errors better
spatially. (Will also need alternate entropy coder)
2. Super-resolution modes where the higher sub-bands may be
selectively skipped from being conveyed, while a smart
reconstruction recovers the lost frequencies.

The current patch includes two types of 32x32 and 64x64
transforms: one where only wavelets are used, and another
where a single level wavelet decomposition is followed
by a lower resolution dct on the low-low band.

Change-Id: I2d6755c4e6c8ec9386a04633dacbe0de3b0043ec
This commit is contained in:
Debargha Mukherjee 2015-06-02 12:25:28 -07:00
parent 5a69abc66b
commit b433dd4443
11 changed files with 1085 additions and 1 deletions

1
configure vendored
View File

@ -301,6 +301,7 @@ EXPERIMENT_LIST="
bitstream_fixes
newmvref
misc_entropy
wavelets
"
CONFIG_LIST="
external_build

View File

@ -709,6 +709,33 @@ void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest,
}
}
#if CONFIG_WAVELETS
void vp9_idct16x16_noscale_c(const tran_low_t *input, int16_t *dest,
int stride) {
tran_low_t out[16 * 16];
tran_low_t *outptr = out;
int i, j;
tran_low_t temp_in[16], temp_out[16];
// First transform rows
for (i = 0; i < 16; ++i) {
idct16(input, outptr);
input += 16;
outptr += 16;
}
// Then transform columns
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = out[j * 16 + i];
idct16(temp_in, temp_out);
for (j = 0; j < 16; ++j) {
dest[j * stride + i] = ROUND_POWER_OF_TWO(temp_out[j], 3);
}
}
}
#endif // CONFIG_WAVELETS
static void iadst16(const tran_low_t *input, tran_low_t *output) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
tran_high_t s9, s10, s11, s12, s13, s14, s15;
@ -1361,6 +1388,46 @@ void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
}
}
#if CONFIG_WAVELETS
void vp9_idct32x32_noscale_c(const tran_low_t *input, int16_t *dest,
int stride) {
tran_low_t out[32 * 32];
tran_low_t *outptr = out;
int i, j;
tran_low_t temp_in[32], temp_out[32];
// Rows
for (i = 0; i < 32; ++i) {
int16_t zero_coeff[16];
for (j = 0; j < 16; ++j)
zero_coeff[j] = input[2 * j] | input[2 * j + 1];
for (j = 0; j < 8; ++j)
zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
for (j = 0; j < 4; ++j)
zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
for (j = 0; j < 2; ++j)
zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
if (zero_coeff[0] | zero_coeff[1])
idct32(input, outptr);
else
vpx_memset(outptr, 0, sizeof(tran_low_t) * 32);
input += 32;
outptr += 32;
}
// Columns
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j)
temp_in[j] = out[j * 32 + i];
idct32(temp_in, temp_out);
for (j = 0; j < 32; ++j) {
dest[j * stride + i] = ROUND_POWER_OF_TWO(temp_out[j], 4);
}
}
}
#endif // CONFIG_WAVELETS
void vp9_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest,
int stride) {
tran_low_t out[32 * 32] = {0};

View File

@ -156,7 +156,7 @@ void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
#if CONFIG_TX64X64
void vp9_idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob);
#endif
#endif // CONFIG_TX64X64
void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
int stride, int eob);
void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,

352
vp9/common/vp9_idwt.c Normal file
View File

@ -0,0 +1,352 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_idwt.h"
// Note: block length must be even for this implementation
static void synthesis_53_row(int length,
tran_low_t *lowpass, tran_low_t *highpass,
tran_low_t *x) {
tran_low_t r, *a, *b;
int n;
n = length >> 1;
b = highpass;
a = lowpass;
r = *highpass;
while (n--) {
*a++ -= (r + (*b) + 1) >> 1;
r = *b++;
}
n = length >> 1;
b = highpass;
a = lowpass;
while (--n) {
*x++ = ((r = *a++) + 1) >> 1;
*x++ = *b++ + ((r + (*a) + 2) >> 2);
}
*x++ = ((r = *a) + 1) >> 1;
*x++ = *b + ((r + 1) >> 1);
}
static void synthesis_53_col(int length,
tran_low_t *lowpass, tran_low_t *highpass,
tran_low_t *x) {
tran_low_t r, *a, *b;
int n;
n = length >> 1;
b = highpass;
a = lowpass;
r = *highpass;
while (n--) {
*a++ -= (r + (*b) + 1) >> 1;
r = *b++;
}
n = length >> 1;
b = highpass;
a = lowpass;
while (--n) {
r = *a++;
*x++ = r;
*x++ = ((*b++) << 1) + ((r + (*a) + 1) >> 1);
}
*x++ = *a;
*x++ = ((*b) << 1) + *a;
}
static void dyadic_synthesize_53(int levels, int width, int height,
tran_low_t *c, int pitch_c,
int16_t *x, int pitch_x,
int dwt_scale_bits) {
int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
tran_low_t buffer[2 * DWT_MAX_LENGTH];
const int dwt_scale_rnd = 1 << (dwt_scale_bits - 1);
th[0] = hh;
tw[0] = hw;
for (i = 1; i <= levels; i++) {
th[i] = (th[i - 1] + 1) >> 1;
tw[i] = (tw[i - 1] + 1) >> 1;
}
for (lv = levels - 1; lv >= 0; lv--) {
nh = th[lv];
nw = tw[lv];
hh = th[lv + 1];
hw = tw[lv + 1];
if ((nh < 2) || (nw < 2)) continue;
for (j = 0; j < nw; j++) {
for (i = 0; i < nh; i++)
buffer[i] = c[i * pitch_c + j];
synthesis_53_col(nh, buffer, buffer + hh, buffer + nh);
for (i = 0; i < nh; i++)
c[i * pitch_c + j] = buffer[i + nh];
}
for (i = 0; i < nh; i++) {
memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer));
synthesis_53_row(nw, buffer, buffer + hw, &c[i * pitch_c]);
}
}
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ?
((c[i * pitch_c + j] + dwt_scale_rnd) >> dwt_scale_bits) :
-((-c[i * pitch_c + j] + dwt_scale_rnd) >> dwt_scale_bits);
}
}
}
// Note: block length must be even for this implementation
static void synthesis_26_row(int length,
tran_low_t *lowpass, tran_low_t *highpass,
tran_low_t *x) {
tran_low_t r, s, *a, *b;
int i, n = length >> 1;
if (n >= 4) {
a = lowpass;
b = highpass;
r = *lowpass;
while (--n) {
*b++ += (r - a[1] + 4) >> 3;
r = *a++;
}
*b += (r - *a + 4) >> 3;
}
a = lowpass;
b = highpass;
for (i = length >> 1; i; i--) {
s = *b++;
r = *a++;
*x++ = (r + s + 1) >> 1;
*x++ = (r - s + 1) >> 1;
}
}
static void synthesis_26_col(int length,
tran_low_t *lowpass, tran_low_t *highpass,
tran_low_t *x) {
tran_low_t r, s, *a, *b;
int i, n = length >> 1;
if (n >= 4) {
a = lowpass;
b = highpass;
r = *lowpass;
while (--n) {
*b++ += (r - a[1] + 4) >> 3;
r = *a++;
}
*b += (r - *a + 4) >> 3;
}
a = lowpass;
b = highpass;
for (i = length >> 1; i; i--) {
s = *b++;
r = *a++;
*x++ = r + s;
*x++ = r - s;
}
}
static void dyadic_synthesize_26(int levels, int width, int height,
tran_low_t *c, int pitch_c,
int16_t *x, int pitch_x,
int dwt_scale_bits) {
int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
tran_low_t buffer[2 * DWT_MAX_LENGTH];
const int dwt_scale_rnd = 1 << (dwt_scale_bits - 1);
th[0] = hh;
tw[0] = hw;
for (i = 1; i <= levels; i++) {
th[i] = (th[i - 1] + 1) >> 1;
tw[i] = (tw[i - 1] + 1) >> 1;
}
for (lv = levels - 1; lv >= 0; lv--) {
nh = th[lv];
nw = tw[lv];
hh = th[lv + 1];
hw = tw[lv + 1];
if ((nh < 2) || (nw < 2)) continue;
for (j = 0; j < nw; j++) {
for (i = 0; i < nh; i++)
buffer[i] = c[i * pitch_c + j];
synthesis_26_col(nh, buffer, buffer + hh, buffer + nh);
for (i = 0; i < nh; i++)
c[i * pitch_c + j] = buffer[i + nh];
}
for (i = 0; i < nh; i++) {
memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer));
synthesis_26_row(nw, buffer, buffer + hw, &c[i * pitch_c]);
}
}
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ?
((c[i * pitch_c + j] + dwt_scale_rnd) >> dwt_scale_bits) :
-((-c[i * pitch_c + j] + dwt_scale_rnd) >> dwt_scale_bits);
}
}
}
static void synthesis_97(int length, double *lowpass, double *highpass,
double *x) {
const double a_predict1 = -1.586134342;
const double a_update1 = -0.05298011854;
const double a_predict2 = 0.8829110762;
const double a_update2 = 0.4435068522;
const double s_low = 1.149604398;
const double s_high = 1/1.149604398;
const double inv_s_low = 1 / s_low;
const double inv_s_high = 1 / s_high;
int i;
double y[DWT_MAX_LENGTH];
// Undo pack and scale
for (i = 0; i < length / 2; i++) {
y[i * 2] = lowpass[i] * inv_s_low;
y[i * 2 + 1] = highpass[i] * inv_s_high;
}
memcpy(x, y, sizeof(*y) * length);
// Undo update 2
for (i = 2; i < length; i += 2) {
x[i] -= a_update2 * (x[i - 1] + x[i + 1]);
}
x[0] -= 2 * a_update2 * x[1];
// Undo predict 2
for (i = 1; i < length - 2; i += 2) {
x[i] -= a_predict2 * (x[i - 1] + x[i + 1]);
}
x[length - 1] -= 2 * a_predict2 * x[length - 2];
// Undo update 1
for (i = 2; i < length; i += 2) {
x[i] -= a_update1 * (x[i - 1] + x[i + 1]);
}
x[0] -= 2 * a_update1 * x[1];
// Undo predict 1
for (i = 1; i < length - 2; i += 2) {
x[i] -= a_predict1 * (x[i - 1] + x[i + 1]);
}
x[length - 1] -= 2 * a_predict1 * x[length - 2];
}
static void dyadic_synthesize_97(int levels, int width, int height,
tran_low_t *c, int pitch_c,
int16_t *x, int pitch_x,
int dwt_scale_bits) {
int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
double buffer[2 * DWT_MAX_LENGTH];
double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH];
for (i = 0; i < height; i++)
for (j = 0; j < width; j++)
y[i * DWT_MAX_LENGTH + j] = c[i * pitch_c + j];
th[0] = hh;
tw[0] = hw;
for (i = 1; i <= levels; i++) {
th[i] = (th[i - 1] + 1) >> 1;
tw[i] = (tw[i - 1] + 1) >> 1;
}
for (lv = levels - 1; lv >= 0; lv--) {
nh = th[lv];
nw = tw[lv];
hh = th[lv + 1];
hw = tw[lv + 1];
if ((nh < 2) || (nw < 2)) continue;
for (j = 0; j < nw; j++) {
for (i = 0; i < nh; i++)
buffer[i] = y[i * DWT_MAX_LENGTH + j];
synthesis_97(nh, buffer, buffer + hh, buffer + nh);
for (i = 0; i < nh; i++)
y[i * DWT_MAX_LENGTH + j] = buffer[i + nh];
}
for (i = 0; i < nh; i++) {
memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer));
synthesis_97(nw, buffer, buffer + hw, &y[i * DWT_MAX_LENGTH]);
}
}
for (i = 0; i < height; i++)
for (j = 0; j < width; j++)
x[i * pitch_x + j] = round(y[i * DWT_MAX_LENGTH + j] /
(1 << dwt_scale_bits));
}
void vp9_idwt32x32_c(tran_low_t *input, tran_low_t *output, int stride) {
#if DWT_TYPE == 26
dyadic_synthesize_26(4, 32, 32, input, 32, output, stride, 2);
#elif DWT_TYPE == 97
dyadic_synthesize_97(4, 32, 32, input, 32, output, stride, 2);
#elif DWT_TYPE == 53
dyadic_synthesize_53(4, 32, 32, input, 32, output, stride, 2);
#endif
}
void vp9_idwtdct32x32_c(tran_low_t *input, tran_low_t *output, int stride) {
const int dwt_levels = 1;
tran_low_t buffer[16 * 16];
tran_low_t buffer2[32 * 32];
int i;
for (i = 0; i < 32; ++i) {
memcpy(&buffer2[i * 32], &input[i * 32], sizeof(buffer2[0]) * 32);
}
for (i = 0; i < 16; ++i) {
memcpy(&buffer[i * 16], &input[i * 32], sizeof(buffer[0]) * 16);
}
vp9_idct16x16_noscale(buffer, buffer2, 32);
#if DWT_TYPE == 26
dyadic_synthesize_26(dwt_levels, 32, 32, buffer2, 32, output, stride, 2);
#elif DWT_TYPE == 97
dyadic_synthesize_97(dwt_levels, 32, 32, buffer2, 32, output, stride, 2);
#elif DWT_TYPE == 53
dyadic_synthesize_53(dwt_levels, 32, 32, buffer2, 32, output, stride, 2);
#endif
}
#if CONFIG_TX64X64
void vp9_idwt64x64_c(tran_low_t *input, tran_low_t *output, int stride) {
#if DWT_TYPE == 26
dyadic_synthesize_26(4, 64, 64, input, 64, output, stride, 1);
#elif DWT_TYPE == 97
dyadic_synthesize_97(4, 64, 64, input, 64, output, stride, 1);
#elif DWT_TYPE == 53
dyadic_synthesize_53(4, 64, 64, input, 64, output, stride, 1);
#endif
}
void vp9_idwtdct64x64_c(tran_low_t *input, tran_low_t *output, int stride) {
const int dwt_levels = 1;
tran_low_t buffer[32 * 32];
tran_low_t buffer2[64 * 64];
int i;
for (i = 0; i < 64; ++i) {
memcpy(&buffer2[i * 64], &input[i * 64], sizeof(buffer2[0]) * 64);
}
for (i = 0; i < 32; ++i) {
memcpy(&buffer[i * 32], &input[i * 64], sizeof(buffer[0]) * 32);
}
vp9_idct32x32_noscale(buffer, buffer2, 64);
#if DWT_TYPE == 26
dyadic_synthesize_26(dwt_levels, 64, 64, buffer2, 64, output, stride, 1);
#elif DWT_TYPE == 97
dyadic_synthesize_97(dwt_levels, 64, 64, buffer2, 64, output, stride, 1);
#elif DWT_TYPE == 53
dyadic_synthesize_53(dwt_levels, 64, 64, buffer2, 64, output, stride, 1);
#endif
}
#endif // CONFIG_TX64X64

39
vp9/common/vp9_idwt.h Normal file
View File

@ -0,0 +1,39 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_COMMON_VP9_IDWT_H_
#define VP9_COMMON_VP9_IDWT_H_
#include <assert.h>
#include "./vpx_config.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_enums.h"
#include "vp9/common/vp9_idct.h"
#define DWT_MAX_LENGTH 64
#define DWT_TYPE 26 // 26/53/97
#ifdef __cplusplus
extern "C" {
#endif
#if CONFIG_TX64X64
void vp9_idwt64x64(tran_low_t *input, tran_low_t *output, int stride);
void vp9_idwtdct64x64(tran_low_t *input, tran_low_t *output, int stride);
#endif // CONFIG_TX64X64
void vp9_idwt32x32(tran_low_t *input, tran_low_t *output, int stride);
void vp9_idwtdct32x32(tran_low_t *input, tran_low_t *output, int stride);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP9_COMMON_VP9_IDWT_H_

View File

@ -396,6 +396,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_256_add/;
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
add_proto qw/void vp9_idct16x16_noscale/, "const tran_low_t *input, int16_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_noscale/;
}
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_10_add/;
@ -411,6 +416,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct64x64_4096_add/;
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
add_proto qw/void vp9_idct32x32_noscale/, "const tran_low_t *input, int16_t *dest, int dest_stride";
specialize qw/vp9_idct32x32_noscale/;
}
}
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
@ -454,6 +464,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_256_add/;
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
add_proto qw/void vp9_idct16x16_noscale/, "const tran_low_t *input, int16_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_noscale/;
}
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_10_add/;
@ -469,6 +484,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct64x64_4096_add/;
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
add_proto qw/void vp9_idct32x32_noscale/, "const tran_low_t *input, int16_t *dest, int dest_stride";
specialize qw/vp9_idct32x32_noscale/;
}
}
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
@ -516,6 +536,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/;
$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
add_proto qw/void vp9_idct16x16_noscale/, "const tran_low_t *input, int16_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_noscale/;
}
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/;
$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon;
@ -535,6 +560,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct64x64_4096_add/;
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
add_proto qw/void vp9_idct32x32_noscale/, "const tran_low_t *input, int16_t *dest, int dest_stride";
specialize qw/vp9_idct32x32_noscale/;
}
}
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
@ -1498,12 +1528,22 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct32x32_rd/;
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
add_proto qw/void vp9_fdct16x16_noscale/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct16x16_noscale/;
}
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_fdct64x64_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct64x64_1/;
add_proto qw/void vp9_fdct64x64/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct64x64/;
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
add_proto qw/void vp9_fdct32x32_noscale/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct32x32_noscale/;
}
}
specialize qw/vp9_fdct32x32_rd sse2/;
} else {
@ -1546,12 +1586,22 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct32x32_rd sse2 avx2/;
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
add_proto qw/void vp9_fdct16x16_noscale/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct16x16_noscale/;
}
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_fdct64x64_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct64x64_1/;
add_proto qw/void vp9_fdct64x64/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct64x64/;
if (vpx_config("CONFIG_WAVELETS") eq "yes") {
add_proto qw/void vp9_fdct32x32_noscale/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct32x32_noscale/;
}
}
}

View File

@ -522,6 +522,193 @@ void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
}
}
#if CONFIG_WAVELETS
// The difference between this one and the function above is scaling
// of the input. This function does not scale so that the actual 2D
// transform is unitary. The function above scales the transform to be
// 8 times unitary.
void vp9_fdct16x16_noscale_c(const int16_t *input, tran_low_t *output,
int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
// as the first pass results are transposed, we transpose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int pass;
// We need an intermediate buffer between passes.
tran_low_t intermediate[256];
const int16_t *in_pass0 = input;
const tran_low_t *in = NULL;
tran_low_t *out = intermediate;
// Do the two transform/transpose passes
for (pass = 0; pass < 2; ++pass) {
tran_high_t step1[8]; // canbe16
tran_high_t step2[8]; // canbe16
tran_high_t step3[8]; // canbe16
tran_high_t input[8]; // canbe16
tran_high_t temp1, temp2; // needs32
int i;
for (i = 0; i < 16; i++) {
if (0 == pass) {
// Calculate input for the first 8 results.
input[0] = (in_pass0[0 * stride] + in_pass0[15 * stride]) >> 1;
input[1] = (in_pass0[1 * stride] + in_pass0[14 * stride]) >> 1;
input[2] = (in_pass0[2 * stride] + in_pass0[13 * stride]) >> 1;
input[3] = (in_pass0[3 * stride] + in_pass0[12 * stride]) >> 1;
input[4] = (in_pass0[4 * stride] + in_pass0[11 * stride]) >> 1;
input[5] = (in_pass0[5 * stride] + in_pass0[10 * stride]) >> 1;
input[6] = (in_pass0[6 * stride] + in_pass0[ 9 * stride]) >> 1;
input[7] = (in_pass0[7 * stride] + in_pass0[ 8 * stride]) >> 1;
// Calculate input for the next 8 results.
step1[0] = (in_pass0[7 * stride] - in_pass0[ 8 * stride]) >> 1;
step1[1] = (in_pass0[6 * stride] - in_pass0[ 9 * stride]) >> 1;
step1[2] = (in_pass0[5 * stride] - in_pass0[10 * stride]) >> 1;
step1[3] = (in_pass0[4 * stride] - in_pass0[11 * stride]) >> 1;
step1[4] = (in_pass0[3 * stride] - in_pass0[12 * stride]) >> 1;
step1[5] = (in_pass0[2 * stride] - in_pass0[13 * stride]) >> 1;
step1[6] = (in_pass0[1 * stride] - in_pass0[14 * stride]) >> 1;
step1[7] = (in_pass0[0 * stride] - in_pass0[15 * stride]) >> 1;
} else {
// Calculate input for the first 8 results.
input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2);
input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2);
input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2);
input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2);
input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2);
input[6] = ((in[6 * 16] + 1) >> 2) + ((in[ 9 * 16] + 1) >> 2);
input[7] = ((in[7 * 16] + 1) >> 2) + ((in[ 8 * 16] + 1) >> 2);
// Calculate input for the next 8 results.
step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[ 8 * 16] + 1) >> 2);
step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[ 9 * 16] + 1) >> 2);
step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2);
step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2);
step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2);
step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2);
step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2);
step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2);
}
// Work on the first eight values; fdct8(input, even_results);
{
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
tran_high_t t0, t1, t2, t3; // needs32
tran_high_t x0, x1, x2, x3; // canbe16
// stage 1
s0 = input[0] + input[7];
s1 = input[1] + input[6];
s2 = input[2] + input[5];
s3 = input[3] + input[4];
s4 = input[3] - input[4];
s5 = input[2] - input[5];
s6 = input[1] - input[6];
s7 = input[0] - input[7];
// fdct4(step, step);
x0 = s0 + s3;
x1 = s1 + s2;
x2 = s1 - s2;
x3 = s0 - s3;
t0 = (x0 + x1) * cospi_16_64;
t1 = (x0 - x1) * cospi_16_64;
t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
out[0] = fdct_round_shift(t0);
out[4] = fdct_round_shift(t2);
out[8] = fdct_round_shift(t1);
out[12] = fdct_round_shift(t3);
// Stage 2
t0 = (s6 - s5) * cospi_16_64;
t1 = (s6 + s5) * cospi_16_64;
t2 = fdct_round_shift(t0);
t3 = fdct_round_shift(t1);
// Stage 3
x0 = s4 + t2;
x1 = s4 - t2;
x2 = s7 - t3;
x3 = s7 + t3;
// Stage 4
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
out[2] = fdct_round_shift(t0);
out[6] = fdct_round_shift(t2);
out[10] = fdct_round_shift(t1);
out[14] = fdct_round_shift(t3);
}
// Work on the next eight values; step1 -> odd_results
{
// step 2
temp1 = (step1[5] - step1[2]) * cospi_16_64;
temp2 = (step1[4] - step1[3]) * cospi_16_64;
step2[2] = fdct_round_shift(temp1);
step2[3] = fdct_round_shift(temp2);
temp1 = (step1[4] + step1[3]) * cospi_16_64;
temp2 = (step1[5] + step1[2]) * cospi_16_64;
step2[4] = fdct_round_shift(temp1);
step2[5] = fdct_round_shift(temp2);
// step 3
step3[0] = step1[0] + step2[3];
step3[1] = step1[1] + step2[2];
step3[2] = step1[1] - step2[2];
step3[3] = step1[0] - step2[3];
step3[4] = step1[7] - step2[4];
step3[5] = step1[6] - step2[5];
step3[6] = step1[6] + step2[5];
step3[7] = step1[7] + step2[4];
// step 4
temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64;
step2[1] = fdct_round_shift(temp1);
step2[2] = fdct_round_shift(temp2);
temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64;
temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
step2[5] = fdct_round_shift(temp1);
step2[6] = fdct_round_shift(temp2);
// step 5
step1[0] = step3[0] + step2[1];
step1[1] = step3[0] - step2[1];
step1[2] = step3[3] + step2[2];
step1[3] = step3[3] - step2[2];
step1[4] = step3[4] - step2[5];
step1[5] = step3[4] + step2[5];
step1[6] = step3[7] - step2[6];
step1[7] = step3[7] + step2[6];
// step 6
temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
out[1] = fdct_round_shift(temp1);
out[9] = fdct_round_shift(temp2);
temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
out[5] = fdct_round_shift(temp1);
out[13] = fdct_round_shift(temp2);
temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
out[3] = fdct_round_shift(temp1);
out[11] = fdct_round_shift(temp2);
temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
out[7] = fdct_round_shift(temp1);
out[15] = fdct_round_shift(temp2);
}
// Do next column (which is a transposed row in second/horizontal pass)
in++;
in_pass0++;
out += 16;
}
// Setup in/out for next pass.
in = intermediate;
out = output;
}
}
#endif // CONFIG_WAVELETS
void vp9_fadst8(const tran_low_t *input, tran_low_t *output) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
@ -1389,6 +1576,35 @@ void vp9_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
}
}
#if CONFIG_WAVELETS
void vp9_fdct32x32_noscale_c(const int16_t *input, tran_low_t *out,
int stride) {
int i, j;
tran_high_t output[32 * 32];
// Columns
for (i = 0; i < 32; ++i) {
tran_high_t temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = input[j * stride + i];
vp9_fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
}
// Rows
for (i = 0; i < 32; ++i) {
tran_high_t temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = output[j + i * 32];
vp9_fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
out[j + i * 32] = (tran_low_t)
((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
}
}
#endif // CONFIG_WAVELETS
// Note that although we use dct_32_round in dct32 computation flow,
// this 2d fdct32x32 for rate-distortion optimization loop is operating
// within 16 bits precision.

323
vp9/encoder/vp9_dwt.c Normal file
View File

@ -0,0 +1,323 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <assert.h>
#include <math.h>
#include "./vpx_config.h"
#include "./vp9_rtcd.h"
#include "vp9/encoder/vp9_dct.h"
#include "vp9/encoder/vp9_dwt.h"
// Note: block length must be even for this implementation
static void analysis_53_row(int length, tran_low_t *x,
tran_low_t *lowpass, tran_low_t *highpass) {
int n;
tran_low_t r, *a, *b;
n = length >> 1;
b = highpass;
a = lowpass;
while (--n) {
*a++ = (r = *x++) << 1;
*b++ = *x - ((r + x[1] + 1) >> 1);
x++;
}
*a = (r = *x++) << 1;
*b = *x - r;
n = length >> 1;
b = highpass;
a = lowpass;
r = *highpass;
while (n--) {
*a++ += (r + (*b) + 1) >> 1;
r = *b++;
}
}
static void analysis_53_col(int length, tran_low_t *x,
tran_low_t *lowpass, tran_low_t *highpass) {
int n;
tran_low_t r, *a, *b;
n = length >> 1;
b = highpass;
a = lowpass;
while (--n) {
*a++ = (r = *x++);
*b++ = (((*x) << 1) - (r + x[1]) + 2) >> 2;
x++;
}
*a = (r = *x++);
*b = (*x - r + 1) >> 1;
n = length >> 1;
b = highpass;
a = lowpass;
r = *highpass;
while (n--) {
*a++ += (r + (*b) + 1) >> 1;
r = *b++;
}
}
static void dyadic_analyze_53(int levels, int width, int height,
int16_t *x, int pitch_x,
tran_low_t *c, int pitch_c,
int dwt_scale_bits) {
int lv, i, j, nh, nw, hh = height, hw = width;
tran_low_t buffer[2 * DWT_MAX_LENGTH];
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
c[i * pitch_c + j] = x[i * pitch_x + j] << dwt_scale_bits;
}
}
for (lv = 0; lv < levels; lv++) {
nh = hh;
hh = (hh + 1) >> 1;
nw = hw;
hw = (hw + 1) >> 1;
if ((nh < 2) || (nw < 2)) return;
for (i = 0; i < nh; i++) {
memcpy(buffer, &c[i * pitch_c], nw * sizeof(tran_low_t));
analysis_53_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw);
}
for (j = 0; j < nw; j++) {
for (i = 0; i < nh; i++)
buffer[i + nh] = c[i * pitch_c + j];
analysis_53_col(nh, buffer + nh, buffer, buffer + hh);
for (i = 0; i < nh; i++)
c[i * pitch_c + j] = buffer[i];
}
}
}
static void analysis_26_row(int length, tran_low_t *x,
tran_low_t *lowpass, tran_low_t *highpass) {
int i, n;
tran_low_t r, s, *a, *b;
a = lowpass;
b = highpass;
for (i = length >> 1; i; i--) {
r = *x++;
s = *x++;
*a++ = r + s;
*b++ = r - s;
}
n = length >> 1;
if (n >= 4) {
a = lowpass;
b = highpass;
r = *lowpass;
while (--n) {
*b++ -= (r - a[1] + 4) >> 3;
r = *a++;
}
*b -= (r - *a + 4) >> 3;
}
}
static void analysis_26_col(int length, tran_low_t *x,
tran_low_t *lowpass, tran_low_t *highpass) {
int i, n;
tran_low_t r, s, *a, *b;
a = lowpass;
b = highpass;
for (i = length >> 1; i; i--) {
r = *x++;
s = *x++;
*a++ = (r + s + 1) >> 1;
*b++ = (r - s + 1) >> 1;
}
n = length >> 1;
if (n >= 4) {
a = lowpass;
b = highpass;
r = *lowpass;
while (--n) {
*b++ -= (r - a[1] + 4) >> 3;
r = *a++;
}
*b -= (r - *a + 4) >> 3;
}
}
static void dyadic_analyze_26(int levels, int width, int height,
int16_t *x, int pitch_x,
tran_low_t *c, int pitch_c,
int dwt_scale_bits) {
int lv, i, j, nh, nw, hh = height, hw = width;
tran_low_t buffer[2 * DWT_MAX_LENGTH];
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
c[i * pitch_c + j] = x[i * pitch_x + j] << dwt_scale_bits;
}
}
for (lv = 0; lv < levels; lv++) {
nh = hh;
hh = (hh + 1) >> 1;
nw = hw;
hw = (hw + 1) >> 1;
if ((nh < 2) || (nw < 2)) return;
for (i = 0; i < nh; i++) {
memcpy(buffer, &c[i * pitch_c], nw * sizeof(tran_low_t));
analysis_26_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw);
}
for (j = 0; j < nw; j++) {
for (i = 0; i < nh; i++)
buffer[i + nh] = c[i * pitch_c + j];
analysis_26_col(nh, buffer + nh, buffer, buffer + hh);
for (i = 0; i < nh; i++)
c[i * pitch_c + j] = buffer[i];
}
}
}
static void analysis_97(int length, double *x,
double *lowpass, double *highpass) {
static const double a_predict1 = -1.586134342;
static const double a_update1 = -0.05298011854;
static const double a_predict2 = 0.8829110762;
static const double a_update2 = 0.4435068522;
static const double s_low = 1.149604398;
static const double s_high = 1/1.149604398;
int i;
double y[DWT_MAX_LENGTH];
// Predict 1
for (i = 1; i < length - 2; i += 2) {
x[i] += a_predict1 * (x[i - 1] + x[i + 1]);
}
x[length - 1] += 2 * a_predict1 * x[length - 2];
// Update 1
for (i = 2; i < length; i += 2) {
x[i] += a_update1 * (x[i - 1] + x[i + 1]);
}
x[0] += 2 * a_update1 * x[1];
// Predict 2
for (i = 1; i < length - 2; i += 2) {
x[i] += a_predict2 * (x[i - 1] + x[i + 1]);
}
x[length - 1] += 2 * a_predict2 * x[length - 2];
// Update 2
for (i = 2; i < length; i += 2) {
x[i] += a_update2 * (x[i - 1] + x[i + 1]);
}
x[0] += 2 * a_update2 * x[1];
memcpy(y, x, sizeof(*y) * length);
// Scale and pack
for (i = 0; i < length / 2; i++) {
lowpass[i] = y[2 * i] * s_low;
highpass[i] = y[2 * i + 1] * s_high;
}
}
static void dyadic_analyze_97(int levels, int width, int height,
int16_t *x, int pitch_x,
tran_low_t *c, int pitch_c,
int dwt_scale_bits) {
int lv, i, j, nh, nw, hh = height, hw = width;
double buffer[2 * DWT_MAX_LENGTH];
double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH];
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
y[i * DWT_MAX_LENGTH + j] = x[i * pitch_x + j] << dwt_scale_bits;
}
}
for (lv = 0; lv < levels; lv++) {
nh = hh;
hh = (hh + 1) >> 1;
nw = hw;
hw = (hw + 1) >> 1;
if ((nh < 2) || (nw < 2)) return;
for (i = 0; i < nh; i++) {
memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer));
analysis_97(nw, buffer, &y[i * DWT_MAX_LENGTH],
&y[i * DWT_MAX_LENGTH] + hw);
}
for (j = 0; j < nw; j++) {
for (i = 0; i < nh; i++)
buffer[i + nh] = y[i * DWT_MAX_LENGTH + j];
analysis_97(nh, buffer + nh, buffer, buffer + hh);
for (i = 0; i < nh; i++)
y[i * DWT_MAX_LENGTH + j] = buffer[i];
}
}
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
c[i * pitch_c + j] = round(y[i * DWT_MAX_LENGTH + j]);
}
}
}
void vp9_fdwt32x32_c(tran_low_t *input, tran_low_t *output, int stride) {
#if DWT_TYPE == 26
dyadic_analyze_26(4, 32, 32, input, stride, output, 32, 2);
#elif DWT_TYPE == 97
dyadic_analyze_97(4, 32, 32, input, stride, output, 32, 2);
#elif DWT_TYPE == 53
dyadic_analyze_53(4, 32, 32, input, stride, output, 32, 2);
#endif
}
void vp9_fdwtdct32x32_c(tran_low_t *input, tran_low_t *output,
int stride) {
const int dwt_levels = 1;
tran_low_t buffer[16 * 16];
int i, j;
// Scales up by 2-bit from unitary
#if DWT_TYPE == 26
dyadic_analyze_26(dwt_levels, 32, 32, input, stride, output, 32, 2);
#elif DWT_TYPE == 97
dyadic_analyze_97(dwt_levels, 32, 32, input, stride, output, 32, 2);
#elif DWT_TYPE == 53
dyadic_analyze_53(dwt_levels, 32, 32, input, stride, output, 32, 2);
#endif
// 16x16 dct in LL band that is unitary
vp9_fdct16x16_noscale(output, buffer, 32);
// Note that the transform overall is 2-bit scaled up from unitary
for (i = 0; i < 16; ++i) {
memcpy(&output[i * 32], &buffer[i * 16], sizeof(buffer[0]) * 16);
}
}
#if CONFIG_TX64X64
void vp9_fdwt64x64_c(tran_low_t *input, tran_low_t *output, int stride) {
#if DWT_TYPE == 26
dyadic_analyze_26(4, 64, 64, input, stride, output, 64, 1);
#elif DWT_TYPE == 97
dyadic_analyze_97(4, 64, 64, input, stride, output, 64, 1);
#elif DWT_TYPE == 53
dyadic_analyze_53(4, 64, 64, input, stride, output, 64, 1);
#endif
}
void vp9_fdwtdct64x64_c(tran_low_t *input, tran_low_t *output,
int stride) {
const int dwt_levels = 1;
tran_low_t buffer[32 * 32];
int i;
// Scales up by 1-bit from unitary
#if DWT_TYPE == 26
dyadic_analyze_26(dwt_levels, 64, 64, input, stride, output, 64, 1);
#elif DWT_TYPE == 97
dyadic_analyze_97(dwt_levels, 64, 64, input, stride, output, 64, 1);
#elif DWT_TYPE == 53
dyadic_analyze_53(dwt_levels, 64, 64, input, stride, output, 64, 1);
#endif
// 32x32 dct in LL band that is unitary
vp9_fdct32x32_noscale(output, buffer, 64);
// Note that the transform overall is 1-bit scaled up from unitary
for (i = 0; i < 32; ++i) {
memcpy(&output[i * 64], &buffer[i * 32], sizeof(buffer[0]) * 32);
}
}
#endif // CONFIG_TX64X64

32
vp9/encoder/vp9_dwt.h Normal file
View File

@ -0,0 +1,32 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_DWT_H_
#define VP9_ENCODER_VP9_DWT_H_
#include "./vpx_config.h"
#include "vp9/common/vp9_idwt.h"
#ifdef __cplusplus
extern "C" {
#endif
#if CONFIG_TX64X64
void vp9_fdwt64x64(tran_low_t *input, tran_low_t *output, int stride);
void vp9_fdwtdct64x64(tran_low_t *input, tran_low_t *output, int stride);
#endif // CONFIG_TX64X64
void vp9_fdwt32x32(tran_low_t *input, tran_low_t *output, int stride);
void vp9_fdwtdct32x32(tran_low_t *input, tran_low_t *output, int stride);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP9_ENCODER_VP9_DWT_H_

View File

@ -70,6 +70,8 @@ VP9_COMMON_SRCS-yes += common/vp9_scan.c
VP9_COMMON_SRCS-yes += common/vp9_scan.h
VP9_COMMON_SRCS-$(CONFIG_GLOBAL_MOTION) += common/vp9_motion_model.c
VP9_COMMON_SRCS-$(CONFIG_GLOBAL_MOTION) += common/vp9_motion_model.h
VP9_COMMON_SRCS-$(CONFIG_WAVELETS) += common/vp9_idwt.c
VP9_COMMON_SRCS-$(CONFIG_WAVELETS) += common/vp9_idwt.h
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_intrin_sse2.c

View File

@ -85,6 +85,8 @@ VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_global_motion.c
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_global_motion.h
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_motion_field.c
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_motion_field.h
VP9_CX_SRCS-$(CONFIG_WAVELETS) += encoder/vp9_dwt.c
VP9_CX_SRCS-$(CONFIG_WAVELETS) += encoder/vp9_dwt.h
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.h
VP9_CX_SRCS-yes += encoder/vp9_tokenize.c