2010-05-18 17:58:33 +02:00
|
|
|
/*
|
2010-09-09 14:16:39 +02:00
|
|
|
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
2010-05-18 17:58:33 +02:00
|
|
|
*
|
2010-06-18 18:39:21 +02:00
|
|
|
* Use of this source code is governed by a BSD-style license
|
2010-06-04 22:19:40 +02:00
|
|
|
* that can be found in the LICENSE file in the root of the source
|
|
|
|
* tree. An additional intellectual property rights grant can be found
|
2010-06-18 18:39:21 +02:00
|
|
|
* in the file PATENTS. All contributing project authors may
|
2010-06-04 22:19:40 +02:00
|
|
|
* be found in the AUTHORS file in the root of the source tree.
|
2010-05-18 17:58:33 +02:00
|
|
|
*/
|
|
|
|
|
2012-10-19 01:31:59 +02:00
|
|
|
#include <assert.h>
|
2010-05-18 17:58:33 +02:00
|
|
|
#include <math.h>
|
2013-10-26 02:55:07 +02:00
|
|
|
|
|
|
|
#include "./vp9_rtcd.h"
|
2015-07-29 00:25:05 +02:00
|
|
|
#include "./vpx_config.h"
|
2015-07-21 20:56:36 +02:00
|
|
|
#include "./vpx_dsp_rtcd.h"
|
2011-02-14 23:18:18 +01:00
|
|
|
|
2012-11-27 22:59:17 +01:00
|
|
|
#include "vp9/common/vp9_blockd.h"
|
2013-02-07 20:51:23 +01:00
|
|
|
#include "vp9/common/vp9_idct.h"
|
2015-07-21 20:56:36 +02:00
|
|
|
#include "vpx_dsp/fwd_txfm.h"
|
2015-07-29 00:25:05 +02:00
|
|
|
#include "vpx_ports/mem.h"
|
2013-11-16 00:21:38 +01:00
|
|
|
|
2015-07-21 00:11:43 +02:00
|
|
|
static void fdct4(const tran_low_t *input, tran_low_t *output) {
|
2014-09-03 01:34:09 +02:00
|
|
|
tran_high_t step[4];
|
|
|
|
tran_high_t temp1, temp2;
|
2013-02-05 00:22:32 +01:00
|
|
|
|
|
|
|
step[0] = input[0] + input[3];
|
|
|
|
step[1] = input[1] + input[2];
|
|
|
|
step[2] = input[1] - input[2];
|
|
|
|
step[3] = input[0] - input[3];
|
|
|
|
|
|
|
|
temp1 = (step[0] + step[1]) * cospi_16_64;
|
|
|
|
temp2 = (step[0] - step[1]) * cospi_16_64;
|
2014-11-05 20:29:21 +01:00
|
|
|
output[0] = (tran_low_t)fdct_round_shift(temp1);
|
|
|
|
output[2] = (tran_low_t)fdct_round_shift(temp2);
|
2013-02-05 00:22:32 +01:00
|
|
|
temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
|
|
|
|
temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
|
2014-11-05 20:29:21 +01:00
|
|
|
output[1] = (tran_low_t)fdct_round_shift(temp1);
|
|
|
|
output[3] = (tran_low_t)fdct_round_shift(temp2);
|
2013-02-05 00:22:32 +01:00
|
|
|
}
|
2012-06-25 21:26:09 +02:00
|
|
|
|
2015-07-21 00:11:43 +02:00
|
|
|
static void fdct8(const tran_low_t *input, tran_low_t *output) {
|
2014-09-03 01:34:09 +02:00
|
|
|
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
|
|
|
|
tran_high_t t0, t1, t2, t3; // needs32
|
|
|
|
tran_high_t x0, x1, x2, x3; // canbe16
|
2013-02-12 07:04:34 +01:00
|
|
|
|
|
|
|
// stage 1
|
2013-02-27 21:29:06 +01:00
|
|
|
s0 = input[0] + input[7];
|
|
|
|
s1 = input[1] + input[6];
|
|
|
|
s2 = input[2] + input[5];
|
|
|
|
s3 = input[3] + input[4];
|
|
|
|
s4 = input[3] - input[4];
|
|
|
|
s5 = input[2] - input[5];
|
|
|
|
s6 = input[1] - input[6];
|
|
|
|
s7 = input[0] - input[7];
|
|
|
|
|
2013-10-10 20:53:55 +02:00
|
|
|
// fdct4(step, step);
|
2013-02-27 21:29:06 +01:00
|
|
|
x0 = s0 + s3;
|
|
|
|
x1 = s1 + s2;
|
|
|
|
x2 = s1 - s2;
|
|
|
|
x3 = s0 - s3;
|
|
|
|
t0 = (x0 + x1) * cospi_16_64;
|
|
|
|
t1 = (x0 - x1) * cospi_16_64;
|
2016-07-27 05:43:23 +02:00
|
|
|
t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
|
|
|
|
t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
|
2014-11-05 20:29:21 +01:00
|
|
|
output[0] = (tran_low_t)fdct_round_shift(t0);
|
|
|
|
output[2] = (tran_low_t)fdct_round_shift(t2);
|
|
|
|
output[4] = (tran_low_t)fdct_round_shift(t1);
|
|
|
|
output[6] = (tran_low_t)fdct_round_shift(t3);
|
2012-08-02 18:07:33 +02:00
|
|
|
|
2013-02-12 07:04:34 +01:00
|
|
|
// Stage 2
|
2013-02-27 21:29:06 +01:00
|
|
|
t0 = (s6 - s5) * cospi_16_64;
|
|
|
|
t1 = (s6 + s5) * cospi_16_64;
|
2014-11-05 20:29:21 +01:00
|
|
|
t2 = (tran_low_t)fdct_round_shift(t0);
|
|
|
|
t3 = (tran_low_t)fdct_round_shift(t1);
|
2012-08-02 18:07:33 +02:00
|
|
|
|
2013-02-12 07:04:34 +01:00
|
|
|
// Stage 3
|
2013-02-27 21:29:06 +01:00
|
|
|
x0 = s4 + t2;
|
|
|
|
x1 = s4 - t2;
|
|
|
|
x2 = s7 - t3;
|
|
|
|
x3 = s7 + t3;
|
2012-08-02 18:07:33 +02:00
|
|
|
|
2013-02-12 07:04:34 +01:00
|
|
|
// Stage 4
|
2016-07-27 05:43:23 +02:00
|
|
|
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
|
|
|
|
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
|
2013-02-27 21:29:06 +01:00
|
|
|
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
|
2016-07-27 05:43:23 +02:00
|
|
|
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
|
2014-11-05 20:29:21 +01:00
|
|
|
output[1] = (tran_low_t)fdct_round_shift(t0);
|
|
|
|
output[3] = (tran_low_t)fdct_round_shift(t2);
|
|
|
|
output[5] = (tran_low_t)fdct_round_shift(t1);
|
|
|
|
output[7] = (tran_low_t)fdct_round_shift(t3);
|
2013-02-12 07:04:34 +01:00
|
|
|
}
|
2012-10-05 12:16:46 +02:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
static void fdct16(const tran_low_t in[16], tran_low_t out[16]) {
|
|
|
|
tran_high_t step1[8]; // canbe16
|
|
|
|
tran_high_t step2[8]; // canbe16
|
|
|
|
tran_high_t step3[8]; // canbe16
|
|
|
|
tran_high_t input[8]; // canbe16
|
|
|
|
tran_high_t temp1, temp2; // needs32
|
2014-05-30 03:14:17 +02:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
// step 1
|
|
|
|
input[0] = in[0] + in[15];
|
|
|
|
input[1] = in[1] + in[14];
|
|
|
|
input[2] = in[2] + in[13];
|
|
|
|
input[3] = in[3] + in[12];
|
|
|
|
input[4] = in[4] + in[11];
|
|
|
|
input[5] = in[5] + in[10];
|
2016-07-27 05:43:23 +02:00
|
|
|
input[6] = in[6] + in[9];
|
|
|
|
input[7] = in[7] + in[8];
|
2014-05-30 03:14:17 +02:00
|
|
|
|
2016-07-27 05:43:23 +02:00
|
|
|
step1[0] = in[7] - in[8];
|
|
|
|
step1[1] = in[6] - in[9];
|
2015-07-21 01:19:32 +02:00
|
|
|
step1[2] = in[5] - in[10];
|
|
|
|
step1[3] = in[4] - in[11];
|
|
|
|
step1[4] = in[3] - in[12];
|
|
|
|
step1[5] = in[2] - in[13];
|
|
|
|
step1[6] = in[1] - in[14];
|
|
|
|
step1[7] = in[0] - in[15];
|
2012-10-05 12:16:46 +02:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
// fdct8(step, step);
|
2013-02-27 21:29:06 +01:00
|
|
|
{
|
2014-09-03 01:34:09 +02:00
|
|
|
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
|
|
|
|
tran_high_t t0, t1, t2, t3; // needs32
|
|
|
|
tran_high_t x0, x1, x2, x3; // canbe16
|
2013-02-27 21:29:06 +01:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
// stage 1
|
|
|
|
s0 = input[0] + input[7];
|
|
|
|
s1 = input[1] + input[6];
|
|
|
|
s2 = input[2] + input[5];
|
|
|
|
s3 = input[3] + input[4];
|
|
|
|
s4 = input[3] - input[4];
|
|
|
|
s5 = input[2] - input[5];
|
|
|
|
s6 = input[1] - input[6];
|
|
|
|
s7 = input[0] - input[7];
|
2013-02-27 21:29:06 +01:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
// fdct4(step, step);
|
|
|
|
x0 = s0 + s3;
|
|
|
|
x1 = s1 + s2;
|
|
|
|
x2 = s1 - s2;
|
|
|
|
x3 = s0 - s3;
|
|
|
|
t0 = (x0 + x1) * cospi_16_64;
|
|
|
|
t1 = (x0 - x1) * cospi_16_64;
|
2016-07-27 05:43:23 +02:00
|
|
|
t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
|
2015-07-21 01:19:32 +02:00
|
|
|
t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
|
|
|
|
out[0] = (tran_low_t)fdct_round_shift(t0);
|
|
|
|
out[4] = (tran_low_t)fdct_round_shift(t2);
|
|
|
|
out[8] = (tran_low_t)fdct_round_shift(t1);
|
|
|
|
out[12] = (tran_low_t)fdct_round_shift(t3);
|
2013-02-27 21:29:06 +01:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
// Stage 2
|
|
|
|
t0 = (s6 - s5) * cospi_16_64;
|
|
|
|
t1 = (s6 + s5) * cospi_16_64;
|
|
|
|
t2 = fdct_round_shift(t0);
|
|
|
|
t3 = fdct_round_shift(t1);
|
2013-02-27 21:29:06 +01:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
// Stage 3
|
|
|
|
x0 = s4 + t2;
|
|
|
|
x1 = s4 - t2;
|
|
|
|
x2 = s7 - t3;
|
|
|
|
x3 = s7 + t3;
|
2012-10-19 01:31:59 +02:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
// Stage 4
|
2016-07-27 05:43:23 +02:00
|
|
|
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
|
|
|
|
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
|
2015-07-21 01:19:32 +02:00
|
|
|
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
|
2016-07-27 05:43:23 +02:00
|
|
|
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
|
2015-07-21 01:19:32 +02:00
|
|
|
out[2] = (tran_low_t)fdct_round_shift(t0);
|
|
|
|
out[6] = (tran_low_t)fdct_round_shift(t2);
|
|
|
|
out[10] = (tran_low_t)fdct_round_shift(t1);
|
|
|
|
out[14] = (tran_low_t)fdct_round_shift(t3);
|
2012-10-19 01:31:59 +02:00
|
|
|
}
|
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
// step 2
|
|
|
|
temp1 = (step1[5] - step1[2]) * cospi_16_64;
|
|
|
|
temp2 = (step1[4] - step1[3]) * cospi_16_64;
|
|
|
|
step2[2] = fdct_round_shift(temp1);
|
|
|
|
step2[3] = fdct_round_shift(temp2);
|
|
|
|
temp1 = (step1[4] + step1[3]) * cospi_16_64;
|
|
|
|
temp2 = (step1[5] + step1[2]) * cospi_16_64;
|
|
|
|
step2[4] = fdct_round_shift(temp1);
|
|
|
|
step2[5] = fdct_round_shift(temp2);
|
2014-11-18 20:53:14 +01:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
// step 3
|
|
|
|
step3[0] = step1[0] + step2[3];
|
|
|
|
step3[1] = step1[1] + step2[2];
|
|
|
|
step3[2] = step1[1] - step2[2];
|
|
|
|
step3[3] = step1[0] - step2[3];
|
|
|
|
step3[4] = step1[7] - step2[4];
|
|
|
|
step3[5] = step1[6] - step2[5];
|
|
|
|
step3[6] = step1[6] + step2[5];
|
|
|
|
step3[7] = step1[7] + step2[4];
|
2014-11-18 20:53:14 +01:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
// step 4
|
2016-07-27 05:43:23 +02:00
|
|
|
temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
|
|
|
|
temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64;
|
2015-07-21 01:19:32 +02:00
|
|
|
step2[1] = fdct_round_shift(temp1);
|
|
|
|
step2[2] = fdct_round_shift(temp2);
|
|
|
|
temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64;
|
2016-07-27 05:43:23 +02:00
|
|
|
temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
|
2015-07-21 01:19:32 +02:00
|
|
|
step2[5] = fdct_round_shift(temp1);
|
|
|
|
step2[6] = fdct_round_shift(temp2);
|
2014-11-18 20:53:14 +01:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
// step 5
|
|
|
|
step1[0] = step3[0] + step2[1];
|
|
|
|
step1[1] = step3[0] - step2[1];
|
|
|
|
step1[2] = step3[3] + step2[2];
|
|
|
|
step1[3] = step3[3] - step2[2];
|
|
|
|
step1[4] = step3[4] - step2[5];
|
|
|
|
step1[5] = step3[4] + step2[5];
|
|
|
|
step1[6] = step3[7] - step2[6];
|
|
|
|
step1[7] = step3[7] + step2[6];
|
2014-11-18 20:53:14 +01:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
// step 6
|
2016-07-27 05:43:23 +02:00
|
|
|
temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
|
2015-07-21 01:19:32 +02:00
|
|
|
temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
|
|
|
|
out[1] = (tran_low_t)fdct_round_shift(temp1);
|
|
|
|
out[9] = (tran_low_t)fdct_round_shift(temp2);
|
2014-11-18 20:53:14 +01:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
|
2016-07-27 05:43:23 +02:00
|
|
|
temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
|
2015-07-21 01:19:32 +02:00
|
|
|
out[5] = (tran_low_t)fdct_round_shift(temp1);
|
|
|
|
out[13] = (tran_low_t)fdct_round_shift(temp2);
|
2014-11-18 20:53:14 +01:00
|
|
|
|
2016-07-27 05:43:23 +02:00
|
|
|
temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
|
2015-07-21 01:19:32 +02:00
|
|
|
temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
|
|
|
|
out[3] = (tran_low_t)fdct_round_shift(temp1);
|
|
|
|
out[11] = (tran_low_t)fdct_round_shift(temp2);
|
2014-11-18 20:53:14 +01:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
|
2016-07-27 05:43:23 +02:00
|
|
|
temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
|
2015-07-21 01:19:32 +02:00
|
|
|
out[7] = (tran_low_t)fdct_round_shift(temp1);
|
|
|
|
out[15] = (tran_low_t)fdct_round_shift(temp2);
|
|
|
|
}
|
2014-11-18 20:53:14 +01:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
static void fadst4(const tran_low_t *input, tran_low_t *output) {
|
|
|
|
tran_high_t x0, x1, x2, x3;
|
|
|
|
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
|
2014-11-18 20:53:14 +01:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
x0 = input[0];
|
|
|
|
x1 = input[1];
|
|
|
|
x2 = input[2];
|
|
|
|
x3 = input[3];
|
2014-11-18 20:53:14 +01:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
if (!(x0 | x1 | x2 | x3)) {
|
|
|
|
output[0] = output[1] = output[2] = output[3] = 0;
|
|
|
|
return;
|
|
|
|
}
|
2014-11-18 20:53:14 +01:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
s0 = sinpi_1_9 * x0;
|
|
|
|
s1 = sinpi_4_9 * x0;
|
|
|
|
s2 = sinpi_2_9 * x1;
|
|
|
|
s3 = sinpi_1_9 * x1;
|
|
|
|
s4 = sinpi_3_9 * x2;
|
|
|
|
s5 = sinpi_4_9 * x3;
|
|
|
|
s6 = sinpi_2_9 * x3;
|
|
|
|
s7 = x0 + x1 - x3;
|
2014-11-18 20:53:14 +01:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
x0 = s0 + s2 + s5;
|
|
|
|
x1 = sinpi_3_9 * s7;
|
|
|
|
x2 = s1 - s3 + s6;
|
|
|
|
x3 = s4;
|
2014-11-18 20:53:14 +01:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
s0 = x0 + x3;
|
|
|
|
s1 = x1;
|
|
|
|
s2 = x2 - x3;
|
|
|
|
s3 = x2 - x0 + x3;
|
2014-11-18 20:53:14 +01:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
// 1-D transform scaling factor is sqrt(2).
|
|
|
|
output[0] = (tran_low_t)fdct_round_shift(s0);
|
|
|
|
output[1] = (tran_low_t)fdct_round_shift(s1);
|
|
|
|
output[2] = (tran_low_t)fdct_round_shift(s2);
|
|
|
|
output[3] = (tran_low_t)fdct_round_shift(s3);
|
2014-11-18 20:53:14 +01:00
|
|
|
}
|
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
static void fadst8(const tran_low_t *input, tran_low_t *output) {
|
|
|
|
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
|
2014-05-30 03:14:17 +02:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
tran_high_t x0 = input[7];
|
|
|
|
tran_high_t x1 = input[0];
|
|
|
|
tran_high_t x2 = input[5];
|
|
|
|
tran_high_t x3 = input[2];
|
|
|
|
tran_high_t x4 = input[3];
|
|
|
|
tran_high_t x5 = input[4];
|
|
|
|
tran_high_t x6 = input[1];
|
|
|
|
tran_high_t x7 = input[6];
|
2013-02-13 18:03:21 +01:00
|
|
|
|
|
|
|
// stage 1
|
2016-07-27 05:43:23 +02:00
|
|
|
s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
|
|
|
|
s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
|
2013-02-13 18:03:21 +01:00
|
|
|
s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
|
|
|
|
s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
|
|
|
|
s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
|
|
|
|
s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
|
2016-07-27 05:43:23 +02:00
|
|
|
s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
|
|
|
|
s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
|
2013-02-13 18:03:21 +01:00
|
|
|
|
2013-11-16 00:21:38 +01:00
|
|
|
x0 = fdct_round_shift(s0 + s4);
|
|
|
|
x1 = fdct_round_shift(s1 + s5);
|
|
|
|
x2 = fdct_round_shift(s2 + s6);
|
|
|
|
x3 = fdct_round_shift(s3 + s7);
|
|
|
|
x4 = fdct_round_shift(s0 - s4);
|
|
|
|
x5 = fdct_round_shift(s1 - s5);
|
|
|
|
x6 = fdct_round_shift(s2 - s6);
|
|
|
|
x7 = fdct_round_shift(s3 - s7);
|
2013-02-13 18:03:21 +01:00
|
|
|
|
|
|
|
// stage 2
|
|
|
|
s0 = x0;
|
|
|
|
s1 = x1;
|
|
|
|
s2 = x2;
|
|
|
|
s3 = x3;
|
2016-07-27 05:43:23 +02:00
|
|
|
s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
|
|
|
|
s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
|
|
|
|
s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
|
|
|
|
s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
|
2013-02-13 18:03:21 +01:00
|
|
|
|
|
|
|
x0 = s0 + s2;
|
|
|
|
x1 = s1 + s3;
|
|
|
|
x2 = s0 - s2;
|
|
|
|
x3 = s1 - s3;
|
2013-11-16 00:21:38 +01:00
|
|
|
x4 = fdct_round_shift(s4 + s6);
|
|
|
|
x5 = fdct_round_shift(s5 + s7);
|
|
|
|
x6 = fdct_round_shift(s4 - s6);
|
|
|
|
x7 = fdct_round_shift(s5 - s7);
|
2013-02-13 18:03:21 +01:00
|
|
|
|
|
|
|
// stage 3
|
|
|
|
s2 = cospi_16_64 * (x2 + x3);
|
|
|
|
s3 = cospi_16_64 * (x2 - x3);
|
|
|
|
s6 = cospi_16_64 * (x6 + x7);
|
|
|
|
s7 = cospi_16_64 * (x6 - x7);
|
|
|
|
|
2013-11-16 00:21:38 +01:00
|
|
|
x2 = fdct_round_shift(s2);
|
|
|
|
x3 = fdct_round_shift(s3);
|
|
|
|
x6 = fdct_round_shift(s6);
|
|
|
|
x7 = fdct_round_shift(s7);
|
2013-02-13 18:03:21 +01:00
|
|
|
|
2014-11-05 20:29:21 +01:00
|
|
|
output[0] = (tran_low_t)x0;
|
|
|
|
output[1] = (tran_low_t)-x4;
|
|
|
|
output[2] = (tran_low_t)x6;
|
|
|
|
output[3] = (tran_low_t)-x2;
|
|
|
|
output[4] = (tran_low_t)x3;
|
|
|
|
output[5] = (tran_low_t)-x7;
|
|
|
|
output[6] = (tran_low_t)x5;
|
|
|
|
output[7] = (tran_low_t)-x1;
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
static void fadst16(const tran_low_t *input, tran_low_t *output) {
|
|
|
|
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
|
|
|
|
tran_high_t s9, s10, s11, s12, s13, s14, s15;
|
2014-02-06 20:54:15 +01:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
tran_high_t x0 = input[15];
|
|
|
|
tran_high_t x1 = input[0];
|
|
|
|
tran_high_t x2 = input[13];
|
|
|
|
tran_high_t x3 = input[2];
|
|
|
|
tran_high_t x4 = input[11];
|
|
|
|
tran_high_t x5 = input[4];
|
|
|
|
tran_high_t x6 = input[9];
|
|
|
|
tran_high_t x7 = input[6];
|
|
|
|
tran_high_t x8 = input[7];
|
|
|
|
tran_high_t x9 = input[8];
|
|
|
|
tran_high_t x10 = input[5];
|
|
|
|
tran_high_t x11 = input[10];
|
|
|
|
tran_high_t x12 = input[3];
|
|
|
|
tran_high_t x13 = input[12];
|
|
|
|
tran_high_t x14 = input[1];
|
|
|
|
tran_high_t x15 = input[14];
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
// stage 1
|
2016-07-27 05:43:23 +02:00
|
|
|
s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
|
2015-07-21 01:19:32 +02:00
|
|
|
s1 = x0 * cospi_31_64 - x1 * cospi_1_64;
|
2016-07-27 05:43:23 +02:00
|
|
|
s2 = x2 * cospi_5_64 + x3 * cospi_27_64;
|
2015-07-21 01:19:32 +02:00
|
|
|
s3 = x2 * cospi_27_64 - x3 * cospi_5_64;
|
2016-07-27 05:43:23 +02:00
|
|
|
s4 = x4 * cospi_9_64 + x5 * cospi_23_64;
|
2015-07-21 01:19:32 +02:00
|
|
|
s5 = x4 * cospi_23_64 - x5 * cospi_9_64;
|
|
|
|
s6 = x6 * cospi_13_64 + x7 * cospi_19_64;
|
|
|
|
s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
|
|
|
|
s8 = x8 * cospi_17_64 + x9 * cospi_15_64;
|
|
|
|
s9 = x8 * cospi_15_64 - x9 * cospi_17_64;
|
|
|
|
s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
|
|
|
|
s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
|
|
|
|
s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
|
2016-07-27 05:43:23 +02:00
|
|
|
s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
|
2015-07-21 01:19:32 +02:00
|
|
|
s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
|
2016-07-27 05:43:23 +02:00
|
|
|
s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
|
2013-02-13 18:03:21 +01:00
|
|
|
|
2013-11-16 00:21:38 +01:00
|
|
|
x0 = fdct_round_shift(s0 + s8);
|
|
|
|
x1 = fdct_round_shift(s1 + s9);
|
|
|
|
x2 = fdct_round_shift(s2 + s10);
|
|
|
|
x3 = fdct_round_shift(s3 + s11);
|
|
|
|
x4 = fdct_round_shift(s4 + s12);
|
|
|
|
x5 = fdct_round_shift(s5 + s13);
|
|
|
|
x6 = fdct_round_shift(s6 + s14);
|
|
|
|
x7 = fdct_round_shift(s7 + s15);
|
2016-07-27 05:43:23 +02:00
|
|
|
x8 = fdct_round_shift(s0 - s8);
|
|
|
|
x9 = fdct_round_shift(s1 - s9);
|
2013-11-16 00:21:38 +01:00
|
|
|
x10 = fdct_round_shift(s2 - s10);
|
|
|
|
x11 = fdct_round_shift(s3 - s11);
|
|
|
|
x12 = fdct_round_shift(s4 - s12);
|
|
|
|
x13 = fdct_round_shift(s5 - s13);
|
|
|
|
x14 = fdct_round_shift(s6 - s14);
|
|
|
|
x15 = fdct_round_shift(s7 - s15);
|
2013-02-13 18:03:21 +01:00
|
|
|
|
|
|
|
// stage 2
|
|
|
|
s0 = x0;
|
|
|
|
s1 = x1;
|
|
|
|
s2 = x2;
|
|
|
|
s3 = x3;
|
|
|
|
s4 = x4;
|
|
|
|
s5 = x5;
|
|
|
|
s6 = x6;
|
|
|
|
s7 = x7;
|
2016-07-27 05:43:23 +02:00
|
|
|
s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
|
|
|
|
s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
|
|
|
|
s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
|
|
|
|
s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
|
|
|
|
s12 = -x12 * cospi_28_64 + x13 * cospi_4_64;
|
|
|
|
s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
|
|
|
|
s14 = -x14 * cospi_12_64 + x15 * cospi_20_64;
|
|
|
|
s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
|
2013-02-13 18:03:21 +01:00
|
|
|
|
|
|
|
x0 = s0 + s4;
|
|
|
|
x1 = s1 + s5;
|
|
|
|
x2 = s2 + s6;
|
|
|
|
x3 = s3 + s7;
|
|
|
|
x4 = s0 - s4;
|
|
|
|
x5 = s1 - s5;
|
|
|
|
x6 = s2 - s6;
|
|
|
|
x7 = s3 - s7;
|
2013-11-16 00:21:38 +01:00
|
|
|
x8 = fdct_round_shift(s8 + s12);
|
|
|
|
x9 = fdct_round_shift(s9 + s13);
|
|
|
|
x10 = fdct_round_shift(s10 + s14);
|
|
|
|
x11 = fdct_round_shift(s11 + s15);
|
|
|
|
x12 = fdct_round_shift(s8 - s12);
|
|
|
|
x13 = fdct_round_shift(s9 - s13);
|
|
|
|
x14 = fdct_round_shift(s10 - s14);
|
|
|
|
x15 = fdct_round_shift(s11 - s15);
|
2013-02-13 18:03:21 +01:00
|
|
|
|
|
|
|
// stage 3
|
|
|
|
s0 = x0;
|
|
|
|
s1 = x1;
|
|
|
|
s2 = x2;
|
|
|
|
s3 = x3;
|
2016-07-27 05:43:23 +02:00
|
|
|
s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
|
2013-02-13 18:03:21 +01:00
|
|
|
s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
|
2016-07-27 05:43:23 +02:00
|
|
|
s6 = -x6 * cospi_24_64 + x7 * cospi_8_64;
|
|
|
|
s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
|
2013-02-13 18:03:21 +01:00
|
|
|
s8 = x8;
|
|
|
|
s9 = x9;
|
|
|
|
s10 = x10;
|
|
|
|
s11 = x11;
|
2016-07-27 05:43:23 +02:00
|
|
|
s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
|
2013-02-13 18:03:21 +01:00
|
|
|
s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
|
2016-07-27 05:43:23 +02:00
|
|
|
s14 = -x14 * cospi_24_64 + x15 * cospi_8_64;
|
|
|
|
s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
|
2013-02-13 18:03:21 +01:00
|
|
|
|
|
|
|
x0 = s0 + s2;
|
|
|
|
x1 = s1 + s3;
|
|
|
|
x2 = s0 - s2;
|
|
|
|
x3 = s1 - s3;
|
2013-11-16 00:21:38 +01:00
|
|
|
x4 = fdct_round_shift(s4 + s6);
|
|
|
|
x5 = fdct_round_shift(s5 + s7);
|
|
|
|
x6 = fdct_round_shift(s4 - s6);
|
|
|
|
x7 = fdct_round_shift(s5 - s7);
|
2013-02-13 18:03:21 +01:00
|
|
|
x8 = s8 + s10;
|
|
|
|
x9 = s9 + s11;
|
|
|
|
x10 = s8 - s10;
|
|
|
|
x11 = s9 - s11;
|
2013-11-16 00:21:38 +01:00
|
|
|
x12 = fdct_round_shift(s12 + s14);
|
|
|
|
x13 = fdct_round_shift(s13 + s15);
|
|
|
|
x14 = fdct_round_shift(s12 - s14);
|
|
|
|
x15 = fdct_round_shift(s13 - s15);
|
2013-02-13 18:03:21 +01:00
|
|
|
|
|
|
|
// stage 4
|
2016-07-27 05:43:23 +02:00
|
|
|
s2 = (-cospi_16_64) * (x2 + x3);
|
2013-02-13 18:03:21 +01:00
|
|
|
s3 = cospi_16_64 * (x2 - x3);
|
|
|
|
s6 = cospi_16_64 * (x6 + x7);
|
2016-07-27 05:43:23 +02:00
|
|
|
s7 = cospi_16_64 * (-x6 + x7);
|
2013-02-13 18:03:21 +01:00
|
|
|
s10 = cospi_16_64 * (x10 + x11);
|
2016-07-27 05:43:23 +02:00
|
|
|
s11 = cospi_16_64 * (-x10 + x11);
|
|
|
|
s14 = (-cospi_16_64) * (x14 + x15);
|
2013-02-13 18:03:21 +01:00
|
|
|
s15 = cospi_16_64 * (x14 - x15);
|
|
|
|
|
2013-11-16 00:21:38 +01:00
|
|
|
x2 = fdct_round_shift(s2);
|
|
|
|
x3 = fdct_round_shift(s3);
|
|
|
|
x6 = fdct_round_shift(s6);
|
|
|
|
x7 = fdct_round_shift(s7);
|
|
|
|
x10 = fdct_round_shift(s10);
|
|
|
|
x11 = fdct_round_shift(s11);
|
|
|
|
x14 = fdct_round_shift(s14);
|
|
|
|
x15 = fdct_round_shift(s15);
|
2013-02-13 18:03:21 +01:00
|
|
|
|
2014-11-05 20:29:21 +01:00
|
|
|
output[0] = (tran_low_t)x0;
|
|
|
|
output[1] = (tran_low_t)-x8;
|
|
|
|
output[2] = (tran_low_t)x12;
|
|
|
|
output[3] = (tran_low_t)-x4;
|
|
|
|
output[4] = (tran_low_t)x6;
|
|
|
|
output[5] = (tran_low_t)x14;
|
|
|
|
output[6] = (tran_low_t)x10;
|
|
|
|
output[7] = (tran_low_t)x2;
|
|
|
|
output[8] = (tran_low_t)x3;
|
|
|
|
output[9] = (tran_low_t)x11;
|
|
|
|
output[10] = (tran_low_t)x15;
|
|
|
|
output[11] = (tran_low_t)x7;
|
|
|
|
output[12] = (tran_low_t)x5;
|
|
|
|
output[13] = (tran_low_t)-x13;
|
|
|
|
output[14] = (tran_low_t)x9;
|
|
|
|
output[15] = (tran_low_t)-x1;
|
2012-11-01 17:04:28 +01:00
|
|
|
}
|
|
|
|
|
2015-07-21 01:19:32 +02:00
|
|
|
static const transform_2d FHT_4[] = {
|
2016-07-27 05:43:23 +02:00
|
|
|
{ fdct4, fdct4 }, // DCT_DCT = 0
|
|
|
|
{ fadst4, fdct4 }, // ADST_DCT = 1
|
|
|
|
{ fdct4, fadst4 }, // DCT_ADST = 2
|
|
|
|
{ fadst4, fadst4 } // ADST_ADST = 3
|
2015-07-21 01:19:32 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static const transform_2d FHT_8[] = {
|
2016-07-27 05:43:23 +02:00
|
|
|
{ fdct8, fdct8 }, // DCT_DCT = 0
|
|
|
|
{ fadst8, fdct8 }, // ADST_DCT = 1
|
|
|
|
{ fdct8, fadst8 }, // DCT_ADST = 2
|
|
|
|
{ fadst8, fadst8 } // ADST_ADST = 3
|
2015-07-21 01:19:32 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static const transform_2d FHT_16[] = {
|
2016-07-27 05:43:23 +02:00
|
|
|
{ fdct16, fdct16 }, // DCT_DCT = 0
|
|
|
|
{ fadst16, fdct16 }, // ADST_DCT = 1
|
|
|
|
{ fdct16, fadst16 }, // DCT_ADST = 2
|
|
|
|
{ fadst16, fadst16 } // ADST_ADST = 3
|
2015-07-21 01:19:32 +02:00
|
|
|
};
|
|
|
|
|
2016-07-27 05:43:23 +02:00
|
|
|
void vp9_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
|
|
|
|
int tx_type) {
|
2015-07-21 01:19:32 +02:00
|
|
|
if (tx_type == DCT_DCT) {
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_fdct4x4_c(input, output, stride);
|
2015-07-21 01:19:32 +02:00
|
|
|
} else {
|
|
|
|
tran_low_t out[4 * 4];
|
|
|
|
int i, j;
|
|
|
|
tran_low_t temp_in[4], temp_out[4];
|
|
|
|
const transform_2d ht = FHT_4[tx_type];
|
|
|
|
|
|
|
|
// Columns
|
|
|
|
for (i = 0; i < 4; ++i) {
|
2016-07-27 05:43:23 +02:00
|
|
|
for (j = 0; j < 4; ++j) temp_in[j] = input[j * stride + i] * 16;
|
|
|
|
if (i == 0 && temp_in[0]) temp_in[0] += 1;
|
2015-07-21 01:19:32 +02:00
|
|
|
ht.cols(temp_in, temp_out);
|
2016-07-27 05:43:23 +02:00
|
|
|
for (j = 0; j < 4; ++j) out[j * 4 + i] = temp_out[j];
|
2015-07-21 01:19:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Rows
|
|
|
|
for (i = 0; i < 4; ++i) {
|
2016-07-27 05:43:23 +02:00
|
|
|
for (j = 0; j < 4; ++j) temp_in[j] = out[j + i * 4];
|
2015-07-21 01:19:32 +02:00
|
|
|
ht.rows(temp_in, temp_out);
|
2016-07-27 05:43:23 +02:00
|
|
|
for (j = 0; j < 4; ++j) output[j + i * 4] = (temp_out[j] + 1) >> 2;
|
2015-07-21 01:19:32 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void vp9_fdct8x8_quant_c(const int16_t *input, int stride,
|
|
|
|
tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
2016-07-27 05:43:23 +02:00
|
|
|
int skip_block, const int16_t *zbin_ptr,
|
|
|
|
const int16_t *round_ptr, const int16_t *quant_ptr,
|
|
|
|
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
|
|
|
|
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
|
|
|
|
uint16_t *eob_ptr, const int16_t *scan,
|
|
|
|
const int16_t *iscan) {
|
2015-07-21 01:19:32 +02:00
|
|
|
int eob = -1;
|
|
|
|
|
|
|
|
int i, j;
|
|
|
|
tran_low_t intermediate[64];
|
|
|
|
|
|
|
|
// Transform columns
|
|
|
|
{
|
|
|
|
tran_low_t *output = intermediate;
|
|
|
|
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
|
|
|
|
tran_high_t t0, t1, t2, t3; // needs32
|
|
|
|
tran_high_t x0, x1, x2, x3; // canbe16
|
|
|
|
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < 8; i++) {
|
|
|
|
// stage 1
|
|
|
|
s0 = (input[0 * stride] + input[7 * stride]) * 4;
|
|
|
|
s1 = (input[1 * stride] + input[6 * stride]) * 4;
|
|
|
|
s2 = (input[2 * stride] + input[5 * stride]) * 4;
|
|
|
|
s3 = (input[3 * stride] + input[4 * stride]) * 4;
|
|
|
|
s4 = (input[3 * stride] - input[4 * stride]) * 4;
|
|
|
|
s5 = (input[2 * stride] - input[5 * stride]) * 4;
|
|
|
|
s6 = (input[1 * stride] - input[6 * stride]) * 4;
|
|
|
|
s7 = (input[0 * stride] - input[7 * stride]) * 4;
|
|
|
|
|
|
|
|
// fdct4(step, step);
|
|
|
|
x0 = s0 + s3;
|
|
|
|
x1 = s1 + s2;
|
|
|
|
x2 = s1 - s2;
|
|
|
|
x3 = s0 - s3;
|
|
|
|
t0 = (x0 + x1) * cospi_16_64;
|
|
|
|
t1 = (x0 - x1) * cospi_16_64;
|
2016-07-27 05:43:23 +02:00
|
|
|
t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
|
|
|
|
t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
|
2015-07-21 01:19:32 +02:00
|
|
|
output[0 * 8] = (tran_low_t)fdct_round_shift(t0);
|
|
|
|
output[2 * 8] = (tran_low_t)fdct_round_shift(t2);
|
|
|
|
output[4 * 8] = (tran_low_t)fdct_round_shift(t1);
|
|
|
|
output[6 * 8] = (tran_low_t)fdct_round_shift(t3);
|
|
|
|
|
|
|
|
// Stage 2
|
|
|
|
t0 = (s6 - s5) * cospi_16_64;
|
|
|
|
t1 = (s6 + s5) * cospi_16_64;
|
|
|
|
t2 = fdct_round_shift(t0);
|
|
|
|
t3 = fdct_round_shift(t1);
|
|
|
|
|
|
|
|
// Stage 3
|
|
|
|
x0 = s4 + t2;
|
|
|
|
x1 = s4 - t2;
|
|
|
|
x2 = s7 - t3;
|
|
|
|
x3 = s7 + t3;
|
|
|
|
|
|
|
|
// Stage 4
|
2016-07-27 05:43:23 +02:00
|
|
|
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
|
|
|
|
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
|
2015-07-21 01:19:32 +02:00
|
|
|
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
|
2016-07-27 05:43:23 +02:00
|
|
|
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
|
2015-07-21 01:19:32 +02:00
|
|
|
output[1 * 8] = (tran_low_t)fdct_round_shift(t0);
|
|
|
|
output[3 * 8] = (tran_low_t)fdct_round_shift(t2);
|
|
|
|
output[5 * 8] = (tran_low_t)fdct_round_shift(t1);
|
|
|
|
output[7 * 8] = (tran_low_t)fdct_round_shift(t3);
|
|
|
|
input++;
|
|
|
|
output++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Rows
|
|
|
|
for (i = 0; i < 8; ++i) {
|
|
|
|
fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]);
|
2016-07-27 05:43:23 +02:00
|
|
|
for (j = 0; j < 8; ++j) coeff_ptr[j + i * 8] /= 2;
|
2015-07-21 01:19:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// TODO(jingning) Decide the need of these arguments after the
|
|
|
|
// quantization process is completed.
|
|
|
|
(void)zbin_ptr;
|
|
|
|
(void)quant_shift_ptr;
|
|
|
|
(void)iscan;
|
|
|
|
|
|
|
|
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
|
|
|
|
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
|
|
|
|
|
|
|
|
if (!skip_block) {
|
|
|
|
// Quantization pass: All coefficients with index >= zero_flag are
|
|
|
|
// skippable. Note: zero_flag can be zero.
|
|
|
|
for (i = 0; i < n_coeffs; i++) {
|
|
|
|
const int rc = scan[i];
|
|
|
|
const int coeff = coeff_ptr[rc];
|
|
|
|
const int coeff_sign = (coeff >> 31);
|
|
|
|
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
|
|
|
|
|
|
|
|
int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
|
|
|
|
tmp = (tmp * quant_ptr[rc != 0]) >> 16;
|
|
|
|
|
|
|
|
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
|
|
|
|
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
|
|
|
|
|
2016-07-27 05:43:23 +02:00
|
|
|
if (tmp) eob = i;
|
2015-07-21 01:19:32 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
*eob_ptr = eob + 1;
|
|
|
|
}
|
|
|
|
|
2016-07-27 05:43:23 +02:00
|
|
|
void vp9_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
|
|
|
|
int tx_type) {
|
2015-07-21 01:19:32 +02:00
|
|
|
if (tx_type == DCT_DCT) {
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_fdct8x8_c(input, output, stride);
|
2015-07-21 01:19:32 +02:00
|
|
|
} else {
|
|
|
|
tran_low_t out[64];
|
|
|
|
int i, j;
|
|
|
|
tran_low_t temp_in[8], temp_out[8];
|
|
|
|
const transform_2d ht = FHT_8[tx_type];
|
|
|
|
|
|
|
|
// Columns
|
|
|
|
for (i = 0; i < 8; ++i) {
|
2016-07-27 05:43:23 +02:00
|
|
|
for (j = 0; j < 8; ++j) temp_in[j] = input[j * stride + i] * 4;
|
2015-07-21 01:19:32 +02:00
|
|
|
ht.cols(temp_in, temp_out);
|
2016-07-27 05:43:23 +02:00
|
|
|
for (j = 0; j < 8; ++j) out[j * 8 + i] = temp_out[j];
|
2015-07-21 01:19:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Rows
|
|
|
|
for (i = 0; i < 8; ++i) {
|
2016-07-27 05:43:23 +02:00
|
|
|
for (j = 0; j < 8; ++j) temp_in[j] = out[j + i * 8];
|
2015-07-21 01:19:32 +02:00
|
|
|
ht.rows(temp_in, temp_out);
|
|
|
|
for (j = 0; j < 8; ++j)
|
|
|
|
output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
|
|
|
|
pixel. */
|
|
|
|
void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
|
|
|
|
int i;
|
|
|
|
tran_high_t a1, b1, c1, d1, e1;
|
|
|
|
const int16_t *ip_pass0 = input;
|
|
|
|
const tran_low_t *ip = NULL;
|
|
|
|
tran_low_t *op = output;
|
|
|
|
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
a1 = ip_pass0[0 * stride];
|
|
|
|
b1 = ip_pass0[1 * stride];
|
|
|
|
c1 = ip_pass0[2 * stride];
|
|
|
|
d1 = ip_pass0[3 * stride];
|
|
|
|
|
|
|
|
a1 += b1;
|
|
|
|
d1 = d1 - c1;
|
|
|
|
e1 = (a1 - d1) >> 1;
|
|
|
|
b1 = e1 - b1;
|
|
|
|
c1 = e1 - c1;
|
|
|
|
a1 -= c1;
|
|
|
|
d1 += b1;
|
|
|
|
op[0] = (tran_low_t)a1;
|
|
|
|
op[4] = (tran_low_t)c1;
|
|
|
|
op[8] = (tran_low_t)d1;
|
|
|
|
op[12] = (tran_low_t)b1;
|
|
|
|
|
|
|
|
ip_pass0++;
|
|
|
|
op++;
|
|
|
|
}
|
|
|
|
ip = output;
|
|
|
|
op = output;
|
|
|
|
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
a1 = ip[0];
|
|
|
|
b1 = ip[1];
|
|
|
|
c1 = ip[2];
|
|
|
|
d1 = ip[3];
|
|
|
|
|
|
|
|
a1 += b1;
|
|
|
|
d1 -= c1;
|
|
|
|
e1 = (a1 - d1) >> 1;
|
|
|
|
b1 = e1 - b1;
|
|
|
|
c1 = e1 - c1;
|
|
|
|
a1 -= c1;
|
|
|
|
d1 += b1;
|
|
|
|
op[0] = (tran_low_t)(a1 * UNIT_QUANT_FACTOR);
|
|
|
|
op[1] = (tran_low_t)(c1 * UNIT_QUANT_FACTOR);
|
|
|
|
op[2] = (tran_low_t)(d1 * UNIT_QUANT_FACTOR);
|
|
|
|
op[3] = (tran_low_t)(b1 * UNIT_QUANT_FACTOR);
|
|
|
|
|
|
|
|
ip += 4;
|
|
|
|
op += 4;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-27 05:43:23 +02:00
|
|
|
void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
|
|
|
|
int tx_type) {
|
2014-02-06 20:54:15 +01:00
|
|
|
if (tx_type == DCT_DCT) {
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_fdct16x16_c(input, output, stride);
|
2014-02-06 20:54:15 +01:00
|
|
|
} else {
|
2014-09-03 01:34:09 +02:00
|
|
|
tran_low_t out[256];
|
2014-02-06 20:54:15 +01:00
|
|
|
int i, j;
|
2014-09-03 01:34:09 +02:00
|
|
|
tran_low_t temp_in[16], temp_out[16];
|
2014-02-06 20:54:15 +01:00
|
|
|
const transform_2d ht = FHT_16[tx_type];
|
|
|
|
|
|
|
|
// Columns
|
|
|
|
for (i = 0; i < 16; ++i) {
|
2016-07-27 05:43:23 +02:00
|
|
|
for (j = 0; j < 16; ++j) temp_in[j] = input[j * stride + i] * 4;
|
2014-02-06 20:54:15 +01:00
|
|
|
ht.cols(temp_in, temp_out);
|
|
|
|
for (j = 0; j < 16; ++j)
|
2015-01-08 01:38:29 +01:00
|
|
|
out[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
|
2014-02-06 20:54:15 +01:00
|
|
|
}
|
2013-02-13 18:03:21 +01:00
|
|
|
|
2014-02-06 20:54:15 +01:00
|
|
|
// Rows
|
|
|
|
for (i = 0; i < 16; ++i) {
|
2016-07-27 05:43:23 +02:00
|
|
|
for (j = 0; j < 16; ++j) temp_in[j] = out[j + i * 16];
|
2014-02-06 20:54:15 +01:00
|
|
|
ht.rows(temp_in, temp_out);
|
2016-07-27 05:43:23 +02:00
|
|
|
for (j = 0; j < 16; ++j) output[j + i * 16] = temp_out[j];
|
2014-02-06 20:54:15 +01:00
|
|
|
}
|
2013-02-13 18:03:21 +01:00
|
|
|
}
|
2012-11-01 17:04:28 +01:00
|
|
|
}
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
|
2014-09-03 01:34:09 +02:00
|
|
|
#if CONFIG_VP9_HIGHBITDEPTH
|
2016-07-27 05:43:23 +02:00
|
|
|
void vp9_highbd_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
|
|
|
|
int tx_type) {
|
2014-09-03 01:34:09 +02:00
|
|
|
vp9_fht4x4_c(input, output, stride, tx_type);
|
|
|
|
}
|
|
|
|
|
2016-07-27 05:43:23 +02:00
|
|
|
void vp9_highbd_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
|
|
|
|
int tx_type) {
|
2014-09-03 01:34:09 +02:00
|
|
|
vp9_fht8x8_c(input, output, stride, tx_type);
|
|
|
|
}
|
|
|
|
|
2014-10-08 21:43:22 +02:00
|
|
|
void vp9_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output,
|
|
|
|
int stride) {
|
2014-09-03 01:34:09 +02:00
|
|
|
vp9_fwht4x4_c(input, output, stride);
|
|
|
|
}
|
|
|
|
|
2016-07-27 05:43:23 +02:00
|
|
|
void vp9_highbd_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
|
|
|
|
int tx_type) {
|
2014-09-03 01:34:09 +02:00
|
|
|
vp9_fht16x16_c(input, output, stride, tx_type);
|
|
|
|
}
|
|
|
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|