From fed8a1837fd6b1e4e36495be8210bd63bfb2bb8f Mon Sep 17 00:00:00 2001 From: Daniel Kang Date: Thu, 2 Aug 2012 17:03:14 -0700 Subject: [PATCH] 16x16 DCT blocks. Set on all 16x16 intra/inter modes Features: - Butterfly fDCT/iDCT - Loop filter does not filter internal edges with 16x16 - Optimize coefficient function - Update coefficient probability function - RD - Entropy stats - 16x16 is a config option Have not tested with experiments. hd: 2.60% std-hd: 2.43% yt: 1.32% derf: 0.60% Change-Id: I96fb090517c30c5da84bad4fae602c3ec0c58b1c --- configure | 1 + test/dct16x16_test.cc | 356 +++++++++++++++++++++++++ test/fdct8x8_test.cc | 8 +- test/idct8x8_test.cc | 154 +++++++++++ test/test.mk | 2 + vp8/common/alloccommon.c | 3 - vp8/common/blockd.h | 11 +- vp8/common/coefupdateprobs.h | 3 + vp8/common/default_coef_probs.h | 208 +++++++++++++++ vp8/common/entropy.c | 103 +++++-- vp8/common/entropy.h | 16 +- vp8/common/entropymode.c | 20 +- vp8/common/generic/systemdependent.c | 3 + vp8/common/idct.h | 11 + vp8/common/idctllm.c | 272 +++++++++++++++++++ vp8/common/invtrans.c | 30 +++ vp8/common/invtrans.h | 8 + vp8/common/loopfilter.c | 27 +- vp8/common/onyxc_int.h | 21 +- vp8/decoder/decodframe.c | 92 ++++++- vp8/decoder/dequantize.c | 36 +++ vp8/decoder/dequantize.h | 9 + vp8/decoder/detokenize.c | 185 ++++++++++--- vp8/decoder/detokenize.h | 3 + vp8/decoder/generic/dsystemdependent.c | 3 + vp8/decoder/onyxd_int.h | 3 + vp8/encoder/bitstream.c | 316 ++++++++++++++-------- vp8/encoder/block.h | 24 +- vp8/encoder/dct.c | 202 ++++++++++++++ vp8/encoder/dct.h | 10 + vp8/encoder/encodeframe.c | 29 +- vp8/encoder/encodeintra.c | 23 ++ vp8/encoder/encodemb.c | 300 ++++++++++++++++++++- vp8/encoder/encodemb.h | 9 + vp8/encoder/generic/csystemdependent.c | 3 + vp8/encoder/onyx_if.c | 12 + vp8/encoder/onyx_int.h | 25 +- vp8/encoder/quantize.c | 194 ++++++++++++-- vp8/encoder/quantize.h | 14 + vp8/encoder/ratectrl.c | 6 + vp8/encoder/rdopt.c | 186 +++++++++++-- vp8/encoder/tokenize.c | 329 ++++++++++++++++++----- vp8/encoder/tokenize.h | 5 +- 43 files changed, 2937 insertions(+), 338 deletions(-) create mode 100644 test/dct16x16_test.cc create mode 100644 test/idct8x8_test.cc diff --git a/configure b/configure index 269d997b5..13d57a876 100755 --- a/configure +++ b/configure @@ -227,6 +227,7 @@ EXPERIMENT_LIST=" hybridtransform switchable_interp htrans8x8 + tx16x16 " CONFIG_LIST=" external_build diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc new file mode 100644 index 000000000..679dd30e9 --- /dev/null +++ b/test/dct16x16_test.cc @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "third_party/googletest/src/include/gtest/gtest.h" + +extern "C" { +#include "vp8/common/entropy.h" +#include "vp8/common/idct.h" +#include "vp8/encoder/dct.h" +} + +#include "acm_random.h" +#include "vpx/vpx_integer.h" + +using libvpx_test::ACMRandom; + +namespace { + +const double PI = 3.1415926535898; +void reference2_16x16_idct_2d(double *input, double *output) { + double x; + for (int l = 0; l < 16; ++l) { + for (int k = 0; k < 16; ++k) { + double s = 0; + for (int i = 0; i < 16; ++i) { + for (int j = 0; j < 16; ++j) { + x=cos(PI*j*(l+0.5)/16.0)*cos(PI*i*(k+0.5)/16.0)*input[i*16+j]/256; + if (i != 0) + x *= sqrt(2.0); + if (j != 0) + x *= sqrt(2.0); + s += x; + } + } + output[k*16+l] = s; + } + } +} + +static void butterfly_16x16_dct_1d(double input[16], double output[16]) { + double step[16]; + double intermediate[16]; + double temp1, temp2; + + const double C1 = cos(1*PI/(double)32); + const double C2 = cos(2*PI/(double)32); + const double C3 = cos(3*PI/(double)32); + const double C4 = cos(4*PI/(double)32); + const double C5 = cos(5*PI/(double)32); + const double C6 = cos(6*PI/(double)32); + const double C7 = cos(7*PI/(double)32); + const double C8 = cos(8*PI/(double)32); + const double C9 = cos(9*PI/(double)32); + const double C10 = cos(10*PI/(double)32); + const double C11 = cos(11*PI/(double)32); + const double C12 = cos(12*PI/(double)32); + const double C13 = cos(13*PI/(double)32); + const double C14 = cos(14*PI/(double)32); + const double C15 = cos(15*PI/(double)32); + + // step 1 + step[ 0] = input[0] + input[15]; + step[ 1] = input[1] + input[14]; + step[ 2] = input[2] + input[13]; + step[ 3] = input[3] + input[12]; + step[ 4] = input[4] + input[11]; + step[ 5] = input[5] + input[10]; + step[ 6] = input[6] + input[ 9]; + step[ 7] = input[7] + input[ 8]; + step[ 8] = input[7] - input[ 8]; + step[ 9] = input[6] - input[ 9]; + step[10] = input[5] - input[10]; + step[11] = input[4] - input[11]; + step[12] = input[3] - input[12]; + step[13] = input[2] - input[13]; + step[14] = input[1] - input[14]; + step[15] = input[0] - input[15]; + + // step 2 + output[0] = step[0] + step[7]; + output[1] = step[1] + step[6]; + output[2] = step[2] + step[5]; + output[3] = step[3] + step[4]; + output[4] = step[3] - step[4]; + output[5] = step[2] - step[5]; + output[6] = step[1] - step[6]; + output[7] = step[0] - step[7]; + + temp1 = step[ 8]*C7; + temp2 = step[15]*C9; + output[ 8] = temp1 + temp2; + + temp1 = step[ 9]*C11; + temp2 = step[14]*C5; + output[ 9] = temp1 - temp2; + + temp1 = step[10]*C3; + temp2 = step[13]*C13; + output[10] = temp1 + temp2; + + temp1 = step[11]*C15; + temp2 = step[12]*C1; + output[11] = temp1 - temp2; + + temp1 = step[11]*C1; + temp2 = step[12]*C15; + output[12] = temp2 + temp1; + + temp1 = step[10]*C13; + temp2 = step[13]*C3; + output[13] = temp2 - temp1; + + temp1 = step[ 9]*C5; + temp2 = step[14]*C11; + output[14] = temp2 + temp1; + + temp1 = step[ 8]*C9; + temp2 = step[15]*C7; + output[15] = temp2 - temp1; + + // step 3 + step[ 0] = output[0] + output[3]; + step[ 1] = output[1] + output[2]; + step[ 2] = output[1] - output[2]; + step[ 3] = output[0] - output[3]; + + temp1 = output[4]*C14; + temp2 = output[7]*C2; + step[ 4] = temp1 + temp2; + + temp1 = output[5]*C10; + temp2 = output[6]*C6; + step[ 5] = temp1 + temp2; + + temp1 = output[5]*C6; + temp2 = output[6]*C10; + step[ 6] = temp2 - temp1; + + temp1 = output[4]*C2; + temp2 = output[7]*C14; + step[ 7] = temp2 - temp1; + + step[ 8] = output[ 8] + output[11]; + step[ 9] = output[ 9] + output[10]; + step[10] = output[ 9] - output[10]; + step[11] = output[ 8] - output[11]; + + step[12] = output[12] + output[15]; + step[13] = output[13] + output[14]; + step[14] = output[13] - output[14]; + step[15] = output[12] - output[15]; + + // step 4 + output[ 0] = (step[ 0] + step[ 1]); + output[ 8] = (step[ 0] - step[ 1]); + + temp1 = step[2]*C12; + temp2 = step[3]*C4; + temp1 = temp1 + temp2; + output[ 4] = 2*(temp1*C8); + + temp1 = step[2]*C4; + temp2 = step[3]*C12; + temp1 = temp2 - temp1; + output[12] = 2*(temp1*C8); + + output[ 2] = 2*((step[4] + step[ 5])*C8); + output[14] = 2*((step[7] - step[ 6])*C8); + + temp1 = step[4] - step[5]; + temp2 = step[6] + step[7]; + output[ 6] = (temp1 + temp2); + output[10] = (temp1 - temp2); + + intermediate[8] = step[8] + step[14]; + intermediate[9] = step[9] + step[15]; + + temp1 = intermediate[8]*C12; + temp2 = intermediate[9]*C4; + temp1 = temp1 - temp2; + output[3] = 2*(temp1*C8); + + temp1 = intermediate[8]*C4; + temp2 = intermediate[9]*C12; + temp1 = temp2 + temp1; + output[13] = 2*(temp1*C8); + + output[ 9] = 2*((step[10] + step[11])*C8); + + intermediate[11] = step[10] - step[11]; + intermediate[12] = step[12] + step[13]; + intermediate[13] = step[12] - step[13]; + intermediate[14] = step[ 8] - step[14]; + intermediate[15] = step[ 9] - step[15]; + + output[15] = (intermediate[11] + intermediate[12]); + output[ 1] = -(intermediate[11] - intermediate[12]); + + output[ 7] = 2*(intermediate[13]*C8); + + temp1 = intermediate[14]*C12; + temp2 = intermediate[15]*C4; + temp1 = temp1 - temp2; + output[11] = -2*(temp1*C8); + + temp1 = intermediate[14]*C4; + temp2 = intermediate[15]*C12; + temp1 = temp2 + temp1; + output[ 5] = 2*(temp1*C8); +} + +static void reference_16x16_dct_1d(double in[16], double out[16]) { + const double kPi = 3.141592653589793238462643383279502884; + const double kInvSqrt2 = 0.707106781186547524400844362104; + for (int k = 0; k < 16; k++) { + out[k] = 0.0; + for (int n = 0; n < 16; n++) + out[k] += in[n]*cos(kPi*(2*n+1)*k/32.0); + if (k == 0) + out[k] = out[k]*kInvSqrt2; + } +} + +void reference_16x16_dct_2d(int16_t input[16*16], double output[16*16]) { + // First transform columns + for (int i = 0; i < 16; ++i) { + double temp_in[16], temp_out[16]; + for (int j = 0; j < 16; ++j) + temp_in[j] = input[j*16 + i]; + butterfly_16x16_dct_1d(temp_in, temp_out); + for (int j = 0; j < 16; ++j) + output[j*16 + i] = temp_out[j]; + } + // Then transform rows + for (int i = 0; i < 16; ++i) { + double temp_in[16], temp_out[16]; + for (int j = 0; j < 16; ++j) + temp_in[j] = output[j + i*16]; + butterfly_16x16_dct_1d(temp_in, temp_out); + // Scale by some magic number + for (int j = 0; j < 16; ++j) + output[j + i*16] = temp_out[j]/2; + } +} + + +TEST(VP8Idct16x16Test, AccuracyCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 1000; + for (int i = 0; i < count_test_block; ++i) { + int16_t in[256], coeff[256]; + int16_t out_c[256]; + double out_r[256]; + + // Initialize a test block with input range [-255, 255]. + for (int j = 0; j < 256; ++j) + in[j] = rnd.Rand8() - rnd.Rand8(); + + reference_16x16_dct_2d(in, out_r); + for (int j = 0; j < 256; j++) + coeff[j] = round(out_r[j]); + vp8_short_idct16x16_c(coeff, out_c, 32); + for (int j = 0; j < 256; ++j) { + const int diff = out_c[j] - in[j]; + const int error = diff * diff; + EXPECT_GE(1, error) + << "Error: 16x16 IDCT has error " << error + << " at index " << j; + } + + vp8_short_fdct16x16_c(in, out_c, 32); + for (int j = 0; j < 256; ++j) { + const double diff = coeff[j] - out_c[j]; + const double error = diff * diff; + EXPECT_GE(1.0, error) + << "Error: 16x16 FDCT has error " << error + << " at index " << j; + } + } +} + +TEST(VP8Fdct16x16Test, AccuracyCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + int max_error = 0; + double total_error = 0; + const int count_test_block = 1000; + for (int i = 0; i < count_test_block; ++i) { + int16_t test_input_block[256]; + int16_t test_temp_block[256]; + int16_t test_output_block[256]; + + // Initialize a test block with input range [-255, 255]. + for (int j = 0; j < 256; ++j) + test_input_block[j] = rnd.Rand8() - rnd.Rand8(); + + const int pitch = 32; + vp8_short_fdct16x16_c(test_input_block, test_temp_block, pitch); + vp8_short_idct16x16_c(test_temp_block, test_output_block, pitch); + + for (int j = 0; j < 256; ++j) { + const int diff = test_input_block[j] - test_output_block[j]; + const int error = diff * diff; + if (max_error < error) + max_error = error; + total_error += error; + } + } + + EXPECT_GE(1, max_error) + << "Error: 16x16 FDCT/IDCT has an individual roundtrip error > 1"; + + EXPECT_GE(count_test_block/10, total_error) + << "Error: 16x16 FDCT/IDCT has average roundtrip error > 1/10 per block"; +} + +TEST(VP8Fdct16x16Test, CoeffSizeCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 1000; + for (int i = 0; i < count_test_block; ++i) { + int16_t input_block[256], input_extreme_block[256]; + int16_t output_block[256], output_extreme_block[256]; + + // Initialize a test block with input range [-255, 255]. + for (int j = 0; j < 256; ++j) { + input_block[j] = rnd.Rand8() - rnd.Rand8(); + input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255; + } + if (i == 0) + for (int j = 0; j < 256; ++j) + input_extreme_block[j] = 255; + + const int pitch = 32; + vp8_short_fdct16x16_c(input_block, output_block, pitch); + vp8_short_fdct16x16_c(input_extreme_block, output_extreme_block, pitch); + + // The minimum quant value is 4. + for (int j = 0; j < 256; ++j) { + EXPECT_GE(4*DCT_MAX_VALUE, abs(output_block[j])) + << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE"; + EXPECT_GE(4*DCT_MAX_VALUE, abs(output_extreme_block[j])) + << "Error: 16x16 FDCT extreme has coefficient larger than 4*DCT_MAX_VALUE"; + } + } +} +} // namespace diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc index 47b88acd1..28b6afb0c 100644 --- a/test/fdct8x8_test.cc +++ b/test/fdct8x8_test.cc @@ -115,8 +115,8 @@ TEST(VP8Fdct8x8Test, RoundTripErrorCheck) { EXPECT_GE(1, max_error) << "Error: 8x8 FDCT/IDCT has an individual roundtrip error > 1"; - EXPECT_GE(count_test_block, total_error) - << "Error: 8x8 FDCT/IDCT has average roundtrip error > 1 per block"; + EXPECT_GE(count_test_block/5, total_error) + << "Error: 8x8 FDCT/IDCT has average roundtrip error > 1/5 per block"; }; TEST(VP8Fdct8x8Test, ExtremalCheck) { @@ -149,9 +149,9 @@ TEST(VP8Fdct8x8Test, ExtremalCheck) { << "Error: Extremal 8x8 FDCT/IDCT has an" << " individual roundtrip error > 1"; - EXPECT_GE(count_test_block, total_error) + EXPECT_GE(count_test_block/5, total_error) << "Error: Extremal 8x8 FDCT/IDCT has average" - << " roundtrip error > 1 per block"; + << " roundtrip error > 1/5 per block"; } }; diff --git a/test/idct8x8_test.cc b/test/idct8x8_test.cc new file mode 100644 index 000000000..a6308f826 --- /dev/null +++ b/test/idct8x8_test.cc @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "third_party/googletest/src/include/gtest/gtest.h" + +extern "C" { +#include "vp8/encoder/dct.h" +#include "vp8/common/idct.h" +} + +#include "acm_random.h" +#include "vpx/vpx_integer.h" + +using libvpx_test::ACMRandom; + +namespace { + +void reference_dct_1d(double input[8], double output[8]) { + const double kPi = 3.141592653589793238462643383279502884; + const double kInvSqrt2 = 0.707106781186547524400844362104; + for (int k = 0; k < 8; k++) { + output[k] = 0.0; + for (int n = 0; n < 8; n++) + output[k] += input[n]*cos(kPi*(2*n+1)*k/16.0); + if (k == 0) + output[k] = output[k]*kInvSqrt2; + } +} + +void reference_dct_2d(int16_t input[64], double output[64]) { + // First transform columns + for (int i = 0; i < 8; ++i) { + double temp_in[8], temp_out[8]; + for (int j = 0; j < 8; ++j) + temp_in[j] = input[j*8 + i]; + reference_dct_1d(temp_in, temp_out); + for (int j = 0; j < 8; ++j) + output[j*8 + i] = temp_out[j]; + } + // Then transform rows + for (int i = 0; i < 8; ++i) { + double temp_in[8], temp_out[8]; + for (int j = 0; j < 8; ++j) + temp_in[j] = output[j + i*8]; + reference_dct_1d(temp_in, temp_out); + for (int j = 0; j < 8; ++j) + output[j + i*8] = temp_out[j]; + } + // Scale by some magic number + for (int i = 0; i < 64; ++i) + output[i] *= 2; +} + +void reference_idct_1d(double input[8], double output[8]) { + const double kPi = 3.141592653589793238462643383279502884; + const double kSqrt2 = 1.414213562373095048801688724209698; + for (int k = 0; k < 8; k++) { + output[k] = 0.0; + for (int n = 0; n < 8; n++) { + output[k] += input[n]*cos(kPi*(2*k+1)*n/16.0); + if (n == 0) + output[k] = output[k]/kSqrt2; + } + } +} + +void reference_idct_2d(double input[64], int16_t output[64]) { + double out[64], out2[64]; + // First transform rows + for (int i = 0; i < 8; ++i) { + double temp_in[8], temp_out[8]; + for (int j = 0; j < 8; ++j) + temp_in[j] = input[j + i*8]; + reference_idct_1d(temp_in, temp_out); + for (int j = 0; j < 8; ++j) + out[j + i*8] = temp_out[j]; + } + // Then transform columns + for (int i = 0; i < 8; ++i) { + double temp_in[8], temp_out[8]; + for (int j = 0; j < 8; ++j) + temp_in[j] = out[j*8 + i]; + reference_idct_1d(temp_in, temp_out); + for (int j = 0; j < 8; ++j) + out2[j*8 + i] = temp_out[j]; + } + for (int i = 0; i < 64; ++i) + output[i] = round(out2[i]/32); +} + +TEST(VP8Idct8x8Test, AccuracyCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 10000; + for (int i = 0; i < count_test_block; ++i) { + int16_t input[64], coeff[64]; + int16_t output_c[64]; + double output_r[64]; + + // Initialize a test block with input range [-255, 255]. + for (int j = 0; j < 64; ++j) + input[j] = rnd.Rand8() - rnd.Rand8(); + + const int pitch = 16; + vp8_short_fdct8x8_c(input, output_c, pitch); + reference_dct_2d(input, output_r); + + for (int j = 0; j < 64; ++j) { + const double diff = output_c[j] - output_r[j]; + const double error = diff * diff; + // An error in a DCT coefficient isn't that bad. + // We care more about the reconstructed pixels. + EXPECT_GE(2.0, error) + << "Error: 8x8 FDCT/IDCT has error " << error + << " at index " << j; + } + +#if 0 + // Tests that the reference iDCT and fDCT match. + reference_dct_2d(input, output_r); + reference_idct_2d(output_r, output_c); + for (int j = 0; j < 64; ++j) { + const int diff = output_c[j] -input[j]; + const int error = diff * diff; + EXPECT_EQ(0, error) + << "Error: 8x8 FDCT/IDCT has error " << error + << " at index " << j; + } +#endif + reference_dct_2d(input, output_r); + for (int j = 0; j < 64; ++j) + coeff[j] = round(output_r[j]); + vp8_short_idct8x8_c(coeff, output_c, pitch); + for (int j = 0; j < 64; ++j) { + const int diff = output_c[j] -input[j]; + const int error = diff * diff; + EXPECT_GE(1, error) + << "Error: 8x8 FDCT/IDCT has error " << error + << " at index " << j; + } + } +} + +} // namespace diff --git a/test/test.mk b/test/test.mk index 601050eb9..9ecf95b0c 100644 --- a/test/test.mk +++ b/test/test.mk @@ -1,8 +1,10 @@ LIBVPX_TEST_SRCS-yes += test.mk LIBVPX_TEST_SRCS-yes += acm_random.h LIBVPX_TEST_SRCS-yes += boolcoder_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_TX16X16) += dct16x16_test.cc LIBVPX_TEST_SRCS-yes += fdct4x4_test.cc LIBVPX_TEST_SRCS-yes += fdct8x8_test.cc +LIBVPX_TEST_SRCS-yes += idct8x8_test.cc LIBVPX_TEST_SRCS-yes += test_libvpx.cc LIBVPX_TEST_DATA-yes += hantro_collage_w352h288.yuv diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c index 7495d5e42..2c5b64cee 100644 --- a/vp8/common/alloccommon.c +++ b/vp8/common/alloccommon.c @@ -218,7 +218,4 @@ void vp8_initialize_common() { vp8_entropy_mode_init(); vp8_entropy_mv_init(); - - vp8_init_scan_order_mask(); - } diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index 1bbaa6422..899b9d9c0 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -131,11 +131,12 @@ typedef enum { // Segment level features. typedef enum { - TX_4X4 = 0, // 4x4 dct transform - TX_8X8 = 1, // 8x8 dct transform - - TX_SIZE_MAX = 2 // Number of differnt transforms avaialble - + TX_4X4, // 4x4 dct transform + TX_8X8, // 8x8 dct transform +#if CONFIG_TX16X16 + TX_16X16, // 16x16 dct transform +#endif + TX_SIZE_MAX // Number of different transforms available } TX_SIZE; #if CONFIG_HYBRIDTRANSFORM diff --git a/vp8/common/coefupdateprobs.h b/vp8/common/coefupdateprobs.h index fa691f89c..0fb25cc94 100644 --- a/vp8/common/coefupdateprobs.h +++ b/vp8/common/coefupdateprobs.h @@ -13,4 +13,7 @@ Generated file included by entropy.c */ #define COEF_UPDATE_PROB 252 #define COEF_UPDATE_PROB_8X8 252 +#if CONFIG_TX16X16 +#define COEF_UPDATE_PROB_16X16 252 +#endif diff --git a/vp8/common/default_coef_probs.h b/vp8/common/default_coef_probs.h index 145faf1ac..dfb0e5ea7 100644 --- a/vp8/common/default_coef_probs.h +++ b/vp8/common/default_coef_probs.h @@ -488,3 +488,211 @@ vp8_default_coef_probs_8x8[BLOCK_TYPES_8X8] } #endif }; +#if CONFIG_TX16X16 +static const vp8_prob +vp8_default_coef_probs_16x16[BLOCK_TYPES_16X16] + [COEF_BANDS] + [PREV_COEF_CONTEXTS] + [ENTROPY_NODES] = +{ + { /* block Type 0 */ + { /* Coeff Band 0 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + }, + { /* Coeff Band 1 */ + { 60, 140, 195, 255, 212, 214, 128, 128, 128, 128, 128}, + { 75, 221, 231, 255, 203, 255, 128, 128, 128, 128, 128}, + { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128}, + { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128} + }, + { /* Coeff Band 2 */ + { 1, 227, 226, 255, 215, 215, 128, 128, 128, 128, 128}, + { 5, 163, 209, 255, 212, 212, 255, 255, 128, 128, 128}, + { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128}, + { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128} + }, + { /* Coeff Band 3 */ + { 1, 226, 225, 255, 228, 236, 128, 128, 128, 128, 128}, + { 6, 163, 208, 255, 224, 234, 255, 255, 128, 128, 128}, + { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128}, + { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128} + }, + { /* Coeff Band 4 */ + { 1, 222, 197, 254, 193, 216, 255, 236, 128, 128, 128}, + { 7, 140, 163, 251, 195, 211, 255, 238, 128, 128, 128}, + { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128}, + { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128} + }, + { /* Coeff Band 5 */ + { 1, 226, 218, 255, 216, 241, 255, 255, 128, 128, 128}, + { 6, 154, 191, 255, 218, 240, 255, 255, 128, 128, 128}, + { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}, + { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128} + }, + { /* Coeff Band 6 */ + { 1, 221, 217, 255, 208, 217, 255, 232, 128, 128, 128}, + { 11, 155, 189, 254, 203, 211, 255, 249, 128, 128, 128}, + { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}, + { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128} + }, + { /* Coeff Band 7 */ + { 1, 207, 235, 255, 232, 240, 128, 128, 128, 128, 128}, + { 58, 161, 216, 255, 229, 235, 255, 255, 128, 128, 128}, + { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128}, + { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128} + } + }, + { /* block Type 1 */ + { /* Coeff Band 0 */ + { 134, 152, 233, 224, 234, 52, 255, 166, 128, 128, 128}, + { 97, 132, 185, 234, 186, 189, 197, 171, 255, 212, 128}, + { 84, 110, 185, 237, 182, 182, 145, 145, 255, 255, 128} + }, + { /* Coeff Band 1 */ + { 1, 124, 213, 247, 192, 212, 255, 255, 128, 128, 128}, + { 88, 111, 178, 254, 189, 211, 255, 255, 128, 128, 128}, + { 12, 59, 129, 236, 150, 179, 239, 195, 255, 255, 128}, + { 12, 59, 129, 236, 150, 179, 239, 195, 255, 255, 128} + }, + { /* Coeff Band 2 */ + { 1, 102, 225, 255, 210, 240, 128, 128, 128, 128, 128}, + { 110, 78, 195, 254, 200, 191, 255, 255, 128, 128, 128}, + { 37, 63, 177, 255, 194, 195, 128, 128, 128, 128, 128}, + { 37, 63, 177, 255, 194, 195, 128, 128, 128, 128, 128} + }, + { /* Coeff Band 3 */ + { 1, 1, 229, 255, 202, 224, 128, 128, 128, 128, 128}, + { 150, 1, 192, 255, 206, 226, 128, 128, 128, 128, 128}, + { 75, 1, 138, 255, 172, 228, 128, 128, 128, 128, 128}, + { 75, 1, 138, 255, 172, 228, 128, 128, 128, 128, 128} + }, + { /* Coeff Band 4 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + }, + { /* Coeff Band 5 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + }, + { /* Coeff Band 6 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + }, + { /* Coeff Band 7 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + } + }, + { /* block Type 2 */ + { /* Coeff Band 0 */ + { 11, 181, 226, 199, 183, 255, 255, 255, 128, 128, 128}, + { 2, 147, 185, 248, 163, 180, 255, 236, 128, 128, 128}, + { 1, 123, 157, 238, 154, 176, 255, 226, 255, 255, 128}, + { 1, 123, 157, 238, 154, 176, 255, 226, 255, 255, 128} + }, + { /* Coeff Band 1 */ + { 1, 150, 191, 246, 174, 188, 255, 235, 128, 128, 128}, + { 1, 125, 166, 245, 165, 185, 255, 234, 128, 128, 128}, + { 1, 79, 125, 240, 148, 179, 255, 234, 255, 255, 128}, + { 1, 79, 125, 240, 148, 179, 255, 234, 255, 255, 128} + }, + { /* Coeff Band 2 */ + { 1, 146, 184, 242, 167, 183, 255, 230, 255, 255, 128}, + { 1, 119, 160, 239, 156, 178, 255, 231, 255, 255, 128}, + { 1, 75, 115, 234, 142, 173, 255, 225, 255, 255, 128}, + { 1, 75, 115, 234, 142, 173, 255, 225, 255, 255, 128} + }, + { /* Coeff Band 3 */ + { 1, 150, 188, 244, 169, 183, 255, 233, 255, 255, 128}, + { 1, 123, 162, 243, 161, 180, 255, 233, 128, 128, 128}, + { 1, 76, 120, 238, 148, 178, 255, 230, 255, 255, 128}, + { 1, 76, 120, 238, 148, 178, 255, 230, 255, 255, 128} + }, + { /* Coeff Band 4 */ + { 1, 163, 202, 252, 188, 204, 255, 248, 128, 128, 128}, + { 1, 136, 180, 251, 181, 201, 255, 246, 128, 128, 128}, + { 1, 92, 146, 249, 170, 197, 255, 245, 128, 128, 128}, + { 1, 92, 146, 249, 170, 197, 255, 245, 128, 128, 128} + }, + { /* Coeff Band 5 */ + { 1, 156, 195, 249, 179, 193, 255, 241, 255, 255, 128}, + { 1, 128, 169, 248, 171, 192, 255, 242, 255, 255, 128}, + { 1, 84, 132, 245, 158, 187, 255, 240, 255, 255, 128}, + { 1, 84, 132, 245, 158, 187, 255, 240, 255, 255, 128} + }, + { /* Coeff Band 6 */ + { 1, 36, 71, 251, 192, 201, 255, 243, 255, 255, 128}, + { 1, 49, 185, 250, 184, 199, 255, 242, 128, 128, 128}, + { 1, 95, 147, 247, 168, 190, 255, 239, 255, 255, 128}, + { 1, 95, 147, 247, 168, 190, 255, 239, 255, 255, 128} + }, + { /* Coeff Band 7 */ + { 1, 19, 98, 255, 218, 222, 255, 255, 128, 128, 128}, + { 36, 50, 210, 255, 212, 221, 255, 255, 128, 128, 128}, + { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128}, + { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128} + } + }, + { /* block Type 3 */ + { /* Coeff Band 0 */ + { 17, 105, 227, 195, 164, 170, 168, 137, 221, 160, 184}, + { 6, 92, 166, 193, 158, 169, 179, 142, 236, 175, 200}, + { 2, 68, 118, 193, 147, 168, 187, 149, 241, 178, 247}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + }, + { /* Coeff Band 1 */ + { 1, 193, 221, 246, 198, 194, 244, 176, 255, 192, 128}, + { 112, 160, 209, 244, 196, 194, 243, 175, 255, 209, 128}, + { 45, 123, 175, 240, 184, 195, 239, 178, 255, 218, 255}, + { 16, 53, 75, 169, 119, 152, 209, 146, 255, 219, 255} + }, + { /* Coeff Band 2 */ + { 1, 141, 183, 240, 176, 187, 246, 198, 255, 218, 128}, + { 36, 97, 150, 231, 161, 180, 243, 191, 255, 217, 255}, + { 8, 65, 111, 210, 143, 166, 230, 167, 255, 224, 255}, + { 2, 35, 61, 157, 113, 149, 208, 142, 255, 217, 255} + }, + { /* Coeff Band 3 */ + { 1, 173, 196, 245, 184, 191, 252, 211, 255, 240, 128}, + { 35, 119, 175, 242, 177, 187, 252, 209, 255, 235, 128}, + { 4, 88, 141, 234, 161, 180, 249, 200, 255, 228, 128}, + { 1, 57, 95, 203, 133, 161, 235, 167, 255, 231, 255} + }, + { /* Coeff Band 4 */ + { 1, 208, 227, 249, 209, 204, 248, 188, 255, 248, 128}, + { 28, 162, 211, 247, 203, 200, 252, 188, 255, 232, 128}, + { 5, 114, 174, 238, 182, 189, 245, 184, 255, 238, 128}, + { 1, 61, 100, 205, 136, 164, 235, 163, 255, 239, 128} + }, + { /* Coeff Band 5 */ + { 1, 195, 218, 252, 208, 207, 250, 205, 255, 245, 128}, + { 22, 141, 196, 249, 198, 201, 250, 202, 255, 244, 128}, + { 2, 105, 163, 240, 178, 189, 246, 191, 255, 246, 128}, + { 1, 70, 112, 206, 144, 167, 232, 162, 255, 239, 128} + }, + { /* Coeff Band 6 */ + { 1, 204, 215, 251, 204, 203, 255, 222, 255, 225, 128}, + { 15, 140, 194, 249, 194, 199, 254, 221, 255, 253, 128}, + { 1, 95, 153, 243, 172, 188, 254, 213, 255, 248, 128}, + { 1, 59, 99, 216, 135, 166, 247, 190, 255, 237, 255} + }, + { /* Coeff Band 7 */ + { 1, 7, 231, 255, 227, 223, 255, 240, 255, 255, 128}, + { 15, 157, 217, 255, 218, 219, 255, 239, 255, 255, 128}, + { 1, 114, 182, 252, 198, 207, 255, 235, 255, 255, 128}, + { 1, 71, 122, 238, 154, 181, 255, 216, 255, 255, 128} + } + } +}; +#endif diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c index 631057252..cbe798289 100644 --- a/vp8/common/entropy.c +++ b/vp8/common/entropy.c @@ -47,7 +47,7 @@ DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -DECLARE_ALIGNED(16, cuchar, vp8_coef_bands[16]) = { +DECLARE_ALIGNED(16, const int, vp8_coef_bands[16]) = { 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7 }; @@ -79,15 +79,15 @@ DECLARE_ALIGNED(16, const int, vp8_row_scan[16]) = { #endif -DECLARE_ALIGNED(64, cuchar, vp8_coef_bands_8x8[64]) = { 0, 1, 2, 3, 5, 4, 4, 5, - 5, 3, 6, 3, 5, 4, 6, 6, - 6, 5, 5, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7 - }; +DECLARE_ALIGNED(64, const int, vp8_coef_bands_8x8[64]) = { 0, 1, 2, 3, 5, 4, 4, 5, + 5, 3, 6, 3, 5, 4, 6, 6, + 6, 5, 5, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7 + }; DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]) = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, @@ -95,9 +95,46 @@ DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]) = { 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, }; +#if CONFIG_TX16X16 +// Table can be optimized. +DECLARE_ALIGNED(16, const int, vp8_coef_bands_16x16[256]) = { + 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, + 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, +}; +DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d_16x16[256]) = { + 0, 1, 16, 32, 17, 2, 3, 18, 33, 48, 64, 49, 34, 19, 4, 5, + 20, 35, 50, 65, 80, 96, 81, 66, 51, 36, 21, 6, 7, 22, 37, 52, + 67, 82, 97, 112, 128, 113, 98, 83, 68, 53, 38, 23, 8, 9, 24, 39, + 54, 69, 84, 99, 114, 129, 144, 160, 145, 130, 115, 100, 85, 70, 55, 40, + 25, 10, 11, 26, 41, 56, 71, 86, 101, 116, 131, 146, 161, 176, 192, 177, + 162, 147, 132, 117, 102, 87, 72, 57, 42, 27, 12, 13, 28, 43, 58, 73, + 88, 103, 118, 133, 148, 163, 178, 193, 208, 224, 209, 194, 179, 164, 149, 134, + 119, 104, 89, 74, 59, 44, 29, 14, 15, 30, 45, 60, 75, 90, 105, 120, + 135, 150, 165, 180, 195, 210, 225, 240, 241, 226, 211, 196, 181, 166, 151, 136, + 121, 106, 91, 76, 61, 46, 31, 47, 62, 77, 92, 107, 122, 137, 152, 167, + 182, 197, 212, 227, 242, 243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93, + 78, 63, 79, 94, 109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, + 215, 200, 185, 170, 155, 140, 125, 110, 95, 111, 126, 141, 156, 171, 186, 201, + 216, 231, 246, 247, 232, 217, 202, 187, 172, 157, 142, 127, 143, 158, 173, 188, + 203, 218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235, + 250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254, 255, +}; +#endif -DECLARE_ALIGNED(16, short, vp8_default_zig_zag_mask[16]); -DECLARE_ALIGNED(64, short, vp8_default_zig_zag_mask_8x8[64]);// int64_t /* Array indices are identical to previously-existing CONTEXT_NODE indices */ @@ -131,17 +168,6 @@ static const Prob Pcat6[] = static vp8_tree_index cat1[2], cat2[4], cat3[6], cat4[8], cat5[10], cat6[26]; -void vp8_init_scan_order_mask() { - int i; - - for (i = 0; i < 16; i++) { - vp8_default_zig_zag_mask[vp8_default_zig_zag1d[i]] = 1 << i; - } - for (i = 0; i < 64; i++) { - vp8_default_zig_zag_mask_8x8[vp8_default_zig_zag1d_8x8[i]] = 1 << i; - } -} - static void init_bit_tree(vp8_tree_index *p, int n) { int i = 0; @@ -181,11 +207,15 @@ vp8_extra_bit_struct vp8_extra_bits[12] = { void vp8_default_coef_probs(VP8_COMMON *pc) { vpx_memcpy(pc->fc.coef_probs, default_coef_probs, - sizeof(default_coef_probs)); + sizeof(pc->fc.coef_probs)); vpx_memcpy(pc->fc.coef_probs_8x8, vp8_default_coef_probs_8x8, - sizeof(vp8_default_coef_probs_8x8)); + sizeof(pc->fc.coef_probs_8x8)); +#if CONFIG_TX16X16 + vpx_memcpy(pc->fc.coef_probs_16x16, vp8_default_coef_probs_16x16, + sizeof(pc->fc.coef_probs_16x16)); +#endif } void vp8_coef_tree_initialize() { @@ -304,4 +334,27 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) { else cm->fc.coef_probs_8x8[i][j][k][t] = prob; } } + +#if CONFIG_TX16X16 + for (i = 0; i < BLOCK_TYPES_16X16; ++i) + for (j = 0; j < COEF_BANDS; ++j) + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { + if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) + continue; + vp8_tree_probs_from_distribution( + MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, + coef_probs, branch_ct, cm->fc.coef_counts_16x16[i][j][k], 256, 1); + for (t = 0; t < ENTROPY_NODES; ++t) { + int prob; + count = branch_ct[t][0] + branch_ct[t][1]; + count = count > count_sat ? count_sat : count; + factor = (update_factor * count / count_sat); + prob = ((int)cm->fc.pre_coef_probs_16x16[i][j][k][t] * (256 - factor) + + (int)coef_probs[t] * factor + 128) >> 8; + if (prob <= 0) cm->fc.coef_probs_16x16[i][j][k][t] = 1; + else if (prob > 255) cm->fc.coef_probs_16x16[i][j][k][t] = 255; + else cm->fc.coef_probs_16x16[i][j][k][t] = prob; + } + } +#endif } diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h index 36cae410c..4497a3d47 100644 --- a/vp8/common/entropy.h +++ b/vp8/common/entropy.h @@ -62,19 +62,22 @@ extern vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */ /* Outside dimension. 0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */ #define BLOCK_TYPES 4 - #if CONFIG_HTRANS8X8 #define BLOCK_TYPES_8X8 4 #else #define BLOCK_TYPES_8X8 3 #endif +#define BLOCK_TYPES_16X16 4 /* Middle dimension is a coarsening of the coefficient's position within the 4x4 DCT. */ #define COEF_BANDS 8 -extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]); -extern DECLARE_ALIGNED(64, const unsigned char, vp8_coef_bands_8x8[64]); +extern DECLARE_ALIGNED(16, const int, vp8_coef_bands[16]); +extern DECLARE_ALIGNED(64, const int, vp8_coef_bands_8x8[64]); +#if CONFIG_TX16X16 +extern DECLARE_ALIGNED(16, const int, vp8_coef_bands_16x16[256]); +#endif /* Inside dimension is 3-valued measure of nearby complexity, that is, the extent to which nearby coefficients are nonzero. For the first @@ -113,8 +116,11 @@ extern DECLARE_ALIGNED(16, const int, vp8_row_scan[16]); extern short vp8_default_zig_zag_mask[16]; extern DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]); -extern short vp8_default_zig_zag_mask_8x8[64];// int64_t void vp8_coef_tree_initialize(void); -void vp8_adapt_coef_probs(struct VP8Common *); +#if CONFIG_TX16X16 +extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d_16x16[256]); +#endif +void vp8_adapt_coef_probs(struct VP8Common *); + #endif diff --git a/vp8/common/entropymode.c b/vp8/common/entropymode.c index 4500e9c11..8d43ce827 100644 --- a/vp8/common/entropymode.c +++ b/vp8/common/entropymode.c @@ -249,19 +249,15 @@ struct vp8_token_struct vp8_sub_mv_ref_encoding_array [VP8_SUBMVREFS]; void vp8_init_mbmode_probs(VP8_COMMON *x) { unsigned int bct [VP8_YMODES] [2]; /* num Ymodes > num UV modes */ - vp8_tree_probs_from_distribution( - VP8_YMODES, vp8_ymode_encodings, vp8_ymode_tree, - x->fc.ymode_prob, bct, y_mode_cts, - 256, 1 - ); + vp8_tree_probs_from_distribution(VP8_YMODES, vp8_ymode_encodings, + vp8_ymode_tree, x->fc.ymode_prob, bct, y_mode_cts, 256, 1); { int i; for (i = 0; i < 8; i++) vp8_tree_probs_from_distribution( VP8_YMODES, vp8_kf_ymode_encodings, vp8_kf_ymode_tree, x->kf_ymode_prob[i], bct, kf_y_mode_cts[i], - 256, 1 - ); + 256, 1); } { int i; @@ -295,13 +291,9 @@ void vp8_init_mbmode_probs(VP8_COMMON *x) { static void intra_bmode_probs_from_distribution( vp8_prob p [VP8_BINTRAMODES - 1], unsigned int branch_ct [VP8_BINTRAMODES - 1] [2], - const unsigned int events [VP8_BINTRAMODES] -) { - vp8_tree_probs_from_distribution( - VP8_BINTRAMODES, vp8_bmode_encodings, vp8_bmode_tree, - p, branch_ct, events, - 256, 1 - ); + const unsigned int events [VP8_BINTRAMODES]) { + vp8_tree_probs_from_distribution(VP8_BINTRAMODES, vp8_bmode_encodings, + vp8_bmode_tree, p, branch_ct, events, 256, 1); } void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES - 1]) { diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c index df162234c..359a139e1 100644 --- a/vp8/common/generic/systemdependent.c +++ b/vp8/common/generic/systemdependent.c @@ -32,6 +32,9 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) { rtcd->idct.idct8 = vp8_short_idct8x8_c; rtcd->idct.idct1_scalar_add_8x8 = vp8_dc_only_idct_add_8x8_c; rtcd->idct.ihaar2 = vp8_short_ihaar2x2_c; +#if CONFIG_TX16X16 + rtcd->idct.idct16x16 = vp8_short_idct16x16_c; +#endif rtcd->recon.copy16x16 = vp8_copy_mem16x16_c; rtcd->recon.copy8x8 = vp8_copy_mem8x8_c; rtcd->recon.avg16x16 = vp8_avg_mem16x16_c; diff --git a/vp8/common/idct.h b/vp8/common/idct.h index 7582a1c8a..b67076967 100644 --- a/vp8/common/idct.h +++ b/vp8/common/idct.h @@ -36,6 +36,13 @@ #define Y2_WHT_UPSCALE_FACTOR 2 #endif +#if CONFIG_TX16X16 +#ifndef vp8_idct_idct16x16 +#define vp8_idct_idct16x16 vp8_short_idct16x16_c +#endif +extern prototype_idct(vp8_idct_idct16x16); +#endif + #ifndef vp8_idct_idct8 #define vp8_idct_idct8 vp8_short_idct8x8_c #endif @@ -120,6 +127,10 @@ typedef struct { vp8_idct_scalar_add_fn_t idct1_scalar_add_8x8; vp8_idct_fn_t ihaar2; vp8_idct_fn_t ihaar2_1; + +#if CONFIG_TX16X16 + vp8_idct_fn_t idct16x16; +#endif } vp8_idct_rtcd_vtable_t; #if CONFIG_RUNTIME_CPU_DETECT diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c index dbf0fda82..e549fe098 100644 --- a/vp8/common/idctllm.c +++ b/vp8/common/idctllm.c @@ -647,3 +647,275 @@ void vp8_short_ihaar2x2_c(short *input, short *output, int pitch) { op[8] = (ip[0] - ip[1] - ip[4] + ip[8]) >> 1; } + +#if CONFIG_TX16X16 +#if 0 +// Keep a really bad float version as reference for now. +void vp8_short_idct16x16_c(short *input, short *output, int pitch) { + double x; + const int short_pitch = pitch >> 1; + int i, j, k, l; + for (l = 0; l < 16; ++l) { + for (k = 0; k < 16; ++k) { + double s = 0; + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) { + x=cos(PI*j*(l+0.5)/16.0)*cos(PI*i*(k+0.5)/16.0)*input[i*16+j]/32; + if (i != 0) + x *= sqrt(2.0); + if (j != 0) + x *= sqrt(2.0); + s += x; + } + } + output[k*short_pitch+l] = (short)round(s); + } + } +} +#endif + +static void butterfly_16x16_idct_1d(double input[16], double output[16]) { + double step[16]; + double intermediate[16]; + double temp1, temp2; + + const double PI = M_PI; + const double C1 = cos(1*PI/(double)32); + const double C2 = cos(2*PI/(double)32); + const double C3 = cos(3*PI/(double)32); + const double C4 = cos(4*PI/(double)32); + const double C5 = cos(5*PI/(double)32); + const double C6 = cos(6*PI/(double)32); + const double C7 = cos(7*PI/(double)32); + const double C8 = cos(8*PI/(double)32); + const double C9 = cos(9*PI/(double)32); + const double C10 = cos(10*PI/(double)32); + const double C11 = cos(11*PI/(double)32); + const double C12 = cos(12*PI/(double)32); + const double C13 = cos(13*PI/(double)32); + const double C14 = cos(14*PI/(double)32); + const double C15 = cos(15*PI/(double)32); + + // step 1 and 2 + step[ 0] = input[0] + input[8]; + step[ 1] = input[0] - input[8]; + + temp1 = input[4]*C12; + temp2 = input[12]*C4; + + temp1 -= temp2; + temp1 *= C8; + + step[ 2] = 2*(temp1); + + temp1 = input[4]*C4; + temp2 = input[12]*C12; + temp1 += temp2; + temp1 = (temp1); + temp1 *= C8; + step[ 3] = 2*(temp1); + + temp1 = input[2]*C8; + temp1 = 2*(temp1); + temp2 = input[6] + input[10]; + + step[ 4] = temp1 + temp2; + step[ 5] = temp1 - temp2; + + temp1 = input[14]*C8; + temp1 = 2*(temp1); + temp2 = input[6] - input[10]; + + step[ 6] = temp2 - temp1; + step[ 7] = temp2 + temp1; + + // for odd input + temp1 = input[3]*C12; + temp2 = input[13]*C4; + temp1 += temp2; + temp1 = (temp1); + temp1 *= C8; + intermediate[ 8] = 2*(temp1); + + temp1 = input[3]*C4; + temp2 = input[13]*C12; + temp2 -= temp1; + temp2 = (temp2); + temp2 *= C8; + intermediate[ 9] = 2*(temp2); + + intermediate[10] = 2*(input[9]*C8); + intermediate[11] = input[15] - input[1]; + intermediate[12] = input[15] + input[1]; + intermediate[13] = 2*((input[7]*C8)); + + temp1 = input[11]*C12; + temp2 = input[5]*C4; + temp2 -= temp1; + temp2 = (temp2); + temp2 *= C8; + intermediate[14] = 2*(temp2); + + temp1 = input[11]*C4; + temp2 = input[5]*C12; + temp1 += temp2; + temp1 = (temp1); + temp1 *= C8; + intermediate[15] = 2*(temp1); + + step[ 8] = intermediate[ 8] + intermediate[14]; + step[ 9] = intermediate[ 9] + intermediate[15]; + step[10] = intermediate[10] + intermediate[11]; + step[11] = intermediate[10] - intermediate[11]; + step[12] = intermediate[12] + intermediate[13]; + step[13] = intermediate[12] - intermediate[13]; + step[14] = intermediate[ 8] - intermediate[14]; + step[15] = intermediate[ 9] - intermediate[15]; + + // step 3 + output[0] = step[ 0] + step[ 3]; + output[1] = step[ 1] + step[ 2]; + output[2] = step[ 1] - step[ 2]; + output[3] = step[ 0] - step[ 3]; + + temp1 = step[ 4]*C14; + temp2 = step[ 7]*C2; + temp1 -= temp2; + output[4] = (temp1); + + temp1 = step[ 4]*C2; + temp2 = step[ 7]*C14; + temp1 += temp2; + output[7] = (temp1); + + temp1 = step[ 5]*C10; + temp2 = step[ 6]*C6; + temp1 -= temp2; + output[5] = (temp1); + + temp1 = step[ 5]*C6; + temp2 = step[ 6]*C10; + temp1 += temp2; + output[6] = (temp1); + + output[8] = step[ 8] + step[11]; + output[9] = step[ 9] + step[10]; + output[10] = step[ 9] - step[10]; + output[11] = step[ 8] - step[11]; + output[12] = step[12] + step[15]; + output[13] = step[13] + step[14]; + output[14] = step[13] - step[14]; + output[15] = step[12] - step[15]; + + // output 4 + step[ 0] = output[0] + output[7]; + step[ 1] = output[1] + output[6]; + step[ 2] = output[2] + output[5]; + step[ 3] = output[3] + output[4]; + step[ 4] = output[3] - output[4]; + step[ 5] = output[2] - output[5]; + step[ 6] = output[1] - output[6]; + step[ 7] = output[0] - output[7]; + + temp1 = output[8]*C7; + temp2 = output[15]*C9; + temp1 -= temp2; + step[ 8] = (temp1); + + temp1 = output[9]*C11; + temp2 = output[14]*C5; + temp1 += temp2; + step[ 9] = (temp1); + + temp1 = output[10]*C3; + temp2 = output[13]*C13; + temp1 -= temp2; + step[10] = (temp1); + + temp1 = output[11]*C15; + temp2 = output[12]*C1; + temp1 += temp2; + step[11] = (temp1); + + temp1 = output[11]*C1; + temp2 = output[12]*C15; + temp2 -= temp1; + step[12] = (temp2); + + temp1 = output[10]*C13; + temp2 = output[13]*C3; + temp1 += temp2; + step[13] = (temp1); + + temp1 = output[9]*C5; + temp2 = output[14]*C11; + temp2 -= temp1; + step[14] = (temp2); + + temp1 = output[8]*C9; + temp2 = output[15]*C7; + temp1 += temp2; + step[15] = (temp1); + + // step 5 + output[0] = (step[0] + step[15]); + output[1] = (step[1] + step[14]); + output[2] = (step[2] + step[13]); + output[3] = (step[3] + step[12]); + output[4] = (step[4] + step[11]); + output[5] = (step[5] + step[10]); + output[6] = (step[6] + step[ 9]); + output[7] = (step[7] + step[ 8]); + + output[15] = (step[0] - step[15]); + output[14] = (step[1] - step[14]); + output[13] = (step[2] - step[13]); + output[12] = (step[3] - step[12]); + output[11] = (step[4] - step[11]); + output[10] = (step[5] - step[10]); + output[9] = (step[6] - step[ 9]); + output[8] = (step[7] - step[ 8]); +} + +// Remove once an int version of iDCT is written +#if 0 +void reference_16x16_idct_1d(double input[16], double output[16]) { + const double kPi = 3.141592653589793238462643383279502884; + const double kSqrt2 = 1.414213562373095048801688724209698; + for (int k = 0; k < 16; k++) { + output[k] = 0.0; + for (int n = 0; n < 16; n++) { + output[k] += input[n]*cos(kPi*(2*k+1)*n/32.0); + if (n == 0) + output[k] = output[k]/kSqrt2; + } + } +} +#endif + +void vp8_short_idct16x16_c(short *input, short *output, int pitch) { + double out[16*16], out2[16*16]; + const int short_pitch = pitch >> 1; + int i, j; + // First transform rows + for (i = 0; i < 16; ++i) { + double temp_in[16], temp_out[16]; + for (j = 0; j < 16; ++j) + temp_in[j] = input[j + i*short_pitch]; + butterfly_16x16_idct_1d(temp_in, temp_out); + for (j = 0; j < 16; ++j) + out[j + i*16] = temp_out[j]; + } + // Then transform columns + for (i = 0; i < 16; ++i) { + double temp_in[16], temp_out[16]; + for (j = 0; j < 16; ++j) + temp_in[j] = out[j*16 + i]; + butterfly_16x16_idct_1d(temp_in, temp_out); + for (j = 0; j < 16; ++j) + out2[j*16 + i] = temp_out[j]; + } + for (i = 0; i < 16*16; ++i) + output[i] = round(out2[i]/128); +} +#endif diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c index d35071600..de9aad58d 100644 --- a/vp8/common/invtrans.c +++ b/vp8/common/invtrans.c @@ -153,3 +153,33 @@ void vp8_inverse_transform_mb_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCK } +#if CONFIG_TX16X16 +void vp8_inverse_transform_b_16x16(const vp8_idct_rtcd_vtable_t *rtcd, + short *input_dqcoeff, + short *output_coeff, int pitch) { + IDCT_INVOKE(rtcd, idct16x16)(input_dqcoeff, output_coeff, pitch); +} + +void vp8_inverse_transform_mby_16x16(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x) { + vp8_inverse_transform_b_16x16(rtcd, &x->block[0].dqcoeff[0], &x->block[0].diff[0], 32); +} + +// U,V blocks are 8x8 per macroblock, so just run 8x8 +void vp8_inverse_transform_mbuv_16x16(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x) { + int i; + for (i = 16; i < 24; i += 4) + vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 16); +} + +void vp8_inverse_transform_mb_16x16(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x) { + int i; + + // Luma + vp8_inverse_transform_b_16x16(rtcd, &x->block[0].dqcoeff[0], &x->block[0].diff[0], 32); + + // U, V + // Chroma blocks are downscaled, so run an 8x8 on them. + for (i = 16; i < 24; i+= 4) + vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 16); +} +#endif diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h index 1eda173b4..877032f88 100644 --- a/vp8/common/invtrans.h +++ b/vp8/common/invtrans.h @@ -30,4 +30,12 @@ extern void vp8_inverse_transform_mb_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MAC extern void vp8_inverse_transform_mby_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); extern void vp8_inverse_transform_mbuv_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); +#if CONFIG_TX16X16 +extern void vp8_inverse_transform_b_16x16(const vp8_idct_rtcd_vtable_t *rtcd, + short *input_dqcoeff, short *output_coeff, + int pitch); +extern void vp8_inverse_transform_mb_16x16(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); +extern void vp8_inverse_transform_mby_16x16(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); +extern void vp8_inverse_transform_mbuv_16x16(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); +#endif #endif diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c index 02af3cca9..05c00ef4e 100644 --- a/vp8/common/loopfilter.c +++ b/vp8/common/loopfilter.c @@ -329,7 +329,11 @@ void vp8_loop_filter_frame vp8_loop_filter_mbv_c (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); - if (!skip_lf) { + if (!skip_lf +#if CONFIG_TX16X16 + && tx_type != TX_16X16 +#endif + ) { if (tx_type == TX_8X8) vp8_loop_filter_bv8x8_c (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); @@ -344,7 +348,11 @@ void vp8_loop_filter_frame vp8_loop_filter_mbh_c (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); - if (!skip_lf) { + if (!skip_lf +#if CONFIG_TX16X16 + && tx_type != TX_16X16 +#endif + ) { if (tx_type == TX_8X8) vp8_loop_filter_bh8x8_c (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); @@ -353,6 +361,7 @@ void vp8_loop_filter_frame (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); } } else { + // FIXME: Not 8x8 aware if (mb_col > 0) LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v) (y_ptr, post->y_stride, lfi_n->mblim[filter_level]); @@ -431,7 +440,6 @@ void vp8_loop_filter_frame_yonly const int seg = mode_info_context->mbmi.segment_id; const int ref_frame = mode_info_context->mbmi.ref_frame; int tx_type = mode_info_context->mbmi.txfm_size; - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; if (filter_level) { @@ -446,7 +454,11 @@ void vp8_loop_filter_frame_yonly vp8_loop_filter_mbv_c (y_ptr, 0, 0, post->y_stride, 0, &lfi); - if (!skip_lf) { + if (!skip_lf +#if CONFIG_TX16X16 + && tx_type != TX_16X16 +#endif + ) { if (tx_type == TX_8X8) vp8_loop_filter_bv8x8_c (y_ptr, 0, 0, post->y_stride, 0, &lfi); @@ -460,7 +472,11 @@ void vp8_loop_filter_frame_yonly vp8_loop_filter_mbh_c (y_ptr, 0, 0, post->y_stride, 0, &lfi); - if (!skip_lf) { + if (!skip_lf +#if CONFIG_TX16X16 + && tx_type != TX_16X16 +#endif + ) { if (tx_type == TX_8X8) vp8_loop_filter_bh8x8_c (y_ptr, 0, 0, post->y_stride, 0, &lfi); @@ -469,6 +485,7 @@ void vp8_loop_filter_frame_yonly (y_ptr, 0, 0, post->y_stride, 0, &lfi); } } else { + // FIXME: Not 8x8 aware if (mb_col > 0) LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v) (y_ptr, post->y_stride, lfi_n->mblim[filter_level]); diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index ccecc4963..89d437ba5 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -52,6 +52,9 @@ typedef struct frame_contexts { vp8_prob mbsplit_prob [VP8_NUMMBSPLITS - 1]; vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; vp8_prob coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#if CONFIG_TX16X16 + vp8_prob coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#endif MV_CONTEXT mvc[2]; #if CONFIG_HIGH_PRECISION_MV MV_CONTEXT_HP mvc_hp[2]; @@ -73,12 +76,22 @@ typedef struct frame_contexts { unsigned int sub_mv_ref_counts [SUBMVREF_COUNT][VP8_SUBMVREFS]; unsigned int mbsplit_counts [VP8_NUMMBSPLITS]; - vp8_prob pre_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - vp8_prob pre_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; + vp8_prob pre_coef_probs [BLOCK_TYPES] [COEF_BANDS] + [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; + vp8_prob pre_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] + [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#if CONFIG_TX16X16 + vp8_prob pre_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] + [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#endif unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] - [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; + [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; unsigned int coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] - [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; + [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#if CONFIG_TX16X16 + unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] + [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#endif unsigned int MVcount [2] [MVvals]; #if CONFIG_HIGH_PRECISION_MV unsigned int MVcount_hp [2] [MVvals_hp]; diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 47d0faa7b..598b37927 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -217,22 +217,46 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, #endif if (pbi->common.frame_type == KEY_FRAME) { - if (pbi->common.txfm_mode == ALLOW_8X8 && +#if CONFIG_TX16X16 + if (xd->mode_info_context->mbmi.mode <= TM_PRED || + xd->mode_info_context->mbmi.mode == NEWMV || + xd->mode_info_context->mbmi.mode == ZEROMV || + xd->mode_info_context->mbmi.mode == NEARMV || + xd->mode_info_context->mbmi.mode == NEARESTMV) + xd->mode_info_context->mbmi.txfm_size = TX_16X16; + else if (pbi->common.txfm_mode == ALLOW_8X8 && xd->mode_info_context->mbmi.mode != I8X8_PRED && xd->mode_info_context->mbmi.mode != B_PRED) +#else + if (pbi->common.txfm_mode == ALLOW_8X8 && + xd->mode_info_context->mbmi.mode != I8X8_PRED && + xd->mode_info_context->mbmi.mode != B_PRED) +#endif xd->mode_info_context->mbmi.txfm_size = TX_8X8; else xd->mode_info_context->mbmi.txfm_size = TX_4X4; } else { - if (pbi->common.txfm_mode == ONLY_4X4) { +#if CONFIG_TX16X16 + if (xd->mode_info_context->mbmi.mode <= TM_PRED || + xd->mode_info_context->mbmi.mode == NEWMV || + xd->mode_info_context->mbmi.mode == ZEROMV || + xd->mode_info_context->mbmi.mode == NEARMV || + xd->mode_info_context->mbmi.mode == NEARESTMV) { + xd->mode_info_context->mbmi.txfm_size = TX_16X16; + } else if (pbi->common.txfm_mode == ALLOW_8X8 && + xd->mode_info_context->mbmi.mode != I8X8_PRED && + xd->mode_info_context->mbmi.mode != B_PRED && + xd->mode_info_context->mbmi.mode != SPLITMV) { +#else + if (pbi->common.txfm_mode == ALLOW_8X8 && + xd->mode_info_context->mbmi.mode != I8X8_PRED && + xd->mode_info_context->mbmi.mode != B_PRED && + xd->mode_info_context->mbmi.mode != SPLITMV) { +#endif + xd->mode_info_context->mbmi.txfm_size = TX_8X8; + } + else { xd->mode_info_context->mbmi.txfm_size = TX_4X4; - } else if (pbi->common.txfm_mode == ALLOW_8X8) { - if (xd->mode_info_context->mbmi.mode == B_PRED - || xd->mode_info_context->mbmi.mode == I8X8_PRED - || xd->mode_info_context->mbmi.mode == SPLITMV) - xd->mode_info_context->mbmi.txfm_size = TX_4X4; - else - xd->mode_info_context->mbmi.txfm_size = TX_8X8; } } @@ -251,6 +275,11 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, xd->block[i].eob = 0; xd->eobs[i] = 0; } +#if CONFIG_TX16X16 + if (tx_type == TX_16X16) + eobtotal = vp8_decode_mb_tokens_16x16(pbi, xd); + else +#endif if (tx_type == TX_8X8) eobtotal = vp8_decode_mb_tokens_8x8(pbi, xd); else @@ -462,6 +491,15 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, xd->dst.y_stride, xd->eobs); } else { BLOCKD *b = &xd->block[24]; + +#if CONFIG_TX16X16 + if (tx_type == TX_16X16) { + vp8_dequant_idct_add_16x16_c(xd->qcoeff, xd->block[0].dequant, + xd->predictor, xd->dst.y_buffer, + 16, xd->dst.y_stride); + } + else +#endif if (tx_type == TX_8X8) { DEQUANT_INVOKE(&pbi->dequant, block_2x2)(b); #ifdef DEC_DEBUG @@ -511,7 +549,11 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, } } - if (tx_type == TX_8X8) + if (tx_type == TX_8X8 +#if CONFIG_TX16X16 + || tx_type == TX_16X16 +#endif + ) DEQUANT_INVOKE(&pbi->dequant, idct_add_uv_block_8x8) // (xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, @@ -904,7 +946,7 @@ static void read_coef_probs(VP8D_COMP *pbi) { } } } - } + } } if (pbi->common.txfm_mode == ALLOW_8X8 && vp8_read_bit(bc)) { @@ -925,6 +967,28 @@ static void read_coef_probs(VP8D_COMP *pbi) { } } } + +#if CONFIG_TX16X16 + // 16x16 + if (vp8_read_bit(bc)) { + // read coef probability tree + for (i = 0; i < BLOCK_TYPES_16X16; ++i) + for (j = !i; j < COEF_BANDS; ++j) + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { + if (k >= 3 && ((i == 0 && j == 1) || + (i > 0 && j == 0))) + continue; + for (l = 0; l < ENTROPY_NODES; ++l) { + + vp8_prob *const p = pc->fc.coef_probs_16x16[i][j][k] + l; + + if (vp8_read(bc, COEF_UPDATE_PROB_16X16)) { + *p = read_prob_diff_update(bc, *p); + } + } + } + } +#endif } int vp8_decode_frame(VP8D_COMP *pbi) { @@ -1287,6 +1351,9 @@ int vp8_decode_frame(VP8D_COMP *pbi) { vp8_copy(pbi->common.fc.pre_coef_probs, pbi->common.fc.coef_probs); vp8_copy(pbi->common.fc.pre_coef_probs_8x8, pbi->common.fc.coef_probs_8x8); +#if CONFIG_TX16X16 + vp8_copy(pbi->common.fc.pre_coef_probs_16x16, pbi->common.fc.coef_probs_16x16); +#endif vp8_copy(pbi->common.fc.pre_ymode_prob, pbi->common.fc.ymode_prob); vp8_copy(pbi->common.fc.pre_uv_mode_prob, pbi->common.fc.uv_mode_prob); vp8_copy(pbi->common.fc.pre_bmode_prob, pbi->common.fc.bmode_prob); @@ -1299,6 +1366,9 @@ int vp8_decode_frame(VP8D_COMP *pbi) { #endif vp8_zero(pbi->common.fc.coef_counts); vp8_zero(pbi->common.fc.coef_counts_8x8); +#if CONFIG_TX16X16 + vp8_zero(pbi->common.fc.coef_counts_16x16); +#endif vp8_zero(pbi->common.fc.ymode_counts); vp8_zero(pbi->common.fc.uv_mode_counts); vp8_zero(pbi->common.fc.bmode_counts); diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c index 3669cc214..655409176 100644 --- a/vp8/decoder/dequantize.c +++ b/vp8/decoder/dequantize.c @@ -422,3 +422,39 @@ void vp8_dequant_dc_idct_add_8x8_c(short *input, short *dq, unsigned char *pred, #endif } +#if CONFIG_TX16X16 +void vp8_dequant_idct_add_16x16_c(short *input, short *dq, unsigned char *pred, + unsigned char *dest, int pitch, int stride) { + short output[256]; + short *diff_ptr = output; + int r, c, i; + + input[0]= input[0] * dq[0]; + + // recover quantizer for 4 4x4 blocks + for (i = 1; i < 256; i++) + input[i] = input[i] * dq[1]; + + // the idct halves ( >> 1) the pitch + vp8_short_idct16x16_c(input, output, 32); + + vpx_memset(input, 0, 512); + + for (r = 0; r < 16; r++) { + for (c = 0; c < 16; c++) { + int a = diff_ptr[c] + pred[c]; + + if (a < 0) + a = 0; + else if (a > 255) + a = 255; + + dest[c] = (unsigned char) a; + } + + dest += stride; + diff_ptr += 16; + pred += pitch; + } +} +#endif diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h index 76418fffa..b12ee6904 100644 --- a/vp8/decoder/dequantize.h +++ b/vp8/decoder/dequantize.h @@ -145,6 +145,12 @@ extern prototype_dequant_idct_add_y_block_8x8(vp8_dequant_idct_add_y_block_8x8); #endif extern prototype_dequant_idct_add_uv_block_8x8(vp8_dequant_idct_add_uv_block_8x8); +#if CONFIG_TX16X16 +#ifndef vp8_dequant_idct_add_16x16 +#define vp8_dequant_idct_add_16x16 vp8_dequant_idct_add_16x16_c +#endif +extern prototype_dequant_idct_add(vp8_dequant_idct_add_16x16); +#endif typedef prototype_dequant_block((*vp8_dequant_block_fn_t)); @@ -178,6 +184,9 @@ typedef struct { vp8_dequant_dc_idct_add_y_block_fn_t_8x8 dc_idct_add_y_block_8x8; vp8_dequant_idct_add_y_block_fn_t_8x8 idct_add_y_block_8x8; vp8_dequant_idct_add_uv_block_fn_t_8x8 idct_add_uv_block_8x8; +#if CONFIG_TX16X16 + vp8_dequant_idct_add_fn_t idct_add_16x16; +#endif } vp8_dequant_rtcd_vtable_t; #if CONFIG_RUNTIME_CPU_DETECT diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c index 155877a4c..c93b8e9c5 100644 --- a/vp8/decoder/detokenize.c +++ b/vp8/decoder/detokenize.c @@ -22,13 +22,13 @@ #define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES -DECLARE_ALIGNED(16, int, coef_bands_x[16]) = { +DECLARE_ALIGNED(16, const int, coef_bands_x[16]) = { 0 * OCB_X, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X }; -DECLARE_ALIGNED(16, int, coef_bands_x_8x8[64]) = { +DECLARE_ALIGNED(16, const int, coef_bands_x_8x8[64]) = { 0 * OCB_X, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 5 * OCB_X, 4 * OCB_X, 4 * OCB_X, 5 * OCB_X, 5 * OCB_X, 3 * OCB_X, 6 * OCB_X, 3 * OCB_X, 5 * OCB_X, 4 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 5 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, @@ -39,6 +39,27 @@ DECLARE_ALIGNED(16, int, coef_bands_x_8x8[64]) = { 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, }; +#if CONFIG_TX16X16 +DECLARE_ALIGNED(16, const int, coef_bands_x_16x16[256]) = { + 0 * OCB_X, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 5 * OCB_X, 4 * OCB_X, 4 * OCB_X, 5 * OCB_X, 5 * OCB_X, 3 * OCB_X, 6 * OCB_X, 3 * OCB_X, 5 * OCB_X, 4 * OCB_X, 6 * OCB_X, 6 * OCB_X, + 6 * OCB_X, 5 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, + 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X +}; +#endif + #define EOB_CONTEXT_NODE 0 #define ZERO_CONTEXT_NODE 1 #define ONE_CONTEXT_NODE 2 @@ -81,9 +102,13 @@ static const unsigned char cat6_prob[14] = void vp8_reset_mb_tokens_context(MACROBLOCKD *x) { /* Clear entropy contexts for Y2 blocks */ - if (x->mode_info_context->mbmi.mode != B_PRED && + if ((x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != I8X8_PRED && - x->mode_info_context->mbmi.mode != SPLITMV) { + x->mode_info_context->mbmi.mode != SPLITMV) +#if CONFIG_TX16X16 + || x->mode_info_context->mbmi.txfm_size == TX_16X16 +#endif + ) { vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); } else { @@ -200,6 +225,27 @@ void static count_tokens_8x8(INT16 *qcoeff_ptr, int block, int type, } } +#if CONFIG_TX16X16 +void static count_tokens_16x16(INT16 *qcoeff_ptr, int block, int type, + ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, + int eob, int seg_eob, FRAME_CONTEXT *fc) { + int c, pt, token; + VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); + for (c = !type; c < eob; ++c) { + int rc = vp8_default_zig_zag1d_16x16[c]; + int v = qcoeff_ptr[rc]; + int band = vp8_coef_bands_16x16[c]; + token = get_token(v); + fc->coef_counts_16x16[type][band][pt][token]++; + pt = vp8_prev_token_class[token]; + } + if (eob < seg_eob) { + int band = vp8_coef_bands_16x16[c]; + fc->coef_counts_16x16[type][band][pt][DCT_EOB_TOKEN]++; + } +} +#endif + static int vp8_get_signed(BOOL_DECODER *br, int value_to_sign) { const int split = (br->range + 1) >> 1; @@ -224,16 +270,16 @@ static int vp8_get_signed(BOOL_DECODER *br, int value_to_sign) { return v; } -#define WRITE_COEF_CONTINUE(val) \ - { \ - Prob = coef_probs + (ENTROPY_NODES*PREV_CONTEXT_INC(val));\ +#define WRITE_COEF_CONTINUE(val) \ + { \ + prob = coef_probs + (ENTROPY_NODES*PREV_CONTEXT_INC(val));\ qcoeff_ptr[scan[c]] = (INT16) vp8_get_signed(br, val); \ c++; \ continue; \ } -#define ADJUST_COEF(prob, bits_count) \ - do { \ +#define ADJUST_COEF(prob, bits_count) \ + do { \ if (vp8_read(br, prob)) \ val += (UINT16)(1 << bits_count);\ } while (0); @@ -246,48 +292,59 @@ static int vp8_decode_coefs(VP8D_COMP *dx, const MACROBLOCKD *xd, FRAME_CONTEXT *const fc = &dx->common.fc; BOOL_DECODER *br = xd->current_bc; int tmp, c = (type == 0); - const vp8_prob *Prob; - const vp8_prob *coef_probs = - (block_type == TX_4X4) ? fc->coef_probs[type][0][0] - : fc->coef_probs_8x8[type][0][0]; + const vp8_prob *prob, *coef_probs; + + switch (block_type) { + case TX_4X4: + coef_probs = fc->coef_probs[type][0][0]; + break; + case TX_8X8: + coef_probs = fc->coef_probs_8x8[type][0][0]; + break; +#if CONFIG_TX16X16 + default: + coef_probs = fc->coef_probs_16x16[type][0][0]; + break; +#endif + } VP8_COMBINEENTROPYCONTEXTS(tmp, *a, *l); - Prob = coef_probs + tmp * ENTROPY_NODES; + prob = coef_probs + tmp * ENTROPY_NODES; while (1) { int val; const uint8_t *cat6 = cat6_prob; if (c == seg_eob) break; - Prob += coef_bands[c]; - if (!vp8_read(br, Prob[EOB_CONTEXT_NODE])) + prob += coef_bands[c]; + if (!vp8_read(br, prob[EOB_CONTEXT_NODE])) break; - SKIP_START: +SKIP_START: if (c == seg_eob) break; - if (!vp8_read(br, Prob[ZERO_CONTEXT_NODE])) { + if (!vp8_read(br, prob[ZERO_CONTEXT_NODE])) { ++c; - Prob = coef_probs + coef_bands[c]; + prob = coef_probs + coef_bands[c]; goto SKIP_START; } // ONE_CONTEXT_NODE_0_ - if (!vp8_read(br, Prob[ONE_CONTEXT_NODE])) { - Prob = coef_probs + ENTROPY_NODES; + if (!vp8_read(br, prob[ONE_CONTEXT_NODE])) { + prob = coef_probs + ENTROPY_NODES; qcoeff_ptr[scan[c]] = (INT16) vp8_get_signed(br, 1); ++c; continue; } // LOW_VAL_CONTEXT_NODE_0_ - if (!vp8_read(br, Prob[LOW_VAL_CONTEXT_NODE])) { - if (!vp8_read(br, Prob[TWO_CONTEXT_NODE])) { + if (!vp8_read(br, prob[LOW_VAL_CONTEXT_NODE])) { + if (!vp8_read(br, prob[TWO_CONTEXT_NODE])) { WRITE_COEF_CONTINUE(2); } - if (!vp8_read(br, Prob[THREE_CONTEXT_NODE])) { + if (!vp8_read(br, prob[THREE_CONTEXT_NODE])) { WRITE_COEF_CONTINUE(3); } WRITE_COEF_CONTINUE(4); } // HIGH_LOW_CONTEXT_NODE_0_ - if (!vp8_read(br, Prob[HIGH_LOW_CONTEXT_NODE])) { - if (!vp8_read(br, Prob[CAT_ONE_CONTEXT_NODE])) { + if (!vp8_read(br, prob[HIGH_LOW_CONTEXT_NODE])) { + if (!vp8_read(br, prob[CAT_ONE_CONTEXT_NODE])) { val = CAT1_MIN_VAL; ADJUST_COEF(CAT1_PROB0, 0); WRITE_COEF_CONTINUE(val); @@ -298,8 +355,8 @@ static int vp8_decode_coefs(VP8D_COMP *dx, const MACROBLOCKD *xd, WRITE_COEF_CONTINUE(val); } // CAT_THREEFOUR_CONTEXT_NODE_0_ - if (!vp8_read(br, Prob[CAT_THREEFOUR_CONTEXT_NODE])) { - if (!vp8_read(br, Prob[CAT_THREE_CONTEXT_NODE])) { + if (!vp8_read(br, prob[CAT_THREEFOUR_CONTEXT_NODE])) { + if (!vp8_read(br, prob[CAT_THREE_CONTEXT_NODE])) { val = CAT3_MIN_VAL; ADJUST_COEF(CAT3_PROB2, 2); ADJUST_COEF(CAT3_PROB1, 1); @@ -314,7 +371,7 @@ static int vp8_decode_coefs(VP8D_COMP *dx, const MACROBLOCKD *xd, WRITE_COEF_CONTINUE(val); } // CAT_FIVE_CONTEXT_NODE_0_: - if (!vp8_read(br, Prob[CAT_FIVE_CONTEXT_NODE])) { + if (!vp8_read(br, prob[CAT_FIVE_CONTEXT_NODE])) { val = CAT5_MIN_VAL; ADJUST_COEF(CAT5_PROB4, 4); ADJUST_COEF(CAT5_PROB3, 3); @@ -331,18 +388,81 @@ static int vp8_decode_coefs(VP8D_COMP *dx, const MACROBLOCKD *xd, WRITE_COEF_CONTINUE(val); } - if (block_type == TX_4X4) + if (block_type == TX_4X4) { #if CONFIG_HYBRIDTRANSFORM count_tokens_adaptive_scan(xd, qcoeff_ptr, i, type, a, l, c, seg_eob, fc); #else count_tokens(qcoeff_ptr, i, type, a, l, c, seg_eob, fc); #endif - - else + } + else if (block_type == TX_8X8) count_tokens_8x8(qcoeff_ptr, i, type, a, l, c, seg_eob, fc); +#if CONFIG_TX16X16 + else + count_tokens_16x16(qcoeff_ptr, i, type, a, l, c, seg_eob, fc); +#endif return c; } +#if CONFIG_TX16X16 +int vp8_decode_mb_tokens_16x16(VP8D_COMP *pbi, MACROBLOCKD *xd) { + ENTROPY_CONTEXT* const A = (ENTROPY_CONTEXT *)xd->above_context; + ENTROPY_CONTEXT* const L = (ENTROPY_CONTEXT *)xd->left_context; + + char* const eobs = xd->eobs; + int c, i, type, eobtotal = 0, seg_eob; + const int segment_id = xd->mode_info_context->mbmi.segment_id; + const int seg_active = segfeature_active(xd, segment_id, SEG_LVL_EOB); + INT16 *qcoeff_ptr = &xd->qcoeff[0]; + + type = PLANE_TYPE_Y_WITH_DC; + if (seg_active) + seg_eob = get_segdata(xd, segment_id, SEG_LVL_EOB); + else + seg_eob = 256; + + // Luma block + { + const int* const scan = vp8_default_zig_zag1d_16x16; + c = vp8_decode_coefs(pbi, xd, A, L, type, seg_eob, qcoeff_ptr, + 0, scan, TX_16X16, coef_bands_x_16x16); + eobs[0] = c; + *A = *L = (c != !type); + for (i = 1; i < 16; i++) { + *(A + vp8_block2above[i]) = *(A); + *(L + vp8_block2left[i]) = *(L); + } + eobtotal += c; + } + + // 8x8 chroma blocks + qcoeff_ptr += 256; + type = PLANE_TYPE_UV; + if (seg_active) + seg_eob = get_segdata(xd, segment_id, SEG_LVL_EOB); + else + seg_eob = 64; + for (i = 16; i < 24; i += 4) { + ENTROPY_CONTEXT* const a = A + vp8_block2above_8x8[i]; + ENTROPY_CONTEXT* const l = L + vp8_block2left_8x8[i]; + const int* const scan = vp8_default_zig_zag1d_8x8; + + c = vp8_decode_coefs(pbi, xd, a, l, type, seg_eob, qcoeff_ptr, + i, scan, TX_8X8, coef_bands_x_8x8); + a[0] = l[0] = ((eobs[i] = c) != !type); + a[1] = a[0]; + l[1] = l[0]; + + eobtotal += c; + qcoeff_ptr += 64; + } + vpx_memset(&A[8], 0, sizeof(A[8])); + vpx_memset(&L[8], 0, sizeof(L[8])); + return eobtotal; +} +#endif + + int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd) { ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context; ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context; @@ -464,7 +584,6 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *xd) { c = vp8_decode_coefs(dx, xd, a, l, type, seg_eob, qcoeff_ptr + 24 * 16, 24, scan, TX_4X4, coef_bands_x); a[0] = l[0] = ((eobs[24] = c) != !type); - eobtotal += c - 16; type = PLANE_TYPE_Y_NO_DC; diff --git a/vp8/decoder/detokenize.h b/vp8/decoder/detokenize.h index caedf2f37..05550cec7 100644 --- a/vp8/decoder/detokenize.h +++ b/vp8/decoder/detokenize.h @@ -17,5 +17,8 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x); int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *); int vp8_decode_mb_tokens_8x8(VP8D_COMP *, MACROBLOCKD *); +#if CONFIG_TX16X16 +int vp8_decode_mb_tokens_16x16(VP8D_COMP *, MACROBLOCKD *); +#endif #endif /* DETOKENIZE_H */ diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c index dd7f4e9f6..15b809e33 100644 --- a/vp8/decoder/generic/dsystemdependent.c +++ b/vp8/decoder/generic/dsystemdependent.c @@ -22,6 +22,9 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi) { pbi->mb.rtcd = &pbi->common.rtcd; pbi->dequant.block_2x2 = vp8_dequantize_b_2x2_c; pbi->dequant.idct_add_8x8 = vp8_dequant_idct_add_8x8_c; +#if CONFIG_TX16X16 + pbi->dequant.idct_add_16x16 = vp8_dequant_idct_add_16x16_c; +#endif pbi->dequant.dc_idct_add_8x8 = vp8_dequant_dc_idct_add_8x8_c; pbi->dequant.dc_idct_add_y_block_8x8 = vp8_dequant_dc_idct_add_y_block_8x8_c; pbi->dequant.idct_add_y_block_8x8 = vp8_dequant_idct_add_y_block_8x8_c; diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h index 5ad0ac83f..f4147e119 100644 --- a/vp8/decoder/onyxd_int.h +++ b/vp8/decoder/onyxd_int.h @@ -54,6 +54,9 @@ typedef struct { vp8_prob const *coef_probs[BLOCK_TYPES]; vp8_prob const *coef_probs_8x8[BLOCK_TYPES_8X8]; +#if CONFIG_TX16X16 + vp8_prob const *coef_probs_16X16[BLOCK_TYPES_16X16]; +#endif UINT8 eob[25]; diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c index 97e791bc6..c555c0300 100644 --- a/vp8/encoder/bitstream.c +++ b/vp8/encoder/bitstream.c @@ -42,6 +42,12 @@ unsigned int tree_update_hist_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] [2]; +#if CONFIG_TX16X16 +unsigned int tree_update_hist_16x16 [BLOCK_TYPES_16X16] + [COEF_BANDS] + [PREV_COEF_CONTEXTS] + [ENTROPY_NODES] [2]; +#endif extern unsigned int active_section; #endif @@ -1283,15 +1289,13 @@ static void print_prob_tree(vp8_prob void build_coeff_contexts(VP8_COMP *cpi) { - int i = 0; - do { - int j = 0; - do { - int k = 0; - do { + int i = 0, j, k; #ifdef ENTROPY_STATS - int t; + int t = 0; #endif + for (i = 0; i < BLOCK_TYPES; ++i) { + for (j = 0; j < COEF_BANDS; ++j) { + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) continue; vp8_tree_probs_from_distribution( @@ -1302,33 +1306,23 @@ void build_coeff_contexts(VP8_COMP *cpi) { 256, 1 ); #ifdef ENTROPY_STATS - if (!cpi->dummy_packing) { - t = 0; - do { - context_counters [i][j][k][t] += - cpi->coef_counts [i][j][k][t]; - } while (++t < MAX_ENTROPY_TOKENS); - } + if (!cpi->dummy_packing) + for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) + context_counters[i][j][k][t] += cpi->coef_counts[i][j][k][t]; #endif - } while (++k < PREV_COEF_CONTEXTS); - } while (++j < COEF_BANDS); - } while (++i < BLOCK_TYPES); + } + } + } - i = 0; if (cpi->common.txfm_mode == ALLOW_8X8) { - do { - int j = 0; /* token/prob index */ - do { - int k = 0; - do { + for (i = 0; i < BLOCK_TYPES_8X8; ++i) { + for (j = 0; j < COEF_BANDS; ++j) { + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { /* at every context */ /* calc probs and branch cts for this frame only */ // vp8_prob new_p [ENTROPY_NODES]; // unsigned int branch_ct [ENTROPY_NODES] [2]; -#ifdef ENTROPY_STATS - int t = 0; /* token/prob index */ -#endif if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) continue; vp8_tree_probs_from_distribution( @@ -1339,20 +1333,36 @@ void build_coeff_contexts(VP8_COMP *cpi) { 256, 1 ); #ifdef ENTROPY_STATS - if (!cpi->dummy_packing) { - t = 0; - do { - context_counters_8x8 [i][j][k][t] += - cpi->coef_counts_8x8 [i][j][k][t]; - } while (++t < MAX_ENTROPY_TOKENS); - } + if (!cpi->dummy_packing) + for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) + context_counters_8x8[i][j][k][t] += cpi->coef_counts_8x8[i][j][k][t]; #endif - - } while (++k < PREV_COEF_CONTEXTS); - } while (++j < COEF_BANDS); - } while (++i < BLOCK_TYPES_8X8); + } + } + } } +#if CONFIG_TX16X16 + //16x16 + for (i = 0; i < BLOCK_TYPES_16X16; ++i) { + for (j = 0; j < COEF_BANDS; ++j) { + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { + if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) + continue; + vp8_tree_probs_from_distribution( + MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, + cpi->frame_coef_probs_16x16[i][j][k], + cpi->frame_branch_ct_16x16[i][j][k], + cpi->coef_counts_16x16[i][j][k], 256, 1); +#ifdef ENTROPY_STATS + if (!cpi->dummy_packing) + for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) + context_counters_16x16[i][j][k][t] += cpi->coef_counts_16x16[i][j][k][t]; +#endif + } + } + } +#endif } static void update_coef_probs3(VP8_COMP *cpi) { @@ -1696,7 +1706,7 @@ static void update_coef_probs2(VP8_COMP *cpi) { } static void update_coef_probs(VP8_COMP *cpi) { - int i = 0; + int i, j, k, t; vp8_writer *const w = & cpi->bc; int update[2] = {0, 0}; int savings; @@ -1704,21 +1714,17 @@ static void update_coef_probs(VP8_COMP *cpi) { vp8_clear_system_state(); // __asm emms; // Build the cofficient contexts based on counts collected in encode loop - build_coeff_contexts(cpi); // vp8_prob bestupd = find_coef_update_prob(cpi); /* dry run to see if there is any udpate at all needed */ savings = 0; - do { - int j = !i; - do { - int k = 0; + for (i = 0; i < BLOCK_TYPES; ++i) { + for (j = !i; j < COEF_BANDS; ++j) { int prev_coef_savings[ENTROPY_NODES] = {0}; - do { - int t = 0; /* token/prob index */ - do { + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { + for (t = 0; t < ENTROPY_NODES; ++t) { vp8_prob newp = cpi->frame_coef_probs [i][j][k][t]; vp8_prob *Pold = cpi->common.fc.coef_probs [i][j][k] + t; const vp8_prob upd = COEF_UPDATE_PROB; @@ -1747,29 +1753,23 @@ static void update_coef_probs(VP8_COMP *cpi) { #endif update[u]++; - } while (++t < ENTROPY_NODES); - } while (++k < PREV_COEF_CONTEXTS); - } while (++j < COEF_BANDS); - } while (++i < BLOCK_TYPES); + } + } + } + } // printf("Update %d %d, savings %d\n", update[0], update[1], savings); /* Is coef updated at all */ if (update[1] == 0 || savings < 0) - { vp8_write_bit(w, 0); - } else { + else { vp8_write_bit(w, 1); - i = 0; - do { - int j = !i; - do { - int k = 0; + for (i = 0; i < BLOCK_TYPES; ++i) { + for (j = !i; j < COEF_BANDS; ++j) { int prev_coef_savings[ENTROPY_NODES] = {0}; - - do { + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { // calc probs and branch cts for this frame only - int t = 0; /* token/prob index */ - do { + for (t = 0; t < ENTROPY_NODES; ++t) { vp8_prob newp = cpi->frame_coef_probs [i][j][k][t]; vp8_prob *Pold = cpi->common.fc.coef_probs [i][j][k] + t; const vp8_prob upd = COEF_UPDATE_PROB; @@ -1791,8 +1791,6 @@ static void update_coef_probs(VP8_COMP *cpi) { if (s > 0) u = 1; #endif - - vp8_write(w, u, upd); #ifdef ENTROPY_STATS if (!cpi->dummy_packing) @@ -1803,28 +1801,23 @@ static void update_coef_probs(VP8_COMP *cpi) { write_prob_diff_update(w, newp, *Pold); *Pold = newp; } - } while (++t < ENTROPY_NODES); - - } while (++k < PREV_COEF_CONTEXTS); - } while (++j < COEF_BANDS); - } while (++i < BLOCK_TYPES); + } + } + } + } } - /* do not do this if not evena allowed */ + /* do not do this if not even allowed */ if (cpi->common.txfm_mode == ALLOW_8X8) { /* dry run to see if update is necessary */ update[0] = update[1] = 0; savings = 0; - i = 0; - do { - int j = !i; - do { - int k = 0; - do { + for (i = 0; i < BLOCK_TYPES_8X8; ++i) { + for (j = !i; j < COEF_BANDS; ++j) { + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { // calc probs and branch cts for this frame only - int t = 0; /* token/prob index */ - do { + for (t = 0; t < ENTROPY_NODES; ++t) { const unsigned int *ct = cpi->frame_branch_ct_8x8 [i][j][k][t]; vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t]; vp8_prob *Pold = cpi->common.fc.coef_probs_8x8 [i][j][k] + t; @@ -1846,26 +1839,20 @@ static void update_coef_probs(VP8_COMP *cpi) { if (u) savings += s; #endif - update[u]++; - } while (++t < MAX_ENTROPY_TOKENS - 1); - } while (++k < PREV_COEF_CONTEXTS); - } while (++j < COEF_BANDS); - } while (++i < BLOCK_TYPES_8X8); + } + } + } + } if (update[1] == 0 || savings < 0) - { vp8_write_bit(w, 0); - } else { + else { vp8_write_bit(w, 1); - i = 0; - do { - int j = !i; - do { - int k = 0; - do { - int t = 0; /* token/prob index */ - do { + for (i = 0; i < BLOCK_TYPES_8X8; ++i) { + for (j = !i; j < COEF_BANDS; ++j) { + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { + for (t = 0; t < ENTROPY_NODES; ++t) { const unsigned int *ct = cpi->frame_branch_ct_8x8 [i][j][k][t]; vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t]; vp8_prob *Pold = cpi->common.fc.coef_probs_8x8 [i][j][k] + t; @@ -1892,12 +1879,90 @@ static void update_coef_probs(VP8_COMP *cpi) { write_prob_diff_update(w, newp, oldp); *Pold = newp; } - } while (++t < MAX_ENTROPY_TOKENS - 1); - } while (++k < PREV_COEF_CONTEXTS); - } while (++j < COEF_BANDS); - } while (++i < BLOCK_TYPES_8X8); + } + } + } + } } } + +#if CONFIG_TX16X16 + // 16x16 + /* dry run to see if update is necessary */ + update[0] = update[1] = 0; + savings = 0; + for (i = 0; i < BLOCK_TYPES_16X16; ++i) { + for (j = !i; j < COEF_BANDS; ++j) { + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { + // calc probs and branch cts for this frame only + for (t = 0; t < ENTROPY_NODES; ++t) { + const unsigned int *ct = cpi->frame_branch_ct_16x16[i][j][k][t]; + vp8_prob newp = cpi->frame_coef_probs_16x16[i][j][k][t]; + vp8_prob *Pold = cpi->common.fc.coef_probs_16x16[i][j][k] + t; + const vp8_prob oldp = *Pold; + int s, u; + const vp8_prob upd = COEF_UPDATE_PROB_16X16; + if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) + continue; +#if defined(SEARCH_NEWP) + s = prob_diff_update_savings_search(ct, oldp, &newp, upd); + u = s > 0 && newp != oldp ? 1 : 0; + if (u) + savings += s - (int)(vp8_cost_zero(upd)); + else + savings -= (int)(vp8_cost_zero(upd)); +#else + s = prob_update_savings(ct, oldp, newp, upd); + u = s > 0 ? 1 : 0; + if (u) + savings += s; +#endif + update[u]++; + } + } + } + } + + if (update[1] == 0 || savings < 0) + vp8_write_bit(w, 0); + else { + vp8_write_bit(w, 1); + for (i = 0; i < BLOCK_TYPES_16X16; ++i) { + for (j = !i; j < COEF_BANDS; ++j) { + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { + for (t = 0; t < ENTROPY_NODES; ++t) { + const unsigned int *ct = cpi->frame_branch_ct_16x16[i][j][k][t]; + vp8_prob newp = cpi->frame_coef_probs_16x16[i][j][k][t]; + vp8_prob *Pold = cpi->common.fc.coef_probs_16x16[i][j][k] + t; + const vp8_prob oldp = *Pold; + const vp8_prob upd = COEF_UPDATE_PROB_16X16; + int s, u; + if (k >= 3 && ((i == 0 && j == 1) || + (i > 0 && j == 0))) + continue; +#if defined(SEARCH_NEWP) + s = prob_diff_update_savings_search(ct, oldp, &newp, upd); + u = s > 0 && newp != oldp ? 1 : 0; +#else + s = prob_update_savings(ct, oldp, newp, upd); + u = s > 0 ? 1 : 0; +#endif + vp8_write(w, u, upd); +#ifdef ENTROPY_STATS + if (!cpi->dummy_packing) + ++tree_update_hist_16x16[i][j][k][t][u]; +#endif + if (u) { + /* send/use new probability */ + write_prob_diff_update(w, newp, oldp); + *Pold = newp; + } + } + } + } + } + } +#endif } #ifdef PACKET_TESTING @@ -2310,18 +2375,19 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) vp8_write_bit(bc, pc->refresh_last_frame); #ifdef ENTROPY_STATS - if (pc->frame_type == INTER_FRAME) active_section = 0; else active_section = 7; - #endif vp8_clear_system_state(); // __asm emms; vp8_copy(cpi->common.fc.pre_coef_probs, cpi->common.fc.coef_probs); vp8_copy(cpi->common.fc.pre_coef_probs_8x8, cpi->common.fc.coef_probs_8x8); +#if CONFIG_TX16X16 + vp8_copy(cpi->common.fc.pre_coef_probs_16x16, cpi->common.fc.coef_probs_16x16); +#endif vp8_copy(cpi->common.fc.pre_ymode_prob, cpi->common.fc.ymode_prob); vp8_copy(cpi->common.fc.pre_uv_mode_prob, cpi->common.fc.uv_mode_prob); vp8_copy(cpi->common.fc.pre_bmode_prob, cpi->common.fc.bmode_prob); @@ -2401,24 +2467,20 @@ void print_tree_update_probs() { FILE *f = fopen("coefupdprob.h", "w"); int Sum; fprintf(f, "\n/* Update probabilities for token entropy tree. */\n\n"); + fprintf(f, "const vp8_prob\n" "vp8_coef_update_probs[BLOCK_TYPES]\n" " [COEF_BANDS]\n" " [PREV_COEF_CONTEXTS]\n" " [ENTROPY_NODES] = {\n"); - for (i = 0; i < BLOCK_TYPES; i++) { fprintf(f, " { \n"); - for (j = 0; j < COEF_BANDS; j++) { fprintf(f, " {\n"); - for (k = 0; k < PREV_COEF_CONTEXTS; k++) { fprintf(f, " {"); - for (l = 0; l < ENTROPY_NODES; l++) { Sum = tree_update_hist[i][j][k][l][0] + tree_update_hist[i][j][k][l][1]; - if (Sum > 0) { if (((tree_update_hist[i][j][k][l][0] * 255) / Sum) > 0) fprintf(f, "%3ld, ", (tree_update_hist[i][j][k][l][0] * 255) / Sum); @@ -2427,16 +2489,12 @@ void print_tree_update_probs() { } else fprintf(f, "%3ld, ", 128); } - fprintf(f, "},\n"); } - fprintf(f, " },\n"); } - fprintf(f, " },\n"); } - fprintf(f, "};\n"); fprintf(f, "const vp8_prob\n" @@ -2444,20 +2502,14 @@ void print_tree_update_probs() { " [COEF_BANDS]\n" " [PREV_COEF_CONTEXTS]\n" " [ENTROPY_NODES] = {\n"); - - for (i = 0; i < BLOCK_TYPES_8X8; i++) { fprintf(f, " { \n"); - for (j = 0; j < COEF_BANDS; j++) { fprintf(f, " {\n"); - for (k = 0; k < PREV_COEF_CONTEXTS; k++) { fprintf(f, " {"); - for (l = 0; l < MAX_ENTROPY_TOKENS - 1; l++) { Sum = tree_update_hist_8x8[i][j][k][l][0] + tree_update_hist_8x8[i][j][k][l][1]; - if (Sum > 0) { if (((tree_update_hist_8x8[i][j][k][l][0] * 255) / Sum) > 0) fprintf(f, "%3ld, ", (tree_update_hist_8x8[i][j][k][l][0] * 255) / Sum); @@ -2466,20 +2518,50 @@ void print_tree_update_probs() { } else fprintf(f, "%3ld, ", 128); } - fprintf(f, "},\n"); } - fprintf(f, " },\n"); } - fprintf(f, " },\n"); } + +#if CONFIG_TX16X16 + fprintf(f, "const vp8_prob\n" + "vp8_coef_update_probs_16x16[BLOCK_TYPES_16X16]\n" + " [COEF_BANDS]\n" + " [PREV_COEF_CONTEXTS]\n" + " [ENTROPY_NODES] = {\n"); + for (i = 0; i < BLOCK_TYPES_16X16; i++) { + fprintf(f, " { \n"); + for (j = 0; j < COEF_BANDS; j++) { + fprintf(f, " {\n"); + for (k = 0; k < PREV_COEF_CONTEXTS; k++) { + fprintf(f, " {"); + for (l = 0; l < MAX_ENTROPY_TOKENS - 1; l++) { + Sum = tree_update_hist_16x16[i][j][k][l][0] + tree_update_hist_16x16[i][j][k][l][1]; + if (Sum > 0) { + if (((tree_update_hist_16x16[i][j][k][l][0] * 255) / Sum) > 0) + fprintf(f, "%3ld, ", (tree_update_hist_16x16[i][j][k][l][0] * 255) / Sum); + else + fprintf(f, "%3ld, ", 1); + } else + fprintf(f, "%3ld, ", 128); + } + fprintf(f, "},\n"); + } + fprintf(f, " },\n"); + } + fprintf(f, " },\n"); + } +#endif + fclose(f); f = fopen("treeupdate.bin", "wb"); fwrite(tree_update_hist, sizeof(tree_update_hist), 1, f); fwrite(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f); +#if CONFIG_TX16X16 + fwrite(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f); +#endif fclose(f); - } #endif diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h index 106036a0d..0019d5e8c 100644 --- a/vp8/encoder/block.h +++ b/vp8/encoder/block.h @@ -35,8 +35,14 @@ typedef struct { unsigned char *quant_shift; short *zbin; short *zbin_8x8; +#if CONFIG_TX16X16 + short *zbin_16x16; +#endif short *zrun_zbin_boost; short *zrun_zbin_boost_8x8; +#if CONFIG_TX16X16 + short *zrun_zbin_boost_16x16; +#endif short *round; // Zbin Over Quant value @@ -49,7 +55,9 @@ typedef struct { int eob_max_offset; int eob_max_offset_8x8; - +#if CONFIG_TX16X16 + int eob_max_offset_16x16; +#endif } BLOCK; typedef struct { @@ -153,9 +161,13 @@ typedef struct { #endif unsigned int token_costs[BLOCK_TYPES] [COEF_BANDS] - [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; + [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; unsigned int token_costs_8x8[BLOCK_TYPES_8X8] [COEF_BANDS] - [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; + [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; +#if CONFIG_TX16X16 + unsigned int token_costs_16x16[BLOCK_TYPES_16X16] [COEF_BANDS] + [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; +#endif int optimize; int q_index; @@ -176,7 +188,13 @@ typedef struct { void (*quantize_b)(BLOCK *b, BLOCKD *d); void (*quantize_b_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1); void (*vp8_short_fdct8x8)(short *input, short *output, int pitch); +#if CONFIG_TX16X16 + void (*vp8_short_fdct16x16)(short *input, short *output, int pitch); +#endif void (*short_fhaar2x2)(short *input, short *output, int pitch); +#if CONFIG_TX16X16 + void (*quantize_b_16x16)(BLOCK *b, BLOCKD *d); +#endif void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d); void (*quantize_b_2x2)(BLOCK *b, BLOCKD *d); diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c index 5954a7685..ba2a692d1 100644 --- a/vp8/encoder/dct.c +++ b/vp8/encoder/dct.c @@ -575,3 +575,205 @@ void vp8_short_walsh8x4_x8_c(short *input, short *output, int pitch) { vp8_short_walsh4x4_x8_c(input + 4, output + 16, pitch); } #endif + +#if CONFIG_TX16X16 +static void dct16x16_1d(double input[16], double output[16]) { + double step[16]; + double intermediate[16]; + double temp1, temp2; + + const double PI = 3.1415926535898; + const double C1 = cos(1*PI/(double)32); + const double C2 = cos(2*PI/(double)32); + const double C3 = cos(3*PI/(double)32); + const double C4 = cos(4*PI/(double)32); + const double C5 = cos(5*PI/(double)32); + const double C6 = cos(6*PI/(double)32); + const double C7 = cos(7*PI/(double)32); + const double C8 = cos(8*PI/(double)32); + const double C9 = cos(9*PI/(double)32); + const double C10 = cos(10*PI/(double)32); + const double C11 = cos(11*PI/(double)32); + const double C12 = cos(12*PI/(double)32); + const double C13 = cos(13*PI/(double)32); + const double C14 = cos(14*PI/(double)32); + const double C15 = cos(15*PI/(double)32); + + // step 1 + step[ 0] = input[0] + input[15]; + step[ 1] = input[1] + input[14]; + step[ 2] = input[2] + input[13]; + step[ 3] = input[3] + input[12]; + step[ 4] = input[4] + input[11]; + step[ 5] = input[5] + input[10]; + step[ 6] = input[6] + input[ 9]; + step[ 7] = input[7] + input[ 8]; + step[ 8] = input[7] - input[ 8]; + step[ 9] = input[6] - input[ 9]; + step[10] = input[5] - input[10]; + step[11] = input[4] - input[11]; + step[12] = input[3] - input[12]; + step[13] = input[2] - input[13]; + step[14] = input[1] - input[14]; + step[15] = input[0] - input[15]; + + // step 2 + output[0] = step[0] + step[7]; + output[1] = step[1] + step[6]; + output[2] = step[2] + step[5]; + output[3] = step[3] + step[4]; + output[4] = step[3] - step[4]; + output[5] = step[2] - step[5]; + output[6] = step[1] - step[6]; + output[7] = step[0] - step[7]; + + temp1 = step[ 8]*C7; + temp2 = step[15]*C9; + output[ 8] = temp1 + temp2; + + temp1 = step[ 9]*C11; + temp2 = step[14]*C5; + output[ 9] = temp1 - temp2; + + temp1 = step[10]*C3; + temp2 = step[13]*C13; + output[10] = temp1 + temp2; + + temp1 = step[11]*C15; + temp2 = step[12]*C1; + output[11] = temp1 - temp2; + + temp1 = step[11]*C1; + temp2 = step[12]*C15; + output[12] = temp2 + temp1; + + temp1 = step[10]*C13; + temp2 = step[13]*C3; + output[13] = temp2 - temp1; + + temp1 = step[ 9]*C5; + temp2 = step[14]*C11; + output[14] = temp2 + temp1; + + temp1 = step[ 8]*C9; + temp2 = step[15]*C7; + output[15] = temp2 - temp1; + + // step 3 + step[ 0] = output[0] + output[3]; + step[ 1] = output[1] + output[2]; + step[ 2] = output[1] - output[2]; + step[ 3] = output[0] - output[3]; + + temp1 = output[4]*C14; + temp2 = output[7]*C2; + step[ 4] = temp1 + temp2; + + temp1 = output[5]*C10; + temp2 = output[6]*C6; + step[ 5] = temp1 + temp2; + + temp1 = output[5]*C6; + temp2 = output[6]*C10; + step[ 6] = temp2 - temp1; + + temp1 = output[4]*C2; + temp2 = output[7]*C14; + step[ 7] = temp2 - temp1; + + step[ 8] = output[ 8] + output[11]; + step[ 9] = output[ 9] + output[10]; + step[10] = output[ 9] - output[10]; + step[11] = output[ 8] - output[11]; + + step[12] = output[12] + output[15]; + step[13] = output[13] + output[14]; + step[14] = output[13] - output[14]; + step[15] = output[12] - output[15]; + + // step 4 + output[ 0] = (step[ 0] + step[ 1]); + output[ 8] = (step[ 0] - step[ 1]); + + temp1 = step[2]*C12; + temp2 = step[3]*C4; + temp1 = temp1 + temp2; + output[ 4] = 2*(temp1*C8); + + temp1 = step[2]*C4; + temp2 = step[3]*C12; + temp1 = temp2 - temp1; + output[12] = 2*(temp1*C8); + + output[ 2] = 2*((step[4] + step[ 5])*C8); + output[14] = 2*((step[7] - step[ 6])*C8); + + temp1 = step[4] - step[5]; + temp2 = step[6] + step[7]; + output[ 6] = (temp1 + temp2); + output[10] = (temp1 - temp2); + + intermediate[8] = step[8] + step[14]; + intermediate[9] = step[9] + step[15]; + + temp1 = intermediate[8]*C12; + temp2 = intermediate[9]*C4; + temp1 = temp1 - temp2; + output[3] = 2*(temp1*C8); + + temp1 = intermediate[8]*C4; + temp2 = intermediate[9]*C12; + temp1 = temp2 + temp1; + output[13] = 2*(temp1*C8); + + output[ 9] = 2*((step[10] + step[11])*C8); + + intermediate[11] = step[10] - step[11]; + intermediate[12] = step[12] + step[13]; + intermediate[13] = step[12] - step[13]; + intermediate[14] = step[ 8] - step[14]; + intermediate[15] = step[ 9] - step[15]; + + output[15] = (intermediate[11] + intermediate[12]); + output[ 1] = -(intermediate[11] - intermediate[12]); + + output[ 7] = 2*(intermediate[13]*C8); + + temp1 = intermediate[14]*C12; + temp2 = intermediate[15]*C4; + temp1 = temp1 - temp2; + output[11] = -2*(temp1*C8); + + temp1 = intermediate[14]*C4; + temp2 = intermediate[15]*C12; + temp1 = temp2 + temp1; + output[ 5] = 2*(temp1*C8); +} + +void vp8_short_fdct16x16_c(short *input, short *out, int pitch) { + int shortpitch = pitch >> 1; + int i, j; + double output[256]; + // First transform columns + for (i = 0; i < 16; i++) { + double temp_in[16], temp_out[16]; + for (j = 0; j < 16; j++) + temp_in[j] = input[j*shortpitch + i]; + dct16x16_1d(temp_in, temp_out); + for (j = 0; j < 16; j++) + output[j*16 + i] = temp_out[j]; + } + // Then transform rows + for (i = 0; i < 16; ++i) { + double temp_in[16], temp_out[16]; + for (j = 0; j < 16; ++j) + temp_in[j] = output[j + i*16]; + dct16x16_1d(temp_in, temp_out); + for (j = 0; j < 16; ++j) + output[j + i*16] = temp_out[j]; + } + // Scale by some magic number + for (i = 0; i < 256; i++) + out[i] = (short)round(output[i]/2); +} +#endif diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h index ac7769d3d..9936969d5 100644 --- a/vp8/encoder/dct.h +++ b/vp8/encoder/dct.h @@ -28,6 +28,13 @@ void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type); void vp8_fht8x4_c(short *input, short *output, int pitch, TX_TYPE tx_type); #endif +#if CONFIG_TX16X16 +#ifndef vp8_fdct_short16x16 +#define vp8_fdct_short16x16 vp8_short_fdct16x16_c +#endif +extern prototype_fdct(vp8_fdct_short16x16); +#endif + #ifndef vp8_fdct_short8x8 #define vp8_fdct_short8x8 vp8_short_fdct8x8_c #endif @@ -71,6 +78,9 @@ extern prototype_fdct(vp8_short_walsh4x4_lossless_c); typedef prototype_fdct(*vp8_fdct_fn_t); typedef struct { +#if CONFIG_TX16X16 + vp8_fdct_fn_t short16x16; +#endif vp8_fdct_fn_t short8x8; vp8_fdct_fn_t haar_short2x2; vp8_fdct_fn_t short4x4; diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 909d56961..23eed6d7a 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -1132,6 +1132,9 @@ static void encode_frame_internal(VP8_COMP *cpi) { #endif vp8_zero(cpi->coef_counts); vp8_zero(cpi->coef_counts_8x8); +#if CONFIG_TX16X16 + vp8_zero(cpi->coef_counts_16x16); +#endif vp8cx_frame_init_quantizer(cpi); @@ -1437,6 +1440,13 @@ void vp8cx_encode_intra_macro_block(VP8_COMP *cpi, } /* test code: set transform size based on mode selection */ +#if CONFIG_TX16X16 + if (x->e_mbd.mode_info_context->mbmi.mode <= TM_PRED) { + x->e_mbd.mode_info_context->mbmi.txfm_size = TX_16X16; + cpi->t16x16_count++; + } + else +#endif if (cpi->common.txfm_mode == ALLOW_8X8 && x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED && x->e_mbd.mode_info_context->mbmi.mode != B_PRED) { @@ -1470,12 +1480,9 @@ extern int cnt_pm; extern void vp8_fix_contexts(MACROBLOCKD *x); -void vp8cx_encode_inter_macroblock -( - VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, - int recon_yoffset, int recon_uvoffset, - int output_enabled -) { +void vp8cx_encode_inter_macroblock (VP8_COMP *cpi, MACROBLOCK *x, + TOKENEXTRA **t, int recon_yoffset, + int recon_uvoffset, int output_enabled) { VP8_COMMON *cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; unsigned char *segment_id = &xd->mode_info_context->mbmi.segment_id; @@ -1523,6 +1530,16 @@ void vp8cx_encode_inter_macroblock set_pred_flag(xd, PRED_REF, ref_pred_flag); /* test code: set transform size based on mode selection */ +#if CONFIG_TX16X16 + if (x->e_mbd.mode_info_context->mbmi.mode <= TM_PRED || + x->e_mbd.mode_info_context->mbmi.mode == NEWMV || + x->e_mbd.mode_info_context->mbmi.mode == ZEROMV || + x->e_mbd.mode_info_context->mbmi.mode == NEARMV || + x->e_mbd.mode_info_context->mbmi.mode == NEARESTMV) { + x->e_mbd.mode_info_context->mbmi.txfm_size = TX_16X16; + cpi->t16x16_count++; + } else +#endif if (cpi->common.txfm_mode == ALLOW_8X8 && x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED && x->e_mbd.mode_info_context->mbmi.mode != B_PRED diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 01ae03a23..325efeb6b 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -160,23 +160,43 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride); +#if CONFIG_TX16X16 + if (tx_type == TX_16X16) + vp8_transform_intra_mby_16x16(x); + else +#endif if (tx_type == TX_8X8) vp8_transform_intra_mby_8x8(x); else vp8_transform_intra_mby(x); +#if CONFIG_TX16X16 + if (tx_type == TX_16X16) + vp8_quantize_mby_16x16(x); + else +#endif if (tx_type == TX_8X8) vp8_quantize_mby_8x8(x); else vp8_quantize_mby(x); if (x->optimize) { +#if CONFIG_TX16X16 + if (tx_type == TX_16X16) + vp8_optimize_mby_16x16(x, rtcd); + else +#endif if (tx_type == TX_8X8) vp8_optimize_mby_8x8(x, rtcd); else vp8_optimize_mby(x, rtcd); } +#if CONFIG_TX16X16 + if (tx_type == TX_16X16) + vp8_inverse_transform_mby_16x16(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + else +#endif if (tx_type == TX_8X8) vp8_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); else @@ -220,6 +240,9 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { int tx_type = x->e_mbd.mode_info_context->mbmi.txfm_size; +#if CONFIG_TX16X16 + if (tx_type == TX_16X16) tx_type = TX_8X8; // 16x16 for U and V should default to 8x8 behavior. +#endif #if CONFIG_COMP_INTRA_PRED if (x->e_mbd.mode_info_context->mbmi.second_uv_mode == (MB_PREDICTION_MODE)(DC_PRED - 1)) { #endif diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c index 653a4cc70..bfab4c647 100644 --- a/vp8/encoder/encodemb.c +++ b/vp8/encoder/encodemb.c @@ -282,6 +282,42 @@ void vp8_transform_mby_8x8(MACROBLOCK *x) { } } +#if CONFIG_TX16X16 +void vp8_transform_mbuv_16x16(MACROBLOCK *x) { + int i; + + vp8_clear_system_state(); + // Default to the 8x8 + for (i = 16; i < 24; i += 4) + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 16); +} + + +void vp8_transform_intra_mby_16x16(MACROBLOCK *x) { + vp8_clear_system_state(); + x->vp8_short_fdct16x16(&x->block[0].src_diff[0], + &x->block[0].coeff[0], 32); +} + + +void vp8_transform_mb_16x16(MACROBLOCK *x) { + int i; + vp8_clear_system_state(); + x->vp8_short_fdct16x16(&x->block[0].src_diff[0], + &x->block[0].coeff[0], 32); + + for (i = 16; i < 24; i += 4) { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 16); + } +} + +void vp8_transform_mby_16x16(MACROBLOCK *x) { + vp8_clear_system_state(); + x->vp8_short_fdct16x16(&x->block[0].src_diff[0], &x->block[0].coeff[0], 32); +} +#endif #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) #define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) @@ -290,7 +326,7 @@ typedef struct vp8_token_state vp8_token_state; struct vp8_token_state { int rate; int error; - signed char next; + int next; signed char token; short qc; }; @@ -1017,29 +1053,280 @@ void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) { } + + +#if CONFIG_TX16X16 +#define UPDATE_RD_COST()\ +{\ + rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\ + rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\ + if (rd_cost0 == rd_cost1) {\ + rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\ + rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\ + }\ +} + +void optimize_b_16x16(MACROBLOCK *mb, int i, int type, + ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, + const VP8_ENCODER_RTCD *rtcd) { + BLOCK *b = &mb->block[i]; + BLOCKD *d = &mb->e_mbd.block[i]; + vp8_token_state tokens[257][2]; + unsigned best_index[257][2]; + const short *dequant_ptr = d->dequant, *coeff_ptr = b->coeff; + short *qcoeff_ptr = qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = dqcoeff_ptr = d->dqcoeff; + int eob = d->eob, final_eob, sz = 0; + int rc, x, next; + int64_t rdmult, rddiv, rd_cost0, rd_cost1; + int rate0, rate1, error0, error1, t0, t1; + int best, band, pt; + int err_mult = plane_rd_mult[type]; + + /* Now set up a Viterbi trellis to evaluate alternative roundings. */ + rdmult = mb->rdmult * err_mult; + if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) + rdmult = (rdmult * 9)>>4; + rddiv = mb->rddiv; + memset(best_index, 0, sizeof(best_index)); + /* Initialize the sentinel node of the trellis. */ + tokens[eob][0].rate = 0; + tokens[eob][0].error = 0; + tokens[eob][0].next = 256; + tokens[eob][0].token = DCT_EOB_TOKEN; + tokens[eob][0].qc = 0; + *(tokens[eob] + 1) = *(tokens[eob] + 0); + next = eob; + for (i = eob; i-- > 0;) { + int base_bits, d2, dx; + + rc = vp8_default_zig_zag1d_16x16[i]; + x = qcoeff_ptr[rc]; + /* Only add a trellis state for non-zero coefficients. */ + if (x) { + int shortcut = 0; + error0 = tokens[next][0].error; + error1 = tokens[next][1].error; + /* Evaluate the first possibility for this state. */ + rate0 = tokens[next][0].rate; + rate1 = tokens[next][1].rate; + t0 = (vp8_dct_value_tokens_ptr + x)->Token; + /* Consider both possible successor states. */ + if (next < 256) { + band = vp8_coef_bands_16x16[i + 1]; + pt = vp8_prev_token_class[t0]; + rate0 += mb->token_costs_16x16[type][band][pt][tokens[next][0].token]; + rate1 += mb->token_costs_16x16[type][band][pt][tokens[next][1].token]; + } + UPDATE_RD_COST(); + /* And pick the best. */ + best = rd_cost1 < rd_cost0; + base_bits = *(vp8_dct_value_cost_ptr + x); + dx = dqcoeff_ptr[rc] - coeff_ptr[rc]; + d2 = dx*dx; + tokens[i][0].rate = base_bits + (best ? rate1 : rate0); + tokens[i][0].error = d2 + (best ? error1 : error0); + tokens[i][0].next = next; + tokens[i][0].token = t0; + tokens[i][0].qc = x; + best_index[i][0] = best; + /* Evaluate the second possibility for this state. */ + rate0 = tokens[next][0].rate; + rate1 = tokens[next][1].rate; + + if((abs(x)*dequant_ptr[rc!=0]>abs(coeff_ptr[rc])) && + (abs(x)*dequant_ptr[rc!=0]Token; + if (next < 256) { + band = vp8_coef_bands_16x16[i + 1]; + if (t0 != DCT_EOB_TOKEN) { + pt = vp8_prev_token_class[t0]; + rate0 += mb->token_costs_16x16[type][band][pt] + [tokens[next][0].token]; + } + if (t1!=DCT_EOB_TOKEN) { + pt = vp8_prev_token_class[t1]; + rate1 += mb->token_costs_16x16[type][band][pt] + [tokens[next][1].token]; + } + } + UPDATE_RD_COST(); + /* And pick the best. */ + best = rd_cost1 < rd_cost0; + base_bits = *(vp8_dct_value_cost_ptr + x); + + if(shortcut) { + dx -= (dequant_ptr[rc!=0] + sz) ^ sz; + d2 = dx*dx; + } + tokens[i][1].rate = base_bits + (best ? rate1 : rate0); + tokens[i][1].error = d2 + (best ? error1 : error0); + tokens[i][1].next = next; + tokens[i][1].token = best ? t1 : t0; + tokens[i][1].qc = x; + best_index[i][1] = best; + /* Finally, make this the new head of the trellis. */ + next = i; + } + /* There's no choice to make for a zero coefficient, so we don't + * add a new trellis node, but we do need to update the costs. + */ + else { + band = vp8_coef_bands_16x16[i + 1]; + t0 = tokens[next][0].token; + t1 = tokens[next][1].token; + /* Update the cost of each path if we're past the EOB token. */ + if (t0 != DCT_EOB_TOKEN) { + tokens[next][0].rate += mb->token_costs_16x16[type][band][0][t0]; + tokens[next][0].token = ZERO_TOKEN; + } + if (t1 != DCT_EOB_TOKEN) { + tokens[next][1].rate += mb->token_costs_16x16[type][band][0][t1]; + tokens[next][1].token = ZERO_TOKEN; + } + /* Don't update next, because we didn't add a new node. */ + } + } + + /* Now pick the best path through the whole trellis. */ + band = vp8_coef_bands_16x16[i + 1]; + VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); + rate0 = tokens[next][0].rate; + rate1 = tokens[next][1].rate; + error0 = tokens[next][0].error; + error1 = tokens[next][1].error; + t0 = tokens[next][0].token; + t1 = tokens[next][1].token; + rate0 += mb->token_costs_16x16[type][band][pt][t0]; + rate1 += mb->token_costs_16x16[type][band][pt][t1]; + UPDATE_RD_COST(); + best = rd_cost1 < rd_cost0; + final_eob = -1; + + for (i = next; i < eob; i = next) { + x = tokens[i][best].qc; + if (x) + final_eob = i; + rc = vp8_default_zig_zag1d_16x16[i]; + qcoeff_ptr[rc] = x; + dqcoeff_ptr[rc] = (x * dequant_ptr[rc!=0]); + + next = tokens[i][best].next; + best = best_index[i][best]; + } + final_eob++; + + d->eob = final_eob; + *a = *l = (d->eob != !type); +} + +void vp8_optimize_mby_16x16(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) { + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta, *tl; + + if (!x->e_mbd.above_context) + return; + if (!x->e_mbd.left_context) + return; + + vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + optimize_b_16x16(x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, rtcd); + *(ta + 1) = *ta; + *(tl + 1) = *tl; +} + +void optimize_mb_16x16(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) { + int b; + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta, *tl; + + vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + + optimize_b_16x16(x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, rtcd); + *(ta + 1) = *ta; + *(tl + 1) = *tl; + + for (b = 16; b < 24; b += 4) { + optimize_b_8x8(x, b, PLANE_TYPE_UV, + ta + vp8_block2above_8x8[b], tl + vp8_block2left_8x8[b], + rtcd); + *(ta + vp8_block2above_8x8[b] + 1) = *(ta + vp8_block2above_8x8[b]); + *(tl + vp8_block2left_8x8[b] + 1) = *(tl + vp8_block2left_8x8[b]); + } +} +#endif + void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { int tx_type = x->e_mbd.mode_info_context->mbmi.txfm_size; vp8_build_inter_predictors_mb(&x->e_mbd); vp8_subtract_mb(rtcd, x); +#if CONFIG_TX16X16 + if (tx_type == TX_16X16) + vp8_transform_mb_16x16(x); + else +#endif if (tx_type == TX_8X8) vp8_transform_mb_8x8(x); else transform_mb(x); +#if CONFIG_TX16X16 + if (tx_type == TX_16X16) + vp8_quantize_mb_16x16(x); + else +#endif if (tx_type == TX_8X8) vp8_quantize_mb_8x8(x); else vp8_quantize_mb(x); if (x->optimize) { +#if CONFIG_TX16X16 + if (tx_type == TX_16X16) + optimize_mb_16x16(x, rtcd); + else +#endif if (tx_type == TX_8X8) optimize_mb_8x8(x, rtcd); else optimize_mb(x, rtcd); } +#if CONFIG_TX16X16 + if (tx_type == TX_16X16) + vp8_inverse_transform_mb_16x16(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + else +#endif if (tx_type == TX_8X8) vp8_inverse_transform_mb_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); else @@ -1111,6 +1398,11 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride); +#if CONFIG_TX16X16 + if (tx_type == TX_16X16) + vp8_transform_mby_16x16(x); + else +#endif if (tx_type == TX_8X8) vp8_transform_mby_8x8(x); else @@ -1118,6 +1410,11 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { vp8_quantize_mby(x); +#if CONFIG_TX16X16 + if (tx_type == TX_16X16) + vp8_inverse_transform_mby_16x16(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + else +#endif if (tx_type == TX_8X8) vp8_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); else @@ -1126,3 +1423,4 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { RECON_INVOKE(&rtcd->common->recon, recon_mby) (IF_RTCD(&rtcd->common->recon), &x->e_mbd); } + diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h index bfcd0f92c..228451936 100644 --- a/vp8/encoder/encodemb.h +++ b/vp8/encoder/encodemb.h @@ -121,6 +121,15 @@ void vp8_build_dcblock_8x8(MACROBLOCK *b); void vp8_optimize_mby_8x8(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd); void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd); +#if CONFIG_TX16X16 +void vp8_transform_mb_16x16(MACROBLOCK *mb); +void vp8_transform_mby_16x16(MACROBLOCK *x); +void vp8_transform_mbuv_16x16(MACROBLOCK *x); +void vp8_transform_intra_mby_16x16(MACROBLOCK *x); +void vp8_build_dcblock_16x16(MACROBLOCK *b); +void vp8_optimize_mby_16x16(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd); +#endif + void vp8_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch); #endif diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index 695e9c69b..6390f3fe4 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -69,6 +69,9 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) { cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c; cpi->rtcd.fdct.short8x8 = vp8_short_fdct8x8_c; +#if CONFIG_TX16X16 + cpi->rtcd.fdct.short16x16 = vp8_short_fdct16x16_c; +#endif cpi->rtcd.fdct.haar_short2x2 = vp8_short_fhaar2x2_c; cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c; diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 5b6684ac8..e471cab89 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -1161,10 +1161,16 @@ void vp8_set_speed_features(VP8_COMP *cpi) { } if (cpi->sf.improved_dct) { +#if CONFIG_TX16X16 + cpi->mb.vp8_short_fdct16x16 = FDCT_INVOKE(&cpi->rtcd.fdct, short16x16); +#endif cpi->mb.vp8_short_fdct8x8 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x8); cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x4); cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4); } else { +#if CONFIG_TX16X16 + cpi->mb.vp8_short_fdct16x16 = FDCT_INVOKE(&cpi->rtcd.fdct, short16x16); +#endif cpi->mb.vp8_short_fdct8x8 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x8); cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, fast8x4); cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, fast4x4); @@ -1177,6 +1183,9 @@ void vp8_set_speed_features(VP8_COMP *cpi) { cpi->mb.quantize_b = vp8_regular_quantize_b; cpi->mb.quantize_b_pair = vp8_regular_quantize_b_pair; cpi->mb.quantize_b_8x8 = vp8_regular_quantize_b_8x8; +#if CONFIG_TX16X16 + cpi->mb.quantize_b_16x16= vp8_regular_quantize_b_16x16; +#endif cpi->mb.quantize_b_2x2 = vp8_regular_quantize_b_2x2; vp8cx_init_quantizer(cpi); @@ -3641,6 +3650,9 @@ static void encode_frame_to_data_rate update_reference_frames(cm); vp8_copy(cpi->common.fc.coef_counts, cpi->coef_counts); vp8_copy(cpi->common.fc.coef_counts_8x8, cpi->coef_counts_8x8); +#if CONFIG_TX16X16 + vp8_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16); +#endif vp8_adapt_coef_probs(&cpi->common); if (cpi->common.frame_type != KEY_FRAME) { vp8_copy(cpi->common.fc.ymode_counts, cpi->ymode_count); diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 2821aadd0..a1159cc5c 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -91,9 +91,13 @@ typedef struct { signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; vp8_prob coef_probs[BLOCK_TYPES] - [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; + [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; vp8_prob coef_probs_8x8[BLOCK_TYPES_8X8] - [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; + [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; +#if CONFIG_TX16X16 + vp8_prob coef_probs_16x16[BLOCK_TYPES_16X16] + [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; +#endif vp8_prob ymode_prob [VP8_YMODES - 1]; /* interframe intra mode probs */ vp8_prob uv_mode_prob [VP8_YMODES][VP8_UV_MODES - 1]; @@ -390,6 +394,15 @@ typedef struct VP8_COMP { DECLARE_ALIGNED(64, short, zrun_zbin_boost_y2_8x8[QINDEX_RANGE][64]); DECLARE_ALIGNED(64, short, zrun_zbin_boost_uv_8x8[QINDEX_RANGE][64]); +#if CONFIG_TX16X16 + DECLARE_ALIGNED(16, short, Y1zbin_16x16[QINDEX_RANGE][256]); + DECLARE_ALIGNED(16, short, Y2zbin_16x16[QINDEX_RANGE][256]); + DECLARE_ALIGNED(16, short, UVzbin_16x16[QINDEX_RANGE][256]); + DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1_16x16[QINDEX_RANGE][256]); + DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2_16x16[QINDEX_RANGE][256]); + DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv_16x16[QINDEX_RANGE][256]); +#endif + MACROBLOCK mb; VP8_COMMON common; vp8_writer bc, bc2; @@ -540,6 +553,11 @@ typedef struct VP8_COMP { unsigned int coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ vp8_prob frame_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; unsigned int frame_branch_ct_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; +#if CONFIG_TX16X16 + unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ + vp8_prob frame_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; + unsigned int frame_branch_ct_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; +#endif int gfu_boost; int last_boost; @@ -598,6 +616,9 @@ typedef struct VP8_COMP { int skip_false_count[3]; int t4x4_count; int t8x8_count; +#if CONFIG_TX16X16 + int t16x16_count; +#endif unsigned char *segmentation_map; diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c index 78892fc3f..81b4d12b3 100644 --- a/vp8/encoder/quantize.c +++ b/vp8/encoder/quantize.c @@ -302,15 +302,93 @@ void vp8_quantize_mb_8x8(MACROBLOCK *x) { void vp8_quantize_mbuv_8x8(MACROBLOCK *x) { int i; - for (i = 16; i < 24; i ++) { + for (i = 16; i < 24; i ++) x->e_mbd.block[i].eob = 0; - } for (i = 16; i < 24; i += 4) x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]); } + +#if CONFIG_TX16X16 +void vp8_quantize_mby_16x16(MACROBLOCK *x) { + int i; + for (i = 0; i < 16; i++) + x->e_mbd.block[i].eob = 0; + x->e_mbd.block[24].eob = 0; + x->quantize_b_16x16(&x->block[0], &x->e_mbd.block[0]); +} + +void vp8_quantize_mb_16x16(MACROBLOCK *x) { + int i; + for(i = 0; i < 25; i++) + x->e_mbd.block[i].eob = 0; + x->quantize_b_16x16(&x->block[0], &x->e_mbd.block[0]); + for (i = 16; i < 24; i += 4) + x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]); +} + +// U and V should use 8x8 +void vp8_quantize_mbuv_16x16(MACROBLOCK *x) { + int i; + + for(i = 16; i < 24; i++) + x->e_mbd.block[i].eob = 0; + for (i = 16; i < 24; i += 4) + x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]); +} + +void vp8_regular_quantize_b_16x16(BLOCK *b, BLOCKD *d) { + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *zbin_boost_ptr = b->zrun_zbin_boost_16x16; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin_16x16; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + unsigned char *quant_shift_ptr = b->quant_shift; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + short zbin_oq_value = b->zbin_extra; + + vpx_memset(qcoeff_ptr, 0, 256*sizeof(short)); + vpx_memset(dqcoeff_ptr, 0, 256*sizeof(short)); + + eob = -1; + for (i = 0; i < b->eob_max_offset_16x16; i++) { + rc = vp8_default_zig_zag1d_16x16[i]; + z = coeff_ptr[rc]; + + zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value); + zbin_boost_ptr ++; + + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) { + x += (round_ptr[rc!=0]); + y = ((int)(((int)(x * quant_ptr[rc!=0]) >> 16) + x)) + >> quant_shift_ptr[rc!=0]; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; // dequantized value + + if (y) { + eob = i; // last nonzero coeffs + zbin_boost_ptr = b->zrun_zbin_boost_16x16; + } + } + } + + d->eob = eob + 1; +} +#endif + + + /* quantize_b_pair function pointer in MACROBLOCK structure is set to one of * these two C functions if corresponding optimized routine is not available. * NEON optimized version implements currently the fast quantization for pair @@ -337,20 +415,39 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) { int i; int quant_val; int Q; - int zbin_boost[16] = { 0, 0, 8, 10, 12, 14, 16, 20, - 24, 28, 32, 36, 40, 44, 44, 44 - }; - - int zbin_boost_8x8[64] = { 0, 0, 0, 8, 8, 8, 10, 12, - 14, 16, 18, 20, 22, 24, 26, 28, - 30, 32, 34, 36, 38, 40, 42, 44, - 46, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48 - }; + static const int zbin_boost[16] = { 0, 0, 8, 10, 12, 14, 16, 20, + 24, 28, 32, 36, 40, 44, 44, 44 + }; + static const int zbin_boost_8x8[64] = { 0, 0, 0, 8, 8, 8, 10, 12, + 14, 16, 18, 20, 22, 24, 26, 28, + 30, 32, 34, 36, 38, 40, 42, 44, + 46, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48 + }; +#if CONFIG_TX16X16 + static const int zbin_boost_16x16[256] = { + 0, 0, 0, 8, 8, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, + 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + }; +#endif int qrounding_factor = 48; @@ -372,33 +469,52 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) { cpi->Y1quant_shift[Q] + 0, quant_val); cpi->Y1zbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; cpi->Y1zbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; +#if CONFIG_TX16X16 + cpi->Y1zbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; +#endif cpi->Y1round[Q][0] = (qrounding_factor * quant_val) >> 7; cpi->common.Y1dequant[Q][0] = quant_val; cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7; cpi->zrun_zbin_boost_y1_8x8[Q][0] = ((quant_val * zbin_boost_8x8[0]) + 64) >> 7; +#if CONFIG_TX16X16 + cpi->zrun_zbin_boost_y1_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7; +#endif + quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q); invert_quant(cpi->Y2quant[Q] + 0, cpi->Y2quant_shift[Q] + 0, quant_val); cpi->Y2zbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; cpi->Y2zbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; +#if CONFIG_TX16X16 + cpi->Y2zbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; +#endif cpi->Y2round[Q][0] = (qrounding_factor * quant_val) >> 7; cpi->common.Y2dequant[Q][0] = quant_val; cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7; cpi->zrun_zbin_boost_y2_8x8[Q][0] = ((quant_val * zbin_boost_8x8[0]) + 64) >> 7; +#if CONFIG_TX16X16 + cpi->zrun_zbin_boost_y2_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7; +#endif quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q); invert_quant(cpi->UVquant[Q] + 0, cpi->UVquant_shift[Q] + 0, quant_val); - cpi->UVzbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;; - cpi->UVzbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;; + cpi->UVzbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; + cpi->UVzbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; +#if CONFIG_TX16X16 + cpi->UVzbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; +#endif cpi->UVround[Q][0] = (qrounding_factor * quant_val) >> 7; cpi->common.UVdequant[Q][0] = quant_val; cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7; cpi->zrun_zbin_boost_uv_8x8[Q][0] = ((quant_val * zbin_boost_8x8[0]) + 64) >> 7; +#if CONFIG_TX16X16 + cpi->zrun_zbin_boost_uv_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7; +#endif // all the 4x4 ac values =; for (i = 1; i < 16; i++) { @@ -453,6 +569,25 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) { cpi->zrun_zbin_boost_uv_8x8[Q][i] = ((quant_val * zbin_boost_8x8[i]) + 64) >> 7; } + +#if CONFIG_TX16X16 + // 16x16 structures. Same comment above applies. + for (i = 1; i < 256; i++) { + int rc = vp8_default_zig_zag1d_16x16[i]; + + quant_val = vp8_ac_yquant(Q); + cpi->Y1zbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7; + cpi->zrun_zbin_boost_y1_16x16[Q][i] = ((quant_val * zbin_boost_16x16[i]) + 64) >> 7; + + quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q); + cpi->Y2zbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7; + cpi->zrun_zbin_boost_y2_16x16[Q][i] = ((quant_val * zbin_boost_16x16[i]) + 64) >> 7; + + quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q); + cpi->UVzbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7; + cpi->zrun_zbin_boost_uv_16x16[Q][i] = ((quant_val * zbin_boost_16x16[i]) + 64) >> 7; + } +#endif } } @@ -491,10 +626,16 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x) { x->block[i].quant_shift = cpi->Y1quant_shift[QIndex]; x->block[i].zbin = cpi->Y1zbin[QIndex]; x->block[i].zbin_8x8 = cpi->Y1zbin_8x8[QIndex]; +#if CONFIG_TX16X16 + x->block[i].zbin_16x16 = cpi->Y1zbin_16x16[QIndex]; +#endif x->block[i].round = cpi->Y1round[QIndex]; x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex]; x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex]; x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y1_8x8[QIndex]; +#if CONFIG_TX16X16 + x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y1_16x16[QIndex]; +#endif x->block[i].zbin_extra = (short)zbin_extra; // Segment max eob offset feature. @@ -503,9 +644,16 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x) { get_segdata(xd, segment_id, SEG_LVL_EOB); x->block[i].eob_max_offset_8x8 = get_segdata(xd, segment_id, SEG_LVL_EOB); +#if CONFIG_TX16X16 + x->block[i].eob_max_offset_16x16 = + get_segdata(xd, segment_id, SEG_LVL_EOB); +#endif } else { x->block[i].eob_max_offset = 16; x->block[i].eob_max_offset_8x8 = 64; +#if CONFIG_TX16X16 + x->block[i].eob_max_offset_16x16 = 256; +#endif } } @@ -520,10 +668,16 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x) { x->block[i].quant_shift = cpi->UVquant_shift[QIndex]; x->block[i].zbin = cpi->UVzbin[QIndex]; x->block[i].zbin_8x8 = cpi->UVzbin_8x8[QIndex]; +#if CONFIG_TX16X16 + x->block[i].zbin_16x16 = cpi->UVzbin_16x16[QIndex]; +#endif x->block[i].round = cpi->UVround[QIndex]; x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex]; x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex]; x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_uv_8x8[QIndex]; +#if CONFIG_TX16X16 + x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_uv_16x16[QIndex]; +#endif x->block[i].zbin_extra = (short)zbin_extra; @@ -549,10 +703,16 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x) { x->block[24].quant_shift = cpi->Y2quant_shift[QIndex]; x->block[24].zbin = cpi->Y2zbin[QIndex]; x->block[24].zbin_8x8 = cpi->Y2zbin_8x8[QIndex]; +#if CONFIG_TX16X16 + x->block[24].zbin_16x16 = cpi->Y2zbin_16x16[QIndex]; +#endif x->block[24].round = cpi->Y2round[QIndex]; x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex]; x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex]; x->block[24].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y2_8x8[QIndex]; +#if CONFIG_TX16X16 + x->block[24].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y2_16x16[QIndex]; +#endif x->block[24].zbin_extra = (short)zbin_extra; // TBD perhaps not use for Y2 diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h index 4106064f5..98fed4c11 100644 --- a/vp8/encoder/quantize.h +++ b/vp8/encoder/quantize.h @@ -46,6 +46,13 @@ extern prototype_quantize_block_pair(vp8_quantize_quantb_pair); #endif extern prototype_quantize_block(vp8_quantize_quantb_8x8); +#if CONFIG_TX16X16 +#ifndef vp8_quantize_quantb_16x16 +#define vp8_quantize_quantb_16x16 vp8_regular_quantize_b_16x16 +#endif +extern prototype_quantize_block(vp8_quantize_quantb_16x16); +#endif + #ifndef vp8_quantize_quantb_2x2 #define vp8_quantize_quantb_2x2 vp8_regular_quantize_b_2x2 #endif @@ -70,6 +77,13 @@ extern prototype_quantize_mb(vp8_quantize_mby); extern prototype_quantize_mb(vp8_quantize_mby_8x8); extern prototype_quantize_mb(vp8_quantize_mbuv_8x8); +#if CONFIG_TX16X16 +void vp8_quantize_mb_16x16(MACROBLOCK *x); +extern prototype_quantize_block(vp8_quantize_quantb_16x16); +extern prototype_quantize_mb(vp8_quantize_mby_16x16); +extern prototype_quantize_mb(vp8_quantize_mbuv_16x16); +#endif + struct VP8_COMP; extern void vp8_set_quantizer(struct VP8_COMP *cpi, int Q); extern void vp8cx_frame_init_quantizer(struct VP8_COMP *cpi); diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c index 43f38568a..b0f92c942 100644 --- a/vp8/encoder/ratectrl.c +++ b/vp8/encoder/ratectrl.c @@ -177,6 +177,9 @@ void vp8_save_coding_context(VP8_COMP *cpi) { #if CONFIG_SWITCHABLE_INTERP vp8_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob); #endif +#if CONFIG_TX16X16 + vp8_copy(cc->coef_probs_16x16, cm->fc.coef_probs_16x16); +#endif } void vp8_restore_coding_context(VP8_COMP *cpi) { @@ -233,6 +236,9 @@ void vp8_restore_coding_context(VP8_COMP *cpi) { #if CONFIG_SWITCHABLE_INTERP vp8_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob); #endif +#if CONFIG_TX16X16 + vp8_copy(cm->fc.coef_probs_16x16, cc->coef_probs_16x16); +#endif } diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 66f342302..a2cd2651a 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -366,6 +366,13 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int QIndex) { (const vp8_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_8x8, BLOCK_TYPES_8X8); +#if CONFIG_TX16X16 + fill_token_costs( + cpi->mb.token_costs_16x16, + (const vp8_prob(*)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_16x16, + BLOCK_TYPES_16X16); +#endif + /*rough estimate for costing*/ cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4; vp8_init_mode_costs(cpi); @@ -809,6 +816,72 @@ static void macro_block_yrd_8x8(MACROBLOCK *mb, *Rate = vp8_rdcost_mby_8x8(mb); } +#if CONFIG_TX16X16 +static int cost_coeffs_16x16(MACROBLOCK *mb, BLOCKD *b, int type, + ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) { + const int eob = b->eob; + int c = !type; /* start at coef 0, unless Y with Y2 */ + int cost = 0, pt; /* surrounding block/prev coef predictor */ + short *qcoeff_ptr = b->qcoeff; + + VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); + +# define QC16X16(I) ( qcoeff_ptr [vp8_default_zig_zag1d_16x16[I]] ) + + for (; c < eob; c++) { + int v = QC16X16(c); + int t = vp8_dct_value_tokens_ptr[v].Token; + cost += mb->token_costs_16x16[type][vp8_coef_bands_16x16[c]][pt][t]; + cost += vp8_dct_value_cost_ptr[v]; + pt = vp8_prev_token_class[t]; + } + +# undef QC16X16 + if (c < 256) + cost += mb->token_costs_16x16[type][vp8_coef_bands_16x16[c]] + [pt][DCT_EOB_TOKEN]; + + pt = (c != !type); // is eob first coefficient; + *a = *l = pt; + return cost; +} + +static int vp8_rdcost_mby_16x16(MACROBLOCK *mb) { + int cost; + MACROBLOCKD *x = &mb->e_mbd; + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta, *tl; + + vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + + cost = cost_coeffs_16x16(mb, x->block, PLANE_TYPE_Y_WITH_DC, ta, tl); + return cost; +} + +static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion, + const VP8_ENCODER_RTCD *rtcd) { + int d; + + ENCODEMB_INVOKE(&rtcd->encodemb, submby)( + mb->src_diff, + *(mb->block[0].base_src), + mb->e_mbd.predictor, + mb->block[0].src_stride); + + vp8_transform_mby_16x16(mb); + vp8_quantize_mby_16x16(mb); + d = ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(mb, 0); + + *Distortion = (d >> 2); + // rate + *Rate = vp8_rdcost_mby_16x16(mb); +} +#endif + static void copy_predictor(unsigned char *dst, const unsigned char *predictor) { const unsigned int *p = (const unsigned int *)predictor; unsigned int *d = (unsigned int *)dst; @@ -1121,7 +1194,12 @@ static int64_t rd_pick_intra16x16mby_mode(VP8_COMP *cpi, } #endif - macro_block_yrd_8x8(x, &ratey, &distortion, IF_RTCD(&cpi->rtcd)); +#if CONFIG_TX16X16 + if (mode <= TM_PRED) + macro_block_yrd_16x16(x, &ratey, &distortion, IF_RTCD(&cpi->rtcd)); + else +#endif + macro_block_yrd_8x8(x, &ratey, &distortion, IF_RTCD(&cpi->rtcd)); // FIXME add compoundmode cost // FIXME add rate for mode2 rate = ratey + x->mbmode_cost[x->e_mbd.frame_type] @@ -3081,16 +3159,33 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int vp8_cost_bit(get_pred_prob(cm, xd, PRED_COMP), 0); } break; + case DC_PRED: + case V_PRED: + case H_PRED: + case TM_PRED: case D45_PRED: case D135_PRED: case D117_PRED: case D153_PRED: case D27_PRED: case D63_PRED: - case DC_PRED: - case V_PRED: - case H_PRED: - case TM_PRED: +#if CONFIG_TX16X16 + // FIXME: breaks lossless since 4x4 isn't allowed + x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; + // FIXME compound intra prediction + RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby) + (&x->e_mbd); + macro_block_yrd_16x16(x, &rate_y, &distortion, + IF_RTCD(&cpi->rtcd)); + rate2 += rate_y; + distortion2 += distortion; + rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode]; + rate2 += uv_intra_rate_8x8; + rate_uv = uv_intra_rate_tokenonly_8x8; + distortion2 += uv_intra_distortion_8x8; + distortion_uv = uv_intra_distortion_8x8; + break; +#else x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; // FIXME compound intra prediction RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby) @@ -3116,6 +3211,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int distortion_uv = uv_intra_distortion; } break; +#endif case NEWMV: { int thissme; @@ -3269,7 +3365,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int } case ZEROMV: - // Trap vectors that reach beyond the UMV borders // Note that ALL New MV, Nearest MV Near MV and Zero MV code drops through to this point // because of the lack of break statements in the previous two cases. @@ -3348,12 +3443,23 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rate2 += vp8_cost_mv_ref(cpi, this_mode, mdcounts); // Y cost and distortion - if (cpi->common.txfm_mode == ALLOW_8X8) - macro_block_yrd_8x8(x, &rate_y, &distortion, - IF_RTCD(&cpi->rtcd)); - else - macro_block_yrd(x, &rate_y, &distortion, - IF_RTCD(&cpi->rtcd)); +#if CONFIG_TX16X16 + if (this_mode == ZEROMV || + this_mode == NEARESTMV || + this_mode == NEARMV || + this_mode == NEWMV) + macro_block_yrd_16x16(x, &rate_y, &distortion, IF_RTCD(&cpi->rtcd)); + else { +#endif + if (cpi->common.txfm_mode == ALLOW_8X8) + macro_block_yrd_8x8(x, &rate_y, &distortion, + IF_RTCD(&cpi->rtcd)); + else + macro_block_yrd(x, &rate_y, &distortion, + IF_RTCD(&cpi->rtcd)); +#if CONFIG_TX16X16 + } +#endif rate2 += rate_y; distortion2 += distortion; @@ -3361,7 +3467,14 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int // UV cost and distortion vp8_build_inter16x16_predictors_mbuv(&x->e_mbd); - if (cpi->common.txfm_mode == ALLOW_8X8) + if (cpi->common.txfm_mode == ALLOW_8X8 +#if CONFIG_TX16X16 + || this_mode == ZEROMV || + this_mode == NEARESTMV || + this_mode == NEARMV || + this_mode == NEWMV +#endif + ) rd_inter16x16_uv_8x8(cpi, x, &rate_uv, &distortion_uv, cpi->common.full_pixel); @@ -3487,9 +3600,21 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int &x->e_mbd.predictor[320], 16, 8); /* Y cost and distortion */ - if (cpi->common.txfm_mode == ALLOW_8X8) + if (cpi->common.txfm_mode == ALLOW_8X8 +#if CONFIG_TX16X16 + || this_mode == ZEROMV || + this_mode == NEARESTMV || + this_mode == NEARMV || + this_mode == NEWMV +#endif + ) +#if CONFIG_TX16X16 + macro_block_yrd_16x16(x, &rate_y, &distortion, + IF_RTCD(&cpi->rtcd)); +#else macro_block_yrd_8x8(x, &rate_y, &distortion, IF_RTCD(&cpi->rtcd)); +#endif else macro_block_yrd(x, &rate_y, &distortion, IF_RTCD(&cpi->rtcd)); @@ -3498,7 +3623,14 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int distortion2 += distortion; /* UV cost and distortion */ - if (cpi->common.txfm_mode == ALLOW_8X8) + if (cpi->common.txfm_mode == ALLOW_8X8 +#if CONFIG_TX16X16 + || this_mode == ZEROMV || + this_mode == NEARESTMV || + this_mode == NEARMV || + this_mode == NEWMV +#endif + ) rd_inter16x16_uv_8x8(cpi, x, &rate_uv, &distortion_uv, cpi->common.full_pixel); @@ -3541,6 +3673,15 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int && this_mode != B_PRED && this_mode != I8X8_PRED); +#if CONFIGURE_TX16X16 + if (this_mode <= TM_PRED || + this_mode == NEWMV || + this_mode == ZEROMV || + this_mode == NEARESTMV || + this_mode == NEARMV) + mb_skippable = mb_is_skippable_16x16(&x->e_mbd); + else +#endif if ((cpi->common.txfm_mode == ALLOW_8X8) && has_y2) { if (x->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME) mb_skippable = mb_is_skippable_8x8(&x->e_mbd); @@ -4002,10 +4143,25 @@ int vp8cx_pick_mode_inter_macroblock } /* test code: set transform size based on mode selection */ +#if CONFIG_TX16X16 + if (xd->mode_info_context->mbmi.mode <= TM_PRED || + xd->mode_info_context->mbmi.mode == NEWMV || + xd->mode_info_context->mbmi.mode == ZEROMV || + xd->mode_info_context->mbmi.mode == NEARMV || + xd->mode_info_context->mbmi.mode == NEARESTMV) { + xd->mode_info_context->mbmi.txfm_size = TX_16X16; + cpi->t16x16_count++; + } + else if (cpi->common.txfm_mode == ALLOW_8X8 + && xd->mode_info_context->mbmi.mode != I8X8_PRED + && xd->mode_info_context->mbmi.mode != B_PRED + && xd->mode_info_context->mbmi.mode != SPLITMV) { +#else if (cpi->common.txfm_mode == ALLOW_8X8 && xd->mode_info_context->mbmi.mode != I8X8_PRED && xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV) { +#endif xd->mode_info_context->mbmi.txfm_size = TX_8X8; cpi->t8x8_count++; } else { diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c index 81ba6f2be..dac18c6db 100644 --- a/vp8/encoder/tokenize.c +++ b/vp8/encoder/tokenize.c @@ -26,17 +26,23 @@ #ifdef ENTROPY_STATS INT64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; INT64 context_counters_8x8[BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -extern unsigned int tree_update_hist [BLOCK_TYPES] -[COEF_BANDS] -[PREV_COEF_CONTEXTS] -[ENTROPY_NODES][2]; -extern unsigned int tree_update_hist_8x8 [BLOCK_TYPES_8X8] -[COEF_BANDS] -[PREV_COEF_CONTEXTS] -[ENTROPY_NODES] [2]; +#if CONFIG_TX16X16 +INT64 context_counters_16x16[BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#endif +extern unsigned int tree_update_hist[BLOCK_TYPES][COEF_BANDS] + [PREV_COEF_CONTEXTS][ENTROPY_NODES][2]; +extern unsigned int tree_update_hist_8x8[BLOCK_TYPES_8X8][COEF_BANDS] + [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2]; +#if CONFIG_TX16X16 +extern unsigned int tree_update_hist_16x16[BLOCK_TYPES_16X16][COEF_BANDS] + [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2]; +#endif #endif void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t); void vp8_stuff_mb_8x8(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t); +#if CONFIG_TX16X16 +void vp8_stuff_mb_16x16(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t); +#endif void vp8_fix_contexts(MACROBLOCKD *x); static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2]; @@ -103,6 +109,54 @@ static void fill_value_tokens() { vp8_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE; } +#if CONFIG_TX16X16 +static void tokenize1st_order_b_16x16(MACROBLOCKD *xd, const BLOCKD *const b, TOKENEXTRA **tp, + const int type, const FRAME_TYPE frametype, ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, VP8_COMP *cpi) { + int pt; /* near block/prev token context index */ + int c = 0; /* start at DC unless type 0 */ + const int eob = b->eob; /* one beyond last nonzero coeff */ + TOKENEXTRA *t = *tp; /* store tokens starting here */ + int x; + const short *qcoeff_ptr = b->qcoeff; + + int seg_eob = 256; + int segment_id = xd->mode_info_context->mbmi.segment_id; + + if (segfeature_active(xd, segment_id, SEG_LVL_EOB)) + seg_eob = get_segdata(xd, segment_id, SEG_LVL_EOB); + + VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); + + do { + const int band = vp8_coef_bands_16x16[c]; + int v; + + x = DCT_EOB_TOKEN; + if (c < eob) { + int rc = vp8_default_zig_zag1d_16x16[c]; + v = qcoeff_ptr[rc]; + + assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE)); + + t->Extra = vp8_dct_value_tokens_ptr[v].Extra; + x = vp8_dct_value_tokens_ptr[v].Token; + } + + t->Token = x; + t->context_tree = cpi->common.fc.coef_probs_16x16[type][band][pt]; + + t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0)); + + ++cpi->coef_counts_16x16[type][band][pt][x]; + } while (pt = vp8_prev_token_class[x], ++t, c < eob && ++c < seg_eob); + + *tp = t; + pt = (c != !type); /* 0 <-> all coeff data is zero */ + *a = *l = pt; +} +#endif + static void tokenize2nd_order_b_8x8 ( MACROBLOCKD *xd, @@ -170,12 +224,8 @@ static void tokenize2nd_order_b_8x8 } -static void tokenize2nd_order_b -( - MACROBLOCKD *xd, - TOKENEXTRA **tp, - VP8_COMP *cpi -) { +static void tokenize2nd_order_b(MACROBLOCKD *xd, TOKENEXTRA **tp, + VP8_COMP *cpi) { int pt; /* near block/prev token context index */ int c; /* start at DC */ TOKENEXTRA *t = *tp;/* store tokens starting here */ @@ -188,9 +238,8 @@ static void tokenize2nd_order_b int seg_eob = 16; int segment_id = xd->mode_info_context->mbmi.segment_id; - if (segfeature_active(xd, segment_id, SEG_LVL_EOB)) { + if (segfeature_active(xd, segment_id, SEG_LVL_EOB)) seg_eob = get_segdata(xd, segment_id, SEG_LVL_EOB); - } b = xd->block + 24; qcoeff_ptr = b->qcoeff; @@ -542,14 +591,10 @@ static void tokenize1st_order_b unsigned int block; const BLOCKD *b; int pt; /* near block/prev token context index */ - int c; - int token; + int band, rc, v, c, token; TOKENEXTRA *t = *tp;/* store tokens starting here */ const short *qcoeff_ptr; - ENTROPY_CONTEXT *a; - ENTROPY_CONTEXT *l; - int band, rc, v; - int tmp1, tmp2; + ENTROPY_CONTEXT *a, *l; int seg_eob = 16; int segment_id = xd->mode_info_context->mbmi.segment_id; @@ -561,11 +606,9 @@ static void tokenize1st_order_b b = xd->block; /* Luma */ for (block = 0; block < 16; block++, b++) { - tmp1 = vp8_block2above[block]; - tmp2 = vp8_block2left[block]; qcoeff_ptr = b->qcoeff; - a = (ENTROPY_CONTEXT *)xd->above_context + tmp1; - l = (ENTROPY_CONTEXT *)xd->left_context + tmp2; + a = (ENTROPY_CONTEXT *)xd->above_context + vp8_block2above[block]; + l = (ENTROPY_CONTEXT *)xd->left_context + vp8_block2left[block]; VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); c = type ? 0 : 1; @@ -609,11 +652,9 @@ static void tokenize1st_order_b } /* Chroma */ for (block = 16; block < 24; block++, b++) { - tmp1 = vp8_block2above[block]; - tmp2 = vp8_block2left[block]; qcoeff_ptr = b->qcoeff; - a = (ENTROPY_CONTEXT *)xd->above_context + tmp1; - l = (ENTROPY_CONTEXT *)xd->left_context + tmp2; + a = (ENTROPY_CONTEXT *)xd->above_context + vp8_block2above[block]; + l = (ENTROPY_CONTEXT *)xd->left_context + vp8_block2left[block]; VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); @@ -701,6 +742,20 @@ int mb_is_skippable_8x8(MACROBLOCKD *x) { return (mby_is_skippable_8x8(x) & mbuv_is_skippable_8x8(x)); } +#if CONFIG_TX16X16 +int mby_is_skippable_16x16(MACROBLOCKD *x) { + int skip = 1; + //skip &= (x->block[0].eob < 2); // I think this should be commented? No second order == DC must be coded + //skip &= (x->block[0].eob < 1); + //skip &= (!x->block[24].eob); + skip &= !x->block[0].eob; + return skip; +} + +int mb_is_skippable_16x16(MACROBLOCKD *x) { + return (mby_is_skippable_16x16(x) & mbuv_is_skippable_8x8(x)); +} +#endif void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) { int plane_type; @@ -730,16 +785,32 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) { has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != I8X8_PRED && x->mode_info_context->mbmi.mode != SPLITMV); +#if CONFIG_TX16X16 + if (tx_type == TX_16X16) has_y2_block = 0; // Because of inter frames +#endif - x->mode_info_context->mbmi.mb_skip_coeff = - ((tx_type == TX_8X8) ? - mb_is_skippable_8x8(x) : - mb_is_skippable(x, has_y2_block)); + switch (tx_type) { +#if CONFIG_TX16X16 + case TX_16X16: + x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_16x16(x); + break; +#endif + case TX_8X8: + x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_8x8(x); + break; + default: + x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x, has_y2_block); + break; + } if (x->mode_info_context->mbmi.mb_skip_coeff) { cpi->skip_true_count[mb_skip_context] += skip_inc; - if (!cpi->common.mb_no_coeff_skip) { +#if CONFIG_TX16X16 + if (tx_type == TX_16X16) + vp8_stuff_mb_16x16(cpi, x, t); + else +#endif if (tx_type == TX_8X8) vp8_stuff_mb_8x8(cpi, x, t); else @@ -766,9 +837,28 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) { tokenize2nd_order_b(x, t, cpi); plane_type = 0; - } +#if CONFIG_TX16X16 + if (tx_type == TX_16X16) { + ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context; + ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context; + tokenize1st_order_b_16x16(x, x->block, t, 3, x->frame_type, A, L, cpi); + for (b = 1; b < 16; b++) { + *(A + vp8_block2above[b]) = *(A); + *(L + vp8_block2left[b] ) = *(L); + } + for (b = 16; b < 24; b += 4) { + tokenize1st_order_b_8x8(x, x->block + b, t, 2, x->frame_type, + A + vp8_block2above_8x8[b], L + vp8_block2left_8x8[b], cpi); + *(A + vp8_block2above_8x8[b]+1) = *(A + vp8_block2above_8x8[b]); + *(L + vp8_block2left_8x8[b]+1 ) = *(L + vp8_block2left_8x8[b]); + } + vpx_memset(&A[8], 0, sizeof(A[8])); + vpx_memset(&L[8], 0, sizeof(L[8])); + } + else +#endif if (tx_type == TX_8X8) { ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context; ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context; @@ -827,15 +917,20 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) { #ifdef ENTROPY_STATS - void init_context_counters(void) { FILE *f = fopen("context.bin", "rb"); if (!f) { vpx_memset(context_counters, 0, sizeof(context_counters)); vpx_memset(context_counters_8x8, 0, sizeof(context_counters_8x8)); +#if CONFIG_TX16X16 + vpx_memset(context_counters_16x16, 0, sizeof(context_counters_16x16)); +#endif } else { fread(context_counters, sizeof(context_counters), 1, f); fread(context_counters_8x8, sizeof(context_counters_8x8), 1, f); +#if CONFIG_TX16X16 + fread(context_counters_16x16, sizeof(context_counters_16x16), 1, f); +#endif fclose(f); } @@ -843,15 +938,20 @@ void init_context_counters(void) { if (!f) { vpx_memset(tree_update_hist, 0, sizeof(tree_update_hist)); vpx_memset(tree_update_hist_8x8, 0, sizeof(tree_update_hist_8x8)); +#if CONFIG_TX16X16 + vpx_memset(tree_update_hist_16x16, 0, sizeof(tree_update_hist_16x16)); +#endif } else { fread(tree_update_hist, sizeof(tree_update_hist), 1, f); fread(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f); +#if CONFIG_TX16X16 + fread(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f); +#endif fclose(f); } } void print_context_counters() { - int type, band, pt, t; FILE *f = fopen("context.c", "w"); @@ -892,7 +992,6 @@ void print_context_counters() { fprintf(f, "static const unsigned int\nvp8_default_coef_counts_8x8" "[BLOCK_TYPES_8X8] [COEF_BANDS]" "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {"); - type = 0; do { fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); @@ -921,26 +1020,54 @@ void print_context_counters() { fprintf(f, "\n }"); } while (++type < BLOCK_TYPES_8X8); - fprintf(f, "\n};\n"); +#if CONFIG_TX16X16 + fprintf(f, "static const unsigned int\nvp8_default_coef_counts_16x16" + "[BLOCK_TYPES_16X16] [COEF_BANDS]" + "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {"); + type = 0; + do { + fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); + band = 0; + do { + fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); + pt = 0; + do { + fprintf(f, "%s\n {", Comma(pt)); + t = 0; + do { + const INT64 x = context_counters_16x16 [type] [band] [pt] [t]; + const int y = (int) x; + + assert(x == (INT64) y); /* no overflow handling yet */ + fprintf(f, "%s %d", Comma(t), y); + + } while (++t < MAX_ENTROPY_TOKENS); + + fprintf(f, "}"); + } while (++pt < PREV_COEF_CONTEXTS); + + fprintf(f, "\n }"); + + } while (++band < COEF_BANDS); + + fprintf(f, "\n }"); + } while (++type < BLOCK_TYPES_16X16); + fprintf(f, "\n};\n"); +#endif + fprintf(f, "static const vp8_prob\n" "vp8_default_coef_probs[BLOCK_TYPES] [COEF_BANDS] \n" "[PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {"); type = 0; - do { fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); - band = 0; - do { fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); - pt = 0; - do { - unsigned int branch_ct [ENTROPY_NODES] [2]; unsigned int coef_counts[MAX_ENTROPY_TOKENS]; vp8_prob coef_probs[ENTROPY_NODES]; @@ -952,7 +1079,6 @@ void print_context_counters() { fprintf(f, "%s\n {", Comma(pt)); t = 0; - do { fprintf(f, "%s %d", Comma(t), coef_probs[t]); @@ -960,11 +1086,8 @@ void print_context_counters() { fprintf(f, "}"); } while (++pt < PREV_COEF_CONTEXTS); - fprintf(f, "\n }"); - } while (++band < COEF_BANDS); - fprintf(f, "\n }"); } while (++type < BLOCK_TYPES); fprintf(f, "\n};\n"); @@ -973,19 +1096,13 @@ void print_context_counters() { "vp8_default_coef_probs_8x8[BLOCK_TYPES_8X8] [COEF_BANDS]\n" "[PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {"); type = 0; - do { fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); - band = 0; - do { fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); - pt = 0; - do { - unsigned int branch_ct [ENTROPY_NODES] [2]; unsigned int coef_counts[MAX_ENTROPY_TOKENS]; vp8_prob coef_probs[ENTROPY_NODES]; @@ -994,34 +1111,65 @@ void print_context_counters() { vp8_tree_probs_from_distribution( MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, coef_probs, branch_ct, coef_counts, 256, 1); - fprintf(f, "%s\n {", Comma(pt)); - t = 0; + t = 0; do { fprintf(f, "%s %d", Comma(t), coef_probs[t]); - } while (++t < ENTROPY_NODES); - fprintf(f, "}"); } while (++pt < PREV_COEF_CONTEXTS); - fprintf(f, "\n }"); - } while (++band < COEF_BANDS); - fprintf(f, "\n }"); } while (++type < BLOCK_TYPES_8X8); fprintf(f, "\n};\n"); +#if CONFIG_TX16X16 + fprintf(f, "static const vp8_prob\n" + "vp8_default_coef_probs_16x16[BLOCK_TYPES_16X16] [COEF_BANDS]\n" + "[PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {"); + type = 0; + do { + fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); + band = 0; + do { + fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); + pt = 0; + do { + unsigned int branch_ct [ENTROPY_NODES] [2]; + unsigned int coef_counts[MAX_ENTROPY_TOKENS]; + vp8_prob coef_probs[ENTROPY_NODES]; + for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) + coef_counts[t] = context_counters_16x16[type] [band] [pt] [t]; + vp8_tree_probs_from_distribution( + MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, + coef_probs, branch_ct, coef_counts, 256, 1); + fprintf(f, "%s\n {", Comma(pt)); + + t = 0; + do { + fprintf(f, "%s %d", Comma(t), coef_probs[t]); + } while (++t < ENTROPY_NODES); + fprintf(f, "}"); + } while (++pt < PREV_COEF_CONTEXTS); + fprintf(f, "\n }"); + } while (++band < COEF_BANDS); + fprintf(f, "\n }"); + } while (++type < BLOCK_TYPES_16X16); + fprintf(f, "\n};\n"); +#endif + fclose(f); f = fopen("context.bin", "wb"); fwrite(context_counters, sizeof(context_counters), 1, f); fwrite(context_counters_8x8, sizeof(context_counters_8x8), 1, f); +#if CONFIG_TX16X16 + fwrite(context_counters_16x16, sizeof(context_counters_16x16), 1, f); +#endif fclose(f); } - #endif @@ -1151,6 +1299,50 @@ void vp8_stuff_mb_8x8(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) { } +#if CONFIG_TX16X16 +static __inline +void stuff1st_order_b_16x16(const BLOCKD *const b, TOKENEXTRA **tp, const FRAME_TYPE frametype, + ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, VP8_COMP *cpi) +{ + int pt; /* near block/prev token context index */ + TOKENEXTRA *t = *tp; /* store tokens starting here */ + VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); + (void) frametype; + (void) b; + + t->Token = DCT_EOB_TOKEN; + t->context_tree = cpi->common.fc.coef_probs_16x16[3][1][pt]; + t->skip_eob_node = 0; + ++t; + *tp = t; + ++cpi->coef_counts_16x16[3][1][pt][DCT_EOB_TOKEN]; + pt = 0; /* 0 <-> all coeff data is zero */ + *a = *l = pt; +} + +void vp8_stuff_mb_16x16(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) { + ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context; + ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context; + int b, i; + + stuff1st_order_b_16x16(x->block, t, x->frame_type, A, L, cpi); + for (i = 1; i < 16; i++) { + *(A + vp8_block2above[i]) = *(A); + *(L + vp8_block2left[i]) = *(L); + } + for (b = 16; b < 24; b += 4) { + stuff1st_order_buv_8x8(x->block + b, t, 2, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], + cpi); + *(A + vp8_block2above_8x8[b]+1) = *(A + vp8_block2above_8x8[b]); + *(L + vp8_block2left_8x8[b]+1 ) = *(L + vp8_block2left_8x8[b]); + } + vpx_memset(&A[8], 0, sizeof(A[8])); + vpx_memset(&L[8], 0, sizeof(L[8])); +} +#endif + static __inline void stuff2nd_order_b ( TOKENEXTRA **tp, @@ -1215,7 +1407,6 @@ void stuff1st_order_buv ++cpi->coef_counts[2] [0] [pt] [DCT_EOB_TOKEN]; pt = 0; /* 0 <-> all coeff data is zero */ *a = *l = pt; - } void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) { @@ -1241,9 +1432,13 @@ void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) { } void vp8_fix_contexts(MACROBLOCKD *x) { /* Clear entropy contexts for Y2 blocks */ - if (x->mode_info_context->mbmi.mode != B_PRED + if ((x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != I8X8_PRED - && x->mode_info_context->mbmi.mode != SPLITMV) { + && x->mode_info_context->mbmi.mode != SPLITMV) +#if CONFIG_TX16X16 + || x->mode_info_context->mbmi.txfm_size == TX_16X16 +#endif + ) { vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); } else { diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h index 4ee676e7f..4d2c74eb3 100644 --- a/vp8/encoder/tokenize.h +++ b/vp8/encoder/tokenize.h @@ -44,8 +44,11 @@ void print_context_counters(); extern INT64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; extern INT64 context_counters_8x8[BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; - +#if CONFIG_TX16X16 +extern INT64 context_counters_16x16[BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; #endif +#endif + extern const int *vp8_dct_value_cost_ptr; /* TODO: The Token field should be broken out into a separate char array to * improve cache locality, since it's needed for costing when the rest of the