Extend superblock size fo 128x128 pixels.

If --enable-ext-partition is used at build time, the superblock size
(sometimes also referred to as coding unit (CU) size) is extended to
128x128 pixels.

Change-Id: Ie09cec6b7e8d765b7555ff5d80974aab60803f3a
This commit is contained in:
Geza Lore
2016-03-07 13:46:39 +00:00
parent cd1d01b96a
commit 552d5cd715
52 changed files with 1448 additions and 824 deletions

View File

@@ -28,7 +28,7 @@
namespace {
static const unsigned int kMaxDimension = MAX_CU_SIZE;
static const unsigned int kMaxDimension = MAX_SB_SIZE;
typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,

View File

@@ -50,16 +50,16 @@ class MaskedSADTest : public ::testing::TestWithParam<MaskedSADParam> {
TEST_P(MaskedSADTest, OperationCheck) {
unsigned int ref_ret, ret;
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
int err_count = 0;
int first_failure = -1;
int src_stride = MAX_CU_SIZE;
int ref_stride = MAX_CU_SIZE;
int msk_stride = MAX_CU_SIZE;
int src_stride = MAX_SB_SIZE;
int ref_stride = MAX_SB_SIZE;
int msk_stride = MAX_SB_SIZE;
for (int i = 0; i < number_of_iterations; ++i) {
for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) {
for (int j = 0; j < MAX_SB_SIZE*MAX_SB_SIZE; j++) {
src_ptr[j] = rnd.Rand8();
ref_ptr[j] = rnd.Rand8();
msk_ptr[j] = ((rnd.Rand8()&0x7f) > 64) ? rnd.Rand8()&0x3f : 64;
@@ -108,18 +108,18 @@ class HighbdMaskedSADTest : public ::testing::
TEST_P(HighbdMaskedSADTest, OperationCheck) {
unsigned int ref_ret, ret;
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int err_count = 0;
int first_failure = -1;
int src_stride = MAX_CU_SIZE;
int ref_stride = MAX_CU_SIZE;
int msk_stride = MAX_CU_SIZE;
int src_stride = MAX_SB_SIZE;
int ref_stride = MAX_SB_SIZE;
int msk_stride = MAX_SB_SIZE;
for (int i = 0; i < number_of_iterations; ++i) {
for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) {
for (int j = 0; j < MAX_SB_SIZE*MAX_SB_SIZE; j++) {
src_ptr[j] = rnd.Rand16()&0xfff;
ref_ptr[j] = rnd.Rand16()&0xfff;
msk_ptr[j] = ((rnd.Rand8()&0x7f) > 64) ? rnd.Rand8()&0x3f : 64;

View File

@@ -58,17 +58,17 @@ TEST_P(MaskedVarianceTest, OperationCheck) {
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
int err_count = 0;
int first_failure = -1;
int src_stride = MAX_CU_SIZE;
int ref_stride = MAX_CU_SIZE;
int msk_stride = MAX_CU_SIZE;
int src_stride = MAX_SB_SIZE;
int ref_stride = MAX_SB_SIZE;
int msk_stride = MAX_SB_SIZE;
for (int i = 0; i < number_of_iterations; ++i) {
for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) {
for (int j = 0; j < MAX_SB_SIZE*MAX_SB_SIZE; j++) {
src_ptr[j] = rnd.Rand8();
ref_ptr[j] = rnd.Rand8();
msk_ptr[j] = rnd(65);
@@ -100,19 +100,19 @@ TEST_P(MaskedVarianceTest, ExtremeValues) {
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
int err_count = 0;
int first_failure = -1;
int src_stride = MAX_CU_SIZE;
int ref_stride = MAX_CU_SIZE;
int msk_stride = MAX_CU_SIZE;
int src_stride = MAX_SB_SIZE;
int ref_stride = MAX_SB_SIZE;
int msk_stride = MAX_SB_SIZE;
for (int i = 0; i < 8; ++i) {
memset(src_ptr, (i & 0x1) ? 255 : 0, MAX_CU_SIZE*MAX_CU_SIZE);
memset(ref_ptr, (i & 0x2) ? 255 : 0, MAX_CU_SIZE*MAX_CU_SIZE);
memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_CU_SIZE*MAX_CU_SIZE);
memset(src_ptr, (i & 0x1) ? 255 : 0, MAX_SB_SIZE*MAX_SB_SIZE);
memset(ref_ptr, (i & 0x2) ? 255 : 0, MAX_SB_SIZE*MAX_SB_SIZE);
memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_SB_SIZE*MAX_SB_SIZE);
ref_ret = ref_func_(src_ptr, src_stride,
ref_ptr, ref_stride,
@@ -166,21 +166,21 @@ TEST_P(MaskedSubPixelVarianceTest, OperationCheck) {
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
int err_count = 0;
int first_failure = -1;
int src_stride = (MAX_CU_SIZE+1);
int ref_stride = (MAX_CU_SIZE+1);
int msk_stride = (MAX_CU_SIZE+1);
int src_stride = (MAX_SB_SIZE+1);
int ref_stride = (MAX_SB_SIZE+1);
int msk_stride = (MAX_SB_SIZE+1);
int xoffset;
int yoffset;
for (int i = 0; i < number_of_iterations; ++i) {
int xoffsets[] = {0, 4, rnd(BIL_SUBPEL_SHIFTS)};
int yoffsets[] = {0, 4, rnd(BIL_SUBPEL_SHIFTS)};
for (int j = 0; j < (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1); j++) {
for (int j = 0; j < (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1); j++) {
src_ptr[j] = rnd.Rand8();
ref_ptr[j] = rnd.Rand8();
msk_ptr[j] = rnd(65);
@@ -221,23 +221,23 @@ TEST_P(MaskedSubPixelVarianceTest, ExtremeValues) {
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
int first_failure_x = -1;
int first_failure_y = -1;
int err_count = 0;
int first_failure = -1;
int src_stride = (MAX_CU_SIZE+1);
int ref_stride = (MAX_CU_SIZE+1);
int msk_stride = (MAX_CU_SIZE+1);
int src_stride = (MAX_SB_SIZE+1);
int ref_stride = (MAX_SB_SIZE+1);
int msk_stride = (MAX_SB_SIZE+1);
for (int xoffset = 0 ; xoffset < BIL_SUBPEL_SHIFTS ; xoffset++) {
for (int yoffset = 0 ; yoffset < BIL_SUBPEL_SHIFTS ; yoffset++) {
for (int i = 0; i < 8; ++i) {
memset(src_ptr, (i & 0x1) ? 255 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1));
memset(ref_ptr, (i & 0x2) ? 255 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1));
memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1));
memset(src_ptr, (i & 0x1) ? 255 : 0, (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1));
memset(ref_ptr, (i & 0x2) ? 255 : 0, (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1));
memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1));
ref_ret = ref_func_(src_ptr, src_stride,
xoffset, yoffset,
@@ -297,19 +297,19 @@ TEST_P(HighbdMaskedVarianceTest, OperationCheck) {
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int err_count = 0;
int first_failure = -1;
int src_stride = MAX_CU_SIZE;
int ref_stride = MAX_CU_SIZE;
int msk_stride = MAX_CU_SIZE;
int src_stride = MAX_SB_SIZE;
int ref_stride = MAX_SB_SIZE;
int msk_stride = MAX_SB_SIZE;
for (int i = 0; i < number_of_iterations; ++i) {
for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) {
for (int j = 0; j < MAX_SB_SIZE*MAX_SB_SIZE; j++) {
src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
msk_ptr[j] = rnd(65);
@@ -341,23 +341,23 @@ TEST_P(HighbdMaskedVarianceTest, ExtremeValues) {
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int err_count = 0;
int first_failure = -1;
int src_stride = MAX_CU_SIZE;
int ref_stride = MAX_CU_SIZE;
int msk_stride = MAX_CU_SIZE;
int src_stride = MAX_SB_SIZE;
int ref_stride = MAX_SB_SIZE;
int msk_stride = MAX_SB_SIZE;
for (int i = 0; i < 8; ++i) {
vpx_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0,
MAX_CU_SIZE*MAX_CU_SIZE);
MAX_SB_SIZE*MAX_SB_SIZE);
vpx_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0,
MAX_CU_SIZE*MAX_CU_SIZE);
memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_CU_SIZE*MAX_CU_SIZE);
MAX_SB_SIZE*MAX_SB_SIZE);
memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_SB_SIZE*MAX_SB_SIZE);
ref_ret = ref_func_(src8_ptr, src_stride,
ref8_ptr, ref_stride,
@@ -407,24 +407,24 @@ TEST_P(HighbdMaskedSubPixelVarianceTest, OperationCheck) {
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int err_count = 0;
int first_failure = -1;
int first_failure_x = -1;
int first_failure_y = -1;
int src_stride = (MAX_CU_SIZE+1);
int ref_stride = (MAX_CU_SIZE+1);
int msk_stride = (MAX_CU_SIZE+1);
int src_stride = (MAX_SB_SIZE+1);
int ref_stride = (MAX_SB_SIZE+1);
int msk_stride = (MAX_SB_SIZE+1);
int xoffset, yoffset;
for (int i = 0; i < number_of_iterations; ++i) {
for (xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) {
for (yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) {
for (int j = 0; j < (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1); j++) {
for (int j = 0; j < (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1); j++) {
src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
msk_ptr[j] = rnd(65);
@@ -465,27 +465,27 @@ TEST_P(HighbdMaskedSubPixelVarianceTest, ExtremeValues) {
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]);
DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int first_failure_x = -1;
int first_failure_y = -1;
int err_count = 0;
int first_failure = -1;
int src_stride = (MAX_CU_SIZE+1);
int ref_stride = (MAX_CU_SIZE+1);
int msk_stride = (MAX_CU_SIZE+1);
int src_stride = (MAX_SB_SIZE+1);
int ref_stride = (MAX_SB_SIZE+1);
int msk_stride = (MAX_SB_SIZE+1);
for (int xoffset = 0 ; xoffset < BIL_SUBPEL_SHIFTS ; xoffset++) {
for (int yoffset = 0 ; yoffset < BIL_SUBPEL_SHIFTS ; yoffset++) {
for (int i = 0; i < 8; ++i) {
vpx_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0,
(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1));
(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1));
vpx_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0,
(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1));
memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1));
(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1));
memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1));
ref_ret = ref_func_(src8_ptr, src_stride,
xoffset, yoffset,

View File

@@ -10,13 +10,16 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#if CONFIG_VP10
#include "vp10/common/blockd.h"
#elif CONFIG_VP9
#include "vp9/common/vp9_blockd.h"
#endif
#include "vpx_mem/vpx_mem.h"
typedef void (*SubtractFunc)(int rows, int cols,
@@ -24,7 +27,7 @@ typedef void (*SubtractFunc)(int rows, int cols,
const uint8_t *src_ptr, ptrdiff_t src_stride,
const uint8_t *pred_ptr, ptrdiff_t pred_stride);
namespace vp9 {
namespace {
class VP9SubtractBlockTest : public ::testing::TestWithParam<SubtractFunc> {
public:
@@ -105,5 +108,4 @@ INSTANTIATE_TEST_CASE_P(NEON, VP9SubtractBlockTest,
INSTANTIATE_TEST_CASE_P(MSA, VP9SubtractBlockTest,
::testing::Values(vpx_subtract_block_msa));
#endif
} // namespace vp9
} // namespace

View File

@@ -147,7 +147,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += subtract_test.cc
ifeq ($(CONFIG_VP9_ENCODER),yes)
LIBVPX_TEST_SRCS-$(CONFIG_SPATIAL_SVC) += svc_test.cc
@@ -172,6 +172,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_fht16x16_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ANS) += vp10_ans_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += sum_squares_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += subtract_test.cc
ifeq ($(CONFIG_EXT_INTER),yes)
LIBVPX_TEST_SRCS-$(HAVE_SSSE3) += masked_variance_test.cc

View File

@@ -44,9 +44,6 @@ typedef enum {
#define IsInterpolatingFilter(filter) (1)
#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
#define MAXTXLEN 32
#define CU_SIZE 64
static INLINE int is_inter_mode(PREDICTION_MODE mode) {
#if CONFIG_EXT_INTER
return mode >= NEARESTMV && mode <= NEW_NEWMV;
@@ -167,8 +164,8 @@ typedef struct {
PREDICTION_MODE mode;
TX_SIZE tx_size;
#if CONFIG_VAR_TX
// TODO(jingning): This effectively assigned an entry for each 8x8 block.
// Apparently it takes much more space than needed.
// TODO(jingning): This effectively assigned a separate entry for each
// 8x8 block. Apparently it takes much more space than needed.
TX_SIZE inter_tx_size[MI_BLOCK_SIZE][MI_BLOCK_SIZE];
#endif
int8_t skip;
@@ -318,15 +315,15 @@ typedef struct macroblockd {
const YV12_BUFFER_CONFIG *cur_buf;
ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16];
ENTROPY_CONTEXT left_context[MAX_MB_PLANE][2 * MI_BLOCK_SIZE];
PARTITION_CONTEXT *above_seg_context;
PARTITION_CONTEXT left_seg_context[8];
PARTITION_CONTEXT left_seg_context[MI_BLOCK_SIZE];
#if CONFIG_VAR_TX
TXFM_CONTEXT *above_txfm_context;
TXFM_CONTEXT *left_txfm_context;
TXFM_CONTEXT left_txfm_context_buffer[8];
TXFM_CONTEXT left_txfm_context_buffer[MI_BLOCK_SIZE];
TX_SIZE max_tx_size;
#if CONFIG_SUPERTX
@@ -686,6 +683,7 @@ void vp10_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
#if CONFIG_EXT_INTER
static INLINE int is_interintra_allowed_bsize(const BLOCK_SIZE bsize) {
// TODO(debargha): Should this be bsize < BLOCK_LARGEST?
return (bsize >= BLOCK_8X8) && (bsize < BLOCK_64X64);
}

View File

@@ -19,154 +19,282 @@
extern "C" {
#endif
#if CONFIG_EXT_PARTITION
# define IF_EXT_PARTITION(...) __VA_ARGS__
#else
# define IF_EXT_PARTITION(...)
#endif
// Log 2 conversion lookup tables for block width and height
static const uint8_t b_width_log2_lookup[BLOCK_SIZES] =
{0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4};
{0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, IF_EXT_PARTITION(4, 5, 5)};
static const uint8_t b_height_log2_lookup[BLOCK_SIZES] =
{0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4};
static const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] =
{1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16};
static const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] =
{1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16};
{0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, IF_EXT_PARTITION(5, 4, 5)};
// Log 2 conversion lookup tables for modeinfo width and height
static const uint8_t mi_width_log2_lookup[BLOCK_SIZES] =
{0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3};
{0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, IF_EXT_PARTITION(3, 4, 4)};
static const uint8_t mi_height_log2_lookup[BLOCK_SIZES] =
{0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3};
{0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, IF_EXT_PARTITION(4, 3, 4)};
// Width/height lookup tables in units of varios block sizes
static const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] =
{1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16, IF_EXT_PARTITION(16, 32, 32)};
static const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] =
{1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16, IF_EXT_PARTITION(32, 16, 32)};
static const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] =
{1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8};
{1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, IF_EXT_PARTITION(8, 16, 16)};
static const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] =
{1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8};
{1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, IF_EXT_PARTITION(16, 8, 16)};
static const uint8_t num_16x16_blocks_wide_lookup[BLOCK_SIZES] =
{1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, IF_EXT_PARTITION(4, 8, 8)};
static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES] =
{1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, IF_EXT_PARTITION(8, 4, 8)};
// VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize)))
static const uint8_t size_group_lookup[BLOCK_SIZES] =
{0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3};
{0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, IF_EXT_PARTITION(3, 3, 3)};
static const uint8_t num_pels_log2_lookup[BLOCK_SIZES] =
{4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12};
{4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, IF_EXT_PARTITION(13, 13, 14)};
static const PARTITION_TYPE partition_lookup[][BLOCK_SIZES] = {
{ // 4X4
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID,
static const PARTITION_TYPE
partition_lookup[MAX_SB_SIZE_LOG2 - 1][BLOCK_SIZES] = {
{ // 4X4 ->
// 4X4
PARTITION_NONE,
// 4X8, 8X4, 8X8
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
// 8X16, 16X8, 16X16
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
// 16X32, 32X16, 32X32
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
PARTITION_INVALID
}, { // 8X8
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
// 32X64, 64X32, 64X64
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
#if CONFIG_EXT_PARTITION
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID
}, { // 16X16
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID,
#endif // CONFIG_EXT_PARTITION
}, { // 8X8 ->
// 4X4
PARTITION_SPLIT,
// 4X8, 8X4, 8X8
PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
// 8X16, 16X8, 16X16
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
PARTITION_INVALID, PARTITION_INVALID
}, { // 32X32
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT,
PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID,
PARTITION_INVALID, PARTITION_INVALID
}, { // 64X64
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ,
PARTITION_NONE
// 16X32, 32X16, 32X32
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
// 32X64, 64X32, 64X64
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
#endif // CONFIG_EXT_PARTITION
}, { // 16X16 ->
// 4X4
PARTITION_SPLIT,
// 4X8, 8X4, 8X8
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 8X16, 16X8, 16X16
PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
// 16X32, 32X16, 32X32
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
// 32X64, 64X32, 64X64
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
#endif // CONFIG_EXT_PARTITION
}, { // 32X32 ->
// 4X4
PARTITION_SPLIT,
// 4X8, 8X4, 8X8
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 8X16, 16X8, 16X16
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 16X32, 32X16, 32X32
PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
// 32X64, 64X32, 64X64
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
#endif // CONFIG_EXT_PARTITION
}, { // 64X64 ->
// 4X4
PARTITION_SPLIT,
// 4X8, 8X4, 8X8
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 8X16, 16X8, 16X16
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 16X32, 32X16, 32X32
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 32X64, 64X32, 64X64
PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
}, { // 128x128 ->
// 4X4
PARTITION_SPLIT,
// 4X8, 8X4, 8X8
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 8X16, 16X8, 16X16
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 16X32, 32X16, 32X32
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 32X64, 64X32, 64X64
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 64x128, 128x64, 128x128
PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
#endif // CONFIG_EXT_PARTITION
}
};
#if CONFIG_EXT_PARTITION_TYPES
static const BLOCK_SIZE subsize_lookup[EXT_PARTITION_TYPES][BLOCK_SIZES] = {
{ // PARTITION_NONE
BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
BLOCK_8X8, BLOCK_8X16, BLOCK_16X8,
BLOCK_16X16, BLOCK_16X32, BLOCK_32X16,
BLOCK_32X32, BLOCK_32X64, BLOCK_64X32,
BLOCK_64X64,
}, { // PARTITION_HORZ
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_64X32,
}, { // PARTITION_VERT
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_32X64,
}, { // PARTITION_SPLIT
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_4X4, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X8, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_16X16, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_32X32,
}, { // PARTITION_HORZ_A
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_64X32,
}, { // PARTITION_HORZ_B
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_64X32,
}, { // PARTITION_VERT_A
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_32X64,
}, { // PARTITION_VERT_B
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_32X64,
}
};
static const BLOCK_SIZE subsize_lookup[EXT_PARTITION_TYPES][BLOCK_SIZES] =
#else
static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] = {
static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] =
#endif // CONFIG_EXT_PARTITION_TYPES
{
{ // PARTITION_NONE
BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
BLOCK_8X8, BLOCK_8X16, BLOCK_16X8,
BLOCK_16X16, BLOCK_16X32, BLOCK_32X16,
BLOCK_32X32, BLOCK_32X64, BLOCK_64X32,
BLOCK_64X64,
// 4X4
BLOCK_4X4,
// 4X8, 8X4, 8X8
BLOCK_4X8, BLOCK_8X4, BLOCK_8X8,
// 8X16, 16X8, 16X16
BLOCK_8X16, BLOCK_16X8, BLOCK_16X16,
// 16X32, 32X16, 32X32
BLOCK_16X32, BLOCK_32X16, BLOCK_32X32,
// 32X64, 64X32, 64X64
BLOCK_32X64, BLOCK_64X32, BLOCK_64X64,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
BLOCK_64X128, BLOCK_128X64, BLOCK_128X128,
#endif // CONFIG_EXT_PARTITION
}, { // PARTITION_HORZ
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_64X32,
// 4X4
BLOCK_INVALID,
// 4X8, 8X4, 8X8
BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
// 8X16, 16X8, 16X16
BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
// 16X32, 32X16, 32X32
BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
// 32X64, 64X32, 64X64
BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
#endif // CONFIG_EXT_PARTITION
}, { // PARTITION_VERT
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_32X64,
// 4X4
BLOCK_INVALID,
// 4X8, 8X4, 8X8
BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
// 8X16, 16X8, 16X16
BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
// 16X32, 32X16, 32X32
BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
// 32X64, 64X32, 64X64
BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
#endif // CONFIG_EXT_PARTITION
}, { // PARTITION_SPLIT
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_4X4, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X8, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_16X16, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_32X32,
// 4X4
BLOCK_INVALID,
// 4X8, 8X4, 8X8
BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4,
// 8X16, 16X8, 16X16
BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X8,
// 16X32, 32X16, 32X32
BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X16,
// 32X64, 64X32, 64X64
BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X32,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X64,
#endif // CONFIG_EXT_PARTITION
#if CONFIG_EXT_PARTITION_TYPES
}, { // PARTITION_HORZ_A
// 4X4
BLOCK_INVALID,
// 4X8, 8X4, 8X8
BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
// 8X16, 16X8, 16X16
BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
// 16X32, 32X16, 32X32
BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
// 32X64, 64X32, 64X64
BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
#endif // CONFIG_EXT_PARTITION
}, { // PARTITION_HORZ_B
// 4X4
BLOCK_INVALID,
// 4X8, 8X4, 8X8
BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
// 8X16, 16X8, 16X16
BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
// 16X32, 32X16, 32X32
BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
// 32X64, 64X32, 64X64
BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
#endif // CONFIG_EXT_PARTITION
}, { // PARTITION_VERT_A
// 4X4
BLOCK_INVALID,
// 4X8, 8X4, 8X8
BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
// 8X16, 16X8, 16X16
BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
// 16X32, 32X16, 32X32
BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
// 32X64, 64X32, 64X64
BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
#endif // CONFIG_EXT_PARTITION
}, { // PARTITION_VERT_B
// 4X4
BLOCK_INVALID,
// 4X8, 8X4, 8X8
BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
// 8X16, 16X8, 16X16
BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
// 16X32, 32X16, 32X32
BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
// 32X64, 64X32, 64X64
BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
#endif // CONFIG_EXT_PARTITION
#endif // CONFIG_EXT_PARTITION_TYPES
}
};
#endif // CONFIG_EXT_PARTITION_TYPES
static const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = {
TX_4X4, TX_4X4, TX_4X4,
TX_8X8, TX_8X8, TX_8X8,
TX_16X16, TX_16X16, TX_16X16,
TX_32X32, TX_32X32, TX_32X32, TX_32X32
// 4X4
TX_4X4,
// 4X8, 8X4, 8X8
TX_4X4, TX_4X4, TX_8X8,
// 8X16, 16X8, 16X16
TX_8X8, TX_8X8, TX_16X16,
// 16X32, 32X16, 32X32
TX_16X16, TX_16X16, TX_32X32,
// 32X64, 64X32, 64X64
TX_32X32, TX_32X32, TX_32X32,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
TX_32X32, TX_32X32, TX_32X32,
#endif // CONFIG_EXT_PARTITION
};
static const BLOCK_SIZE txsize_to_bsize[TX_SIZES] = {
@@ -200,6 +328,11 @@ static const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = {
{{BLOCK_32X64, BLOCK_32X32}, {BLOCK_INVALID, BLOCK_16X32}},
{{BLOCK_64X32, BLOCK_INVALID}, {BLOCK_32X32, BLOCK_32X16}},
{{BLOCK_64X64, BLOCK_64X32}, {BLOCK_32X64, BLOCK_32X32}},
#if CONFIG_EXT_PARTITION
{{BLOCK_64X128, BLOCK_64X64}, {BLOCK_INVALID, BLOCK_32X64}},
{{BLOCK_128X64, BLOCK_INVALID}, {BLOCK_64X64, BLOCK_64X32}},
{{BLOCK_128X128, BLOCK_128X64}, {BLOCK_64X128, BLOCK_64X64}},
#endif // CONFIG_EXT_PARTITION
};
// Generates 4 bit field in which each bit set to 1 represents
@@ -209,6 +342,24 @@ static const struct {
PARTITION_CONTEXT above;
PARTITION_CONTEXT left;
} partition_context_lookup[BLOCK_SIZES]= {
#if CONFIG_EXT_PARTITION
{31, 31}, // 4X4 - {0b11111, 0b11111}
{31, 30}, // 4X8 - {0b11111, 0b11110}
{30, 31}, // 8X4 - {0b11110, 0b11111}
{30, 30}, // 8X8 - {0b11110, 0b11110}
{30, 28}, // 8X16 - {0b11110, 0b11100}
{28, 30}, // 16X8 - {0b11100, 0b11110}
{28, 28}, // 16X16 - {0b11100, 0b11100}
{28, 24}, // 16X32 - {0b11100, 0b11000}
{24, 28}, // 32X16 - {0b11000, 0b11100}
{24, 24}, // 32X32 - {0b11000, 0b11000}
{24, 16}, // 32X64 - {0b11000, 0b10000}
{16, 24}, // 64X32 - {0b10000, 0b11000}
{16, 16}, // 64X64 - {0b10000, 0b10000}
{16, 0 }, // 64X128- {0b10000, 0b00000}
{0, 16}, // 128X64- {0b00000, 0b10000}
{0, 0 }, // 128X128-{0b00000, 0b00000}
#else
{15, 15}, // 4X4 - {0b1111, 0b1111}
{15, 14}, // 4X8 - {0b1111, 0b1110}
{14, 15}, // 8X4 - {0b1110, 0b1111}
@@ -222,6 +373,7 @@ static const struct {
{8, 0 }, // 32X64 - {0b1000, 0b0000}
{0, 8 }, // 64X32 - {0b0000, 0b1000}
{0, 0 }, // 64X64 - {0b0000, 0b0000}
#endif // CONFIG_EXT_PARTITION
};
#if CONFIG_SUPERTX

View File

@@ -171,6 +171,13 @@ static const vpx_prob default_partition_probs[PARTITION_CONTEXTS]
{ 72, 16, 44, 128, 128, 128, 128 }, // a split, l not split
{ 58, 32, 12, 128, 128, 128, 128 }, // l split, a not split
{ 10, 7, 6, 128, 128, 128, 128 }, // a/l both split
#if CONFIG_EXT_PARTITION
// 128x128 -> 64x64
{ 222, 34, 30, 128, 128, 128, 128 }, // a/l both not split
{ 72, 16, 44, 128, 128, 128, 128 }, // a split, l not split
{ 58, 32, 12, 128, 128, 128, 128 }, // l split, a not split
{ 10, 7, 6, 128, 128, 128, 128 }, // a/l both split
#endif // CONFIG_EXT_PARTITION
};
#else
static const vpx_prob default_partition_probs[PARTITION_CONTEXTS]
@@ -195,6 +202,13 @@ static const vpx_prob default_partition_probs[PARTITION_CONTEXTS]
{ 72, 16, 44 }, // a split, l not split
{ 58, 32, 12 }, // l split, a not split
{ 10, 7, 6 }, // a/l both split
#if CONFIG_EXT_PARTITION
// 128x128 -> 64x64
{ 222, 34, 30 }, // a/l both not split
{ 72, 16, 44 }, // a split, l not split
{ 58, 32, 12 }, // l split, a not split
{ 10, 7, 6 }, // a/l both split
#endif // CONFIG_EXT_PARTITION
};
#endif // CONFIG_EXT_PARTITION_TYPES
@@ -256,20 +270,33 @@ static const vpx_prob default_inter_compound_mode_probs
static const vpx_prob default_interintra_prob[BLOCK_SIZES] = {
192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
#if CONFIG_EXT_PARTITION
192, 192, 192
#endif // CONFIG_EXT_PARTITION
};
static const vpx_prob default_wedge_interintra_prob[BLOCK_SIZES] = {
192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
#if CONFIG_EXT_PARTITION
192, 192, 192
#endif // CONFIG_EXT_PARTITION
};
static const vpx_prob default_wedge_interinter_prob[BLOCK_SIZES] = {
192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
#if CONFIG_EXT_PARTITION
192, 192, 192
#endif // CONFIG_EXT_PARTITION
};
#endif // CONFIG_EXT_INTER
#if CONFIG_OBMC
static const vpx_prob default_obmc_prob[BLOCK_SIZES] = {
255, 255, 255, 151, 153, 144, 178, 165, 160, 207, 195, 168, 244,
#if CONFIG_EXT_PARTITION
// TODO(debargha) What are the correct values for these?
192, 192, 192
#endif // CONFIG_EXT_PARTITION
};
#endif // CONFIG_OBMC
@@ -389,6 +416,11 @@ vp10_default_palette_y_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = {
{ 180, 113, 136, 49, 45, 114},
{ 107, 70, 87, 49, 154, 156},
{ 98, 105, 142, 63, 64, 152},
#if CONFIG_EXT_PARTITION
{ 98, 105, 142, 63, 64, 152},
{ 98, 105, 142, 63, 64, 152},
{ 98, 105, 142, 63, 64, 152},
#endif // CONFIG_EXT_PARTITION
};
const vpx_prob
@@ -403,6 +435,11 @@ vp10_default_palette_uv_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = {
{ 67, 53, 54, 55, 66, 93},
{ 120, 130, 83, 171, 75, 214},
{ 72, 55, 66, 68, 79, 107},
#if CONFIG_EXT_PARTITION
{ 72, 55, 66, 68, 79, 107},
{ 72, 55, 66, 68, 79, 107},
{ 72, 55, 66, 68, 79, 107},
#endif // CONFIG_EXT_PARTITION
};
const vpx_prob
@@ -418,6 +455,11 @@ vp10_default_palette_y_mode_prob[PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS]
{ 240, 180, 100, },
{ 240, 180, 100, },
{ 240, 180, 100, },
#if CONFIG_EXT_PARTITION
{ 240, 180, 100, },
{ 240, 180, 100, },
{ 240, 180, 100, },
#endif // CONFIG_EXT_PARTITION
};

View File

@@ -32,7 +32,7 @@ extern "C" {
#define PALETTE_COLOR_CONTEXTS 16
#define PALETTE_MAX_SIZE 8
#define PALETTE_BLOCK_SIZES (BLOCK_64X64 - BLOCK_8X8 + 1)
#define PALETTE_BLOCK_SIZES (BLOCK_LARGEST - BLOCK_8X8 + 1)
#define PALETTE_Y_MODE_CONTEXTS 3
struct VP10Common;

View File

@@ -18,13 +18,25 @@
extern "C" {
#endif
#define MI_SIZE_LOG2 3
#define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2) // 64 = 2^6
#undef MAX_SB_SIZE
#if CONFIG_EXT_PARTITION
# define MAX_SB_SIZE_LOG2 7
#else
# define MAX_SB_SIZE_LOG2 6
#endif // CONFIG_EXT_PARTITION
#define MAX_SB_SIZE (1 << MAX_SB_SIZE_LOG2)
#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE)
#define MI_SIZE_LOG2 3
#define MI_SIZE (1 << MI_SIZE_LOG2) // pixels per mi-unit
#define MI_BLOCK_SIZE_LOG2 (MAX_SB_SIZE_LOG2 - MI_SIZE_LOG2)
#define MI_BLOCK_SIZE (1 << MI_BLOCK_SIZE_LOG2) // mi-units per max block
#define MI_MASK (MI_BLOCK_SIZE - 1)
#define MI_MASK_2 (MI_BLOCK_SIZE * 2 - 1)
#if CONFIG_EXT_TILE
# define MAX_TILE_ROWS 1024
@@ -62,19 +74,16 @@ typedef enum BITSTREAM_PROFILE {
#define BLOCK_32X64 10
#define BLOCK_64X32 11
#define BLOCK_64X64 12
#if CONFIG_EXT_PARTITION
#if !CONFIG_EXT_PARTITION
# define BLOCK_SIZES 13
#else
# define BLOCK_64X128 13
# define BLOCK_128X64 14
# define BLOCK_128X128 15
# define BLOCK_SIZES 16
#else
#define BLOCK_SIZES 13
#endif // CONFIG_EXT_PARTITION
#define BLOCK_INVALID (BLOCK_SIZES)
#endif // !CONFIG_EXT_PARTITION
#define BLOCK_INVALID BLOCK_SIZES
#define BLOCK_LARGEST (BLOCK_SIZES - 1)
typedef uint8_t BLOCK_SIZE;
#if CONFIG_EXT_PARTITION_TYPES
@@ -104,7 +113,11 @@ typedef enum PARTITION_TYPE {
typedef char PARTITION_CONTEXT;
#define PARTITION_PLOFFSET 4 // number of probability models per block size
#if CONFIG_EXT_PARTITION
# define PARTITION_CONTEXTS (5 * PARTITION_PLOFFSET)
#else
# define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET)
#endif // CONFIG_EXT_PARTITION
// block transform size
typedef uint8_t TX_SIZE;
@@ -114,6 +127,15 @@ typedef uint8_t TX_SIZE;
#define TX_32X32 ((TX_SIZE)3) // 32x32 transform
#define TX_SIZES ((TX_SIZE)4)
#define MAX_TX_SIZE_LOG2 5
#define MAX_TX_SIZE (1 << MAX_TX_SIZE_LOG2)
#define MAX_TX_SQUARE (MAX_TX_SIZE * MAX_TX_SIZE)
// Number of maxium size transform blocks in the maximum size superblock
#define MAX_TX_BLOCKS_IN_MAX_SB_LOG2 \
((MAX_SB_SIZE_LOG2 - MAX_TX_SIZE_LOG2) * 2)
#define MAX_TX_BLOCKS_IN_MAX_SB (1 << MAX_TX_BLOCKS_IN_MAX_SB_LOG2)
// frame transform mode
typedef enum {
ONLY_4X4 = 0, // only 4x4 transform used
@@ -286,10 +308,15 @@ typedef enum {
/* Segment Feature Masks */
#define MAX_MV_REF_CANDIDATES 2
#if CONFIG_REF_MV
#define MAX_REF_MV_STACK_SIZE 16
#if CONFIG_EXT_PARTITION
#define REF_CAT_LEVEL 640
#else
#define REF_CAT_LEVEL 160
#endif
#endif // CONFIG_EXT_PARTITION
#endif // CONFIG_REF_MV
#define INTRA_INTER_CONTEXTS 4
#define COMP_INTER_CONTEXTS 5

View File

@@ -871,6 +871,9 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col,
cm->mi_rows - mi_row : MI_BLOCK_SIZE);
const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ?
cm->mi_cols - mi_col : MI_BLOCK_SIZE);
#if CONFIG_EXT_PARTITION
assert(0 && "Not yet updated");
#endif // CONFIG_EXT_PARTITION
vp10_zero(*lfm);
assert(mip[0] != NULL);
@@ -1045,8 +1048,10 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col,
const uint64_t rows = cm->mi_rows - mi_row;
// Each pixel inside the border gets a 1,
const uint64_t mask_y = (((uint64_t) 1 << (rows << 3)) - 1);
const uint16_t mask_uv = (((uint16_t) 1 << (((rows + 1) >> 1) << 2)) - 1);
const uint64_t mask_y =
(((uint64_t) 1 << (rows << MI_BLOCK_SIZE_LOG2)) - 1);
const uint16_t mask_uv =
(((uint16_t) 1 << (((rows + 1) >> 1) << (MI_BLOCK_SIZE_LOG2 - 1))) - 1);
// Remove values completely outside our border.
for (i = 0; i < TX_32X32; i++) {
@@ -1262,7 +1267,7 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
int tx_size_mask = 0;
// Filter level can vary per MI
if (!(lfl[(r << 3) + (c >> ss_x)] =
if (!(lfl[(r << MI_BLOCK_SIZE_LOG2) + (c >> ss_x)] =
get_filter_level(&cm->lf_info, mbmi)))
continue;
@@ -1280,11 +1285,13 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
sb_type, ss_x, ss_y) :
mbmi->inter_tx_size[blk_row][blk_col];
tx_size_r = VPXMIN(tx_size, cm->above_txfm_context[mi_col + c]);
tx_size_c = VPXMIN(tx_size, cm->left_txfm_context[(mi_row + r) & 0x07]);
tx_size_r = VPXMIN(tx_size,
cm->above_txfm_context[mi_col + c]);
tx_size_c = VPXMIN(tx_size,
cm->left_txfm_context[(mi_row + r) & MI_MASK]);
cm->above_txfm_context[mi_col + c] = tx_size;
cm->left_txfm_context[(mi_row + r) & 0x07] = tx_size;
cm->left_txfm_context[(mi_row + r) & MI_MASK] = tx_size;
#endif
// Build masks based on the transform size of each block
@@ -1351,13 +1358,14 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
border_mask = ~(mi_col == 0);
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
highbd_filter_selectively_vert(CONVERT_TO_SHORTPTR(dst->buf),
highbd_filter_selectively_vert(
CONVERT_TO_SHORTPTR(dst->buf),
dst->stride,
mask_16x16_c & border_mask,
mask_8x8_c & border_mask,
mask_4x4_c & border_mask,
mask_4x4_int[r],
&cm->lf_info, &lfl[r << 3],
&cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2],
(int)cm->bit_depth);
} else {
filter_selectively_vert(dst->buf, dst->stride,
@@ -1365,7 +1373,7 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
mask_8x8_c & border_mask,
mask_4x4_c & border_mask,
mask_4x4_int[r],
&cm->lf_info, &lfl[r << 3]);
&cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]);
}
#else
filter_selectively_vert(dst->buf, dst->stride,
@@ -1373,7 +1381,7 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
mask_8x8_c & border_mask,
mask_4x4_c & border_mask,
mask_4x4_int[r],
&cm->lf_info, &lfl[r << 3]);
&cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
mi_8x8 += row_step_stride;
@@ -1400,13 +1408,14 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
}
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
highbd_filter_selectively_horiz(
CONVERT_TO_SHORTPTR(dst->buf),
dst->stride,
mask_16x16_r,
mask_8x8_r,
mask_4x4_r,
mask_4x4_int_r,
&cm->lf_info, &lfl[r << 3],
&cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2],
(int)cm->bit_depth);
} else {
filter_selectively_horiz(dst->buf, dst->stride,
@@ -1414,7 +1423,7 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
mask_8x8_r,
mask_4x4_r,
mask_4x4_int_r,
&cm->lf_info, &lfl[r << 3]);
&cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]);
}
#else
filter_selectively_horiz(dst->buf, dst->stride,
@@ -1422,7 +1431,7 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
mask_8x8_r,
mask_4x4_r,
mask_4x4_int_r,
&cm->lf_info, &lfl[r << 3]);
&cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
}
@@ -1455,16 +1464,18 @@ void vp10_filter_block_plane_ss00(VP10_COMMON *const cm,
highbd_filter_selectively_vert_row2(
plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
&lfm->lfl_y[r << 3], (int)cm->bit_depth);
&lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2], (int)cm->bit_depth);
} else {
filter_selectively_vert_row2(
plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
&lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]);
}
#else
filter_selectively_vert_row2(
plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
&lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 16 * dst->stride;
mask_16x16 >>= 16;
@@ -1499,17 +1510,18 @@ void vp10_filter_block_plane_ss00(VP10_COMMON *const cm,
if (cm->use_highbitdepth) {
highbd_filter_selectively_horiz(
CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3],
mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
&lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2],
(int)cm->bit_depth);
} else {
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
&lfm->lfl_y[r << 3]);
&lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]);
}
#else
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
&lfm->lfl_y[r << 3]);
&lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
@@ -1539,8 +1551,10 @@ void vp10_filter_block_plane_ss11(VP10_COMMON *const cm,
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) {
if (plane->plane_type == 1) {
for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) {
lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)];
lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)];
lfm->lfl_uv[(r << 1) + c] =
lfm->lfl_y[(r << MI_BLOCK_SIZE_LOG2) + (c << 1)];
lfm->lfl_uv[((r + 2) << 1) + c] =
lfm->lfl_y[((r + 2) << MI_BLOCK_SIZE_LOG2) + (c << 1)];
}
}
@@ -1632,9 +1646,31 @@ void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
VP10_COMMON *cm,
struct macroblockd_plane planes[MAX_MB_PLANE],
int start, int stop, int y_only) {
#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
int mi_row, mi_col;
# if CONFIG_VAR_TX
memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols);
# endif // CONFIG_VAR_TX
for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
# if CONFIG_VAR_TX
memset(cm->left_txfm_context, TX_SIZES, MI_BLOCK_SIZE);
# endif // CONFIG_VAR_TX
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
int plane;
vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
for (plane = 0; plane < num_planes; ++plane)
vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
mi_row, mi_col);
}
}
#else
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
int mi_row, mi_col;
#if !CONFIG_VAR_TX && !CONFIG_EXT_PARTITION_TYPES
enum lf_path path;
LOOP_FILTER_MASK lfm;
@@ -1646,29 +1682,17 @@ void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
path = LF_PATH_444;
else
path = LF_PATH_SLOW;
#endif // !CONFIG_VAR_TX && !CONFIG_EXT_PARTITION_TYPES
#if CONFIG_VAR_TX
memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols);
#endif
for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
#if CONFIG_VAR_TX
memset(cm->left_txfm_context, TX_SIZES, 8);
#endif
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
int plane;
vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION_TYPES
for (plane = 0; plane < num_planes; ++plane)
vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
mi_row, mi_col);
#else
// TODO(JBB): Make setup_mask work for non 420.
vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,
&lfm);
vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
vp10_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
for (plane = 1; plane < num_planes; ++plane) {
switch (path) {
@@ -1684,9 +1708,9 @@ void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
break;
}
}
#endif // CONFIG_VAR_TX || CONFIG_EXT_PARTITION_TYPES
}
}
#endif // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
}
void vp10_loop_filter_frame(YV12_BUFFER_CONFIG *frame,

View File

@@ -84,8 +84,8 @@ typedef struct {
uint16_t above_uv[TX_SIZES];
uint16_t left_int_4x4_uv;
uint16_t above_int_4x4_uv;
uint8_t lfl_y[64];
uint8_t lfl_uv[16];
uint8_t lfl_y[MI_BLOCK_SIZE * MI_BLOCK_SIZE];
uint8_t lfl_uv[MI_BLOCK_SIZE / 2 * MI_BLOCK_SIZE / 2];
} LOOP_FILTER_MASK;
/* assorted loopfilter functions which get used elsewhere */

View File

@@ -12,6 +12,7 @@
#include "vp10/common/mvref_common.h"
#if CONFIG_REF_MV
static uint8_t add_ref_mv_candidate(const MODE_INFO *const candidate_mi,
const MB_MODE_INFO *const candidate,
const MV_REFERENCE_FRAME rf[2],
@@ -23,6 +24,8 @@ static uint8_t add_ref_mv_candidate(const MODE_INFO *const candidate_mi,
int index = 0, ref;
int newmv_count = 0;
assert(2 * weight < REF_CAT_LEVEL);
if (rf[1] == NONE) {
// single reference frame
for (ref = 0; ref < 2; ++ref) {
@@ -246,32 +249,30 @@ static uint8_t scan_blk_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd,
return newmv_count;
}
// This function assumes MI blocks are 8x8 and coding units are 64x64
static int has_top_right(const MACROBLOCKD *xd,
int mi_row, int mi_col, int bs) {
// In a split partition all apart from the bottom right has a top right
int has_tr = !((mi_row & bs) & (bs * 2 - 1)) ||
!((mi_col & bs) & (bs * 2 - 1));
int has_tr = !((mi_row & bs) && (mi_col & bs));
// bs > 0 and bs is a power of 2
assert(bs > 0 && !(bs & (bs - 1)));
// Filter out partial right-most boundaries
// For each 4x4 group of blocks, when the bottom right is decoded the blocks
// to the right have not been decoded therefore the second from bottom in the
// right-most column does not have a top right
if ((mi_col & bs) & (bs * 2 - 1)) {
if (((mi_col & (2 * bs)) & (bs * 4 - 1)) &&
((mi_row & (2 * bs)) & (bs * 4 - 1)))
// to the right have not been decoded therefore the bottom right does
// not have a top right
while (bs < MI_BLOCK_SIZE) {
if (mi_col & bs) {
if ((mi_col & (2 * bs)) && (mi_row & (2 * bs))) {
has_tr = 0;
break;
}
} else {
break;
}
bs <<= 1;
}
// If the right had side of the block lines up with the right had edge end of
// a group of 8x8 MI blocks (i.e. edge of a coding unit) and is not on the top
// row of that coding unit, it does not have a top right
if (has_tr)
if (((mi_col + xd->n8_w) & 0x07) == 0)
if ((mi_row & 0x07) > 0)
has_tr = 0;
// The left had of two vertical rectangles always has a top right (as the
// The left hand of two vertical rectangles always has a top right (as the
// block above will have been decoded)
if (xd->n8_w < xd->n8_h)
if (!xd->is_sec_rect)
@@ -359,8 +360,11 @@ static void setup_ref_mv_list(const VP10_COMMON *cm, const MACROBLOCKD *xd,
nearest_refmv_count = *refmv_count;
for (idx = 0; idx < nearest_refmv_count; ++idx)
for (idx = 0; idx < nearest_refmv_count; ++idx) {
assert(ref_mv_stack[idx].weight > 0 &&
ref_mv_stack[idx].weight < REF_CAT_LEVEL);
ref_mv_stack[idx].weight += REF_CAT_LEVEL;
}
if (prev_frame_mvs_base && cm->show_frame && cm->last_show_frame
&& rf[1] == NONE) {

View File

@@ -120,7 +120,16 @@ static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = {
// 64X32
{{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}},
// 64X64
{{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}}
{{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}},
#if CONFIG_EXT_PARTITION
// TODO(debargha/jingning) Making them twice the 32x64, .. ones above
// 64x128
{{0, -2}, {-2, 0}, {8, -2}, {-2, 4}, {-2, -2}, {0, -6}, {-6, 0}, {4, -2}},
// 128x64
{{-2, 0}, {0, -2}, {-2, 8}, {4, -2}, {-2, -2}, {-6, 0}, {0, -6}, {-2, 4}},
// 128x128
{{-2, 6}, {6, -2}, {-2, 8}, {8, -2}, {-2, -2}, {-2, 0}, {0, -2}, {-2, 12}},
#endif // CONFIG_EXT_PARTITION
};
static const int idx_n_column_to_subblock[4][2] = {
@@ -131,7 +140,11 @@ static const int idx_n_column_to_subblock[4][2] = {
};
// clamp_mv_ref
#if CONFIG_EXT_PARTITION
# define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
#else
# define MV_BORDER (8 << 3) // Allow 8 pels in 1/8th pel units
#endif // CONFIG_EXT_PARTITION
static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) {
clamp_mv(mv, xd->mb_to_left_edge - bw * 8 - MV_BORDER,

View File

@@ -332,7 +332,7 @@ typedef struct VP10Common {
ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
#if CONFIG_VAR_TX
TXFM_CONTEXT *above_txfm_context;
TXFM_CONTEXT left_txfm_context[8];
TXFM_CONTEXT left_txfm_context[MI_BLOCK_SIZE];
#endif
int above_context_alloc_cols;
@@ -440,7 +440,7 @@ static INLINE void vp10_init_macroblockd(VP10_COMMON *cm, MACROBLOCKD *xd,
static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) {
const int above_idx = mi_col * 2;
const int left_idx = (mi_row * 2) & 15; // FIXME: Mask should be CU_SIZE*2-1
const int left_idx = (mi_row * 2) & MI_MASK_2;
int i;
for (i = 0; i < MAX_MB_PLANE; ++i) {
struct macroblockd_plane *const pd = &xd->plane[i];

View File

@@ -454,52 +454,52 @@ void vp10_make_masked_inter_predictor(
const MACROBLOCKD *xd) {
const MODE_INFO *mi = xd->mi[0];
#if CONFIG_VP9_HIGHBITDEPTH
uint8_t tmp_dst_[2 * CU_SIZE * CU_SIZE];
uint8_t tmp_dst_[2 * MAX_SB_SQUARE];
uint8_t *tmp_dst =
(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_;
vp10_make_inter_predictor(pre, pre_stride, tmp_dst, CU_SIZE,
vp10_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
subpel_x, subpel_y, sf, w, h, 0,
interp_filter, xs, ys, xd);
#if CONFIG_SUPERTX
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
build_masked_compound_extend_highbd(
dst, dst_stride, tmp_dst, CU_SIZE, plane,
dst, dst_stride, tmp_dst, MAX_SB_SIZE, plane,
mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type,
wedge_offset_y, wedge_offset_x, h, w);
else
build_masked_compound_extend(
dst, dst_stride, tmp_dst, CU_SIZE, plane,
dst, dst_stride, tmp_dst, MAX_SB_SIZE, plane,
mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type,
wedge_offset_y, wedge_offset_x, h, w);
#else
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
build_masked_compound_highbd(
dst, dst_stride, tmp_dst, CU_SIZE,
dst, dst_stride, tmp_dst, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type, h, w);
else
build_masked_compound(
dst, dst_stride, tmp_dst, CU_SIZE,
dst, dst_stride, tmp_dst, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type, h, w);
#endif // CONFIG_SUPERTX
#else // CONFIG_VP9_HIGHBITDEPTH
uint8_t tmp_dst[CU_SIZE * CU_SIZE];
vp10_make_inter_predictor(pre, pre_stride, tmp_dst, CU_SIZE,
uint8_t tmp_dst[MAX_SB_SQUARE];
vp10_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
subpel_x, subpel_y, sf, w, h, 0,
interp_filter, xs, ys, xd);
#if CONFIG_SUPERTX
build_masked_compound_extend(
dst, dst_stride, tmp_dst, CU_SIZE, plane,
dst, dst_stride, tmp_dst, MAX_SB_SIZE, plane,
mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type,
wedge_offset_y, wedge_offset_x, h, w);
#else
build_masked_compound(
dst, dst_stride, tmp_dst, CU_SIZE,
dst, dst_stride, tmp_dst, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type, h, w);
#endif // CONFIG_SUPERTX
@@ -877,12 +877,13 @@ void vp10_build_masked_inter_predictor_complex(
int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize,
PARTITION_TYPE partition, int plane) {
int i, j;
uint8_t mask[MAXTXLEN];
int top_w = 4 << b_width_log2_lookup[top_bsize],
top_h = 4 << b_height_log2_lookup[top_bsize];
int w = 4 << b_width_log2_lookup[bsize], h = 4 << b_height_log2_lookup[bsize];
int w_offset = (mi_col - mi_col_ori) << 3,
h_offset = (mi_row - mi_row_ori) << 3;
uint8_t mask[MAX_TX_SIZE];
int top_w = 4 << b_width_log2_lookup[top_bsize];
int top_h = 4 << b_height_log2_lookup[top_bsize];
int w = 4 << b_width_log2_lookup[bsize];
int h = 4 << b_height_log2_lookup[bsize];
int w_offset = (mi_col - mi_col_ori) * MI_SIZE;
int h_offset = (mi_row - mi_row_ori) * MI_SIZE;
#if CONFIG_VP9_HIGHBITDEPTH
uint16_t *dst16= CONVERT_TO_SHORTPTR(dst);
@@ -890,6 +891,8 @@ void vp10_build_masked_inter_predictor_complex(
int b_hdb = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
#endif // CONFIG_VP9_HIGHBITDEPTH
assert(bsize <= BLOCK_32X32);
top_w >>= pd->subsampling_x;
top_h >>= pd->subsampling_y;
w >>= pd->subsampling_x;
@@ -916,7 +919,8 @@ void vp10_build_masked_inter_predictor_complex(
if (m == 0)
dst_tmp[j] = dst2_tmp[j];
else
dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6;
dst_tmp[j] = ROUND_POWER_OF_TWO(dst_tmp[j] * m +
dst2_tmp[j] * (64 - m), 6);
}
dst_tmp += dst_stride;
dst2_tmp += dst2_stride;
@@ -943,7 +947,8 @@ void vp10_build_masked_inter_predictor_complex(
if (m == 0)
dst_tmp[j] = dst2_tmp[j];
else
dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6;
dst_tmp[j] = ROUND_POWER_OF_TWO(dst_tmp[j] * m +
dst2_tmp[j] * (64 - m), 6);
}
dst_tmp += dst_stride;
dst2_tmp += dst2_stride;
@@ -978,7 +983,8 @@ void vp10_build_masked_inter_predictor_complex(
if (m == 0)
dst_tmp[j] = dst2_tmp[j];
else
dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6;
dst_tmp[j] = ROUND_POWER_OF_TWO(dst_tmp[j] * m +
dst2_tmp[j] * (64 - m), 6);
}
memcpy(dst_tmp + j, dst2_tmp + j,
(top_w - w_offset - w) * sizeof(uint16_t));
@@ -1001,7 +1007,8 @@ void vp10_build_masked_inter_predictor_complex(
if (m == 0)
dst_tmp[j] = dst2_tmp[j];
else
dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6;
dst_tmp[j] = ROUND_POWER_OF_TWO(dst_tmp[j] * m +
dst2_tmp[j] * (64 - m), 6);
}
memcpy(dst_tmp + j, dst2_tmp + j,
(top_w - w_offset - w) * sizeof(uint8_t));
@@ -1158,12 +1165,39 @@ static const uint8_t obmc_mask_16[2][16] = {
};
static const uint8_t obmc_mask_32[2][32] = {
{ 33, 35, 36, 38, 40, 41, 43, 44, 45, 47, 48, 50, 51, 52, 53, 55,
56, 57, 58, 59, 60, 60, 61, 62, 62, 63, 63, 64, 64, 64, 64, 64},
{ 31, 29, 28, 26, 24, 23, 21, 20, 19, 17, 16, 14, 13, 12, 11, 9,
8, 7, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0, 0, 0, 0}
{ 33, 35, 36, 38, 40, 41, 43, 44,
45, 47, 48, 50, 51, 52, 53, 55,
56, 57, 58, 59, 60, 60, 61, 62,
62, 63, 63, 64, 64, 64, 64, 64 },
{ 31, 29, 28, 26, 24, 23, 21, 20,
19, 17, 16, 14, 13, 12, 11, 9,
8, 7, 6, 5, 4, 4, 3, 2,
2, 1, 1, 0, 0, 0, 0, 0 }
};
#if CONFIG_EXT_PARTITION
// TODO(debargha): What are the correct values here?
static const uint8_t obmc_mask_64[2][64] = {
{ 33, 33, 35, 35, 36, 36, 38, 38,
40, 40, 41, 41, 43, 43, 44, 44,
45, 45, 47, 47, 48, 48, 50, 50,
51, 51, 52, 52, 53, 53, 55, 55,
56, 56, 57, 57, 58, 58, 59, 59,
60, 60, 60, 60, 61, 61, 62, 62,
62, 62, 63, 63, 63, 63, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64 },
{ 31, 31, 29, 29, 28, 28, 26, 26,
24, 24, 23, 23, 21, 21, 20, 20,
19, 19, 17, 17, 16, 16, 14, 14,
13, 13, 12, 12, 11, 11, 9, 9,
8, 8, 7, 7, 6, 6, 5, 5,
4, 4, 4, 4, 3, 3, 2, 2,
2, 2, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0 }
};
#endif // CONFIG_EXT_PARTITION
void setup_obmc_mask(int length, const uint8_t *mask[2]) {
switch (length) {
case 1:
@@ -1190,9 +1224,15 @@ void setup_obmc_mask(int length, const uint8_t *mask[2]) {
mask[0] = obmc_mask_32[0];
mask[1] = obmc_mask_32[1];
break;
#if CONFIG_EXT_PARTITION
case 64:
mask[0] = obmc_mask_64[0];
mask[1] = obmc_mask_64[1];
break;
#endif // CONFIG_EXT_PARTITION
default:
mask[0] = obmc_mask_32[0];
mask[1] = obmc_mask_32[1];
mask[0] = NULL;
mask[1] = NULL;
assert(0);
break;
}
@@ -1265,15 +1305,15 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *pd = &xd->plane[plane];
int bw = (mi_step * 8) >> pd->subsampling_x;
int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
int bh = overlap >> pd->subsampling_y;
int row, col;
int dst_stride = use_tmp_dst_buf ? final_stride[plane] : pd->dst.stride;
uint8_t *dst = use_tmp_dst_buf ?
&final_buf[plane][(i * 8) >> pd->subsampling_x] :
&pd->dst.buf[(i * 8) >> pd->subsampling_x];
&final_buf[plane][(i * MI_SIZE) >> pd->subsampling_x] :
&pd->dst.buf[(i * MI_SIZE) >> pd->subsampling_x];
int tmp_stride = tmp_stride1[plane];
uint8_t *tmp = &tmp_buf1[plane][(i * 8) >> pd->subsampling_x];
uint8_t *tmp = &tmp_buf1[plane][(i * MI_SIZE) >> pd->subsampling_x];
const uint8_t *mask[2];
setup_obmc_mask(bh, mask);
@@ -1285,8 +1325,9 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col)
dst16[col] = (mask[0][row] * dst16[col] + mask[1][row] * tmp16[col]
+ 32) >> 6;
dst16[col] = ROUND_POWER_OF_TWO(mask[0][row] * dst16[col] +
mask[1][row] * tmp16[col], 6);
dst16 += dst_stride;
tmp16 += tmp_stride;
}
@@ -1294,8 +1335,8 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
#endif // CONFIG_VP9_HIGHBITDEPTH
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col)
dst[col] = (mask[0][row] * dst[col] + mask[1][row] * tmp[col] + 32)
>> 6;
dst[col] = ROUND_POWER_OF_TWO(mask[0][row] * dst[col] +
mask[1][row] * tmp[col], 6);
dst += dst_stride;
tmp += tmp_stride;
}
@@ -1332,15 +1373,15 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *pd = &xd->plane[plane];
int bw = overlap >> pd->subsampling_x;
int bh = (mi_step * 8) >> pd->subsampling_y;
int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
int row, col;
int dst_stride = use_tmp_dst_buf ? final_stride[plane] : pd->dst.stride;
uint8_t *dst = use_tmp_dst_buf ?
&final_buf[plane][(i * 8 * dst_stride) >> pd->subsampling_y] :
&pd->dst.buf[(i * 8 * dst_stride) >> pd->subsampling_y];
&final_buf[plane][(i * MI_SIZE * dst_stride) >> pd->subsampling_y] :
&pd->dst.buf[(i * MI_SIZE * dst_stride) >> pd->subsampling_y];
int tmp_stride = tmp_stride2[plane];
uint8_t *tmp = &tmp_buf2[plane]
[(i * 8 * tmp_stride) >> pd->subsampling_y];
[(i * MI_SIZE * tmp_stride) >> pd->subsampling_y];
const uint8_t *mask[2];
setup_obmc_mask(bw, mask);
@@ -1352,8 +1393,8 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col)
dst16[col] = (mask[0][col] * dst16[col] + mask[1][col] * tmp16[col]
+ 32) >> 6;
dst16[col] = ROUND_POWER_OF_TWO(mask[0][col] * dst16[col] +
mask[1][col] * tmp16[col], 6);
dst16 += dst_stride;
tmp16 += tmp_stride;
}
@@ -1361,8 +1402,8 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
#endif // CONFIG_VP9_HIGHBITDEPTH
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col)
dst[col] = (mask[0][col] * dst[col] + mask[1][col] * tmp[col] + 32)
>> 6;
dst[col] = ROUND_POWER_OF_TWO(mask[0][col] * dst[col] +
mask[1][col] * tmp[col], 6);
dst += dst_stride;
tmp += tmp_stride;
}
@@ -1572,7 +1613,31 @@ static void combine_interintra(PREDICTION_MODE mode,
static const int scale_bits = 8;
static const int scale_max = 256;
static const int scale_round = 127;
static const int weights1d[64] = {
#if CONFIG_EXT_PARTITION
// TODO(debargha): Fill in the correct weights for 128 wide blocks.
static const int weights1d[MAX_SB_SIZE] = {
128, 128, 125, 125, 122, 122, 119, 119,
116, 116, 114, 114, 111, 111, 109, 109,
107, 107, 105, 105, 103, 103, 101, 101,
99, 99, 97, 97, 96, 96, 94, 94,
93, 93, 91, 91, 90, 90, 89, 89,
88, 88, 86, 86, 85, 85, 84, 84,
83, 83, 82, 82, 81, 81, 81, 81,
80, 80, 79, 79, 78, 78, 78, 78,
77, 77, 76, 76, 76, 76, 75, 75,
75, 75, 74, 74, 74, 74, 73, 73,
73, 73, 72, 72, 72, 72, 71, 71,
71, 71, 71, 71, 70, 70, 70, 70,
70, 70, 70, 70, 69, 69, 69, 69,
69, 69, 69, 69, 68, 68, 68, 68,
68, 68, 68, 68, 68, 68, 67, 67,
67, 67, 67, 67, 67, 67, 67, 67,
};
static int size_scales[BLOCK_SIZES] = {
32, 16, 16, 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1
};
#else
static const int weights1d[MAX_SB_SIZE] = {
128, 125, 122, 119, 116, 114, 111, 109,
107, 105, 103, 101, 99, 97, 96, 94,
93, 91, 90, 89, 88, 86, 85, 84,
@@ -1582,14 +1647,14 @@ static void combine_interintra(PREDICTION_MODE mode,
70, 70, 69, 69, 69, 69, 68, 68,
68, 68, 68, 67, 67, 67, 67, 67,
};
const int bw = 4 << b_width_log2_lookup[plane_bsize];
const int bh = 4 << b_height_log2_lookup[plane_bsize];
static int size_scales[BLOCK_SIZES] = {
16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1
};
#endif // CONFIG_EXT_PARTITION
int size = VPXMAX(bw, bh);
int size_scale = (size >= 64 ? 1 :
size == 32 ? 2 :
size == 16 ? 4 :
size == 8 ? 8 : 16);
const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
const int size_scale = size_scales[plane_bsize];
int i, j;
if (use_wedge_interintra && get_wedge_bits(bsize)) {
@@ -1712,7 +1777,31 @@ static void combine_interintra_highbd(PREDICTION_MODE mode,
static const int scale_bits = 8;
static const int scale_max = 256;
static const int scale_round = 127;
static const int weights1d[64] = {
#if CONFIG_EXT_PARTITION
// TODO(debargha): Fill in the correct weights for 128 wide blocks.
static const int weights1d[MAX_SB_SIZE] = {
128, 128, 125, 125, 122, 122, 119, 119,
116, 116, 114, 114, 111, 111, 109, 109,
107, 107, 105, 105, 103, 103, 101, 101,
99, 99, 97, 97, 96, 96, 94, 94,
93, 93, 91, 91, 90, 90, 89, 89,
88, 88, 86, 86, 85, 85, 84, 84,
83, 83, 82, 82, 81, 81, 81, 81,
80, 80, 79, 79, 78, 78, 78, 78,
77, 77, 76, 76, 76, 76, 75, 75,
75, 75, 74, 74, 74, 74, 73, 73,
73, 73, 72, 72, 72, 72, 71, 71,
71, 71, 71, 71, 70, 70, 70, 70,
70, 70, 70, 70, 69, 69, 69, 69,
69, 69, 69, 69, 68, 68, 68, 68,
68, 68, 68, 68, 68, 68, 67, 67,
67, 67, 67, 67, 67, 67, 67, 67,
};
static int size_scales[BLOCK_SIZES] = {
32, 16, 16, 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1
};
#else
static const int weights1d[MAX_SB_SIZE] = {
128, 125, 122, 119, 116, 114, 111, 109,
107, 105, 103, 101, 99, 97, 96, 94,
93, 91, 90, 89, 88, 86, 85, 84,
@@ -1722,15 +1811,16 @@ static void combine_interintra_highbd(PREDICTION_MODE mode,
70, 70, 69, 69, 69, 69, 68, 68,
68, 68, 68, 67, 67, 67, 67, 67,
};
const int bw = 4 << b_width_log2_lookup[plane_bsize];
const int bh = 4 << b_height_log2_lookup[plane_bsize];
static int size_scales[BLOCK_SIZES] = {
16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1
};
#endif // CONFIG_EXT_PARTITION
int size = VPXMAX(bw, bh);
int size_scale = (size >= 64 ? 1 :
size == 32 ? 2 :
size == 16 ? 4 :
size == 8 ? 8 : 16);
const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
const int size_scale = size_scales[plane_bsize];
int i, j;
uint16_t *comppred = CONVERT_TO_SHORTPTR(comppred8);
uint16_t *interpred = CONVERT_TO_SHORTPTR(interpred8);
uint16_t *intrapred = CONVERT_TO_SHORTPTR(intrapred8);
@@ -1889,8 +1979,7 @@ void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd,
const int bw = 4 << b_width_log2_lookup[bsize];
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t,
intrapredictor[CU_SIZE * CU_SIZE]);
DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
build_intra_predictors_for_interintra(
xd, xd->plane[0].dst.buf, xd->plane[0].dst.stride,
CONVERT_TO_BYTEPTR(intrapredictor), bw,
@@ -1907,7 +1996,7 @@ void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd,
}
#endif // CONFIG_VP9_HIGHBITDEPTH
{
uint8_t intrapredictor[CU_SIZE * CU_SIZE];
uint8_t intrapredictor[MAX_SB_SQUARE];
build_intra_predictors_for_interintra(
xd, xd->plane[0].dst.buf, xd->plane[0].dst.stride,
intrapredictor, bw,
@@ -1931,8 +2020,7 @@ void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd,
const int bw = 4 << b_width_log2_lookup[uvbsize];
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t,
uintrapredictor[CU_SIZE * CU_SIZE]);
DECLARE_ALIGNED(16, uint16_t, uintrapredictor[MAX_SB_SQUARE]);
build_intra_predictors_for_interintra(
xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
CONVERT_TO_BYTEPTR(uintrapredictor), bw,
@@ -1950,7 +2038,7 @@ void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd,
}
#endif // CONFIG_VP9_HIGHBITDEPTH
{
uint8_t uintrapredictor[CU_SIZE * CU_SIZE];
uint8_t uintrapredictor[MAX_SB_SQUARE];
build_intra_predictors_for_interintra(
xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
uintrapredictor, bw,
@@ -2117,30 +2205,30 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane,
if (ref && get_wedge_bits(mi->mbmi.sb_type)
&& mi->mbmi.use_wedge_interinter) {
#if CONFIG_VP9_HIGHBITDEPTH
uint8_t tmp_dst_[2 * CU_SIZE * CU_SIZE];
uint8_t tmp_dst_[2 * MAX_SB_SQUARE];
uint8_t *tmp_dst =
(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_;
#else
uint8_t tmp_dst[CU_SIZE * CU_SIZE];
uint8_t tmp_dst[MAX_SB_SQUARE];
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int k;
for (k = 0; k < h; ++k)
memcpy(tmp_dst_ + 2 * CU_SIZE * k, ext_dst1 +
memcpy(tmp_dst_ + 2 * MAX_SB_SIZE * k, ext_dst1 +
ext_dst_stride1 * 2 * k, w * 2);
} else {
int k;
for (k = 0; k < h; ++k)
memcpy(tmp_dst_ + CU_SIZE * k, ext_dst1 +
memcpy(tmp_dst_ + MAX_SB_SIZE * k, ext_dst1 +
ext_dst_stride1 * k, w);
}
#else
{
int k;
for (k = 0; k < h; ++k)
memcpy(tmp_dst + CU_SIZE * k, ext_dst1 +
memcpy(tmp_dst + MAX_SB_SIZE * k, ext_dst1 +
ext_dst_stride1 * k, w);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -2149,20 +2237,20 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
build_masked_compound_extend_highbd(
dst, dst_buf->stride, tmp_dst, CU_SIZE, plane,
dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE, plane,
mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type,
wedge_offset_y, wedge_offset_x, h, w);
} else {
build_masked_compound_extend(
dst, dst_buf->stride, tmp_dst, CU_SIZE, plane,
dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE, plane,
mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type,
wedge_offset_y, wedge_offset_x, h, w);
}
#else
build_masked_compound_extend(dst, dst_buf->stride, tmp_dst,
CU_SIZE, plane,
MAX_SB_SIZE, plane,
mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type,
wedge_offset_y, wedge_offset_x, h, w);
@@ -2171,12 +2259,12 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
build_masked_compound_highbd(dst, dst_buf->stride, tmp_dst,
CU_SIZE,
MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type, h, w);
else
#endif // CONFIG_VP9_HIGHBITDEPTH
build_masked_compound(dst, dst_buf->stride, tmp_dst, CU_SIZE,
build_masked_compound(dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type, h, w);
#endif // CONFIG_SUPERTX

View File

@@ -443,8 +443,8 @@ void vp10_build_prediction_by_left_preds(VP10_COMMON *cm,
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
#define MASK_MASTER_SIZE (2 * CU_SIZE)
#define MASK_MASTER_STRIDE (2 * CU_SIZE)
#define MASK_MASTER_SIZE (2 * MAX_SB_SIZE)
#define MASK_MASTER_STRIDE (2 * MAX_SB_SIZE)
void vp10_init_wedge_masks();

View File

@@ -44,30 +44,30 @@ static const uint8_t extend_modes[INTRA_MODES] = {
NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // TM
};
static const uint8_t orders_64x64[1] = { 0 };
static const uint8_t orders_64x32[2] = { 0, 1 };
static const uint8_t orders_32x64[2] = { 0, 1 };
static const uint8_t orders_32x32[4] = {
static const uint8_t orders_128x128[1] = { 0 };
static const uint8_t orders_128x64[2] = { 0, 1 };
static const uint8_t orders_64x128[2] = { 0, 1 };
static const uint8_t orders_64x64[4] = {
0, 1,
2, 3,
};
static const uint8_t orders_32x16[8] = {
static const uint8_t orders_64x32[8] = {
0, 2,
1, 3,
4, 6,
5, 7,
};
static const uint8_t orders_16x32[8] = {
static const uint8_t orders_32x64[8] = {
0, 1, 2, 3,
4, 5, 6, 7,
};
static const uint8_t orders_16x16[16] = {
static const uint8_t orders_32x32[16] = {
0, 1, 4, 5,
2, 3, 6, 7,
8, 9, 12, 13,
10, 11, 14, 15,
};
static const uint8_t orders_16x8[32] = {
static const uint8_t orders_32x16[32] = {
0, 2, 8, 10,
1, 3, 9, 11,
4, 6, 12, 14,
@@ -77,13 +77,13 @@ static const uint8_t orders_16x8[32] = {
20, 22, 28, 30,
21, 23, 29, 31,
};
static const uint8_t orders_8x16[32] = {
static const uint8_t orders_16x32[32] = {
0, 1, 2, 3, 8, 9, 10, 11,
4, 5, 6, 7, 12, 13, 14, 15,
16, 17, 18, 19, 24, 25, 26, 27,
20, 21, 22, 23, 28, 29, 30, 31,
};
static const uint8_t orders_8x8[64] = {
static const uint8_t orders_16x16[64] = {
0, 1, 4, 5, 16, 17, 20, 21,
2, 3, 6, 7, 18, 19, 22, 23,
8, 9, 12, 13, 24, 25, 28, 29,
@@ -93,24 +93,96 @@ static const uint8_t orders_8x8[64] = {
40, 41, 44, 45, 56, 57, 60, 61,
42, 43, 46, 47, 58, 59, 62, 63,
};
static const uint8_t *const orders[BLOCK_SIZES] = {
orders_8x8, orders_8x8, orders_8x8, orders_8x8,
orders_8x16, orders_16x8, orders_16x16,
orders_16x32, orders_32x16, orders_32x32,
orders_32x64, orders_64x32, orders_64x64,
#if CONFIG_EXT_PARTITION
static const uint8_t orders_16x8[128] = {
0, 2, 8, 10, 32, 34, 40, 42,
1, 3, 9, 11, 33, 35, 41, 43,
4, 6, 12, 14, 36, 38, 44, 46,
5, 7, 13, 15, 37, 39, 45, 47,
16, 18, 24, 26, 48, 50, 56, 58,
17, 19, 25, 27, 49, 51, 57, 59,
20, 22, 28, 30, 52, 54, 60, 62,
21, 23, 29, 31, 53, 55, 61, 63,
64, 66, 72, 74, 96, 98, 104, 106,
65, 67, 73, 75, 97, 99, 105, 107,
68, 70, 76, 78, 100, 102, 108, 110,
69, 71, 77, 79, 101, 103, 109, 111,
80, 82, 88, 90, 112, 114, 120, 122,
81, 83, 89, 91, 113, 115, 121, 123,
84, 86, 92, 94, 116, 118, 124, 126,
85, 87, 93, 95, 117, 119, 125, 127,
};
static const uint8_t orders_8x16[128] = {
0, 1, 2, 3, 8, 9, 10, 11, 32, 33, 34, 35, 40, 41, 42, 43,
4, 5, 6, 7, 12, 13, 14, 15, 36, 37, 38, 39, 44, 45, 46, 47,
16, 17, 18, 19, 24, 25, 26, 27, 48, 49, 50, 51, 56, 57, 58, 59,
20, 21, 22, 23, 28, 29, 30, 31, 52, 53, 54, 55, 60, 61, 62, 63,
64, 65, 66, 67, 72, 73, 74, 75, 96, 97, 98, 99, 104, 105, 106, 107,
68, 69, 70, 71, 76, 77, 78, 79, 100, 101, 102, 103, 108, 109, 110, 111,
80, 81, 82, 83, 88, 89, 90, 91, 112, 113, 114, 115, 120, 121, 122, 123,
84, 85, 86, 87, 92, 93, 94, 95, 116, 117, 118, 119, 124, 125, 126, 127,
};
static const uint8_t orders_8x8[256] = {
0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, 85,
2, 3, 6, 7, 18, 19, 22, 23, 66, 67, 70, 71, 82, 83, 86, 87,
8, 9, 12, 13, 24, 25, 28, 29, 72, 73, 76, 77, 88, 89, 92, 93,
10, 11, 14, 15, 26, 27, 30, 31, 74, 75, 78, 79, 90, 91, 94, 95,
32, 33, 36, 37, 48, 49, 52, 53, 96, 97, 100, 101, 112, 113, 116, 117,
34, 35, 38, 39, 50, 51, 54, 55, 98, 99, 102, 103, 114, 115, 118, 119,
40, 41, 44, 45, 56, 57, 60, 61, 104, 105, 108, 109, 120, 121, 124, 125,
42, 43, 46, 47, 58, 59, 62, 63, 106, 107, 110, 111, 122, 123, 126, 127,
128, 129, 132, 133, 144, 145, 148, 149, 192, 193, 196, 197, 208, 209, 212, 213,
130, 131, 134, 135, 146, 147, 150, 151, 194, 195, 198, 199, 210, 211, 214, 215,
136, 137, 140, 141, 152, 153, 156, 157, 200, 201, 204, 205, 216, 217, 220, 221,
138, 139, 142, 143, 154, 155, 158, 159, 202, 203, 206, 207, 218, 219, 222, 223,
160, 161, 164, 165, 176, 177, 180, 181, 224, 225, 228, 229, 240, 241, 244, 245,
162, 163, 166, 167, 178, 179, 182, 183, 226, 227, 230, 231, 242, 243, 246, 247,
168, 169, 172, 173, 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253,
170, 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251, 254, 255,
};
static const uint8_t *const orders[BLOCK_SIZES] = {
// 4X4
orders_8x8,
// 4X8, 8X4, 8X8
orders_8x8, orders_8x8, orders_8x8,
// 8X16, 16X8, 16X16
orders_8x16, orders_16x8, orders_16x16,
// 16X32, 32X16, 32X32
orders_16x32, orders_32x16, orders_32x32,
// 32X64, 64X32, 64X64
orders_32x64, orders_64x32, orders_64x64,
// 64x128, 128x64, 128x128
orders_64x128, orders_128x64, orders_128x128
};
#else
static const uint8_t *const orders[BLOCK_SIZES] = {
// 4X4
orders_16x16,
// 4X8, 8X4, 8X8
orders_16x16, orders_16x16, orders_16x16,
// 8X16, 16X8, 16X16
orders_16x32, orders_32x16, orders_32x32,
// 16X32, 32X16, 32X32
orders_32x64, orders_64x32, orders_64x64,
// 32X64, 64X32, 64X64
orders_64x128, orders_128x64, orders_128x128
};
#endif // CONFIG_EXT_PARTITION
#if CONFIG_EXT_PARTITION_TYPES
static const uint8_t orders_verta_32x32[4] = {
static const uint8_t orders_verta_64x64[4] = {
0, 2,
1, 2,
};
static const uint8_t orders_verta_16x16[16] = {
static const uint8_t orders_verta_32x32[16] = {
0, 2, 4, 6,
1, 2, 5, 6,
8, 10, 12, 14,
9, 10, 13, 14,
};
static const uint8_t orders_verta_8x8[64] = {
static const uint8_t orders_verta_16x16[64] = {
0, 2, 4, 6, 16, 18, 20, 22,
1, 2, 5, 6, 17, 18, 21, 22,
8, 10, 12, 14, 24, 26, 28, 30,
@@ -120,12 +192,53 @@ static const uint8_t orders_verta_8x8[64] = {
40, 42, 44, 46, 56, 58, 60, 62,
41, 42, 45, 46, 57, 58, 61, 62,
};
static const uint8_t *const orders_verta[BLOCK_SIZES] = {
orders_verta_8x8, orders_verta_8x8, orders_verta_8x8, orders_verta_8x8,
orders_8x16, orders_16x8, orders_verta_16x16,
orders_16x32, orders_32x16, orders_verta_32x32,
orders_32x64, orders_64x32, orders_64x64,
#if CONFIG_EXT_PARTITION
static const uint8_t orders_verta_8x8[256] = {
0, 2, 4, 6, 16, 18, 20, 22, 64, 66, 68, 70, 80, 82, 84, 86,
1, 2, 5, 6, 17, 18, 21, 22, 65, 66, 69, 70, 81, 82, 85, 86,
8, 10, 12, 14, 24, 26, 28, 30, 72, 74, 76, 78, 88, 90, 92, 94,
9, 10, 13, 14, 25, 26, 29, 30, 73, 74, 77, 78, 89, 90, 93, 94,
32, 34, 36, 38, 48, 50, 52, 54, 96, 98, 100, 102, 112, 114, 116, 118,
33, 34, 37, 38, 49, 50, 53, 54, 97, 98, 101, 102, 113, 114, 117, 118,
40, 42, 44, 46, 56, 58, 60, 62, 104, 106, 108, 110, 120, 122, 124, 126,
41, 42, 45, 46, 57, 58, 61, 62, 105, 106, 109, 110, 121, 122, 125, 126,
128, 130, 132, 134, 144, 146, 148, 150, 192, 194, 196, 198, 208, 210, 212, 214,
129, 130, 133, 134, 145, 146, 149, 150, 193, 194, 197, 198, 209, 210, 213, 214,
136, 138, 140, 142, 152, 154, 156, 158, 200, 202, 204, 206, 216, 218, 220, 222,
137, 138, 141, 142, 153, 154, 157, 158, 201, 202, 205, 206, 217, 218, 221, 222,
160, 162, 164, 166, 176, 178, 180, 182, 224, 226, 228, 230, 240, 242, 244, 246,
161, 162, 165, 166, 177, 178, 181, 182, 225, 226, 229, 230, 241, 242, 245, 246,
168, 170, 172, 174, 184, 186, 188, 190, 232, 234, 236, 238, 248, 250, 252, 254,
169, 170, 173, 174, 185, 186, 189, 190, 233, 234, 237, 238, 249, 250, 253, 254,
};
static const uint8_t *const orders_verta[BLOCK_SIZES] = {
// 4X4
orders_verta_8x8,
// 4X8, 8X4, 8X8
orders_verta_8x8, orders_verta_8x8, orders_verta_8x8,
// 8X16, 16X8, 16X16
orders_8x16, orders_16x8, orders_verta_16x16,
// 16X32, 32X16, 32X32
orders_16x32, orders_32x16, orders_verta_32x32,
// 32X64, 64X32, 64X64
orders_32x64, orders_64x32, orders_verta_64x64,
// 64x128, 128x64, 128x128
orders_64x128, orders_128x64, orders_128x128
};
#else
static const uint8_t *const orders_verta[BLOCK_SIZES] = {
// 4X4
orders_verta_16x16,
// 4X8, 8X4, 8X8
orders_verta_16x16, orders_verta_16x16, orders_verta_16x16,
// 8X16, 16X8, 16X16
orders_16x32, orders_32x16, orders_verta_32x32,
// 16X32, 32X16, 32X32
orders_32x64, orders_64x32, orders_verta_64x64,
// 32X64, 64X32, 64X64
orders_64x128, orders_128x64, orders_128x128
};
#endif // CONFIG_EXT_PARTITION
#endif // CONFIG_EXT_PARTITION_TYPES
static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col,
@@ -159,19 +272,21 @@ static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col,
if (x + step < w)
return 1;
mi_row = (mi_row & 7) >> hl;
mi_col = (mi_col & 7) >> wl;
mi_row = (mi_row & MI_MASK) >> hl;
mi_col = (mi_col & MI_MASK) >> wl;
// If top row of coding unit
if (mi_row == 0)
return 1;
// If rightmost column of coding unit
if (((mi_col + 1) << wl) >= 8)
if (((mi_col + 1) << wl) >= MI_BLOCK_SIZE)
return 0;
my_order = order[((mi_row + 0) << (3 - wl)) + mi_col + 0];
tr_order = order[((mi_row - 1) << (3 - wl)) + mi_col + 1];
my_order =
order[((mi_row + 0) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col + 0];
tr_order =
order[((mi_row - 1) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col + 1];
return my_order > tr_order;
} else {
@@ -200,17 +315,17 @@ static int vp10_has_bottom(BLOCK_SIZE bsize, int mi_row, int mi_col,
if (y + step < h)
return 1;
mi_row = (mi_row & 7) >> hl;
mi_col = (mi_col & 7) >> wl;
mi_row = (mi_row & MI_MASK) >> hl;
mi_col = (mi_col & MI_MASK) >> wl;
if (mi_col == 0)
return (mi_row << (hl + !ss_y)) + y + step < (8 << !ss_y);
return (mi_row << (hl + !ss_y)) + y + step < (MI_BLOCK_SIZE << !ss_y);
if (((mi_row + 1) << hl) >= 8)
if (((mi_row + 1) << hl) >= MI_BLOCK_SIZE)
return 0;
my_order = order[((mi_row + 0) << (3 - wl)) + mi_col + 0];
bl_order = order[((mi_row + 1) << (3 - wl)) + mi_col - 1];
my_order = order[((mi_row + 0) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col + 0];
bl_order = order[((mi_row + 1) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col - 1];
return bl_order < my_order;
}
@@ -336,8 +451,8 @@ static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bs,
if (filter_type != INTRA_FILTER_LINEAR) {
const int pad_size = SUBPEL_TAPS >> 1;
int len;
DECLARE_ALIGNED(16, uint8_t, buf[SUBPEL_SHIFTS][64]);
DECLARE_ALIGNED(16, uint8_t, src[64 + SUBPEL_TAPS]);
DECLARE_ALIGNED(16, uint8_t, buf[SUBPEL_SHIFTS][MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, src[MAX_SB_SIZE + SUBPEL_TAPS]);
uint8_t flags[SUBPEL_SHIFTS];
memset(flags, 0, SUBPEL_SHIFTS * sizeof(flags[0]));
@@ -467,8 +582,8 @@ static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bs,
if (filter_type != INTRA_FILTER_LINEAR) {
const int pad_size = SUBPEL_TAPS >> 1;
int len, i;
DECLARE_ALIGNED(16, uint8_t, buf[64][4 * SUBPEL_SHIFTS]);
DECLARE_ALIGNED(16, uint8_t, src[(64 + SUBPEL_TAPS) * 4]);
DECLARE_ALIGNED(16, uint8_t, buf[MAX_SB_SIZE][4 * SUBPEL_SHIFTS]);
DECLARE_ALIGNED(16, uint8_t, src[(MAX_SB_SIZE + SUBPEL_TAPS) * 4]);
uint8_t flags[SUBPEL_SHIFTS];
memset(flags, 0, SUBPEL_SHIFTS * sizeof(flags[0]));
@@ -1063,8 +1178,8 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
int i;
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
DECLARE_ALIGNED(16, uint16_t, left_col[64]);
DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]);
DECLARE_ALIGNED(16, uint16_t, left_col[MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint16_t, above_data[MAX_SB_SIZE + 16]);
uint16_t *above_row = above_data + 16;
const uint16_t *const_above_row = above_row;
const int bs = 4 << tx_size;
@@ -1220,9 +1335,9 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
int n_left_px, int n_bottomleft_px,
int plane) {
int i;
DECLARE_ALIGNED(16, uint8_t, left_col[64]);
DECLARE_ALIGNED(16, uint8_t, left_col[MAX_SB_SIZE]);
const uint8_t *above_ref = ref - ref_stride;
DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]);
DECLARE_ALIGNED(16, uint8_t, above_data[MAX_SB_SIZE + 16]);
uint8_t *above_row = above_data + 16;
const uint8_t *const_above_row = above_row;
const int bs = 4 << tx_size;

View File

@@ -109,6 +109,12 @@ void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer,
path = LF_PATH_SLOW;
#endif // !CONFIG_EXT_PARTITION_TYPES
#if CONFIG_EXT_PARTITION
printf("STOPPING: This code has not been modified to work with the "
"extended coding unit size experiment");
exit(EXIT_FAILURE);
#endif // CONFIG_EXT_PARTITION
for (mi_row = start; mi_row < stop;
mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) {
MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
@@ -176,6 +182,12 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame,
const int num_workers = VPXMIN(nworkers, tile_cols);
int i;
#if CONFIG_EXT_PARTITION
printf("STOPPING: This code has not been modified to work with the "
"extended coding unit size experiment");
exit(EXIT_FAILURE);
#endif // CONFIG_EXT_PARTITION
if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
num_workers > lf_sync->num_workers) {
vp10_loop_filter_dealloc(lf_sync);

View File

@@ -5,8 +5,8 @@
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/mem.h"
#define MAX_BLOCK_WIDTH (64)
#define MAX_BLOCK_HEIGHT (64)
#define MAX_BLOCK_WIDTH (MAX_SB_SIZE)
#define MAX_BLOCK_HEIGHT (MAX_SB_SIZE)
#define MAX_STEP (32)
#define MAX_FILTER_TAP (12)

View File

@@ -489,7 +489,7 @@ static void extend_and_predict_highbd(const uint8_t *buf_ptr1,
MACROBLOCKD *xd,
int w, int h, int ref, int xs, int ys) {
DECLARE_ALIGNED(16, uint16_t,
mc_buf_high[(CU_SIZE + 16) * 2 * (CU_SIZE + 16) * 2]);
mc_buf_high[(MAX_SB_SIZE + 16) * 2 * (MAX_SB_SIZE + 16) * 2]);
const uint8_t *buf_ptr;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -535,7 +535,8 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
#endif // CONFIG_EXT_INTER && CONFIG_SUPERTX
MACROBLOCKD *xd,
int w, int h, int ref, int xs, int ys) {
DECLARE_ALIGNED(16, uint8_t, mc_buf[(CU_SIZE + 16) * 2 * (CU_SIZE + 16) * 2]);
DECLARE_ALIGNED(16, uint8_t,
mc_buf[(MAX_SB_SIZE + 16) * 2 * (MAX_SB_SIZE + 16) * 2]);
const uint8_t *buf_ptr;
build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w,
@@ -1093,7 +1094,7 @@ static void set_param_topblock(VP10_COMMON *const cm, MACROBLOCKD *const xd,
}
#if CONFIG_VAR_TX
xd->above_txfm_context = cm->above_txfm_context + mi_col;
xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
set_txfm_ctx(xd->left_txfm_context, xd->mi[0]->mbmi.tx_size, bh);
set_txfm_ctx(xd->above_txfm_context, xd->mi[0]->mbmi.tx_size, bw);
#endif
@@ -1304,38 +1305,38 @@ static void dec_predict_sb_complex(VP10Decoder *const pbi,
uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3];
DECLARE_ALIGNED(16, uint8_t,
tmp_buf1[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]);
tmp_buf1[MAX_MB_PLANE * MAX_TX_SQUARE * 2]);
DECLARE_ALIGNED(16, uint8_t,
tmp_buf2[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]);
tmp_buf2[MAX_MB_PLANE * MAX_TX_SQUARE * 2]);
DECLARE_ALIGNED(16, uint8_t,
tmp_buf3[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]);
int dst_stride1[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN};
int dst_stride2[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN};
int dst_stride3[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN};
tmp_buf3[MAX_MB_PLANE * MAX_TX_SQUARE * 2]);
int dst_stride1[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE};
int dst_stride2[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE};
int dst_stride3[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE};
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int len = sizeof(uint16_t);
dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAXTXLEN * MAXTXLEN * len);
dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN * len);
dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_TX_SQUARE * len);
dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAX_TX_SQUARE * len);
dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAXTXLEN * MAXTXLEN * len);
dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN * len);
dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_TX_SQUARE * len);
dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAX_TX_SQUARE * len);
dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3);
dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAXTXLEN * MAXTXLEN * len);
dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN * len);
dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAX_TX_SQUARE * len);
dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAX_TX_SQUARE * len);
} else {
#endif
dst_buf1[0] = tmp_buf1;
dst_buf1[1] = tmp_buf1 + MAXTXLEN * MAXTXLEN;
dst_buf1[2] = tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN;
dst_buf1[1] = tmp_buf1 + MAX_TX_SQUARE;
dst_buf1[2] = tmp_buf1 + 2 * MAX_TX_SQUARE;
dst_buf2[0] = tmp_buf2;
dst_buf2[1] = tmp_buf2 + MAXTXLEN * MAXTXLEN;
dst_buf2[2] = tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN;
dst_buf2[1] = tmp_buf2 + MAX_TX_SQUARE;
dst_buf2[2] = tmp_buf2 + 2 * MAX_TX_SQUARE;
dst_buf3[0] = tmp_buf3;
dst_buf3[1] = tmp_buf3 + MAXTXLEN * MAXTXLEN;
dst_buf3[2] = tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN;
dst_buf3[1] = tmp_buf3 + MAX_TX_SQUARE;
dst_buf3[2] = tmp_buf3 + 2 * MAX_TX_SQUARE;
#if CONFIG_VP9_HIGHBITDEPTH
}
#endif
@@ -1900,39 +1901,37 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd,
if (mbmi->obmc) {
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint8_t,
tmp_buf1[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t,
tmp_buf2[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
#else
DECLARE_ALIGNED(16, uint8_t,
tmp_buf1[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t,
tmp_buf2[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH
uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
int dst_stride1[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE};
int dst_stride2[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE};
int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
assert(mbmi->sb_type >= BLOCK_8X8);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int len = sizeof(uint16_t);
dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + CU_SIZE * CU_SIZE * len);
dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 +
CU_SIZE * CU_SIZE * 2 * len);
dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len);
dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * 2 * len);
dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + CU_SIZE * CU_SIZE * len);
dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 +
CU_SIZE * CU_SIZE * 2 * len);
dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len);
dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len);
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
dst_buf1[0] = tmp_buf1;
dst_buf1[1] = tmp_buf1 + CU_SIZE * CU_SIZE;
dst_buf1[2] = tmp_buf1 + CU_SIZE * CU_SIZE * 2;
dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE;
dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2;
dst_buf2[0] = tmp_buf2;
dst_buf2[1] = tmp_buf2 + CU_SIZE * CU_SIZE;
dst_buf2[2] = tmp_buf2 + CU_SIZE * CU_SIZE * 2;
dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE;
dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2;
#if CONFIG_VP9_HIGHBITDEPTH
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -3281,7 +3280,7 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi,
#if CONFIG_ANS
&td->token_ans,
#endif // CONFIG_ANS
BLOCK_64X64, 4);
BLOCK_LARGEST, MAX_SB_SIZE_LOG2 - 2);
}
pbi->mb.corrupted |= td->xd.corrupted;
if (pbi->mb.corrupted)
@@ -3396,7 +3395,7 @@ static int tile_worker_hook(TileWorkerData *const tile_data,
#if CONFIG_ANS
&tile_data->token_ans,
#endif // CONFIG_ANS
BLOCK_64X64, 4);
BLOCK_LARGEST, MAX_SB_SIZE_LOG2 - 2);
}
}
return !tile_data->xd.corrupted;

View File

@@ -39,8 +39,8 @@ typedef struct TileData {
#endif // CONFIG_ANS
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
DECLARE_ALIGNED(16, uint8_t, color_index_map[2][64 * 64]);
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_TX_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]);
} TileData;
typedef struct TileWorkerData {
@@ -52,8 +52,8 @@ typedef struct TileWorkerData {
FRAME_COUNTS counts;
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
DECLARE_ALIGNED(16, uint8_t, color_index_map[2][64 * 64]);
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_TX_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]);
struct vpx_internal_error_info error_info;
} TileWorkerData;

View File

@@ -62,7 +62,7 @@ static int decode_coefs(const MACROBLOCKD *xd,
const vpx_prob *prob;
unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
unsigned int (*eob_branch_count)[COEFF_CONTEXTS];
uint8_t token_cache[32 * 32];
uint8_t token_cache[MAX_TX_SQUARE];
const uint8_t *band_translate = get_band_translate(tx_size);
int dq_shift;
int v, token;
@@ -245,7 +245,7 @@ static int decode_coefs_ans(const MACROBLOCKD *const xd,
const vpx_prob *prob;
unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
unsigned int (*eob_branch_count)[COEFF_CONTEXTS];
uint8_t token_cache[32 * 32];
uint8_t token_cache[MAX_TX_SQUARE];
const uint8_t *band_translate = get_band_translate(tx_size);
int dq_shift;
int v, token;

View File

@@ -116,8 +116,8 @@ void vp10_caq_select_segment(VP10_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs,
VP10_COMMON *const cm = &cpi->common;
const int mi_offset = mi_row * cm->mi_cols + mi_col;
const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
const int bw = num_8x8_blocks_wide_lookup[BLOCK_LARGEST];
const int bh = num_8x8_blocks_high_lookup[BLOCK_LARGEST];
const int xmis = VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]);
const int ymis = VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[bs]);
int x, y;

View File

@@ -415,9 +415,9 @@ static void cyclic_refresh_update_map(VP10_COMP *const cpi) {
bl_index = mi_row * cm->mi_cols + mi_col;
// Loop through all 8x8 blocks in superblock and update map.
xmis =
VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[BLOCK_64X64]);
VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[BLOCK_LARGEST]);
ymis =
VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[BLOCK_64X64]);
VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[BLOCK_LARGEST]);
for (y = 0; y < ymis; y++) {
for (x = 0; x < xmis; x++) {
const int bl_index2 = bl_index + y * cm->mi_cols + x;

View File

@@ -32,9 +32,11 @@ static const int segment_id[ENERGY_SPAN] = {0, 1, 1, 2, 3, 4};
#define SEGMENT_ID(i) segment_id[(i) - ENERGY_MIN]
DECLARE_ALIGNED(16, static const uint8_t, vp10_64_zeros[64]) = {0};
DECLARE_ALIGNED(16, static const uint8_t,
vp10_all_zeros[MAX_SB_SIZE]) = {0};
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, static const uint16_t, vp10_highbd_64_zeros[64]) = {0};
DECLARE_ALIGNED(16, static const uint16_t,
vp10_highbd_all_zeros[MAX_SB_SIZE]) = {0};
#endif
unsigned int vp10_vaq_segment_id(int energy) {
@@ -153,17 +155,17 @@ static unsigned int block_variance(VP10_COMP *cpi, MACROBLOCK *x,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
aq_highbd_8_variance(x->plane[0].src.buf, x->plane[0].src.stride,
CONVERT_TO_BYTEPTR(vp10_highbd_64_zeros), 0, bw, bh,
CONVERT_TO_BYTEPTR(vp10_highbd_all_zeros), 0, bw, bh,
&sse, &avg);
sse >>= 2 * (xd->bd - 8);
avg >>= (xd->bd - 8);
} else {
aq_variance(x->plane[0].src.buf, x->plane[0].src.stride,
vp10_64_zeros, 0, bw, bh, &sse, &avg);
vp10_all_zeros, 0, bw, bh, &sse, &avg);
}
#else
aq_variance(x->plane[0].src.buf, x->plane[0].src.stride,
vp10_64_zeros, 0, bw, bh, &sse, &avg);
vp10_all_zeros, 0, bw, bh, &sse, &avg);
#endif // CONFIG_VP9_HIGHBITDEPTH
var = sse - (((int64_t)avg * avg) / (bw * bh));
return (256 * var) / (bw * bh);
@@ -172,17 +174,17 @@ static unsigned int block_variance(VP10_COMP *cpi, MACROBLOCK *x,
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
x->plane[0].src.stride,
CONVERT_TO_BYTEPTR(vp10_highbd_64_zeros),
CONVERT_TO_BYTEPTR(vp10_highbd_all_zeros),
0, &sse);
} else {
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
x->plane[0].src.stride,
vp10_64_zeros, 0, &sse);
vp10_all_zeros, 0, &sse);
}
#else
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
x->plane[0].src.stride,
vp10_64_zeros, 0, &sse);
vp10_all_zeros, 0, &sse);
#endif // CONFIG_VP9_HIGHBITDEPTH
return (256 * var) >> num_pels_log2_lookup[bs];
}

View File

@@ -1893,7 +1893,7 @@ static void write_modes(VP10_COMP *const cpi,
for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) {
write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, 0,
mi_row, mi_col, BLOCK_64X64);
mi_row, mi_col, BLOCK_LARGEST);
}
}
}

View File

@@ -28,7 +28,7 @@ typedef struct {
} diff;
typedef struct macroblock_plane {
DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]);
DECLARE_ALIGNED(16, int16_t, src_diff[MAX_SB_SQUARE]);
tran_low_t *qcoeff;
tran_low_t *coeff;
uint16_t *eobs;
@@ -63,10 +63,10 @@ typedef struct {
} MB_MODE_INFO_EXT;
typedef struct {
uint8_t best_palette_color_map[4096];
double kmeans_data_buf[2 * 4096];
uint8_t kmeans_indices_buf[4096];
uint8_t kmeans_pre_indices_buf[4096];
uint8_t best_palette_color_map[MAX_SB_SQUARE];
double kmeans_data_buf[2 * MAX_SB_SQUARE];
uint8_t kmeans_indices_buf[MAX_SB_SQUARE];
uint8_t kmeans_pre_indices_buf[MAX_SB_SQUARE];
} PALETTE_BUFFER;
typedef struct macroblock MACROBLOCK;
@@ -140,11 +140,11 @@ struct macroblock {
// Notes transform blocks where no coefficents are coded.
// Set during mode selection. Read during block encoding.
uint8_t zcoeff_blk[TX_SIZES][256];
uint8_t zcoeff_blk[TX_SIZES][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4];
#if CONFIG_VAR_TX
uint8_t blk_skip[MAX_MB_PLANE][256];
uint8_t blk_skip[MAX_MB_PLANE][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4];
#if CONFIG_REF_MV
uint8_t blk_skip_drl[MAX_MB_PLANE][256];
uint8_t blk_skip_drl[MAX_MB_PLANE][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4];
#endif
#endif
@@ -164,12 +164,12 @@ struct macroblock {
int quant_fp;
// skip forward transform and quantization
uint8_t skip_txfm[MAX_MB_PLANE][4];
uint8_t skip_txfm[MAX_MB_PLANE][MAX_TX_BLOCKS_IN_MAX_SB];
#define SKIP_TXFM_NONE 0
#define SKIP_TXFM_AC_DC 1
#define SKIP_TXFM_AC_ONLY 2
int64_t bsse[MAX_MB_PLANE][4];
int64_t bsse[MAX_MB_PLANE][MAX_TX_BLOCKS_IN_MAX_SB];
// Used to store sub partition's choices.
MV pred_mv[MAX_REF_FRAMES];

View File

@@ -11,11 +11,14 @@
#include "vp10/encoder/context_tree.h"
#include "vp10/encoder/encoder.h"
static const BLOCK_SIZE square[] = {
static const BLOCK_SIZE square[MAX_SB_SIZE_LOG2 - 2] = {
BLOCK_8X8,
BLOCK_16X16,
BLOCK_32X32,
BLOCK_64X64,
#if CONFIG_EXT_PARTITION
BLOCK_128X128,
#endif // CONFIG_EXT_PARTITION
};
static void alloc_mode_context(VP10_COMMON *cm, int num_4x4_blk,
@@ -53,6 +56,14 @@ static void alloc_mode_context(VP10_COMMON *cm, int num_4x4_blk,
ctx->eobs_pbuf[i][k] = ctx->eobs[i][k];
}
}
if (cm->allow_screen_content_tools) {
for (i = 0; i < 2; ++i) {
CHECK_MEM_ERROR(cm, ctx->color_index_map[i],
vpx_memalign(32,
num_pix * sizeof(*ctx->color_index_map[i])));
}
}
}
static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
@@ -177,8 +188,13 @@ static void free_tree_contexts(PC_TREE *tree) {
// represents the state of our search.
void vp10_setup_pc_tree(VP10_COMMON *cm, ThreadData *td) {
int i, j;
#if CONFIG_EXT_PARTITION
const int leaf_nodes = 256;
const int tree_nodes = 256 + 64 + 16 + 4 + 1;
#else
const int leaf_nodes = 64;
const int tree_nodes = 64 + 16 + 4 + 1;
#endif // CONFIG_EXT_PARTITION
int pc_tree_index = 0;
PC_TREE *this_pc;
PICK_MODE_CONTEXT *this_leaf;
@@ -217,7 +233,7 @@ void vp10_setup_pc_tree(VP10_COMMON *cm, ThreadData *td) {
// Each node has 4 leaf nodes, fill each block_size level of the tree
// from leafs to the root.
for (nodes = 16; nodes > 0; nodes >>= 2) {
for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) {
for (i = 0; i < nodes; ++i) {
PC_TREE *const tree = &td->pc_tree[pc_tree_index];
alloc_tree_contexts(cm, tree, 4 << (2 * square_index));
@@ -233,11 +249,17 @@ void vp10_setup_pc_tree(VP10_COMMON *cm, ThreadData *td) {
}
void vp10_free_pc_tree(ThreadData *td) {
#if CONFIG_EXT_PARTITION
const int leaf_nodes = 256;
const int tree_nodes = 256 + 64 + 16 + 4 + 1;
#else
const int leaf_nodes = 64;
const int tree_nodes = 64 + 16 + 4 + 1;
#endif // CONFIG_EXT_PARTITION
int i;
// Set up all 4x4 mode contexts
for (i = 0; i < 64; ++i)
for (i = 0; i < leaf_nodes; ++i)
free_mode_context(&td->leaf_tree[i]);
// Sets up all the leaf nodes in the tree.

View File

@@ -49,7 +49,6 @@ typedef struct {
// For current partition, only if all Y, U, and V transform blocks'
// coefficients are quantized to 0, skippable is set to 0.
int skippable;
uint8_t skip_txfm[MAX_MB_PLANE << 2];
int best_mode_index;
int hybrid_pred_diff;
int comp_pred_diff;

View File

@@ -189,7 +189,7 @@ int vp10_denoiser_filter_c(const uint8_t *sig, int sig_stride,
static uint8_t *block_start(uint8_t *framebuf, int stride,
int mi_row, int mi_col) {
return framebuf + (stride * mi_row * 8) + (mi_col * 8);
return framebuf + (stride * mi_row * MI_SIZE) + (mi_col * MI_SIZE);
}
static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,

View File

@@ -93,7 +93,16 @@ static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td,
// purposes of activity masking.
// Eventually this should be replaced by custom no-reference routines,
// which will be faster.
static const uint8_t VP9_VAR_OFFS[64] = {
static const uint8_t VP10_VAR_OFFS[MAX_SB_SIZE] = {
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
#if CONFIG_EXT_PARTITION
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
@@ -102,10 +111,20 @@ static const uint8_t VP9_VAR_OFFS[64] = {
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128
#endif // CONFIG_EXT_PARTITION
};
#if CONFIG_VP9_HIGHBITDEPTH
static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = {
static const uint16_t VP10_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
#if CONFIG_EXT_PARTITION
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
@@ -114,9 +133,19 @@ static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = {
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128
#endif // CONFIG_EXT_PARTITION
};
static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = {
static const uint16_t VP10_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
#if CONFIG_EXT_PARTITION
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
@@ -125,9 +154,19 @@ static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = {
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4
#endif // CONFIG_EXT_PARTITION
};
static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
static const uint16_t VP10_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
#if CONFIG_EXT_PARTITION
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
@@ -136,6 +175,7 @@ static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16
#endif // CONFIG_EXT_PARTITION
};
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -144,7 +184,7 @@ unsigned int vp10_get_sby_perpixel_variance(VP10_COMP *cpi,
BLOCK_SIZE bs) {
unsigned int sse;
const unsigned int var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
VP9_VAR_OFFS, 0, &sse);
VP10_VAR_OFFS, 0, &sse);
return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
}
@@ -155,18 +195,18 @@ unsigned int vp10_high_get_sby_perpixel_variance(
switch (bd) {
case 10:
var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10),
CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_10),
0, &sse);
break;
case 12:
var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12),
CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_12),
0, &sse);
break;
case 8:
default:
var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8),
CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_8),
0, &sse);
break;
}
@@ -406,6 +446,13 @@ typedef struct {
v32x32 split[4];
} v64x64;
#if CONFIG_EXT_PARTITION
typedef struct {
partition_variance part_variances;
v64x64 split[4];
} v128x128;
#endif // CONFIG_EXT_PARTITION
typedef struct {
partition_variance *part_variances;
var *split[4];
@@ -415,12 +462,24 @@ typedef enum {
V16X16,
V32X32,
V64X64,
#if CONFIG_EXT_PARTITION
V128X128,
#endif // CONFIG_EXT_PARTITION
} TREE_LEVEL;
static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
int i;
node->part_variances = NULL;
switch (bsize) {
#if CONFIG_EXT_PARTITION
case BLOCK_128X128: {
v128x128 *vt = (v128x128 *) data;
node->part_variances = &vt->part_variances;
for (i = 0; i < 4; i++)
node->split[i] = &vt->split[i].part_variances.none;
break;
}
#endif // CONFIG_EXT_PARTITION
case BLOCK_64X64: {
v64x64 *vt = (v64x64 *) data;
node->part_variances = &vt->part_variances;
@@ -770,7 +829,8 @@ static int choose_partitioning(VP10_COMP *cpi,
const uint8_t *d;
int sp;
int dp;
int pixels_wide = 64, pixels_high = 64;
int pixels_wide = 8 * num_8x8_blocks_wide_lookup[BLOCK_LARGEST];
int pixels_high = 8 * num_8x8_blocks_high_lookup[BLOCK_LARGEST];
int64_t thresholds[4] = {cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
cpi->vbp_thresholds[2], cpi->vbp_thresholds[3]};
@@ -781,10 +841,11 @@ static int choose_partitioning(VP10_COMP *cpi,
int variance4x4downsample[16];
int segment_id = CR_SEGMENT_ID_BASE;
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map :
cm->last_frame_seg_map;
segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
segment_id = get_segment_id(cm, map, BLOCK_LARGEST, mi_row, mi_col);
if (cyclic_refresh_segment_id_boosted(segment_id)) {
int q = vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex);
@@ -792,11 +853,12 @@ static int choose_partitioning(VP10_COMP *cpi,
}
}
#if CONFIG_EXT_PARTITION_TYPES
assert(0);
#endif
#if CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
printf("Not yet implemented: choose_partitioning\n");
exit(-1);
#endif // CONFIG_EXT_PARTITION
set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_LARGEST);
if (xd->mb_to_right_edge < 0)
pixels_wide += (xd->mb_to_right_edge >> 3);
@@ -813,8 +875,20 @@ static int choose_partitioning(VP10_COMP *cpi,
const YV12_BUFFER_CONFIG *yv12_g = NULL;
unsigned int y_sad, y_sad_g;
const BLOCK_SIZE bsize = BLOCK_32X32
+ (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows);
const int max_mi_block_size = num_8x8_blocks_wide_lookup[BLOCK_LARGEST];
const int is_right_edge = mi_col + max_mi_block_size / 2 > cm->mi_cols;
const int is_left_edge = mi_row + max_mi_block_size / 2 > cm->mi_rows;
BLOCK_SIZE bsize;
if (is_right_edge && is_left_edge)
bsize = get_subsize(BLOCK_LARGEST, PARTITION_SPLIT);
else if (is_right_edge)
bsize = get_subsize(BLOCK_LARGEST, PARTITION_VERT);
else if (is_left_edge)
bsize = get_subsize(BLOCK_LARGEST, PARTITION_HORZ);
else
bsize = BLOCK_LARGEST;
assert(yv12 != NULL);
yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
@@ -834,7 +908,7 @@ static int choose_partitioning(VP10_COMP *cpi,
&cm->frame_refs[LAST_FRAME - 1].sf);
mbmi->ref_frame[0] = LAST_FRAME;
mbmi->ref_frame[1] = NONE;
mbmi->sb_type = BLOCK_64X64;
mbmi->sb_type = BLOCK_LARGEST;
mbmi->mv[0].as_int = 0;
mbmi->interp_filter = BILINEAR;
@@ -849,7 +923,7 @@ static int choose_partitioning(VP10_COMP *cpi,
x->pred_mv[LAST_FRAME] = mbmi->mv[0].as_mv;
}
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_LARGEST);
for (i = 1; i <= 2; ++i) {
struct macroblock_plane *p = &x->plane[i];
@@ -868,33 +942,29 @@ static int choose_partitioning(VP10_COMP *cpi,
d = xd->plane[0].dst.buf;
dp = xd->plane[0].dst.stride;
// If the y_sad is very small, take 64x64 as partition and exit.
// Don't check on boosted segment for now, as 64x64 is suppressed there.
if (segment_id == CR_SEGMENT_ID_BASE &&
y_sad < cpi->vbp_threshold_sad) {
const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64];
const int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64];
if (mi_col + block_width / 2 < cm->mi_cols &&
mi_row + block_height / 2 < cm->mi_rows) {
set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_64X64);
// If the y_sad is very small, take the largest partition and exit.
// Don't check on boosted segment for now, as largest is suppressed there.
if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) {
if (!is_right_edge && !is_left_edge) {
set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_LARGEST);
return 0;
}
}
} else {
d = VP9_VAR_OFFS;
d = VP10_VAR_OFFS;
dp = 0;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (xd->bd) {
case 10:
d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10);
d = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_10);
break;
case 12:
d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12);
d = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_12);
break;
case 8:
default:
d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8);
d = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_8);
break;
}
}
@@ -1699,15 +1769,6 @@ static void rd_pick_sb_modes(VP10_COMP *cpi,
p[i].eobs = ctx->eobs_pbuf[i][0];
}
if (cm->current_video_frame == 0 && cm->allow_screen_content_tools) {
for (i = 0; i < 2; ++i) {
if (ctx->color_index_map[i] == 0) {
CHECK_MEM_ERROR(cm, ctx->color_index_map[i],
vpx_memalign(16, (ctx->num_4x4_blk << 4) *
sizeof(*ctx->color_index_map[i])));
}
}
}
for (i = 0; i < 2; ++i)
pd[i].color_index_map = ctx->color_index_map[i];
@@ -2084,17 +2145,16 @@ static void update_stats(VP10_COMMON *cm, ThreadData *td
}
}
typedef struct {
ENTROPY_CONTEXT a[16 * MAX_MB_PLANE];
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sa[8];
PARTITION_CONTEXT sl[8];
ENTROPY_CONTEXT a[2 * MI_BLOCK_SIZE * MAX_MB_PLANE];
ENTROPY_CONTEXT l[2 * MI_BLOCK_SIZE * MAX_MB_PLANE];
PARTITION_CONTEXT sa[MI_BLOCK_SIZE];
PARTITION_CONTEXT sl[MI_BLOCK_SIZE];
#if CONFIG_VAR_TX
TXFM_CONTEXT *p_ta;
TXFM_CONTEXT *p_tl;
TXFM_CONTEXT ta[8];
TXFM_CONTEXT tl[8];
TXFM_CONTEXT ta[MI_BLOCK_SIZE];
TXFM_CONTEXT tl[MI_BLOCK_SIZE];
#endif
} RD_SEARCH_MACROBLOCK_CONTEXT;
@@ -2892,11 +2952,11 @@ static void rd_use_partition(VP10_COMP *cpi,
// We must have chosen a partitioning and encoding or we'll fail later on.
// No other opportunities for success.
if (bsize == BLOCK_64X64)
if (bsize == BLOCK_LARGEST)
assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
if (do_recon) {
int output_enabled = (bsize == BLOCK_64X64);
int output_enabled = (bsize == BLOCK_LARGEST);
encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
pc_tree);
}
@@ -2909,21 +2969,38 @@ static void rd_use_partition(VP10_COMP *cpi,
}
static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
BLOCK_8X8, BLOCK_8X8, BLOCK_8X8,
BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,
BLOCK_16X16
BLOCK_4X4, // 4x4
BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, // 4x8, 8x4, 8x8
BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 8x16, 16x8, 16x16
BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, // 16x32, 32x16, 32x32
BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 32x64, 64x32, 64x64
#if CONFIG_EXT_PARTITION
BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 // 64x128, 128x64, 128x128
#endif // CONFIG_EXT_PARTITION
};
static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
BLOCK_8X8, BLOCK_16X16, BLOCK_16X16,
BLOCK_16X16, BLOCK_32X32, BLOCK_32X32,
BLOCK_32X32, BLOCK_64X64, BLOCK_64X64,
BLOCK_64X64, BLOCK_64X64, BLOCK_64X64,
BLOCK_64X64
BLOCK_8X8, // 4x4
BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 4x8, 8x4, 8x8
BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, // 8x16, 16x8, 16x16
BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, // 16x32, 32x16, 32x32
BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, // 32x64, 64x32, 64x64
#if CONFIG_EXT_PARTITION
BLOCK_64X64, BLOCK_64X64, BLOCK_128X128 // 64x128, 128x64, 128x128
#endif // CONFIG_EXT_PARTITION
};
// Next square block size less or equal than current block size.
static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
BLOCK_4X4, // 4x4
BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 4x8, 8x4, 8x8
BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, // 8x16, 16x8, 16x16
BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, // 16x32, 32x16, 32x32
BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, // 32x64, 64x32, 64x64
#if CONFIG_EXT_PARTITION
BLOCK_64X64, BLOCK_64X64, BLOCK_128X128 // 64x128, 128x64, 128x128
#endif // CONFIG_EXT_PARTITION
};
// Look at all the mode_info entries for blocks that are part of this
// partition and find the min and max values for sb_type.
@@ -2954,15 +3031,6 @@ static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8,
}
}
// Next square block size less or equal than current block size.
static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
BLOCK_8X8, BLOCK_8X8, BLOCK_8X8,
BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,
BLOCK_32X32, BLOCK_32X32, BLOCK_32X32,
BLOCK_64X64
};
// Look at neighboring blocks and set a min and max partition size based on
// what they chose.
static void rd_auto_partition_range(VP10_COMP *cpi, const TileInfo *const tile,
@@ -2978,13 +3046,13 @@ static void rd_auto_partition_range(VP10_COMP *cpi, const TileInfo *const tile,
const int col8x8_remaining = tile->mi_col_end - mi_col;
int bh, bw;
BLOCK_SIZE min_size = BLOCK_4X4;
BLOCK_SIZE max_size = BLOCK_64X64;
BLOCK_SIZE max_size = BLOCK_LARGEST;
int bs_hist[BLOCK_SIZES] = {0};
// Trap case where we do not have a prediction.
if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) {
// Default "min to max" and "max to min"
min_size = BLOCK_64X64;
min_size = BLOCK_LARGEST;
max_size = BLOCK_4X4;
// NOTE: each call to get_sb_partition_size_range() uses the previous
@@ -3054,7 +3122,7 @@ static void set_partition_range(VP10_COMMON *cm, MACROBLOCKD *xd,
MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str];
BLOCK_SIZE bs, min_size, max_size;
min_size = BLOCK_64X64;
min_size = BLOCK_LARGEST;
max_size = BLOCK_4X4;
if (prev_mi) {
@@ -3104,16 +3172,27 @@ static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
}
#if CONFIG_FP_MB_STATS
const int num_16x16_blocks_wide_lookup[BLOCK_SIZES] =
{1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4};
const int num_16x16_blocks_high_lookup[BLOCK_SIZES] =
{1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4};
const int qindex_skip_threshold_lookup[BLOCK_SIZES] =
{0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120};
{0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120,
#if CONFIG_EXT_PARTITION
// TODO(debargha): What are the correct numbers here?
130, 130, 150
#endif // CONFIG_EXT_PARTITION
};
const int qindex_split_threshold_lookup[BLOCK_SIZES] =
{0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120};
{0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120,
#if CONFIG_EXT_PARTITION
// TODO(debargha): What are the correct numbers here?
160, 160, 240
#endif // CONFIG_EXT_PARTITION
};
const int complexity_16x16_blocks_threshold[BLOCK_SIZES] =
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6};
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6
#if CONFIG_EXT_PARTITION
// TODO(debargha): What are the correct numbers here?
8, 8, 10
#endif // CONFIG_EXT_PARTITION
};
typedef enum {
MV_ZERO = 0,
@@ -3526,8 +3605,8 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td,
pc_tree->partitioning = PARTITION_NONE;
// Adjust dist breakout threshold according to the partition size.
dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] +
b_height_log2_lookup[bsize]);
dist_breakout_thr >>= (2 * (MAX_SB_SIZE_LOG2 - 2))
- (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
rate_breakout_thr *= num_pels_log2_lookup[bsize];
@@ -4124,12 +4203,12 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td,
if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
pc_tree->index != 3) {
int output_enabled = (bsize == BLOCK_64X64);
int output_enabled = (bsize == BLOCK_LARGEST);
encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
bsize, pc_tree);
}
if (bsize == BLOCK_64X64) {
if (bsize == BLOCK_LARGEST) {
assert(tp_orig < *tp || (tp_orig == *tp && xd->mi[0]->mbmi.skip));
assert(best_rdc.rate < INT_MAX);
assert(best_rdc.dist < INT64_MAX);
@@ -4149,6 +4228,11 @@ static void encode_rd_sb_row(VP10_COMP *cpi,
MACROBLOCKD *const xd = &x->e_mbd;
SPEED_FEATURES *const sf = &cpi->sf;
int mi_col;
#if CONFIG_EXT_PARTITION
const int leaf_nodes = 256;
#else
const int leaf_nodes = 64;
#endif // CONFIG_EXT_PARTITION
// Initialize the left context for the new SB row
vp10_zero_left_context(xd);
@@ -4170,10 +4254,10 @@ static void encode_rd_sb_row(VP10_COMP *cpi,
MODE_INFO **mi = cm->mi_grid_visible + idx_str;
if (sf->adaptive_pred_interp_filter) {
for (i = 0; i < 64; ++i)
for (i = 0; i < leaf_nodes; ++i)
td->leaf_tree[i].pred_interp_filter = SWITCHABLE;
for (i = 0; i < 64; ++i) {
for (i = 0; i < leaf_nodes; ++i) {
td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
@@ -4187,29 +4271,29 @@ static void encode_rd_sb_row(VP10_COMP *cpi,
if (seg->enabled) {
const uint8_t *const map = seg->update_map ? cpi->segmentation_map
: cm->last_frame_seg_map;
int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
int segment_id = get_segment_id(cm, map, BLOCK_LARGEST, mi_row, mi_col);
seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
}
x->source_variance = UINT_MAX;
if (sf->partition_search_type == FIXED_PARTITION || seg_skip) {
const BLOCK_SIZE bsize =
seg_skip ? BLOCK_64X64 : sf->always_this_block_size;
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
seg_skip ? BLOCK_LARGEST : sf->always_this_block_size;
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_LARGEST);
set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
BLOCK_64X64, &dummy_rate, &dummy_dist,
BLOCK_LARGEST, &dummy_rate, &dummy_dist,
#if CONFIG_SUPERTX
&dummy_rate_nocoef,
#endif // CONFIG_SUPERTX
1, td->pc_root);
} else if (cpi->partition_search_skippable_frame) {
BLOCK_SIZE bsize;
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_LARGEST);
bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
BLOCK_64X64, &dummy_rate, &dummy_dist,
BLOCK_LARGEST, &dummy_rate, &dummy_dist,
#if CONFIG_SUPERTX
&dummy_rate_nocoef,
#endif // CONFIG_SUPERTX
@@ -4218,7 +4302,7 @@ static void encode_rd_sb_row(VP10_COMP *cpi,
cm->frame_type != KEY_FRAME) {
choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
BLOCK_64X64, &dummy_rate, &dummy_dist,
BLOCK_LARGEST, &dummy_rate, &dummy_dist,
#if CONFIG_SUPERTX
&dummy_rate_nocoef,
#endif // CONFIG_SUPERTX
@@ -4226,12 +4310,12 @@ static void encode_rd_sb_row(VP10_COMP *cpi,
} else {
// If required set upper and lower partition size limits
if (sf->auto_min_max_partition_size) {
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_LARGEST);
rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col,
&x->min_partition_size,
&x->max_partition_size);
}
rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64,
rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_LARGEST,
&dummy_rdc,
#if CONFIG_SUPERTX
&dummy_rate_nocoef,
@@ -4930,19 +5014,15 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td,
#if CONFIG_OBMC
if (mbmi->obmc) {
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint8_t,
tmp_buf1[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
DECLARE_ALIGNED(16, uint8_t,
tmp_buf2[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
#else
DECLARE_ALIGNED(16, uint8_t,
tmp_buf1[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
DECLARE_ALIGNED(16, uint8_t,
tmp_buf2[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH
uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
int dst_stride1[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE};
int dst_stride2[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE};
int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
assert(mbmi->sb_type >= BLOCK_8X8);
@@ -4950,21 +5030,19 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td,
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int len = sizeof(uint16_t);
dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + CU_SIZE * CU_SIZE * len);
dst_buf1[2] = CONVERT_TO_BYTEPTR(
tmp_buf1 + CU_SIZE * CU_SIZE * 2 * len);
dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len);
dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * 2 * len);
dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + CU_SIZE * CU_SIZE * len);
dst_buf2[2] = CONVERT_TO_BYTEPTR(
tmp_buf2 + CU_SIZE * CU_SIZE * 2 * len);
dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len);
dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len);
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
dst_buf1[0] = tmp_buf1;
dst_buf1[1] = tmp_buf1 + CU_SIZE * CU_SIZE;
dst_buf1[2] = tmp_buf1 + CU_SIZE * CU_SIZE * 2;
dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE;
dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2;
dst_buf2[0] = tmp_buf2;
dst_buf2[1] = tmp_buf2 + CU_SIZE * CU_SIZE;
dst_buf2[2] = tmp_buf2 + CU_SIZE * CU_SIZE * 2;
dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE;
dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2;
#if CONFIG_VP9_HIGHBITDEPTH
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -5447,38 +5525,35 @@ static void predict_sb_complex(VP10_COMP *cpi, ThreadData *td,
int i, ctx;
uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3];
DECLARE_ALIGNED(16, uint8_t,
tmp_buf1[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]);
DECLARE_ALIGNED(16, uint8_t,
tmp_buf2[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]);
DECLARE_ALIGNED(16, uint8_t,
tmp_buf3[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]);
int dst_stride1[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN};
int dst_stride2[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN};
int dst_stride3[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN};
DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_TX_SQUARE * 2]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_TX_SQUARE * 2]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf3[MAX_MB_PLANE * MAX_TX_SQUARE * 2]);
int dst_stride1[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE};
int dst_stride2[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE};
int dst_stride3[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE};
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int len = sizeof(uint16_t);
dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAXTXLEN * MAXTXLEN * len);
dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN * len);
dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_TX_SQUARE * len);
dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAX_TX_SQUARE * len);
dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAXTXLEN * MAXTXLEN * len);
dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN * len);
dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_TX_SQUARE * len);
dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAX_TX_SQUARE * len);
dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3);
dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAXTXLEN * MAXTXLEN * len);
dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN * len);
dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAX_TX_SQUARE * len);
dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAX_TX_SQUARE * len);
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
dst_buf1[0] = tmp_buf1;
dst_buf1[1] = tmp_buf1 + MAXTXLEN * MAXTXLEN;
dst_buf1[2] = tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN;
dst_buf1[1] = tmp_buf1 + MAX_TX_SQUARE;
dst_buf1[2] = tmp_buf1 + 2 * MAX_TX_SQUARE;
dst_buf2[0] = tmp_buf2;
dst_buf2[1] = tmp_buf2 + MAXTXLEN * MAXTXLEN;
dst_buf2[2] = tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN;
dst_buf2[1] = tmp_buf2 + MAX_TX_SQUARE;
dst_buf2[2] = tmp_buf2 + 2 * MAX_TX_SQUARE;
dst_buf3[0] = tmp_buf3;
dst_buf3[1] = tmp_buf3 + MAXTXLEN * MAXTXLEN;
dst_buf3[2] = tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN;
dst_buf3[1] = tmp_buf3 + MAX_TX_SQUARE;
dst_buf3[2] = tmp_buf3 + 2 * MAX_TX_SQUARE;
#if CONFIG_VP9_HIGHBITDEPTH
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -6037,7 +6112,8 @@ static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td,
sse_uv = 0;
for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
#if CONFIG_VAR_TX
ENTROPY_CONTEXT ctxa[16], ctxl[16];
ENTROPY_CONTEXT ctxa[2 * MI_BLOCK_SIZE];
ENTROPY_CONTEXT ctxl[2 * MI_BLOCK_SIZE];
const struct macroblockd_plane *const pd = &xd->plane[plane];
int coeff_ctx = 1;
@@ -6081,7 +6157,8 @@ static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td,
#endif // CONFIG_EXT_TX
for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
#if CONFIG_VAR_TX
ENTROPY_CONTEXT ctxa[16], ctxl[16];
ENTROPY_CONTEXT ctxa[2 * MI_BLOCK_SIZE];
ENTROPY_CONTEXT ctxl[2 * MI_BLOCK_SIZE];
const struct macroblockd_plane *const pd = &xd->plane[0];
int coeff_ctx = 1;
#endif // CONFIG_VAR_TX

View File

@@ -29,8 +29,8 @@
#include "vp10/encoder/tokenize.h"
struct optimize_ctx {
ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
ENTROPY_CONTEXT ta[MAX_MB_PLANE][2 * MI_BLOCK_SIZE];
ENTROPY_CONTEXT tl[MAX_MB_PLANE][2 * MI_BLOCK_SIZE];
};
void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
@@ -96,9 +96,9 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
struct macroblock_plane *const p = &mb->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
const int ref = is_inter_block(&xd->mi[0]->mbmi);
vp10_token_state tokens[1025][2];
unsigned best_index[1025][2];
uint8_t token_cache[1024];
vp10_token_state tokens[MAX_TX_SQUARE+1][2];
unsigned best_index[MAX_TX_SQUARE+1][2];
uint8_t token_cache[MAX_TX_SQUARE];
const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);

View File

@@ -1955,6 +1955,8 @@ void vp10_change_config(struct VP10_COMP *cpi, const VP10EncoderConfig *oxcf) {
CHECK_MEM_ERROR(cm, x->palette_buffer,
vpx_memalign(16, sizeof(*x->palette_buffer)));
}
vp10_free_pc_tree(&cpi->td);
vp10_setup_pc_tree(&cpi->common, &cpi->td);
}
vp10_reset_segment_features(cm);
@@ -3147,7 +3149,7 @@ static void loopfilter_frame(VP10_COMP *cpi, VP10_COMMON *cm) {
}
if (lf->filter_level > 0) {
#if CONFIG_VAR_TX
#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION
vp10_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
#else
if (cpi->num_workers > 1)

View File

@@ -312,8 +312,8 @@ typedef struct VP10_COMP {
QUANTS quants;
ThreadData td;
MB_MODE_INFO_EXT *mbmi_ext_base;
DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]); // 8: SIMD width
DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]); // 8: SIMD width
VP10_COMMON common;
VP10EncoderConfig oxcf;
struct lookahead_ctx *lookahead;

View File

@@ -366,13 +366,13 @@ static unsigned int setup_center_error(const MACROBLOCKD *xd,
#if CONFIG_VP9_HIGHBITDEPTH
if (second_pred != NULL) {
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]);
vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
y_stride);
besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride,
sse1);
} else {
DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
}
@@ -384,7 +384,7 @@ static unsigned int setup_center_error(const MACROBLOCKD *xd,
#else
(void) xd;
if (second_pred != NULL) {
DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
} else {
@@ -694,7 +694,7 @@ static int upsampled_pref_error(const MACROBLOCKD *xd,
unsigned int besterr;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]);
DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
if (second_pred != NULL)
vpx_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, y,
y_stride);
@@ -704,9 +704,9 @@ static int upsampled_pref_error(const MACROBLOCKD *xd,
besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride,
sse);
} else {
DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
#else
DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
(void) xd;
#endif // CONFIG_VP9_HIGHBITDEPTH
if (second_pred != NULL)
@@ -1961,10 +1961,10 @@ unsigned int vp10_int_pro_motion_estimation(const VP10_COMP *cpi, MACROBLOCK *x,
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
DECLARE_ALIGNED(16, int16_t, hbuf[128]);
DECLARE_ALIGNED(16, int16_t, vbuf[128]);
DECLARE_ALIGNED(16, int16_t, src_hbuf[64]);
DECLARE_ALIGNED(16, int16_t, src_vbuf[64]);
DECLARE_ALIGNED(16, int16_t, hbuf[2 * MAX_SB_SIZE]);
DECLARE_ALIGNED(16, int16_t, vbuf[2 * MAX_SB_SIZE]);
DECLARE_ALIGNED(16, int16_t, src_hbuf[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, int16_t, src_vbuf[MAX_SB_SQUARE]);
int idx;
const int bw = 4 << b_width_log2_lookup[bsize];
const int bh = 4 << b_height_log2_lookup[bsize];

View File

@@ -41,7 +41,7 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
VP10_COMMON *const cm = &cpi->common;
int64_t filt_err;
#if CONFIG_VAR_TX
#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION
vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level,
1, partial_frame);
#else

View File

@@ -461,7 +461,7 @@ void vp10_init_quantizer(VP10_COMP *cpi) {
cpi->uv_dequant[q][i] = quant;
}
for (i = 2; i < 8; i++) {
for (i = 2; i < 8; i++) { // 8: SIMD width
quants->y_quant[q][i] = quants->y_quant[q][1];
quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1];
quants->y_round_fp[q][i] = quants->y_round_fp[q][1];

View File

@@ -27,6 +27,7 @@ typedef void (*VP10_QUANT_FACADE)(const tran_low_t *coeff_ptr,
const scan_order *sc);
typedef struct {
// 0: dc 1: ac 2-8: ac repeated to SIMD width
DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]);

View File

@@ -62,7 +62,10 @@ void vp10_rd_cost_init(RD_COST *rd_cost) {
// This table is used to correct for block size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32,
#if CONFIG_EXT_PARTITION
48, 48, 64
#endif // CONFIG_EXT_PARTITION
};
static void fill_mode_costs(VP10_COMP *cpi) {
@@ -560,8 +563,8 @@ void vp10_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
const struct macroblockd_plane *pd,
ENTROPY_CONTEXT t_above[16],
ENTROPY_CONTEXT t_left[16]) {
ENTROPY_CONTEXT t_above[2 * MI_BLOCK_SIZE],
ENTROPY_CONTEXT t_left[2 * MI_BLOCK_SIZE]) {
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
@@ -935,7 +938,7 @@ void vp10_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
int mode;
for (mode = 0; mode < top_mode; ++mode) {
const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_LARGEST);
BLOCK_SIZE bs;
for (bs = min_size; bs <= max_size; ++bs) {
int *const fact = &factor_buf[bs][mode];

View File

@@ -330,8 +330,8 @@ void vp10_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame);
void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
const struct macroblockd_plane *pd,
ENTROPY_CONTEXT t_above[16],
ENTROPY_CONTEXT t_left[16]);
ENTROPY_CONTEXT t_above[2 * MI_BLOCK_SIZE],
ENTROPY_CONTEXT t_left[2 * MI_BLOCK_SIZE]);
void vp10_set_rd_speed_thresholds(struct VP10_COMP *cpi);

View File

@@ -102,8 +102,8 @@ typedef struct {
struct rdcost_block_args {
const VP10_COMP *cpi;
MACROBLOCK *x;
ENTROPY_CONTEXT t_above[16];
ENTROPY_CONTEXT t_left[16];
ENTROPY_CONTEXT t_above[2 * MI_BLOCK_SIZE];
ENTROPY_CONTEXT t_left[2 * MI_BLOCK_SIZE];
int this_rate;
int64_t this_dist;
int64_t this_sse;
@@ -376,8 +376,8 @@ static void get_energy_distribution_fine(const VP10_COMP *cpi,
unsigned int esq[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
unsigned int var[16];
double total = 0;
const int f_index = bsize - 6;
const int f_index = bsize - BLOCK_16X16;
if (f_index < 0) {
int i, j, index;
int w_shift = bw == 8 ? 1 : 2;
@@ -890,7 +890,7 @@ static int cost_coeffs(MACROBLOCK *x,
const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
x->token_costs[tx_size][type][is_inter_block(mbmi)];
uint8_t token_cache[32 * 32];
uint8_t token_cache[MAX_TX_SQUARE];
#if CONFIG_VAR_TX
int pt = coeff_ctx;
#else
@@ -1045,10 +1045,10 @@ static void dist_block(const VP10_COMP *cpi, MACROBLOCK *x, int plane,
if (*eob) {
const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, recon16[32 * 32]); // MAX TX_SIZE**2
DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]);
uint8_t *recon = (uint8_t*)recon16;
#else
DECLARE_ALIGNED(16, uint8_t, recon[32 * 32]); // MAX TX_SIZE**2
DECLARE_ALIGNED(16, uint8_t, recon[MAX_TX_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH
const PLANE_TYPE plane_type = plane == 0 ? PLANE_TYPE_Y : PLANE_TYPE_UV;
@@ -1064,18 +1064,18 @@ static void dist_block(const VP10_COMP *cpi, MACROBLOCK *x, int plane,
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
recon = CONVERT_TO_BYTEPTR(recon);
inv_txfm_param.bd = xd->bd;
vpx_highbd_convolve_copy(dst, dst_stride, recon, 32,
vpx_highbd_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE,
NULL, 0, NULL, 0, bs, bs, xd->bd);
highbd_inv_txfm_add(dqcoeff, recon, 32, &inv_txfm_param);
highbd_inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param);
} else
#endif // CONFIG_VP9_HIGHBITDEPTH
{
vpx_convolve_copy(dst, dst_stride, recon, 32,
vpx_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE,
NULL, 0, NULL, 0, bs, bs);
inv_txfm_add(dqcoeff, recon, 32, &inv_txfm_param);
inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param);
}
cpi->fn_ptr[tx_bsize].vf(src, src_stride, recon, 32, &tmp);
cpi->fn_ptr[tx_bsize].vf(src, src_stride, recon, MAX_TX_SIZE, &tmp);
}
*out_dist = (int64_t)tmp * 16;
@@ -2838,10 +2838,10 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
uint8_t *src = &p->src.buf[4 * blk_row * src_stride + 4 * blk_col];
uint8_t *dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, rec_buffer_alloc_16[32 * 32]);
DECLARE_ALIGNED(16, uint16_t, rec_buffer16[MAX_TX_SQUARE]);
uint8_t *rec_buffer;
#else
DECLARE_ALIGNED(16, uint8_t, rec_buffer[32 * 32]);
DECLARE_ALIGNED(16, uint8_t, rec_buffer[MAX_TX_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
@@ -2860,16 +2860,16 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
// TODO(any): Use dist_block to compute distortion
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer_alloc_16);
vpx_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, 32,
rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer16);
vpx_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE,
NULL, 0, NULL, 0, bh, bh, xd->bd);
} else {
rec_buffer = (uint8_t *)rec_buffer_alloc_16;
vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32,
rec_buffer = (uint8_t *)rec_buffer16;
vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE,
NULL, 0, NULL, 0, bh, bh);
}
#else
vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32,
vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE,
NULL, 0, NULL, 0, bh, bh);
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -2904,12 +2904,12 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
inv_txfm_param.bd = xd->bd;
highbd_inv_txfm_add(dqcoeff, rec_buffer, 32, &inv_txfm_param);
highbd_inv_txfm_add(dqcoeff, rec_buffer, MAX_TX_SIZE, &inv_txfm_param);
} else {
inv_txfm_add(dqcoeff, rec_buffer, 32, &inv_txfm_param);
inv_txfm_add(dqcoeff, rec_buffer, MAX_TX_SIZE, &inv_txfm_param);
}
#else // CONFIG_VP9_HIGHBITDEPTH
inv_txfm_add(dqcoeff, rec_buffer, 32, &inv_txfm_param);
inv_txfm_add(dqcoeff, rec_buffer, MAX_TX_SIZE, &inv_txfm_param);
#endif // CONFIG_VP9_HIGHBITDEPTH
if ((bh >> 2) + blk_col > max_blocks_wide ||
@@ -2921,16 +2921,16 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
tmp = 0;
for (idy = 0; idy < blocks_height; idy += 2) {
for (idx = 0; idx < blocks_width; idx += 2) {
cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx,
src_stride,
rec_buffer + 4 * idy * 32 + 4 * idx,
32, &this_dist);
uint8_t *const s = src + 4 * idy * src_stride + 4 * idx;
uint8_t *const r = rec_buffer + 4 * idy * MAX_TX_SIZE + 4 * idx;
cpi->fn_ptr[BLOCK_8X8].vf(s, src_stride, r, MAX_TX_SIZE, &this_dist);
tmp += this_dist;
}
}
} else {
uint32_t this_dist;
cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &this_dist);
cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, MAX_TX_SIZE,
&this_dist);
tmp = this_dist;
}
}
@@ -3125,8 +3125,10 @@ static void inter_block_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
int idx, idy;
int block = 0;
int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
ENTROPY_CONTEXT ctxa[16], ctxl[16];
TXFM_CONTEXT tx_above[8], tx_left[8];
ENTROPY_CONTEXT ctxa[2 * MI_BLOCK_SIZE];
ENTROPY_CONTEXT ctxl[2 * MI_BLOCK_SIZE];
TXFM_CONTEXT tx_above[MI_BLOCK_SIZE];
TXFM_CONTEXT tx_left[MI_BLOCK_SIZE];
int pnrate = 0, pnskip = 1;
int64_t pndist = 0, pnsse = 0;
@@ -3240,7 +3242,7 @@ static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
const int is_inter = is_inter_block(mbmi);
TX_SIZE best_tx_size[MI_BLOCK_SIZE][MI_BLOCK_SIZE];
TX_SIZE best_tx = TX_SIZES;
uint8_t best_blk_skip[256];
uint8_t best_blk_skip[MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4];
const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4);
int idx, idy;
int prune = 0;
@@ -3423,7 +3425,8 @@ static int inter_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x,
int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
int pnrate = 0, pnskip = 1;
int64_t pndist = 0, pnsse = 0;
ENTROPY_CONTEXT ta[16], tl[16];
ENTROPY_CONTEXT ta[2 * MI_BLOCK_SIZE];
ENTROPY_CONTEXT tl[2 * MI_BLOCK_SIZE];
vp10_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl);
@@ -4560,10 +4563,10 @@ static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
// Prediction buffer from second frame.
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]);
DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
uint8_t *second_pred;
#else
DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]);
DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH
for (ref = 0; ref < 2; ++ref) {
@@ -5733,9 +5736,9 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
step_param = cpi->mv_step_param;
}
if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
if (cpi->sf.adaptive_motion_search && bsize < BLOCK_LARGEST) {
int boffset =
2 * (b_width_log2_lookup[BLOCK_64X64] -
2 * (b_width_log2_lookup[BLOCK_LARGEST] -
VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
step_param = VPXMAX(step_param, boffset);
}
@@ -6202,16 +6205,15 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
const int * const intra_mode_cost =
cpi->mbmode_cost[size_group_lookup[bsize]];
const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
const int tmp_buf_sz = CU_SIZE * CU_SIZE;
#if CONFIG_REF_MV
uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
#endif
#endif // CONFIG_EXT_INTER
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * MAX_SB_SQUARE]);
uint8_t *tmp_buf;
#else
DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * MAX_SB_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_OBMC
@@ -6226,7 +6228,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
int best_rate_y, best_rate_uv;
#endif // CONFIG_SUPERTX
#if CONFIG_VAR_TX
uint8_t best_blk_skip[3][256];
uint8_t best_blk_skip[MAX_MB_PLANE][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4];
#endif // CONFIG_VAR_TX
int64_t best_distortion = INT64_MAX;
unsigned int best_pred_var = UINT_MAX;
@@ -6241,8 +6243,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
int orig_dst_stride[MAX_MB_PLANE];
int rs = 0;
INTERP_FILTER best_filter = SWITCHABLE;
uint8_t skip_txfm[MAX_MB_PLANE][4] = {{0}};
int64_t bsse[MAX_MB_PLANE][4] = {{0}};
uint8_t skip_txfm[MAX_MB_PLANE][MAX_TX_BLOCKS_IN_MAX_SB] = {{0}};
int64_t bsse[MAX_MB_PLANE][MAX_TX_BLOCKS_IN_MAX_SB] = {{0}};
int skip_txfm_sb = 0;
int64_t skip_sse_sb = INT64_MAX;
@@ -6569,8 +6571,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
restore_dst_buf(xd, orig_dst, orig_dst_stride);
} else {
for (j = 0; j < MAX_MB_PLANE; j++) {
xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
xd->plane[j].dst.stride = 64;
xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE;
xd->plane[j].dst.stride = MAX_SB_SIZE;
}
}
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
@@ -6648,15 +6650,15 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
if (have_newmv_in_inter_mode(this_mode)) {
int_mv tmp_mv[2];
int rate_mvs[2], tmp_rate_mv = 0;
uint8_t pred0[2 * CU_SIZE * CU_SIZE * 3];
uint8_t pred1[2 * CU_SIZE * CU_SIZE * 3];
uint8_t pred0[2 * MAX_SB_SQUARE * 3];
uint8_t pred1[2 * MAX_SB_SQUARE * 3];
uint8_t *preds0[3] = {pred0,
pred0 + 2 * CU_SIZE * CU_SIZE,
pred0 + 4 * CU_SIZE * CU_SIZE};
pred0 + 2 * MAX_SB_SQUARE,
pred0 + 4 * MAX_SB_SQUARE};
uint8_t *preds1[3] = {pred1,
pred1 + 2 * CU_SIZE * CU_SIZE,
pred1 + 4 * CU_SIZE * CU_SIZE};
int strides[3] = {CU_SIZE, CU_SIZE, CU_SIZE};
pred1 + 2 * MAX_SB_SQUARE,
pred1 + 4 * MAX_SB_SQUARE};
int strides[3] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
vp10_build_inter_predictors_for_planes_single_buf(
xd, bsize, mi_row, mi_col, 0, preds0, strides);
vp10_build_inter_predictors_for_planes_single_buf(
@@ -6723,15 +6725,15 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
mbmi->mv[1].as_int = cur_mv[1].as_int;
}
} else {
uint8_t pred0[2 * CU_SIZE * CU_SIZE * 3];
uint8_t pred1[2 * CU_SIZE * CU_SIZE * 3];
uint8_t pred0[2 * MAX_SB_SQUARE * 3];
uint8_t pred1[2 * MAX_SB_SQUARE * 3];
uint8_t *preds0[3] = {pred0,
pred0 + 2 * CU_SIZE * CU_SIZE,
pred0 + 4 * CU_SIZE * CU_SIZE};
pred0 + 2 * MAX_SB_SQUARE,
pred0 + 4 * MAX_SB_SQUARE};
uint8_t *preds1[3] = {pred1,
pred1 + 2 * CU_SIZE * CU_SIZE,
pred1 + 4 * CU_SIZE * CU_SIZE};
int strides[3] = {CU_SIZE, CU_SIZE, CU_SIZE};
pred1 + 2 * MAX_SB_SQUARE,
pred1 + 4 * MAX_SB_SQUARE};
int strides[3] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
vp10_build_inter_predictors_for_planes_single_buf(
xd, bsize, mi_row, mi_col, 0, preds0, strides);
vp10_build_inter_predictors_for_planes_single_buf(
@@ -6791,8 +6793,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
int tmp_rate_mv = 0;
mbmi->ref_frame[1] = NONE;
for (j = 0; j < MAX_MB_PLANE; j++) {
xd->plane[j].dst.buf = tmp_buf + j * tmp_buf_sz;
xd->plane[j].dst.stride = CU_SIZE;
xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE;
xd->plane[j].dst.stride = MAX_SB_SIZE;
}
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
restore_dst_buf(xd, orig_dst, orig_dst_stride);
@@ -6805,11 +6807,11 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
rmode = intra_mode_cost[mbmi->interintra_mode];
vp10_build_interintra_predictors(xd,
tmp_buf,
tmp_buf + tmp_buf_sz,
tmp_buf + 2 * tmp_buf_sz,
CU_SIZE,
CU_SIZE,
CU_SIZE,
tmp_buf + MAX_SB_SQUARE,
tmp_buf + 2 * MAX_SB_SQUARE,
MAX_SB_SIZE,
MAX_SB_SIZE,
MAX_SB_SIZE,
bsize);
model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
&skip_txfm_sb, &skip_sse_sb);
@@ -6830,11 +6832,11 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
if (wedge_bits) {
vp10_build_interintra_predictors(xd,
tmp_buf,
tmp_buf + tmp_buf_sz,
tmp_buf + 2 * tmp_buf_sz,
CU_SIZE,
CU_SIZE,
CU_SIZE,
tmp_buf + MAX_SB_SQUARE,
tmp_buf + 2 * MAX_SB_SQUARE,
MAX_SB_SIZE,
MAX_SB_SIZE,
MAX_SB_SIZE,
bsize);
model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
&skip_txfm_sb, &skip_sse_sb);
@@ -6852,11 +6854,11 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
mbmi->interintra_uv_wedge_index = wedge_index;
vp10_build_interintra_predictors(xd,
tmp_buf,
tmp_buf + tmp_buf_sz,
tmp_buf + 2 * tmp_buf_sz,
CU_SIZE,
CU_SIZE,
CU_SIZE,
tmp_buf + MAX_SB_SQUARE,
tmp_buf + 2 * MAX_SB_SQUARE,
MAX_SB_SIZE,
MAX_SB_SIZE,
MAX_SB_SIZE,
bsize);
model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
&skip_txfm_sb, &skip_sse_sb);
@@ -6937,8 +6939,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
if (best_needs_copy) {
// again temporarily set the buffers to local memory to prevent a memcpy
for (i = 0; i < MAX_MB_PLANE; i++) {
xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
xd->plane[i].dst.stride = 64;
xd->plane[i].dst.buf = tmp_buf + i * MAX_SB_SQUARE;
xd->plane[i].dst.stride = MAX_SB_SIZE;
}
}
rd = tmp_rd;
@@ -7572,33 +7574,33 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
const MODE_INFO *left_mi = xd->left_mi;
#if CONFIG_OBMC
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * 64 * 64]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * 64 * 64]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
#else
DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * 64 * 64]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH
uint8_t *dst_buf1[3], *dst_buf2[3];
int dst_stride1[3] = {64, 64, 64};
int dst_stride2[3] = {64, 64, 64};
uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int len = sizeof(uint16_t);
dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + 4096 * len);
dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 8192 * len);
dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len);
dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAX_SB_SQUARE * len);
dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + 4096 * len);
dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 8192 * len);
dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len);
dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAX_SB_SQUARE * len);
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
dst_buf1[0] = tmp_buf1;
dst_buf1[1] = tmp_buf1 + 4096;
dst_buf1[2] = tmp_buf1 + 8192;
dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE;
dst_buf1[2] = tmp_buf1 + 2 * MAX_SB_SQUARE;
dst_buf2[0] = tmp_buf2;
dst_buf2[1] = tmp_buf2 + 4096;
dst_buf2[2] = tmp_buf2 + 8192;
dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE;
dst_buf2[2] = tmp_buf2 + 2 * MAX_SB_SQUARE;
#if CONFIG_VP9_HIGHBITDEPTH
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -9386,7 +9388,7 @@ void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi,
int switchable_filter_index;
int_mv *second_ref = comp_pred ?
&x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL;
b_mode_info tmp_best_bmodes[16];
b_mode_info tmp_best_bmodes[16]; // Should this be 4 ?
MB_MODE_INFO tmp_best_mbmode;
BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
int pred_exists = 0;

View File

@@ -328,13 +328,13 @@ void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd) {
mi_ptr = cm->mi_grid_visible + tile_info.mi_row_start * cm->mi_stride +
tile_info.mi_col_start;
for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
mi_row += 8, mi_ptr += 8 * cm->mi_stride) {
mi_row += MI_BLOCK_SIZE, mi_ptr += MI_BLOCK_SIZE * cm->mi_stride) {
MODE_INFO **mi = mi_ptr;
for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
mi_col += 8, mi += 8) {
mi_col += MI_BLOCK_SIZE, mi += MI_BLOCK_SIZE) {
count_segs_sb(cm, xd, &tile_info, mi, no_pred_segcounts,
temporal_predictor_count, t_unpred_seg_counts,
mi_row, mi_col, BLOCK_64X64);
mi_row, mi_col, BLOCK_LARGEST);
}
}
}

View File

@@ -353,6 +353,11 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf,
sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST;
sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST;
sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST;
#if CONFIG_EXT_PARTITION
sf->inter_mode_mask[BLOCK_64X128] = INTER_NEAREST;
sf->inter_mode_mask[BLOCK_128X64] = INTER_NEAREST;
sf->inter_mode_mask[BLOCK_128X128] = INTER_NEAREST;
#endif // CONFIG_EXT_PARTITION
sf->max_intra_bsize = BLOCK_32X32;
sf->allow_skip_recode = 1;
}
@@ -372,6 +377,11 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf,
sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;
#if CONFIG_EXT_PARTITION
sf->inter_mode_mask[BLOCK_64X128] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_128X64] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_128X128] = INTER_NEAREST_NEW_ZERO;
#endif // CONFIG_EXT_PARTITION
sf->adaptive_rd_thresh = 2;
// This feature is only enabled when partition search is disabled.
sf->reuse_inter_pred_sby = 1;
@@ -483,7 +493,7 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
sf->use_square_partition_only = 0;
sf->auto_min_max_partition_size = NOT_IN_USE;
sf->rd_auto_partition_min_limit = BLOCK_4X4;
sf->default_max_partition_size = BLOCK_64X64;
sf->default_max_partition_size = BLOCK_LARGEST;
sf->default_min_partition_size = BLOCK_4X4;
sf->adjust_partitioning_from_last_frame = 0;
sf->last_partitioning_redo_frequency = 4;
@@ -514,7 +524,7 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
sf->schedule_mode_search = 0;
for (i = 0; i < BLOCK_SIZES; ++i)
sf->inter_mode_mask[i] = INTER_ALL;
sf->max_intra_bsize = BLOCK_64X64;
sf->max_intra_bsize = BLOCK_LARGEST;
sf->reuse_inter_pred_sby = 0;
// This setting only takes effect when partition_search_type is set
// to FIXED_PARTITION.
@@ -541,6 +551,12 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
else if (oxcf->mode == GOOD)
set_good_speed_feature(cpi, cm, sf, oxcf->speed);
// sf->partition_search_breakout_dist_thr is set assuming max 64x64
// blocks. Normalise this if the blocks are bigger.
if (MAX_SB_SIZE_LOG2 > 6) {
sf->partition_search_breakout_dist_thr <<= 2 * (MAX_SB_SIZE_LOG2 - 6);
}
cpi->full_search_sad = vp10_full_search_sad;
cpi->diamond_search_sad = vp10_diamond_search_sad;

View File

@@ -438,7 +438,7 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col,
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
TOKENEXTRA **tp = args->tp;
uint8_t token_cache[32 * 32];
uint8_t token_cache[MAX_TX_SQUARE];
struct macroblock_plane *p = &x->plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane];
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;

View File

@@ -361,9 +361,7 @@ int vp10_denoiser_filter_sse2(const uint8_t *sig, int sig_stride,
avg, avg_stride,
increase_denoising,
bs, motion_magnitude, 8);
} else if (bs == BLOCK_16X8 || bs == BLOCK_16X16 || bs == BLOCK_16X32 ||
bs == BLOCK_32X16|| bs == BLOCK_32X32 || bs == BLOCK_32X64 ||
bs == BLOCK_64X32 || bs == BLOCK_64X64) {
} else if (bs < BLOCK_SIZES) {
return vp10_denoiser_NxM_sse2_big(sig, sig_stride,
mc_avg, mc_avg_stride,
avg, avg_stride,

View File

@@ -130,20 +130,20 @@ static void convolve(const uint8_t *src, ptrdiff_t src_stride,
// --Must round-up because block may be located at sub-pixel position.
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
uint8_t temp[MAX_EXT_SIZE * MAX_CU_SIZE];
uint8_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];
int intermediate_height =
(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
assert(w <= MAX_CU_SIZE);
assert(h <= MAX_CU_SIZE);
assert(w <= MAX_SB_SIZE);
assert(h <= MAX_SB_SIZE);
assert(y_step_q4 <= 32);
assert(x_step_q4 <= 32);
convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
temp, MAX_CU_SIZE,
temp, MAX_SB_SIZE,
x_filters, x0_q4, x_step_q4, w, intermediate_height);
convolve_vert(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_CU_SIZE,
convolve_vert(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE,
dst, dst_stride,
y_filters, y0_q4, y_step_q4, w, h);
}
@@ -240,13 +240,13 @@ void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
/* Fixed size intermediate buffer places limits on parameters. */
DECLARE_ALIGNED(16, uint8_t, temp[MAX_CU_SIZE * MAX_CU_SIZE]);
assert(w <= MAX_CU_SIZE);
assert(h <= MAX_CU_SIZE);
DECLARE_ALIGNED(16, uint8_t, temp[MAX_SB_SIZE * MAX_SB_SIZE]);
assert(w <= MAX_SB_SIZE);
assert(h <= MAX_SB_SIZE);
vpx_convolve8_c(src, src_stride, temp, MAX_CU_SIZE,
vpx_convolve8_c(src, src_stride, temp, MAX_SB_SIZE,
filter_x, x_step_q4, filter_y, y_step_q4, w, h);
vpx_convolve_avg_c(temp, MAX_CU_SIZE, dst, dst_stride,
vpx_convolve_avg_c(temp, MAX_SB_SIZE, dst, dst_stride,
NULL, 0, NULL, 0, w, h);
}
@@ -463,21 +463,21 @@ static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
// --Must round-up because block may be located at sub-pixel position.
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
uint16_t temp[MAX_EXT_SIZE * MAX_CU_SIZE];
uint16_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];
int intermediate_height =
(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
assert(w <= MAX_CU_SIZE);
assert(h <= MAX_CU_SIZE);
assert(w <= MAX_SB_SIZE);
assert(h <= MAX_SB_SIZE);
assert(y_step_q4 <= 32);
assert(x_step_q4 <= 32);
highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
CONVERT_TO_BYTEPTR(temp), MAX_CU_SIZE,
CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE,
x_filters, x0_q4, x_step_q4, w,
intermediate_height, bd);
highbd_convolve_vert(
CONVERT_TO_BYTEPTR(temp) + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_CU_SIZE,
CONVERT_TO_BYTEPTR(temp) + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE,
dst, dst_stride,
y_filters, y0_q4, y_step_q4, w, h, bd);
}
@@ -561,14 +561,14 @@ void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
const int16_t *filter_y, int y_step_q4,
int w, int h, int bd) {
// Fixed size intermediate buffer places limits on parameters.
DECLARE_ALIGNED(16, uint16_t, temp[MAX_CU_SIZE * MAX_CU_SIZE]);
assert(w <= MAX_CU_SIZE);
assert(h <= MAX_CU_SIZE);
DECLARE_ALIGNED(16, uint16_t, temp[MAX_SB_SIZE * MAX_SB_SIZE]);
assert(w <= MAX_SB_SIZE);
assert(h <= MAX_SB_SIZE);
vpx_highbd_convolve8_c(src, src_stride,
CONVERT_TO_BYTEPTR(temp), MAX_CU_SIZE,
CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE,
filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);
vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), MAX_CU_SIZE,
vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE,
dst, dst_stride,
NULL, 0, NULL, 0, w, h, bd);
}

View File

@@ -13,18 +13,19 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/mem.h"
#ifdef __cplusplus
extern "C" {
#endif
#ifndef MAX_SB_SIZE
# if CONFIG_VP10 && CONFIG_EXT_PARTITION
# define MAX_CU_SIZE 128
# define MAX_SB_SIZE 128
# else
# define MAX_CU_SIZE 64
# define MAX_SB_SIZE 64
# endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
#endif // ndef MAX_SB_SIZE
#define VPXMIN(x, y) (((x) < (y)) ? (x) : (y))
#define VPXMAX(x, y) (((x) > (y)) ? (x) : (y))

View File

@@ -99,27 +99,27 @@ void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
int w, int h) { \
assert(filter_x[3] != 128); \
assert(filter_y[3] != 128); \
assert(w <= MAX_CU_SIZE); \
assert(h <= MAX_CU_SIZE); \
assert(w <= MAX_SB_SIZE); \
assert(h <= MAX_SB_SIZE); \
assert(x_step_q4 == 16); \
assert(y_step_q4 == 16); \
if (filter_x[0] || filter_x[1] || filter_x[2]|| \
filter_y[0] || filter_y[1] || filter_y[2]) { \
DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+7)]); \
DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_SB_SIZE * (MAX_SB_SIZE+7)]); \
vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
fdata2, MAX_CU_SIZE, \
fdata2, MAX_SB_SIZE, \
filter_x, x_step_q4, filter_y, y_step_q4, \
w, h + 7); \
vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * MAX_CU_SIZE, MAX_CU_SIZE, \
vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * MAX_SB_SIZE, MAX_SB_SIZE, \
dst, dst_stride, \
filter_x, x_step_q4, filter_y, \
y_step_q4, w, h); \
} else { \
DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+1)]); \
vpx_convolve8_horiz_##opt(src, src_stride, fdata2, MAX_CU_SIZE, \
DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_SB_SIZE * (MAX_SB_SIZE+1)]); \
vpx_convolve8_horiz_##opt(src, src_stride, fdata2, MAX_SB_SIZE, \
filter_x, x_step_q4, filter_y, y_step_q4, \
w, h + 1); \
vpx_convolve8_##avg##vert_##opt(fdata2, MAX_CU_SIZE, dst, dst_stride, \
vpx_convolve8_##avg##vert_##opt(fdata2, MAX_SB_SIZE, dst, dst_stride, \
filter_x, x_step_q4, filter_y, \
y_step_q4, w, h); \
} \
@@ -239,38 +239,38 @@ void vpx_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
const int16_t *filter_x, int x_step_q4, \
const int16_t *filter_y, int y_step_q4, \
int w, int h, int bd) { \
assert(w <= MAX_CU_SIZE); \
assert(h <= MAX_CU_SIZE); \
assert(w <= MAX_SB_SIZE); \
assert(h <= MAX_SB_SIZE); \
if (x_step_q4 == 16 && y_step_q4 == 16) { \
if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
DECLARE_ALIGNED(16, uint16_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+7)]); \
DECLARE_ALIGNED(16, uint16_t, fdata2[MAX_SB_SIZE * (MAX_SB_SIZE+7)]); \
vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, \
src_stride, \
CONVERT_TO_BYTEPTR(fdata2), \
MAX_CU_SIZE, \
MAX_SB_SIZE, \
filter_x, x_step_q4, \
filter_y, y_step_q4, \
w, h + 7, bd); \
vpx_highbd_convolve8_##avg##vert_##opt( \
CONVERT_TO_BYTEPTR(fdata2) + 3 * MAX_CU_SIZE, \
MAX_CU_SIZE, \
CONVERT_TO_BYTEPTR(fdata2) + 3 * MAX_SB_SIZE, \
MAX_SB_SIZE, \
dst, \
dst_stride, \
filter_x, x_step_q4, \
filter_y, y_step_q4, \
w, h, bd); \
} else { \
DECLARE_ALIGNED(16, uint16_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+1)]); \
DECLARE_ALIGNED(16, uint16_t, fdata2[MAX_SB_SIZE * (MAX_SB_SIZE+1)]); \
vpx_highbd_convolve8_horiz_##opt(src, \
src_stride, \
CONVERT_TO_BYTEPTR(fdata2), \
MAX_CU_SIZE, \
MAX_SB_SIZE, \
filter_x, x_step_q4, \
filter_y, y_step_q4, \
w, h + 1, bd); \
vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), \
MAX_CU_SIZE, \
MAX_SB_SIZE, \
dst, \
dst_stride, \
filter_x, x_step_q4, \

View File

@@ -31,6 +31,10 @@ cglobal subtract_block, 7, 7, 8, \
je .case_16
cmp colsd, 32
je .case_32
%if CONFIG_EXT_PARTITION
cmp colsd, 64
je .case_64
%endif
%macro loop16 6
mova m0, [srcq+%1]
@@ -55,6 +59,22 @@ cglobal subtract_block, 7, 7, 8, \
mova [diffq+mmsize*1+%6], m1
%endmacro
%if CONFIG_EXT_PARTITION
mov pred_str, pred_stridemp
.loop_128:
loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize
loop16 2*mmsize, 3*mmsize, 2*mmsize, 3*mmsize, 4*mmsize, 6*mmsize
loop16 4*mmsize, 5*mmsize, 4*mmsize, 5*mmsize, 8*mmsize, 10*mmsize
loop16 6*mmsize, 7*mmsize, 6*mmsize, 7*mmsize, 12*mmsize, 14*mmsize
lea diffq, [diffq+diff_strideq*2]
add predq, pred_str
add srcq, src_strideq
sub rowsd, 1
jnz .loop_128
RET
.case_64:
%endif
mov pred_str, pred_stridemp
.loop_64:
loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize

View File

@@ -844,12 +844,12 @@ static void scaledconvolve2d(const uint8_t *src, ptrdiff_t src_stride,
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
// --Require an additional 8 rows for the horiz_w8 transpose tail.
DECLARE_ALIGNED(16, uint8_t, temp[(MAX_EXT_SIZE + 8) * MAX_CU_SIZE]);
DECLARE_ALIGNED(16, uint8_t, temp[(MAX_EXT_SIZE + 8) * MAX_SB_SIZE]);
const int intermediate_height =
(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
assert(w <= MAX_CU_SIZE);
assert(h <= MAX_CU_SIZE);
assert(w <= MAX_SB_SIZE);
assert(h <= MAX_SB_SIZE);
assert(y_step_q4 <= 32);
assert(x_step_q4 <= 32);
@@ -857,33 +857,33 @@ static void scaledconvolve2d(const uint8_t *src, ptrdiff_t src_stride,
scaledconvolve_horiz_w8(src - src_stride * (SUBPEL_TAPS / 2 - 1),
src_stride,
temp,
MAX_CU_SIZE,
MAX_SB_SIZE,
x_filters, x0_q4, x_step_q4,
w, intermediate_height);
} else {
scaledconvolve_horiz_w4(src - src_stride * (SUBPEL_TAPS / 2 - 1),
src_stride,
temp,
MAX_CU_SIZE,
MAX_SB_SIZE,
x_filters, x0_q4, x_step_q4,
w, intermediate_height);
}
if (w >= 16) {
scaledconvolve_vert_w16(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1),
MAX_CU_SIZE,
scaledconvolve_vert_w16(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1),
MAX_SB_SIZE,
dst,
dst_stride,
y_filters, y0_q4, y_step_q4, w, h);
} else if (w == 8) {
scaledconvolve_vert_w8(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1),
MAX_CU_SIZE,
scaledconvolve_vert_w8(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1),
MAX_SB_SIZE,
dst,
dst_stride,
y_filters, y0_q4, y_step_q4, w, h);
} else {
scaledconvolve_vert_w4(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1),
MAX_CU_SIZE,
scaledconvolve_vert_w4(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1),
MAX_SB_SIZE,
dst,
dst_stride,
y_filters, y0_q4, y_step_q4, w, h);

View File

@@ -21,7 +21,11 @@ extern "C" {
#include "vpx/vpx_integer.h"
#define VP8BORDERINPIXELS 32
#if CONFIG_EXT_PARTITION
# define VP9INNERBORDERINPIXELS 160
#else
# define VP9INNERBORDERINPIXELS 96
#endif // CONFIG_EXT_PARTITION
#define VP9_INTERP_EXTEND 4
#define VP9_ENC_BORDER_IN_PIXELS 160
#define VP9_DEC_BORDER_IN_PIXELS 160