diff --git a/test/convolve_test.cc b/test/convolve_test.cc index 0e54c4013..fdea61f29 100644 --- a/test/convolve_test.cc +++ b/test/convolve_test.cc @@ -28,7 +28,7 @@ namespace { -static const unsigned int kMaxDimension = MAX_CU_SIZE; +static const unsigned int kMaxDimension = MAX_SB_SIZE; typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, diff --git a/test/masked_sad_test.cc b/test/masked_sad_test.cc index 34223eac8..13fff0f0d 100644 --- a/test/masked_sad_test.cc +++ b/test/masked_sad_test.cc @@ -50,16 +50,16 @@ class MaskedSADTest : public ::testing::TestWithParam { TEST_P(MaskedSADTest, OperationCheck) { unsigned int ref_ret, ret; ACMRandom rnd(ACMRandom::DeterministicSeed()); - DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); - DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); - DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); + DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); int err_count = 0; int first_failure = -1; - int src_stride = MAX_CU_SIZE; - int ref_stride = MAX_CU_SIZE; - int msk_stride = MAX_CU_SIZE; + int src_stride = MAX_SB_SIZE; + int ref_stride = MAX_SB_SIZE; + int msk_stride = MAX_SB_SIZE; for (int i = 0; i < number_of_iterations; ++i) { - for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) { + for (int j = 0; j < MAX_SB_SIZE*MAX_SB_SIZE; j++) { src_ptr[j] = rnd.Rand8(); ref_ptr[j] = rnd.Rand8(); msk_ptr[j] = ((rnd.Rand8()&0x7f) > 64) ? rnd.Rand8()&0x3f : 64; @@ -108,18 +108,18 @@ class HighbdMaskedSADTest : public ::testing:: TEST_P(HighbdMaskedSADTest, OperationCheck) { unsigned int ref_ret, ret; ACMRandom rnd(ACMRandom::DeterministicSeed()); - DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); - DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); - DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); + DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); int err_count = 0; int first_failure = -1; - int src_stride = MAX_CU_SIZE; - int ref_stride = MAX_CU_SIZE; - int msk_stride = MAX_CU_SIZE; + int src_stride = MAX_SB_SIZE; + int ref_stride = MAX_SB_SIZE; + int msk_stride = MAX_SB_SIZE; for (int i = 0; i < number_of_iterations; ++i) { - for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) { + for (int j = 0; j < MAX_SB_SIZE*MAX_SB_SIZE; j++) { src_ptr[j] = rnd.Rand16()&0xfff; ref_ptr[j] = rnd.Rand16()&0xfff; msk_ptr[j] = ((rnd.Rand8()&0x7f) > 64) ? rnd.Rand8()&0x3f : 64; diff --git a/test/masked_variance_test.cc b/test/masked_variance_test.cc index 1f8bf1e22..1710285df 100644 --- a/test/masked_variance_test.cc +++ b/test/masked_variance_test.cc @@ -58,17 +58,17 @@ TEST_P(MaskedVarianceTest, OperationCheck) { unsigned int ref_ret, opt_ret; unsigned int ref_sse, opt_sse; ACMRandom rnd(ACMRandom::DeterministicSeed()); - DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); - DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); - DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); + DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); int err_count = 0; int first_failure = -1; - int src_stride = MAX_CU_SIZE; - int ref_stride = MAX_CU_SIZE; - int msk_stride = MAX_CU_SIZE; + int src_stride = MAX_SB_SIZE; + int ref_stride = MAX_SB_SIZE; + int msk_stride = MAX_SB_SIZE; for (int i = 0; i < number_of_iterations; ++i) { - for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) { + for (int j = 0; j < MAX_SB_SIZE*MAX_SB_SIZE; j++) { src_ptr[j] = rnd.Rand8(); ref_ptr[j] = rnd.Rand8(); msk_ptr[j] = rnd(65); @@ -100,19 +100,19 @@ TEST_P(MaskedVarianceTest, ExtremeValues) { unsigned int ref_ret, opt_ret; unsigned int ref_sse, opt_sse; ACMRandom rnd(ACMRandom::DeterministicSeed()); - DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); - DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); - DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); + DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); int err_count = 0; int first_failure = -1; - int src_stride = MAX_CU_SIZE; - int ref_stride = MAX_CU_SIZE; - int msk_stride = MAX_CU_SIZE; + int src_stride = MAX_SB_SIZE; + int ref_stride = MAX_SB_SIZE; + int msk_stride = MAX_SB_SIZE; for (int i = 0; i < 8; ++i) { - memset(src_ptr, (i & 0x1) ? 255 : 0, MAX_CU_SIZE*MAX_CU_SIZE); - memset(ref_ptr, (i & 0x2) ? 255 : 0, MAX_CU_SIZE*MAX_CU_SIZE); - memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_CU_SIZE*MAX_CU_SIZE); + memset(src_ptr, (i & 0x1) ? 255 : 0, MAX_SB_SIZE*MAX_SB_SIZE); + memset(ref_ptr, (i & 0x2) ? 255 : 0, MAX_SB_SIZE*MAX_SB_SIZE); + memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_SB_SIZE*MAX_SB_SIZE); ref_ret = ref_func_(src_ptr, src_stride, ref_ptr, ref_stride, @@ -166,21 +166,21 @@ TEST_P(MaskedSubPixelVarianceTest, OperationCheck) { unsigned int ref_ret, opt_ret; unsigned int ref_sse, opt_sse; ACMRandom rnd(ACMRandom::DeterministicSeed()); - DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); - DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); - DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); + DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); + DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); int err_count = 0; int first_failure = -1; - int src_stride = (MAX_CU_SIZE+1); - int ref_stride = (MAX_CU_SIZE+1); - int msk_stride = (MAX_CU_SIZE+1); + int src_stride = (MAX_SB_SIZE+1); + int ref_stride = (MAX_SB_SIZE+1); + int msk_stride = (MAX_SB_SIZE+1); int xoffset; int yoffset; for (int i = 0; i < number_of_iterations; ++i) { int xoffsets[] = {0, 4, rnd(BIL_SUBPEL_SHIFTS)}; int yoffsets[] = {0, 4, rnd(BIL_SUBPEL_SHIFTS)}; - for (int j = 0; j < (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1); j++) { + for (int j = 0; j < (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1); j++) { src_ptr[j] = rnd.Rand8(); ref_ptr[j] = rnd.Rand8(); msk_ptr[j] = rnd(65); @@ -221,23 +221,23 @@ TEST_P(MaskedSubPixelVarianceTest, ExtremeValues) { unsigned int ref_ret, opt_ret; unsigned int ref_sse, opt_sse; ACMRandom rnd(ACMRandom::DeterministicSeed()); - DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); - DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); - DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); + DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); + DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); int first_failure_x = -1; int first_failure_y = -1; int err_count = 0; int first_failure = -1; - int src_stride = (MAX_CU_SIZE+1); - int ref_stride = (MAX_CU_SIZE+1); - int msk_stride = (MAX_CU_SIZE+1); + int src_stride = (MAX_SB_SIZE+1); + int ref_stride = (MAX_SB_SIZE+1); + int msk_stride = (MAX_SB_SIZE+1); for (int xoffset = 0 ; xoffset < BIL_SUBPEL_SHIFTS ; xoffset++) { for (int yoffset = 0 ; yoffset < BIL_SUBPEL_SHIFTS ; yoffset++) { for (int i = 0; i < 8; ++i) { - memset(src_ptr, (i & 0x1) ? 255 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)); - memset(ref_ptr, (i & 0x2) ? 255 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)); - memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)); + memset(src_ptr, (i & 0x1) ? 255 : 0, (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)); + memset(ref_ptr, (i & 0x2) ? 255 : 0, (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)); + memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)); ref_ret = ref_func_(src_ptr, src_stride, xoffset, yoffset, @@ -297,19 +297,19 @@ TEST_P(HighbdMaskedVarianceTest, OperationCheck) { unsigned int ref_ret, opt_ret; unsigned int ref_sse, opt_sse; ACMRandom rnd(ACMRandom::DeterministicSeed()); - DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); - DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); - DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); + DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); int err_count = 0; int first_failure = -1; - int src_stride = MAX_CU_SIZE; - int ref_stride = MAX_CU_SIZE; - int msk_stride = MAX_CU_SIZE; + int src_stride = MAX_SB_SIZE; + int ref_stride = MAX_SB_SIZE; + int msk_stride = MAX_SB_SIZE; for (int i = 0; i < number_of_iterations; ++i) { - for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) { + for (int j = 0; j < MAX_SB_SIZE*MAX_SB_SIZE; j++) { src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); msk_ptr[j] = rnd(65); @@ -341,23 +341,23 @@ TEST_P(HighbdMaskedVarianceTest, ExtremeValues) { unsigned int ref_ret, opt_ret; unsigned int ref_sse, opt_sse; ACMRandom rnd(ACMRandom::DeterministicSeed()); - DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); - DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); - DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); + DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); int err_count = 0; int first_failure = -1; - int src_stride = MAX_CU_SIZE; - int ref_stride = MAX_CU_SIZE; - int msk_stride = MAX_CU_SIZE; + int src_stride = MAX_SB_SIZE; + int ref_stride = MAX_SB_SIZE; + int msk_stride = MAX_SB_SIZE; for (int i = 0; i < 8; ++i) { vpx_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0, - MAX_CU_SIZE*MAX_CU_SIZE); + MAX_SB_SIZE*MAX_SB_SIZE); vpx_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0, - MAX_CU_SIZE*MAX_CU_SIZE); - memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_CU_SIZE*MAX_CU_SIZE); + MAX_SB_SIZE*MAX_SB_SIZE); + memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_SB_SIZE*MAX_SB_SIZE); ref_ret = ref_func_(src8_ptr, src_stride, ref8_ptr, ref_stride, @@ -407,24 +407,24 @@ TEST_P(HighbdMaskedSubPixelVarianceTest, OperationCheck) { unsigned int ref_ret, opt_ret; unsigned int ref_sse, opt_sse; ACMRandom rnd(ACMRandom::DeterministicSeed()); - DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); - DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); - DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); + DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); + DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); int err_count = 0; int first_failure = -1; int first_failure_x = -1; int first_failure_y = -1; - int src_stride = (MAX_CU_SIZE+1); - int ref_stride = (MAX_CU_SIZE+1); - int msk_stride = (MAX_CU_SIZE+1); + int src_stride = (MAX_SB_SIZE+1); + int ref_stride = (MAX_SB_SIZE+1); + int msk_stride = (MAX_SB_SIZE+1); int xoffset, yoffset; for (int i = 0; i < number_of_iterations; ++i) { for (xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) { for (yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) { - for (int j = 0; j < (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1); j++) { + for (int j = 0; j < (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1); j++) { src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); msk_ptr[j] = rnd(65); @@ -465,27 +465,27 @@ TEST_P(HighbdMaskedSubPixelVarianceTest, ExtremeValues) { unsigned int ref_ret, opt_ret; unsigned int ref_sse, opt_sse; ACMRandom rnd(ACMRandom::DeterministicSeed()); - DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); - DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); - DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); + DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); + DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); int first_failure_x = -1; int first_failure_y = -1; int err_count = 0; int first_failure = -1; - int src_stride = (MAX_CU_SIZE+1); - int ref_stride = (MAX_CU_SIZE+1); - int msk_stride = (MAX_CU_SIZE+1); + int src_stride = (MAX_SB_SIZE+1); + int ref_stride = (MAX_SB_SIZE+1); + int msk_stride = (MAX_SB_SIZE+1); for (int xoffset = 0 ; xoffset < BIL_SUBPEL_SHIFTS ; xoffset++) { for (int yoffset = 0 ; yoffset < BIL_SUBPEL_SHIFTS ; yoffset++) { for (int i = 0; i < 8; ++i) { vpx_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0, - (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)); + (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)); vpx_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0, - (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)); - memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)); + (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)); + memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)); ref_ret = ref_func_(src8_ptr, src_stride, xoffset, yoffset, diff --git a/test/vp9_subtract_test.cc b/test/subtract_test.cc similarity index 97% rename from test/vp9_subtract_test.cc rename to test/subtract_test.cc index 3cad4d7e6..a3f015277 100644 --- a/test/vp9_subtract_test.cc +++ b/test/subtract_test.cc @@ -10,13 +10,16 @@ #include "third_party/googletest/src/include/gtest/gtest.h" -#include "./vp9_rtcd.h" #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" +#if CONFIG_VP10 +#include "vp10/common/blockd.h" +#elif CONFIG_VP9 #include "vp9/common/vp9_blockd.h" +#endif #include "vpx_mem/vpx_mem.h" typedef void (*SubtractFunc)(int rows, int cols, @@ -24,7 +27,7 @@ typedef void (*SubtractFunc)(int rows, int cols, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride); -namespace vp9 { +namespace { class VP9SubtractBlockTest : public ::testing::TestWithParam { public: @@ -105,5 +108,4 @@ INSTANTIATE_TEST_CASE_P(NEON, VP9SubtractBlockTest, INSTANTIATE_TEST_CASE_P(MSA, VP9SubtractBlockTest, ::testing::Values(vpx_subtract_block_msa)); #endif - -} // namespace vp9 +} // namespace diff --git a/test/test.mk b/test/test.mk index db2e361eb..b173ec3fd 100644 --- a/test/test.mk +++ b/test/test.mk @@ -147,7 +147,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc -LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += subtract_test.cc ifeq ($(CONFIG_VP9_ENCODER),yes) LIBVPX_TEST_SRCS-$(CONFIG_SPATIAL_SVC) += svc_test.cc @@ -172,6 +172,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_fht16x16_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ANS) += vp10_ans_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += sum_squares_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += subtract_test.cc ifeq ($(CONFIG_EXT_INTER),yes) LIBVPX_TEST_SRCS-$(HAVE_SSSE3) += masked_variance_test.cc diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h index fb3f44b12..821d67c95 100644 --- a/vp10/common/blockd.h +++ b/vp10/common/blockd.h @@ -44,9 +44,6 @@ typedef enum { #define IsInterpolatingFilter(filter) (1) #endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS -#define MAXTXLEN 32 -#define CU_SIZE 64 - static INLINE int is_inter_mode(PREDICTION_MODE mode) { #if CONFIG_EXT_INTER return mode >= NEARESTMV && mode <= NEW_NEWMV; @@ -167,8 +164,8 @@ typedef struct { PREDICTION_MODE mode; TX_SIZE tx_size; #if CONFIG_VAR_TX - // TODO(jingning): This effectively assigned an entry for each 8x8 block. - // Apparently it takes much more space than needed. + // TODO(jingning): This effectively assigned a separate entry for each + // 8x8 block. Apparently it takes much more space than needed. TX_SIZE inter_tx_size[MI_BLOCK_SIZE][MI_BLOCK_SIZE]; #endif int8_t skip; @@ -318,15 +315,15 @@ typedef struct macroblockd { const YV12_BUFFER_CONFIG *cur_buf; ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; - ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16]; + ENTROPY_CONTEXT left_context[MAX_MB_PLANE][2 * MI_BLOCK_SIZE]; PARTITION_CONTEXT *above_seg_context; - PARTITION_CONTEXT left_seg_context[8]; + PARTITION_CONTEXT left_seg_context[MI_BLOCK_SIZE]; #if CONFIG_VAR_TX TXFM_CONTEXT *above_txfm_context; TXFM_CONTEXT *left_txfm_context; - TXFM_CONTEXT left_txfm_context_buffer[8]; + TXFM_CONTEXT left_txfm_context_buffer[MI_BLOCK_SIZE]; TX_SIZE max_tx_size; #if CONFIG_SUPERTX @@ -686,6 +683,7 @@ void vp10_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, #if CONFIG_EXT_INTER static INLINE int is_interintra_allowed_bsize(const BLOCK_SIZE bsize) { + // TODO(debargha): Should this be bsize < BLOCK_LARGEST? return (bsize >= BLOCK_8X8) && (bsize < BLOCK_64X64); } diff --git a/vp10/common/common_data.h b/vp10/common/common_data.h index 67d6e3a81..44ebff2dc 100644 --- a/vp10/common/common_data.h +++ b/vp10/common/common_data.h @@ -19,154 +19,282 @@ extern "C" { #endif +#if CONFIG_EXT_PARTITION +# define IF_EXT_PARTITION(...) __VA_ARGS__ +#else +# define IF_EXT_PARTITION(...) +#endif + // Log 2 conversion lookup tables for block width and height static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = - {0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4}; + {0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, IF_EXT_PARTITION(4, 5, 5)}; static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = - {0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4}; -static const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] = - {1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16}; -static const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] = - {1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16}; + {0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, IF_EXT_PARTITION(5, 4, 5)}; // Log 2 conversion lookup tables for modeinfo width and height static const uint8_t mi_width_log2_lookup[BLOCK_SIZES] = - {0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3}; + {0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, IF_EXT_PARTITION(3, 4, 4)}; static const uint8_t mi_height_log2_lookup[BLOCK_SIZES] = - {0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3}; + {0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, IF_EXT_PARTITION(4, 3, 4)}; + +// Width/height lookup tables in units of varios block sizes +static const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] = + {1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16, IF_EXT_PARTITION(16, 32, 32)}; +static const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] = + {1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16, IF_EXT_PARTITION(32, 16, 32)}; static const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] = - {1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8}; + {1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, IF_EXT_PARTITION(8, 16, 16)}; static const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] = - {1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8}; + {1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, IF_EXT_PARTITION(16, 8, 16)}; +static const uint8_t num_16x16_blocks_wide_lookup[BLOCK_SIZES] = + {1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, IF_EXT_PARTITION(4, 8, 8)}; +static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES] = + {1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, IF_EXT_PARTITION(8, 4, 8)}; // VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize))) static const uint8_t size_group_lookup[BLOCK_SIZES] = - {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3}; + {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, IF_EXT_PARTITION(3, 3, 3)}; static const uint8_t num_pels_log2_lookup[BLOCK_SIZES] = - {4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12}; + {4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, IF_EXT_PARTITION(13, 13, 14)}; -static const PARTITION_TYPE partition_lookup[][BLOCK_SIZES] = { - { // 4X4 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 - PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID, +static const PARTITION_TYPE + partition_lookup[MAX_SB_SIZE_LOG2 - 1][BLOCK_SIZES] = { + { // 4X4 -> + // 4X4 + PARTITION_NONE, + // 4X8, 8X4, 8X8 PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, + // 8X16, 16X8, 16X16 PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, + // 16X32, 32X16, 32X32 PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, - PARTITION_INVALID - }, { // 8X8 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 - PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, + // 32X64, 64X32, 64X64 PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, +#if CONFIG_EXT_PARTITION PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID - }, { // 16X16 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 - PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, - PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, +#endif // CONFIG_EXT_PARTITION + }, { // 8X8 -> + // 4X4 + PARTITION_SPLIT, + // 4X8, 8X4, 8X8 + PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, + // 8X16, 16X8, 16X16 PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID - }, { // 32X32 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 - PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, - PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, - PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID - }, { // 64X64 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 - PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, - PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, - PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, - PARTITION_NONE + // 16X32, 32X16, 32X32 + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, + // 32X64, 64X32, 64X64 + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, +#if CONFIG_EXT_PARTITION + // 64x128, 128x64, 128x128 + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, +#endif // CONFIG_EXT_PARTITION + }, { // 16X16 -> + // 4X4 + PARTITION_SPLIT, + // 4X8, 8X4, 8X8 + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, + // 8X16, 16X8, 16X16 + PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, + // 16X32, 32X16, 32X32 + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, + // 32X64, 64X32, 64X64 + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, +#if CONFIG_EXT_PARTITION + // 64x128, 128x64, 128x128 + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, +#endif // CONFIG_EXT_PARTITION + }, { // 32X32 -> + // 4X4 + PARTITION_SPLIT, + // 4X8, 8X4, 8X8 + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, + // 8X16, 16X8, 16X16 + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, + // 16X32, 32X16, 32X32 + PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, + // 32X64, 64X32, 64X64 + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, +#if CONFIG_EXT_PARTITION + // 64x128, 128x64, 128x128 + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, +#endif // CONFIG_EXT_PARTITION + }, { // 64X64 -> + // 4X4 + PARTITION_SPLIT, + // 4X8, 8X4, 8X8 + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, + // 8X16, 16X8, 16X16 + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, + // 16X32, 32X16, 32X32 + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, + // 32X64, 64X32, 64X64 + PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, +#if CONFIG_EXT_PARTITION + // 64x128, 128x64, 128x128 + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, + }, { // 128x128 -> + // 4X4 + PARTITION_SPLIT, + // 4X8, 8X4, 8X8 + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, + // 8X16, 16X8, 16X16 + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, + // 16X32, 32X16, 32X32 + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, + // 32X64, 64X32, 64X64 + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, + // 64x128, 128x64, 128x128 + PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, +#endif // CONFIG_EXT_PARTITION } }; #if CONFIG_EXT_PARTITION_TYPES -static const BLOCK_SIZE subsize_lookup[EXT_PARTITION_TYPES][BLOCK_SIZES] = { - { // PARTITION_NONE - BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, - BLOCK_8X8, BLOCK_8X16, BLOCK_16X8, - BLOCK_16X16, BLOCK_16X32, BLOCK_32X16, - BLOCK_32X32, BLOCK_32X64, BLOCK_64X32, - BLOCK_64X64, - }, { // PARTITION_HORZ - BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_64X32, - }, { // PARTITION_VERT - BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_32X64, - }, { // PARTITION_SPLIT - BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_4X4, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_8X8, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_16X16, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_32X32, - }, { // PARTITION_HORZ_A - BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_64X32, - }, { // PARTITION_HORZ_B - BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_64X32, - }, { // PARTITION_VERT_A - BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_32X64, - }, { // PARTITION_VERT_B - BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_32X64, - } -}; +static const BLOCK_SIZE subsize_lookup[EXT_PARTITION_TYPES][BLOCK_SIZES] = #else -static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] = { +static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] = +#endif // CONFIG_EXT_PARTITION_TYPES +{ { // PARTITION_NONE - BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, - BLOCK_8X8, BLOCK_8X16, BLOCK_16X8, - BLOCK_16X16, BLOCK_16X32, BLOCK_32X16, - BLOCK_32X32, BLOCK_32X64, BLOCK_64X32, - BLOCK_64X64, + // 4X4 + BLOCK_4X4, + // 4X8, 8X4, 8X8 + BLOCK_4X8, BLOCK_8X4, BLOCK_8X8, + // 8X16, 16X8, 16X16 + BLOCK_8X16, BLOCK_16X8, BLOCK_16X16, + // 16X32, 32X16, 32X32 + BLOCK_16X32, BLOCK_32X16, BLOCK_32X32, + // 32X64, 64X32, 64X64 + BLOCK_32X64, BLOCK_64X32, BLOCK_64X64, +#if CONFIG_EXT_PARTITION + // 64x128, 128x64, 128x128 + BLOCK_64X128, BLOCK_128X64, BLOCK_128X128, +#endif // CONFIG_EXT_PARTITION }, { // PARTITION_HORZ - BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_64X32, + // 4X4 + BLOCK_INVALID, + // 4X8, 8X4, 8X8 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4, + // 8X16, 16X8, 16X16 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8, + // 16X32, 32X16, 32X32 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16, + // 32X64, 64X32, 64X64 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32, +#if CONFIG_EXT_PARTITION + // 64x128, 128x64, 128x128 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64, +#endif // CONFIG_EXT_PARTITION }, { // PARTITION_VERT - BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_32X64, + // 4X4 + BLOCK_INVALID, + // 4X8, 8X4, 8X8 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8, + // 8X16, 16X8, 16X16 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16, + // 16X32, 32X16, 32X32 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32, + // 32X64, 64X32, 64X64 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64, +#if CONFIG_EXT_PARTITION + // 64x128, 128x64, 128x128 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128, +#endif // CONFIG_EXT_PARTITION }, { // PARTITION_SPLIT - BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_4X4, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_8X8, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_16X16, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_32X32, + // 4X4 + BLOCK_INVALID, + // 4X8, 8X4, 8X8 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4, + // 8X16, 16X8, 16X16 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X8, + // 16X32, 32X16, 32X32 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X16, + // 32X64, 64X32, 64X64 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X32, +#if CONFIG_EXT_PARTITION + // 64x128, 128x64, 128x128 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X64, +#endif // CONFIG_EXT_PARTITION +#if CONFIG_EXT_PARTITION_TYPES + }, { // PARTITION_HORZ_A + // 4X4 + BLOCK_INVALID, + // 4X8, 8X4, 8X8 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4, + // 8X16, 16X8, 16X16 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8, + // 16X32, 32X16, 32X32 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16, + // 32X64, 64X32, 64X64 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32, +#if CONFIG_EXT_PARTITION + // 64x128, 128x64, 128x128 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64, +#endif // CONFIG_EXT_PARTITION + }, { // PARTITION_HORZ_B + // 4X4 + BLOCK_INVALID, + // 4X8, 8X4, 8X8 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4, + // 8X16, 16X8, 16X16 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8, + // 16X32, 32X16, 32X32 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16, + // 32X64, 64X32, 64X64 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32, +#if CONFIG_EXT_PARTITION + // 64x128, 128x64, 128x128 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64, +#endif // CONFIG_EXT_PARTITION + }, { // PARTITION_VERT_A + // 4X4 + BLOCK_INVALID, + // 4X8, 8X4, 8X8 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8, + // 8X16, 16X8, 16X16 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16, + // 16X32, 32X16, 32X32 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32, + // 32X64, 64X32, 64X64 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64, +#if CONFIG_EXT_PARTITION + // 64x128, 128x64, 128x128 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128, +#endif // CONFIG_EXT_PARTITION + }, { // PARTITION_VERT_B + // 4X4 + BLOCK_INVALID, + // 4X8, 8X4, 8X8 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8, + // 8X16, 16X8, 16X16 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16, + // 16X32, 32X16, 32X32 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32, + // 32X64, 64X32, 64X64 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64, +#if CONFIG_EXT_PARTITION + // 64x128, 128x64, 128x128 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128, +#endif // CONFIG_EXT_PARTITION +#endif // CONFIG_EXT_PARTITION_TYPES } }; -#endif // CONFIG_EXT_PARTITION_TYPES static const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = { - TX_4X4, TX_4X4, TX_4X4, - TX_8X8, TX_8X8, TX_8X8, - TX_16X16, TX_16X16, TX_16X16, - TX_32X32, TX_32X32, TX_32X32, TX_32X32 + // 4X4 + TX_4X4, + // 4X8, 8X4, 8X8 + TX_4X4, TX_4X4, TX_8X8, + // 8X16, 16X8, 16X16 + TX_8X8, TX_8X8, TX_16X16, + // 16X32, 32X16, 32X32 + TX_16X16, TX_16X16, TX_32X32, + // 32X64, 64X32, 64X64 + TX_32X32, TX_32X32, TX_32X32, +#if CONFIG_EXT_PARTITION + // 64x128, 128x64, 128x128 + TX_32X32, TX_32X32, TX_32X32, +#endif // CONFIG_EXT_PARTITION }; static const BLOCK_SIZE txsize_to_bsize[TX_SIZES] = { @@ -200,6 +328,11 @@ static const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = { {{BLOCK_32X64, BLOCK_32X32}, {BLOCK_INVALID, BLOCK_16X32}}, {{BLOCK_64X32, BLOCK_INVALID}, {BLOCK_32X32, BLOCK_32X16}}, {{BLOCK_64X64, BLOCK_64X32}, {BLOCK_32X64, BLOCK_32X32}}, +#if CONFIG_EXT_PARTITION + {{BLOCK_64X128, BLOCK_64X64}, {BLOCK_INVALID, BLOCK_32X64}}, + {{BLOCK_128X64, BLOCK_INVALID}, {BLOCK_64X64, BLOCK_64X32}}, + {{BLOCK_128X128, BLOCK_128X64}, {BLOCK_64X128, BLOCK_64X64}}, +#endif // CONFIG_EXT_PARTITION }; // Generates 4 bit field in which each bit set to 1 represents @@ -209,6 +342,24 @@ static const struct { PARTITION_CONTEXT above; PARTITION_CONTEXT left; } partition_context_lookup[BLOCK_SIZES]= { +#if CONFIG_EXT_PARTITION + {31, 31}, // 4X4 - {0b11111, 0b11111} + {31, 30}, // 4X8 - {0b11111, 0b11110} + {30, 31}, // 8X4 - {0b11110, 0b11111} + {30, 30}, // 8X8 - {0b11110, 0b11110} + {30, 28}, // 8X16 - {0b11110, 0b11100} + {28, 30}, // 16X8 - {0b11100, 0b11110} + {28, 28}, // 16X16 - {0b11100, 0b11100} + {28, 24}, // 16X32 - {0b11100, 0b11000} + {24, 28}, // 32X16 - {0b11000, 0b11100} + {24, 24}, // 32X32 - {0b11000, 0b11000} + {24, 16}, // 32X64 - {0b11000, 0b10000} + {16, 24}, // 64X32 - {0b10000, 0b11000} + {16, 16}, // 64X64 - {0b10000, 0b10000} + {16, 0 }, // 64X128- {0b10000, 0b00000} + {0, 16}, // 128X64- {0b00000, 0b10000} + {0, 0 }, // 128X128-{0b00000, 0b00000} +#else {15, 15}, // 4X4 - {0b1111, 0b1111} {15, 14}, // 4X8 - {0b1111, 0b1110} {14, 15}, // 8X4 - {0b1110, 0b1111} @@ -222,6 +373,7 @@ static const struct { {8, 0 }, // 32X64 - {0b1000, 0b0000} {0, 8 }, // 64X32 - {0b0000, 0b1000} {0, 0 }, // 64X64 - {0b0000, 0b0000} +#endif // CONFIG_EXT_PARTITION }; #if CONFIG_SUPERTX diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c index b57ed7abd..29d541951 100644 --- a/vp10/common/entropymode.c +++ b/vp10/common/entropymode.c @@ -171,6 +171,13 @@ static const vpx_prob default_partition_probs[PARTITION_CONTEXTS] { 72, 16, 44, 128, 128, 128, 128 }, // a split, l not split { 58, 32, 12, 128, 128, 128, 128 }, // l split, a not split { 10, 7, 6, 128, 128, 128, 128 }, // a/l both split +#if CONFIG_EXT_PARTITION + // 128x128 -> 64x64 + { 222, 34, 30, 128, 128, 128, 128 }, // a/l both not split + { 72, 16, 44, 128, 128, 128, 128 }, // a split, l not split + { 58, 32, 12, 128, 128, 128, 128 }, // l split, a not split + { 10, 7, 6, 128, 128, 128, 128 }, // a/l both split +#endif // CONFIG_EXT_PARTITION }; #else static const vpx_prob default_partition_probs[PARTITION_CONTEXTS] @@ -195,6 +202,13 @@ static const vpx_prob default_partition_probs[PARTITION_CONTEXTS] { 72, 16, 44 }, // a split, l not split { 58, 32, 12 }, // l split, a not split { 10, 7, 6 }, // a/l both split +#if CONFIG_EXT_PARTITION + // 128x128 -> 64x64 + { 222, 34, 30 }, // a/l both not split + { 72, 16, 44 }, // a split, l not split + { 58, 32, 12 }, // l split, a not split + { 10, 7, 6 }, // a/l both split +#endif // CONFIG_EXT_PARTITION }; #endif // CONFIG_EXT_PARTITION_TYPES @@ -256,20 +270,33 @@ static const vpx_prob default_inter_compound_mode_probs static const vpx_prob default_interintra_prob[BLOCK_SIZES] = { 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, +#if CONFIG_EXT_PARTITION + 192, 192, 192 +#endif // CONFIG_EXT_PARTITION }; static const vpx_prob default_wedge_interintra_prob[BLOCK_SIZES] = { 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, +#if CONFIG_EXT_PARTITION + 192, 192, 192 +#endif // CONFIG_EXT_PARTITION }; static const vpx_prob default_wedge_interinter_prob[BLOCK_SIZES] = { 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, +#if CONFIG_EXT_PARTITION + 192, 192, 192 +#endif // CONFIG_EXT_PARTITION }; #endif // CONFIG_EXT_INTER #if CONFIG_OBMC static const vpx_prob default_obmc_prob[BLOCK_SIZES] = { 255, 255, 255, 151, 153, 144, 178, 165, 160, 207, 195, 168, 244, +#if CONFIG_EXT_PARTITION + // TODO(debargha) What are the correct values for these? + 192, 192, 192 +#endif // CONFIG_EXT_PARTITION }; #endif // CONFIG_OBMC @@ -389,6 +416,11 @@ vp10_default_palette_y_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = { { 180, 113, 136, 49, 45, 114}, { 107, 70, 87, 49, 154, 156}, { 98, 105, 142, 63, 64, 152}, +#if CONFIG_EXT_PARTITION + { 98, 105, 142, 63, 64, 152}, + { 98, 105, 142, 63, 64, 152}, + { 98, 105, 142, 63, 64, 152}, +#endif // CONFIG_EXT_PARTITION }; const vpx_prob @@ -403,6 +435,11 @@ vp10_default_palette_uv_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = { { 67, 53, 54, 55, 66, 93}, { 120, 130, 83, 171, 75, 214}, { 72, 55, 66, 68, 79, 107}, +#if CONFIG_EXT_PARTITION + { 72, 55, 66, 68, 79, 107}, + { 72, 55, 66, 68, 79, 107}, + { 72, 55, 66, 68, 79, 107}, +#endif // CONFIG_EXT_PARTITION }; const vpx_prob @@ -418,6 +455,11 @@ vp10_default_palette_y_mode_prob[PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS] { 240, 180, 100, }, { 240, 180, 100, }, { 240, 180, 100, }, +#if CONFIG_EXT_PARTITION + { 240, 180, 100, }, + { 240, 180, 100, }, + { 240, 180, 100, }, +#endif // CONFIG_EXT_PARTITION }; diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h index 3d5fe9e47..8219dc5e0 100644 --- a/vp10/common/entropymode.h +++ b/vp10/common/entropymode.h @@ -32,7 +32,7 @@ extern "C" { #define PALETTE_COLOR_CONTEXTS 16 #define PALETTE_MAX_SIZE 8 -#define PALETTE_BLOCK_SIZES (BLOCK_64X64 - BLOCK_8X8 + 1) +#define PALETTE_BLOCK_SIZES (BLOCK_LARGEST - BLOCK_8X8 + 1) #define PALETTE_Y_MODE_CONTEXTS 3 struct VP10Common; diff --git a/vp10/common/enums.h b/vp10/common/enums.h index 36c9f9121..5615cee93 100644 --- a/vp10/common/enums.h +++ b/vp10/common/enums.h @@ -18,13 +18,25 @@ extern "C" { #endif -#define MI_SIZE_LOG2 3 -#define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2) // 64 = 2^6 +#undef MAX_SB_SIZE +#if CONFIG_EXT_PARTITION +# define MAX_SB_SIZE_LOG2 7 +#else +# define MAX_SB_SIZE_LOG2 6 +#endif // CONFIG_EXT_PARTITION + +#define MAX_SB_SIZE (1 << MAX_SB_SIZE_LOG2) +#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE) + +#define MI_SIZE_LOG2 3 #define MI_SIZE (1 << MI_SIZE_LOG2) // pixels per mi-unit + +#define MI_BLOCK_SIZE_LOG2 (MAX_SB_SIZE_LOG2 - MI_SIZE_LOG2) #define MI_BLOCK_SIZE (1 << MI_BLOCK_SIZE_LOG2) // mi-units per max block #define MI_MASK (MI_BLOCK_SIZE - 1) +#define MI_MASK_2 (MI_BLOCK_SIZE * 2 - 1) #if CONFIG_EXT_TILE # define MAX_TILE_ROWS 1024 @@ -49,32 +61,29 @@ typedef enum BITSTREAM_PROFILE { MAX_PROFILES } BITSTREAM_PROFILE; -#define BLOCK_4X4 0 -#define BLOCK_4X8 1 -#define BLOCK_8X4 2 -#define BLOCK_8X8 3 -#define BLOCK_8X16 4 -#define BLOCK_16X8 5 -#define BLOCK_16X16 6 -#define BLOCK_16X32 7 -#define BLOCK_32X16 8 -#define BLOCK_32X32 9 -#define BLOCK_32X64 10 -#define BLOCK_64X32 11 -#define BLOCK_64X64 12 - -#if CONFIG_EXT_PARTITION -#define BLOCK_64X128 13 -#define BLOCK_128X64 14 -#define BLOCK_128X128 15 -#define BLOCK_SIZES 16 +#define BLOCK_4X4 0 +#define BLOCK_4X8 1 +#define BLOCK_8X4 2 +#define BLOCK_8X8 3 +#define BLOCK_8X16 4 +#define BLOCK_16X8 5 +#define BLOCK_16X16 6 +#define BLOCK_16X32 7 +#define BLOCK_32X16 8 +#define BLOCK_32X32 9 +#define BLOCK_32X64 10 +#define BLOCK_64X32 11 +#define BLOCK_64X64 12 +#if !CONFIG_EXT_PARTITION +# define BLOCK_SIZES 13 #else -#define BLOCK_SIZES 13 -#endif // CONFIG_EXT_PARTITION - -#define BLOCK_INVALID (BLOCK_SIZES) +# define BLOCK_64X128 13 +# define BLOCK_128X64 14 +# define BLOCK_128X128 15 +# define BLOCK_SIZES 16 +#endif // !CONFIG_EXT_PARTITION +#define BLOCK_INVALID BLOCK_SIZES #define BLOCK_LARGEST (BLOCK_SIZES - 1) - typedef uint8_t BLOCK_SIZE; #if CONFIG_EXT_PARTITION_TYPES @@ -104,7 +113,11 @@ typedef enum PARTITION_TYPE { typedef char PARTITION_CONTEXT; #define PARTITION_PLOFFSET 4 // number of probability models per block size -#define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET) +#if CONFIG_EXT_PARTITION +# define PARTITION_CONTEXTS (5 * PARTITION_PLOFFSET) +#else +# define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET) +#endif // CONFIG_EXT_PARTITION // block transform size typedef uint8_t TX_SIZE; @@ -114,6 +127,15 @@ typedef uint8_t TX_SIZE; #define TX_32X32 ((TX_SIZE)3) // 32x32 transform #define TX_SIZES ((TX_SIZE)4) +#define MAX_TX_SIZE_LOG2 5 +#define MAX_TX_SIZE (1 << MAX_TX_SIZE_LOG2) +#define MAX_TX_SQUARE (MAX_TX_SIZE * MAX_TX_SIZE) + +// Number of maxium size transform blocks in the maximum size superblock +#define MAX_TX_BLOCKS_IN_MAX_SB_LOG2 \ + ((MAX_SB_SIZE_LOG2 - MAX_TX_SIZE_LOG2) * 2) +#define MAX_TX_BLOCKS_IN_MAX_SB (1 << MAX_TX_BLOCKS_IN_MAX_SB_LOG2) + // frame transform mode typedef enum { ONLY_4X4 = 0, // only 4x4 transform used @@ -286,10 +308,15 @@ typedef enum { /* Segment Feature Masks */ #define MAX_MV_REF_CANDIDATES 2 + #if CONFIG_REF_MV #define MAX_REF_MV_STACK_SIZE 16 -#define REF_CAT_LEVEL 160 -#endif +#if CONFIG_EXT_PARTITION +#define REF_CAT_LEVEL 640 +#else +#define REF_CAT_LEVEL 160 +#endif // CONFIG_EXT_PARTITION +#endif // CONFIG_REF_MV #define INTRA_INTER_CONTEXTS 4 #define COMP_INTER_CONTEXTS 5 diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index 25941d02b..fe9b13cb4 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -871,6 +871,9 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, cm->mi_rows - mi_row : MI_BLOCK_SIZE); const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? cm->mi_cols - mi_col : MI_BLOCK_SIZE); +#if CONFIG_EXT_PARTITION + assert(0 && "Not yet updated"); +#endif // CONFIG_EXT_PARTITION vp10_zero(*lfm); assert(mip[0] != NULL); @@ -1045,8 +1048,10 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, const uint64_t rows = cm->mi_rows - mi_row; // Each pixel inside the border gets a 1, - const uint64_t mask_y = (((uint64_t) 1 << (rows << 3)) - 1); - const uint16_t mask_uv = (((uint16_t) 1 << (((rows + 1) >> 1) << 2)) - 1); + const uint64_t mask_y = + (((uint64_t) 1 << (rows << MI_BLOCK_SIZE_LOG2)) - 1); + const uint16_t mask_uv = + (((uint16_t) 1 << (((rows + 1) >> 1) << (MI_BLOCK_SIZE_LOG2 - 1))) - 1); // Remove values completely outside our border. for (i = 0; i < TX_32X32; i++) { @@ -1262,7 +1267,7 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm, int tx_size_mask = 0; // Filter level can vary per MI - if (!(lfl[(r << 3) + (c >> ss_x)] = + if (!(lfl[(r << MI_BLOCK_SIZE_LOG2) + (c >> ss_x)] = get_filter_level(&cm->lf_info, mbmi))) continue; @@ -1280,11 +1285,13 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm, sb_type, ss_x, ss_y) : mbmi->inter_tx_size[blk_row][blk_col]; - tx_size_r = VPXMIN(tx_size, cm->above_txfm_context[mi_col + c]); - tx_size_c = VPXMIN(tx_size, cm->left_txfm_context[(mi_row + r) & 0x07]); + tx_size_r = VPXMIN(tx_size, + cm->above_txfm_context[mi_col + c]); + tx_size_c = VPXMIN(tx_size, + cm->left_txfm_context[(mi_row + r) & MI_MASK]); cm->above_txfm_context[mi_col + c] = tx_size; - cm->left_txfm_context[(mi_row + r) & 0x07] = tx_size; + cm->left_txfm_context[(mi_row + r) & MI_MASK] = tx_size; #endif // Build masks based on the transform size of each block @@ -1351,21 +1358,22 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm, border_mask = ~(mi_col == 0); #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { - highbd_filter_selectively_vert(CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - mask_16x16_c & border_mask, - mask_8x8_c & border_mask, - mask_4x4_c & border_mask, - mask_4x4_int[r], - &cm->lf_info, &lfl[r << 3], - (int)cm->bit_depth); + highbd_filter_selectively_vert( + CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, + mask_16x16_c & border_mask, + mask_8x8_c & border_mask, + mask_4x4_c & border_mask, + mask_4x4_int[r], + &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2], + (int)cm->bit_depth); } else { filter_selectively_vert(dst->buf, dst->stride, mask_16x16_c & border_mask, mask_8x8_c & border_mask, mask_4x4_c & border_mask, mask_4x4_int[r], - &cm->lf_info, &lfl[r << 3]); + &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]); } #else filter_selectively_vert(dst->buf, dst->stride, @@ -1373,7 +1381,7 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm, mask_8x8_c & border_mask, mask_4x4_c & border_mask, mask_4x4_int[r], - &cm->lf_info, &lfl[r << 3]); + &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; mi_8x8 += row_step_stride; @@ -1400,21 +1408,22 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm, } #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { - highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int_r, - &cm->lf_info, &lfl[r << 3], - (int)cm->bit_depth); + highbd_filter_selectively_horiz( + CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, + mask_16x16_r, + mask_8x8_r, + mask_4x4_r, + mask_4x4_int_r, + &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2], + (int)cm->bit_depth); } else { filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, - &cm->lf_info, &lfl[r << 3]); + &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]); } #else filter_selectively_horiz(dst->buf, dst->stride, @@ -1422,7 +1431,7 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, - &cm->lf_info, &lfl[r << 3]); + &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; } @@ -1455,16 +1464,18 @@ void vp10_filter_block_plane_ss00(VP10_COMMON *const cm, highbd_filter_selectively_vert_row2( plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfm->lfl_y[r << 3], (int)cm->bit_depth); + &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2], (int)cm->bit_depth); } else { filter_selectively_vert_row2( plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, - mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]); + mask_4x4_l, mask_4x4_int_l, &cm->lf_info, + &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]); } #else filter_selectively_vert_row2( plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, - mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]); + mask_4x4_l, mask_4x4_int_l, &cm->lf_info, + &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 16 * dst->stride; mask_16x16 >>= 16; @@ -1499,17 +1510,18 @@ void vp10_filter_block_plane_ss00(VP10_COMMON *const cm, if (cm->use_highbitdepth) { highbd_filter_selectively_horiz( CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3], + mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, + &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2], (int)cm->bit_depth); } else { filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, - &lfm->lfl_y[r << 3]); + &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]); } #else filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, - &lfm->lfl_y[r << 3]); + &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; @@ -1539,8 +1551,10 @@ void vp10_filter_block_plane_ss11(VP10_COMMON *const cm, for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { if (plane->plane_type == 1) { for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { - lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; - lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)]; + lfm->lfl_uv[(r << 1) + c] = + lfm->lfl_y[(r << MI_BLOCK_SIZE_LOG2) + (c << 1)]; + lfm->lfl_uv[((r + 2) << 1) + c] = + lfm->lfl_y[((r + 2) << MI_BLOCK_SIZE_LOG2) + (c << 1)]; } } @@ -1632,9 +1646,31 @@ void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP10_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop, int y_only) { +#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES + const int num_planes = y_only ? 1 : MAX_MB_PLANE; + int mi_row, mi_col; + +# if CONFIG_VAR_TX + memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols); +# endif // CONFIG_VAR_TX + for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { + MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; +# if CONFIG_VAR_TX + memset(cm->left_txfm_context, TX_SIZES, MI_BLOCK_SIZE); +# endif // CONFIG_VAR_TX + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { + int plane; + + vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); + + for (plane = 0; plane < num_planes; ++plane) + vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, + mi_row, mi_col); + } + } +#else const int num_planes = y_only ? 1 : MAX_MB_PLANE; int mi_row, mi_col; -#if !CONFIG_VAR_TX && !CONFIG_EXT_PARTITION_TYPES enum lf_path path; LOOP_FILTER_MASK lfm; @@ -1646,29 +1682,17 @@ void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, path = LF_PATH_444; else path = LF_PATH_SLOW; -#endif // !CONFIG_VAR_TX && !CONFIG_EXT_PARTITION_TYPES -#if CONFIG_VAR_TX - memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols); -#endif for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; -#if CONFIG_VAR_TX - memset(cm->left_txfm_context, TX_SIZES, 8); -#endif for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { int plane; vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); -#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION_TYPES - for (plane = 0; plane < num_planes; ++plane) - vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, - mi_row, mi_col); -#else // TODO(JBB): Make setup_mask work for non 420. - vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, - &lfm); + vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm); + vp10_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm); for (plane = 1; plane < num_planes; ++plane) { switch (path) { @@ -1684,9 +1708,9 @@ void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, break; } } -#endif // CONFIG_VAR_TX || CONFIG_EXT_PARTITION_TYPES } } +#endif // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES } void vp10_loop_filter_frame(YV12_BUFFER_CONFIG *frame, diff --git a/vp10/common/loopfilter.h b/vp10/common/loopfilter.h index 81f44de7c..8fa0b8048 100644 --- a/vp10/common/loopfilter.h +++ b/vp10/common/loopfilter.h @@ -84,8 +84,8 @@ typedef struct { uint16_t above_uv[TX_SIZES]; uint16_t left_int_4x4_uv; uint16_t above_int_4x4_uv; - uint8_t lfl_y[64]; - uint8_t lfl_uv[16]; + uint8_t lfl_y[MI_BLOCK_SIZE * MI_BLOCK_SIZE]; + uint8_t lfl_uv[MI_BLOCK_SIZE / 2 * MI_BLOCK_SIZE / 2]; } LOOP_FILTER_MASK; /* assorted loopfilter functions which get used elsewhere */ diff --git a/vp10/common/mvref_common.c b/vp10/common/mvref_common.c index 30d779051..aa651a2e2 100644 --- a/vp10/common/mvref_common.c +++ b/vp10/common/mvref_common.c @@ -12,6 +12,7 @@ #include "vp10/common/mvref_common.h" #if CONFIG_REF_MV + static uint8_t add_ref_mv_candidate(const MODE_INFO *const candidate_mi, const MB_MODE_INFO *const candidate, const MV_REFERENCE_FRAME rf[2], @@ -23,6 +24,8 @@ static uint8_t add_ref_mv_candidate(const MODE_INFO *const candidate_mi, int index = 0, ref; int newmv_count = 0; + assert(2 * weight < REF_CAT_LEVEL); + if (rf[1] == NONE) { // single reference frame for (ref = 0; ref < 2; ++ref) { @@ -246,32 +249,30 @@ static uint8_t scan_blk_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd, return newmv_count; } -// This function assumes MI blocks are 8x8 and coding units are 64x64 static int has_top_right(const MACROBLOCKD *xd, int mi_row, int mi_col, int bs) { // In a split partition all apart from the bottom right has a top right - int has_tr = !((mi_row & bs) & (bs * 2 - 1)) || - !((mi_col & bs) & (bs * 2 - 1)); + int has_tr = !((mi_row & bs) && (mi_col & bs)); + + // bs > 0 and bs is a power of 2 + assert(bs > 0 && !(bs & (bs - 1))); - // Filter out partial right-most boundaries // For each 4x4 group of blocks, when the bottom right is decoded the blocks - // to the right have not been decoded therefore the second from bottom in the - // right-most column does not have a top right - if ((mi_col & bs) & (bs * 2 - 1)) { - if (((mi_col & (2 * bs)) & (bs * 4 - 1)) && - ((mi_row & (2 * bs)) & (bs * 4 - 1))) - has_tr = 0; + // to the right have not been decoded therefore the bottom right does + // not have a top right + while (bs < MI_BLOCK_SIZE) { + if (mi_col & bs) { + if ((mi_col & (2 * bs)) && (mi_row & (2 * bs))) { + has_tr = 0; + break; + } + } else { + break; + } + bs <<= 1; } - // If the right had side of the block lines up with the right had edge end of - // a group of 8x8 MI blocks (i.e. edge of a coding unit) and is not on the top - // row of that coding unit, it does not have a top right - if (has_tr) - if (((mi_col + xd->n8_w) & 0x07) == 0) - if ((mi_row & 0x07) > 0) - has_tr = 0; - - // The left had of two vertical rectangles always has a top right (as the + // The left hand of two vertical rectangles always has a top right (as the // block above will have been decoded) if (xd->n8_w < xd->n8_h) if (!xd->is_sec_rect) @@ -359,8 +360,11 @@ static void setup_ref_mv_list(const VP10_COMMON *cm, const MACROBLOCKD *xd, nearest_refmv_count = *refmv_count; - for (idx = 0; idx < nearest_refmv_count; ++idx) + for (idx = 0; idx < nearest_refmv_count; ++idx) { + assert(ref_mv_stack[idx].weight > 0 && + ref_mv_stack[idx].weight < REF_CAT_LEVEL); ref_mv_stack[idx].weight += REF_CAT_LEVEL; + } if (prev_frame_mvs_base && cm->show_frame && cm->last_show_frame && rf[1] == NONE) { diff --git a/vp10/common/mvref_common.h b/vp10/common/mvref_common.h index 104a91a99..5a3b6a88d 100644 --- a/vp10/common/mvref_common.h +++ b/vp10/common/mvref_common.h @@ -120,7 +120,16 @@ static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = { // 64X32 {{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}}, // 64X64 - {{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}} + {{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}}, +#if CONFIG_EXT_PARTITION + // TODO(debargha/jingning) Making them twice the 32x64, .. ones above + // 64x128 + {{0, -2}, {-2, 0}, {8, -2}, {-2, 4}, {-2, -2}, {0, -6}, {-6, 0}, {4, -2}}, + // 128x64 + {{-2, 0}, {0, -2}, {-2, 8}, {4, -2}, {-2, -2}, {-6, 0}, {0, -6}, {-2, 4}}, + // 128x128 + {{-2, 6}, {6, -2}, {-2, 8}, {8, -2}, {-2, -2}, {-2, 0}, {0, -2}, {-2, 12}}, +#endif // CONFIG_EXT_PARTITION }; static const int idx_n_column_to_subblock[4][2] = { @@ -131,7 +140,11 @@ static const int idx_n_column_to_subblock[4][2] = { }; // clamp_mv_ref -#define MV_BORDER (8 << 3) // Allow 8 pels in 1/8th pel units +#if CONFIG_EXT_PARTITION +# define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units +#else +# define MV_BORDER (8 << 3) // Allow 8 pels in 1/8th pel units +#endif // CONFIG_EXT_PARTITION static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) { clamp_mv(mv, xd->mb_to_left_edge - bw * 8 - MV_BORDER, diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h index 3eac586f4..bdd9ffeaf 100644 --- a/vp10/common/onyxc_int.h +++ b/vp10/common/onyxc_int.h @@ -332,7 +332,7 @@ typedef struct VP10Common { ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; #if CONFIG_VAR_TX TXFM_CONTEXT *above_txfm_context; - TXFM_CONTEXT left_txfm_context[8]; + TXFM_CONTEXT left_txfm_context[MI_BLOCK_SIZE]; #endif int above_context_alloc_cols; @@ -440,7 +440,7 @@ static INLINE void vp10_init_macroblockd(VP10_COMMON *cm, MACROBLOCKD *xd, static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) { const int above_idx = mi_col * 2; - const int left_idx = (mi_row * 2) & 15; // FIXME: Mask should be CU_SIZE*2-1 + const int left_idx = (mi_row * 2) & MI_MASK_2; int i; for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblockd_plane *const pd = &xd->plane[i]; diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c index 517538915..57c26a0fd 100644 --- a/vp10/common/reconinter.c +++ b/vp10/common/reconinter.c @@ -454,52 +454,52 @@ void vp10_make_masked_inter_predictor( const MACROBLOCKD *xd) { const MODE_INFO *mi = xd->mi[0]; #if CONFIG_VP9_HIGHBITDEPTH - uint8_t tmp_dst_[2 * CU_SIZE * CU_SIZE]; + uint8_t tmp_dst_[2 * MAX_SB_SQUARE]; uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_; - vp10_make_inter_predictor(pre, pre_stride, tmp_dst, CU_SIZE, + vp10_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x, subpel_y, sf, w, h, 0, interp_filter, xs, ys, xd); #if CONFIG_SUPERTX if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) build_masked_compound_extend_highbd( - dst, dst_stride, tmp_dst, CU_SIZE, plane, + dst, dst_stride, tmp_dst, MAX_SB_SIZE, plane, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, wedge_offset_y, wedge_offset_x, h, w); else build_masked_compound_extend( - dst, dst_stride, tmp_dst, CU_SIZE, plane, + dst, dst_stride, tmp_dst, MAX_SB_SIZE, plane, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, wedge_offset_y, wedge_offset_x, h, w); #else if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) build_masked_compound_highbd( - dst, dst_stride, tmp_dst, CU_SIZE, + dst, dst_stride, tmp_dst, MAX_SB_SIZE, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, h, w); else build_masked_compound( - dst, dst_stride, tmp_dst, CU_SIZE, + dst, dst_stride, tmp_dst, MAX_SB_SIZE, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, h, w); #endif // CONFIG_SUPERTX #else // CONFIG_VP9_HIGHBITDEPTH - uint8_t tmp_dst[CU_SIZE * CU_SIZE]; - vp10_make_inter_predictor(pre, pre_stride, tmp_dst, CU_SIZE, + uint8_t tmp_dst[MAX_SB_SQUARE]; + vp10_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x, subpel_y, sf, w, h, 0, interp_filter, xs, ys, xd); #if CONFIG_SUPERTX build_masked_compound_extend( - dst, dst_stride, tmp_dst, CU_SIZE, plane, + dst, dst_stride, tmp_dst, MAX_SB_SIZE, plane, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, wedge_offset_y, wedge_offset_x, h, w); #else build_masked_compound( - dst, dst_stride, tmp_dst, CU_SIZE, + dst, dst_stride, tmp_dst, MAX_SB_SIZE, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, h, w); #endif // CONFIG_SUPERTX @@ -877,12 +877,13 @@ void vp10_build_masked_inter_predictor_complex( int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, PARTITION_TYPE partition, int plane) { int i, j; - uint8_t mask[MAXTXLEN]; - int top_w = 4 << b_width_log2_lookup[top_bsize], - top_h = 4 << b_height_log2_lookup[top_bsize]; - int w = 4 << b_width_log2_lookup[bsize], h = 4 << b_height_log2_lookup[bsize]; - int w_offset = (mi_col - mi_col_ori) << 3, - h_offset = (mi_row - mi_row_ori) << 3; + uint8_t mask[MAX_TX_SIZE]; + int top_w = 4 << b_width_log2_lookup[top_bsize]; + int top_h = 4 << b_height_log2_lookup[top_bsize]; + int w = 4 << b_width_log2_lookup[bsize]; + int h = 4 << b_height_log2_lookup[bsize]; + int w_offset = (mi_col - mi_col_ori) * MI_SIZE; + int h_offset = (mi_row - mi_row_ori) * MI_SIZE; #if CONFIG_VP9_HIGHBITDEPTH uint16_t *dst16= CONVERT_TO_SHORTPTR(dst); @@ -890,6 +891,8 @@ void vp10_build_masked_inter_predictor_complex( int b_hdb = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; #endif // CONFIG_VP9_HIGHBITDEPTH + assert(bsize <= BLOCK_32X32); + top_w >>= pd->subsampling_x; top_h >>= pd->subsampling_y; w >>= pd->subsampling_x; @@ -916,7 +919,8 @@ void vp10_build_masked_inter_predictor_complex( if (m == 0) dst_tmp[j] = dst2_tmp[j]; else - dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; + dst_tmp[j] = ROUND_POWER_OF_TWO(dst_tmp[j] * m + + dst2_tmp[j] * (64 - m), 6); } dst_tmp += dst_stride; dst2_tmp += dst2_stride; @@ -943,7 +947,8 @@ void vp10_build_masked_inter_predictor_complex( if (m == 0) dst_tmp[j] = dst2_tmp[j]; else - dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; + dst_tmp[j] = ROUND_POWER_OF_TWO(dst_tmp[j] * m + + dst2_tmp[j] * (64 - m), 6); } dst_tmp += dst_stride; dst2_tmp += dst2_stride; @@ -978,7 +983,8 @@ void vp10_build_masked_inter_predictor_complex( if (m == 0) dst_tmp[j] = dst2_tmp[j]; else - dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; + dst_tmp[j] = ROUND_POWER_OF_TWO(dst_tmp[j] * m + + dst2_tmp[j] * (64 - m), 6); } memcpy(dst_tmp + j, dst2_tmp + j, (top_w - w_offset - w) * sizeof(uint16_t)); @@ -1001,7 +1007,8 @@ void vp10_build_masked_inter_predictor_complex( if (m == 0) dst_tmp[j] = dst2_tmp[j]; else - dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; + dst_tmp[j] = ROUND_POWER_OF_TWO(dst_tmp[j] * m + + dst2_tmp[j] * (64 - m), 6); } memcpy(dst_tmp + j, dst2_tmp + j, (top_w - w_offset - w) * sizeof(uint8_t)); @@ -1158,12 +1165,39 @@ static const uint8_t obmc_mask_16[2][16] = { }; static const uint8_t obmc_mask_32[2][32] = { - { 33, 35, 36, 38, 40, 41, 43, 44, 45, 47, 48, 50, 51, 52, 53, 55, - 56, 57, 58, 59, 60, 60, 61, 62, 62, 63, 63, 64, 64, 64, 64, 64}, - { 31, 29, 28, 26, 24, 23, 21, 20, 19, 17, 16, 14, 13, 12, 11, 9, - 8, 7, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0, 0, 0, 0} + { 33, 35, 36, 38, 40, 41, 43, 44, + 45, 47, 48, 50, 51, 52, 53, 55, + 56, 57, 58, 59, 60, 60, 61, 62, + 62, 63, 63, 64, 64, 64, 64, 64 }, + { 31, 29, 28, 26, 24, 23, 21, 20, + 19, 17, 16, 14, 13, 12, 11, 9, + 8, 7, 6, 5, 4, 4, 3, 2, + 2, 1, 1, 0, 0, 0, 0, 0 } }; +#if CONFIG_EXT_PARTITION +// TODO(debargha): What are the correct values here? +static const uint8_t obmc_mask_64[2][64] = { + { 33, 33, 35, 35, 36, 36, 38, 38, + 40, 40, 41, 41, 43, 43, 44, 44, + 45, 45, 47, 47, 48, 48, 50, 50, + 51, 51, 52, 52, 53, 53, 55, 55, + 56, 56, 57, 57, 58, 58, 59, 59, + 60, 60, 60, 60, 61, 61, 62, 62, + 62, 62, 63, 63, 63, 63, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64 }, + { 31, 31, 29, 29, 28, 28, 26, 26, + 24, 24, 23, 23, 21, 21, 20, 20, + 19, 19, 17, 17, 16, 16, 14, 14, + 13, 13, 12, 12, 11, 11, 9, 9, + 8, 8, 7, 7, 6, 6, 5, 5, + 4, 4, 4, 4, 3, 3, 2, 2, + 2, 2, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 } +}; +#endif // CONFIG_EXT_PARTITION + + void setup_obmc_mask(int length, const uint8_t *mask[2]) { switch (length) { case 1: @@ -1190,9 +1224,15 @@ void setup_obmc_mask(int length, const uint8_t *mask[2]) { mask[0] = obmc_mask_32[0]; mask[1] = obmc_mask_32[1]; break; +#if CONFIG_EXT_PARTITION + case 64: + mask[0] = obmc_mask_64[0]; + mask[1] = obmc_mask_64[1]; + break; +#endif // CONFIG_EXT_PARTITION default: - mask[0] = obmc_mask_32[0]; - mask[1] = obmc_mask_32[1]; + mask[0] = NULL; + mask[1] = NULL; assert(0); break; } @@ -1265,15 +1305,15 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const struct macroblockd_plane *pd = &xd->plane[plane]; - int bw = (mi_step * 8) >> pd->subsampling_x; + int bw = (mi_step * MI_SIZE) >> pd->subsampling_x; int bh = overlap >> pd->subsampling_y; int row, col; int dst_stride = use_tmp_dst_buf ? final_stride[plane] : pd->dst.stride; uint8_t *dst = use_tmp_dst_buf ? - &final_buf[plane][(i * 8) >> pd->subsampling_x] : - &pd->dst.buf[(i * 8) >> pd->subsampling_x]; + &final_buf[plane][(i * MI_SIZE) >> pd->subsampling_x] : + &pd->dst.buf[(i * MI_SIZE) >> pd->subsampling_x]; int tmp_stride = tmp_stride1[plane]; - uint8_t *tmp = &tmp_buf1[plane][(i * 8) >> pd->subsampling_x]; + uint8_t *tmp = &tmp_buf1[plane][(i * MI_SIZE) >> pd->subsampling_x]; const uint8_t *mask[2]; setup_obmc_mask(bh, mask); @@ -1285,8 +1325,9 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, for (row = 0; row < bh; ++row) { for (col = 0; col < bw; ++col) - dst16[col] = (mask[0][row] * dst16[col] + mask[1][row] * tmp16[col] - + 32) >> 6; + dst16[col] = ROUND_POWER_OF_TWO(mask[0][row] * dst16[col] + + mask[1][row] * tmp16[col], 6); + dst16 += dst_stride; tmp16 += tmp_stride; } @@ -1294,8 +1335,8 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, #endif // CONFIG_VP9_HIGHBITDEPTH for (row = 0; row < bh; ++row) { for (col = 0; col < bw; ++col) - dst[col] = (mask[0][row] * dst[col] + mask[1][row] * tmp[col] + 32) - >> 6; + dst[col] = ROUND_POWER_OF_TWO(mask[0][row] * dst[col] + + mask[1][row] * tmp[col], 6); dst += dst_stride; tmp += tmp_stride; } @@ -1332,15 +1373,15 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const struct macroblockd_plane *pd = &xd->plane[plane]; int bw = overlap >> pd->subsampling_x; - int bh = (mi_step * 8) >> pd->subsampling_y; + int bh = (mi_step * MI_SIZE) >> pd->subsampling_y; int row, col; int dst_stride = use_tmp_dst_buf ? final_stride[plane] : pd->dst.stride; uint8_t *dst = use_tmp_dst_buf ? - &final_buf[plane][(i * 8 * dst_stride) >> pd->subsampling_y] : - &pd->dst.buf[(i * 8 * dst_stride) >> pd->subsampling_y]; + &final_buf[plane][(i * MI_SIZE * dst_stride) >> pd->subsampling_y] : + &pd->dst.buf[(i * MI_SIZE * dst_stride) >> pd->subsampling_y]; int tmp_stride = tmp_stride2[plane]; uint8_t *tmp = &tmp_buf2[plane] - [(i * 8 * tmp_stride) >> pd->subsampling_y]; + [(i * MI_SIZE * tmp_stride) >> pd->subsampling_y]; const uint8_t *mask[2]; setup_obmc_mask(bw, mask); @@ -1352,8 +1393,8 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, for (row = 0; row < bh; ++row) { for (col = 0; col < bw; ++col) - dst16[col] = (mask[0][col] * dst16[col] + mask[1][col] * tmp16[col] - + 32) >> 6; + dst16[col] = ROUND_POWER_OF_TWO(mask[0][col] * dst16[col] + + mask[1][col] * tmp16[col], 6); dst16 += dst_stride; tmp16 += tmp_stride; } @@ -1361,8 +1402,8 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, #endif // CONFIG_VP9_HIGHBITDEPTH for (row = 0; row < bh; ++row) { for (col = 0; col < bw; ++col) - dst[col] = (mask[0][col] * dst[col] + mask[1][col] * tmp[col] + 32) - >> 6; + dst[col] = ROUND_POWER_OF_TWO(mask[0][col] * dst[col] + + mask[1][col] * tmp[col], 6); dst += dst_stride; tmp += tmp_stride; } @@ -1572,7 +1613,31 @@ static void combine_interintra(PREDICTION_MODE mode, static const int scale_bits = 8; static const int scale_max = 256; static const int scale_round = 127; - static const int weights1d[64] = { +#if CONFIG_EXT_PARTITION + // TODO(debargha): Fill in the correct weights for 128 wide blocks. + static const int weights1d[MAX_SB_SIZE] = { + 128, 128, 125, 125, 122, 122, 119, 119, + 116, 116, 114, 114, 111, 111, 109, 109, + 107, 107, 105, 105, 103, 103, 101, 101, + 99, 99, 97, 97, 96, 96, 94, 94, + 93, 93, 91, 91, 90, 90, 89, 89, + 88, 88, 86, 86, 85, 85, 84, 84, + 83, 83, 82, 82, 81, 81, 81, 81, + 80, 80, 79, 79, 78, 78, 78, 78, + 77, 77, 76, 76, 76, 76, 75, 75, + 75, 75, 74, 74, 74, 74, 73, 73, + 73, 73, 72, 72, 72, 72, 71, 71, + 71, 71, 71, 71, 70, 70, 70, 70, + 70, 70, 70, 70, 69, 69, 69, 69, + 69, 69, 69, 69, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + }; + static int size_scales[BLOCK_SIZES] = { + 32, 16, 16, 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1 + }; +#else + static const int weights1d[MAX_SB_SIZE] = { 128, 125, 122, 119, 116, 114, 111, 109, 107, 105, 103, 101, 99, 97, 96, 94, 93, 91, 90, 89, 88, 86, 85, 84, @@ -1582,14 +1647,14 @@ static void combine_interintra(PREDICTION_MODE mode, 70, 70, 69, 69, 69, 69, 68, 68, 68, 68, 68, 67, 67, 67, 67, 67, }; - const int bw = 4 << b_width_log2_lookup[plane_bsize]; - const int bh = 4 << b_height_log2_lookup[plane_bsize]; + static int size_scales[BLOCK_SIZES] = { + 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1 + }; +#endif // CONFIG_EXT_PARTITION - int size = VPXMAX(bw, bh); - int size_scale = (size >= 64 ? 1 : - size == 32 ? 2 : - size == 16 ? 4 : - size == 8 ? 8 : 16); + const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; + const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; + const int size_scale = size_scales[plane_bsize]; int i, j; if (use_wedge_interintra && get_wedge_bits(bsize)) { @@ -1712,7 +1777,31 @@ static void combine_interintra_highbd(PREDICTION_MODE mode, static const int scale_bits = 8; static const int scale_max = 256; static const int scale_round = 127; - static const int weights1d[64] = { +#if CONFIG_EXT_PARTITION + // TODO(debargha): Fill in the correct weights for 128 wide blocks. + static const int weights1d[MAX_SB_SIZE] = { + 128, 128, 125, 125, 122, 122, 119, 119, + 116, 116, 114, 114, 111, 111, 109, 109, + 107, 107, 105, 105, 103, 103, 101, 101, + 99, 99, 97, 97, 96, 96, 94, 94, + 93, 93, 91, 91, 90, 90, 89, 89, + 88, 88, 86, 86, 85, 85, 84, 84, + 83, 83, 82, 82, 81, 81, 81, 81, + 80, 80, 79, 79, 78, 78, 78, 78, + 77, 77, 76, 76, 76, 76, 75, 75, + 75, 75, 74, 74, 74, 74, 73, 73, + 73, 73, 72, 72, 72, 72, 71, 71, + 71, 71, 71, 71, 70, 70, 70, 70, + 70, 70, 70, 70, 69, 69, 69, 69, + 69, 69, 69, 69, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + }; + static int size_scales[BLOCK_SIZES] = { + 32, 16, 16, 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1 + }; +#else + static const int weights1d[MAX_SB_SIZE] = { 128, 125, 122, 119, 116, 114, 111, 109, 107, 105, 103, 101, 99, 97, 96, 94, 93, 91, 90, 89, 88, 86, 85, 84, @@ -1722,15 +1811,16 @@ static void combine_interintra_highbd(PREDICTION_MODE mode, 70, 70, 69, 69, 69, 69, 68, 68, 68, 68, 68, 67, 67, 67, 67, 67, }; - const int bw = 4 << b_width_log2_lookup[plane_bsize]; - const int bh = 4 << b_height_log2_lookup[plane_bsize]; + static int size_scales[BLOCK_SIZES] = { + 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1 + }; +#endif // CONFIG_EXT_PARTITION - int size = VPXMAX(bw, bh); - int size_scale = (size >= 64 ? 1 : - size == 32 ? 2 : - size == 16 ? 4 : - size == 8 ? 8 : 16); + const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; + const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; + const int size_scale = size_scales[plane_bsize]; int i, j; + uint16_t *comppred = CONVERT_TO_SHORTPTR(comppred8); uint16_t *interpred = CONVERT_TO_SHORTPTR(interpred8); uint16_t *intrapred = CONVERT_TO_SHORTPTR(intrapred8); @@ -1889,8 +1979,7 @@ void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, const int bw = 4 << b_width_log2_lookup[bsize]; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - DECLARE_ALIGNED(16, uint16_t, - intrapredictor[CU_SIZE * CU_SIZE]); + DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]); build_intra_predictors_for_interintra( xd, xd->plane[0].dst.buf, xd->plane[0].dst.stride, CONVERT_TO_BYTEPTR(intrapredictor), bw, @@ -1907,7 +1996,7 @@ void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, } #endif // CONFIG_VP9_HIGHBITDEPTH { - uint8_t intrapredictor[CU_SIZE * CU_SIZE]; + uint8_t intrapredictor[MAX_SB_SQUARE]; build_intra_predictors_for_interintra( xd, xd->plane[0].dst.buf, xd->plane[0].dst.stride, intrapredictor, bw, @@ -1931,8 +2020,7 @@ void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd, const int bw = 4 << b_width_log2_lookup[uvbsize]; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - DECLARE_ALIGNED(16, uint16_t, - uintrapredictor[CU_SIZE * CU_SIZE]); + DECLARE_ALIGNED(16, uint16_t, uintrapredictor[MAX_SB_SQUARE]); build_intra_predictors_for_interintra( xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, CONVERT_TO_BYTEPTR(uintrapredictor), bw, @@ -1950,7 +2038,7 @@ void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd, } #endif // CONFIG_VP9_HIGHBITDEPTH { - uint8_t uintrapredictor[CU_SIZE * CU_SIZE]; + uint8_t uintrapredictor[MAX_SB_SQUARE]; build_intra_predictors_for_interintra( xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, uintrapredictor, bw, @@ -2117,30 +2205,30 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane, if (ref && get_wedge_bits(mi->mbmi.sb_type) && mi->mbmi.use_wedge_interinter) { #if CONFIG_VP9_HIGHBITDEPTH - uint8_t tmp_dst_[2 * CU_SIZE * CU_SIZE]; + uint8_t tmp_dst_[2 * MAX_SB_SQUARE]; uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_; #else - uint8_t tmp_dst[CU_SIZE * CU_SIZE]; + uint8_t tmp_dst[MAX_SB_SQUARE]; #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int k; for (k = 0; k < h; ++k) - memcpy(tmp_dst_ + 2 * CU_SIZE * k, ext_dst1 + + memcpy(tmp_dst_ + 2 * MAX_SB_SIZE * k, ext_dst1 + ext_dst_stride1 * 2 * k, w * 2); } else { int k; for (k = 0; k < h; ++k) - memcpy(tmp_dst_ + CU_SIZE * k, ext_dst1 + + memcpy(tmp_dst_ + MAX_SB_SIZE * k, ext_dst1 + ext_dst_stride1 * k, w); } #else { int k; for (k = 0; k < h; ++k) - memcpy(tmp_dst + CU_SIZE * k, ext_dst1 + + memcpy(tmp_dst + MAX_SB_SIZE * k, ext_dst1 + ext_dst_stride1 * k, w); } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -2149,20 +2237,20 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane, #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { build_masked_compound_extend_highbd( - dst, dst_buf->stride, tmp_dst, CU_SIZE, plane, + dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE, plane, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, wedge_offset_y, wedge_offset_x, h, w); } else { build_masked_compound_extend( - dst, dst_buf->stride, tmp_dst, CU_SIZE, plane, + dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE, plane, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, wedge_offset_y, wedge_offset_x, h, w); } #else build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, - CU_SIZE, plane, + MAX_SB_SIZE, plane, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, wedge_offset_y, wedge_offset_x, h, w); @@ -2171,12 +2259,12 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane, #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) build_masked_compound_highbd(dst, dst_buf->stride, tmp_dst, - CU_SIZE, + MAX_SB_SIZE, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, h, w); else #endif // CONFIG_VP9_HIGHBITDEPTH - build_masked_compound(dst, dst_buf->stride, tmp_dst, CU_SIZE, + build_masked_compound(dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, h, w); #endif // CONFIG_SUPERTX diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h index 4dcd203e7..75c371e39 100644 --- a/vp10/common/reconinter.h +++ b/vp10/common/reconinter.h @@ -443,8 +443,8 @@ void vp10_build_prediction_by_left_preds(VP10_COMMON *cm, #endif // CONFIG_OBMC #if CONFIG_EXT_INTER -#define MASK_MASTER_SIZE (2 * CU_SIZE) -#define MASK_MASTER_STRIDE (2 * CU_SIZE) +#define MASK_MASTER_SIZE (2 * MAX_SB_SIZE) +#define MASK_MASTER_STRIDE (2 * MAX_SB_SIZE) void vp10_init_wedge_masks(); diff --git a/vp10/common/reconintra.c b/vp10/common/reconintra.c index 11c4b940b..300005f74 100644 --- a/vp10/common/reconintra.c +++ b/vp10/common/reconintra.c @@ -44,30 +44,30 @@ static const uint8_t extend_modes[INTRA_MODES] = { NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // TM }; -static const uint8_t orders_64x64[1] = { 0 }; -static const uint8_t orders_64x32[2] = { 0, 1 }; -static const uint8_t orders_32x64[2] = { 0, 1 }; -static const uint8_t orders_32x32[4] = { +static const uint8_t orders_128x128[1] = { 0 }; +static const uint8_t orders_128x64[2] = { 0, 1 }; +static const uint8_t orders_64x128[2] = { 0, 1 }; +static const uint8_t orders_64x64[4] = { 0, 1, 2, 3, }; -static const uint8_t orders_32x16[8] = { +static const uint8_t orders_64x32[8] = { 0, 2, 1, 3, 4, 6, 5, 7, }; -static const uint8_t orders_16x32[8] = { +static const uint8_t orders_32x64[8] = { 0, 1, 2, 3, 4, 5, 6, 7, }; -static const uint8_t orders_16x16[16] = { +static const uint8_t orders_32x32[16] = { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15, }; -static const uint8_t orders_16x8[32] = { +static const uint8_t orders_32x16[32] = { 0, 2, 8, 10, 1, 3, 9, 11, 4, 6, 12, 14, @@ -77,13 +77,13 @@ static const uint8_t orders_16x8[32] = { 20, 22, 28, 30, 21, 23, 29, 31, }; -static const uint8_t orders_8x16[32] = { +static const uint8_t orders_16x32[32] = { 0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15, 16, 17, 18, 19, 24, 25, 26, 27, 20, 21, 22, 23, 28, 29, 30, 31, }; -static const uint8_t orders_8x8[64] = { +static const uint8_t orders_16x16[64] = { 0, 1, 4, 5, 16, 17, 20, 21, 2, 3, 6, 7, 18, 19, 22, 23, 8, 9, 12, 13, 24, 25, 28, 29, @@ -93,24 +93,96 @@ static const uint8_t orders_8x8[64] = { 40, 41, 44, 45, 56, 57, 60, 61, 42, 43, 46, 47, 58, 59, 62, 63, }; -static const uint8_t *const orders[BLOCK_SIZES] = { - orders_8x8, orders_8x8, orders_8x8, orders_8x8, - orders_8x16, orders_16x8, orders_16x16, - orders_16x32, orders_32x16, orders_32x32, - orders_32x64, orders_64x32, orders_64x64, + +#if CONFIG_EXT_PARTITION +static const uint8_t orders_16x8[128] = { + 0, 2, 8, 10, 32, 34, 40, 42, + 1, 3, 9, 11, 33, 35, 41, 43, + 4, 6, 12, 14, 36, 38, 44, 46, + 5, 7, 13, 15, 37, 39, 45, 47, + 16, 18, 24, 26, 48, 50, 56, 58, + 17, 19, 25, 27, 49, 51, 57, 59, + 20, 22, 28, 30, 52, 54, 60, 62, + 21, 23, 29, 31, 53, 55, 61, 63, + 64, 66, 72, 74, 96, 98, 104, 106, + 65, 67, 73, 75, 97, 99, 105, 107, + 68, 70, 76, 78, 100, 102, 108, 110, + 69, 71, 77, 79, 101, 103, 109, 111, + 80, 82, 88, 90, 112, 114, 120, 122, + 81, 83, 89, 91, 113, 115, 121, 123, + 84, 86, 92, 94, 116, 118, 124, 126, + 85, 87, 93, 95, 117, 119, 125, 127, }; +static const uint8_t orders_8x16[128] = { + 0, 1, 2, 3, 8, 9, 10, 11, 32, 33, 34, 35, 40, 41, 42, 43, + 4, 5, 6, 7, 12, 13, 14, 15, 36, 37, 38, 39, 44, 45, 46, 47, + 16, 17, 18, 19, 24, 25, 26, 27, 48, 49, 50, 51, 56, 57, 58, 59, + 20, 21, 22, 23, 28, 29, 30, 31, 52, 53, 54, 55, 60, 61, 62, 63, + 64, 65, 66, 67, 72, 73, 74, 75, 96, 97, 98, 99, 104, 105, 106, 107, + 68, 69, 70, 71, 76, 77, 78, 79, 100, 101, 102, 103, 108, 109, 110, 111, + 80, 81, 82, 83, 88, 89, 90, 91, 112, 113, 114, 115, 120, 121, 122, 123, + 84, 85, 86, 87, 92, 93, 94, 95, 116, 117, 118, 119, 124, 125, 126, 127, +}; +static const uint8_t orders_8x8[256] = { +0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, 85, +2, 3, 6, 7, 18, 19, 22, 23, 66, 67, 70, 71, 82, 83, 86, 87, +8, 9, 12, 13, 24, 25, 28, 29, 72, 73, 76, 77, 88, 89, 92, 93, +10, 11, 14, 15, 26, 27, 30, 31, 74, 75, 78, 79, 90, 91, 94, 95, +32, 33, 36, 37, 48, 49, 52, 53, 96, 97, 100, 101, 112, 113, 116, 117, +34, 35, 38, 39, 50, 51, 54, 55, 98, 99, 102, 103, 114, 115, 118, 119, +40, 41, 44, 45, 56, 57, 60, 61, 104, 105, 108, 109, 120, 121, 124, 125, +42, 43, 46, 47, 58, 59, 62, 63, 106, 107, 110, 111, 122, 123, 126, 127, +128, 129, 132, 133, 144, 145, 148, 149, 192, 193, 196, 197, 208, 209, 212, 213, +130, 131, 134, 135, 146, 147, 150, 151, 194, 195, 198, 199, 210, 211, 214, 215, +136, 137, 140, 141, 152, 153, 156, 157, 200, 201, 204, 205, 216, 217, 220, 221, +138, 139, 142, 143, 154, 155, 158, 159, 202, 203, 206, 207, 218, 219, 222, 223, +160, 161, 164, 165, 176, 177, 180, 181, 224, 225, 228, 229, 240, 241, 244, 245, +162, 163, 166, 167, 178, 179, 182, 183, 226, 227, 230, 231, 242, 243, 246, 247, +168, 169, 172, 173, 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253, +170, 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251, 254, 255, +}; + +static const uint8_t *const orders[BLOCK_SIZES] = { + // 4X4 + orders_8x8, + // 4X8, 8X4, 8X8 + orders_8x8, orders_8x8, orders_8x8, + // 8X16, 16X8, 16X16 + orders_8x16, orders_16x8, orders_16x16, + // 16X32, 32X16, 32X32 + orders_16x32, orders_32x16, orders_32x32, + // 32X64, 64X32, 64X64 + orders_32x64, orders_64x32, orders_64x64, + // 64x128, 128x64, 128x128 + orders_64x128, orders_128x64, orders_128x128 +}; +#else +static const uint8_t *const orders[BLOCK_SIZES] = { + // 4X4 + orders_16x16, + // 4X8, 8X4, 8X8 + orders_16x16, orders_16x16, orders_16x16, + // 8X16, 16X8, 16X16 + orders_16x32, orders_32x16, orders_32x32, + // 16X32, 32X16, 32X32 + orders_32x64, orders_64x32, orders_64x64, + // 32X64, 64X32, 64X64 + orders_64x128, orders_128x64, orders_128x128 +}; +#endif // CONFIG_EXT_PARTITION + #if CONFIG_EXT_PARTITION_TYPES -static const uint8_t orders_verta_32x32[4] = { +static const uint8_t orders_verta_64x64[4] = { 0, 2, 1, 2, }; -static const uint8_t orders_verta_16x16[16] = { +static const uint8_t orders_verta_32x32[16] = { 0, 2, 4, 6, 1, 2, 5, 6, 8, 10, 12, 14, 9, 10, 13, 14, }; -static const uint8_t orders_verta_8x8[64] = { +static const uint8_t orders_verta_16x16[64] = { 0, 2, 4, 6, 16, 18, 20, 22, 1, 2, 5, 6, 17, 18, 21, 22, 8, 10, 12, 14, 24, 26, 28, 30, @@ -120,12 +192,53 @@ static const uint8_t orders_verta_8x8[64] = { 40, 42, 44, 46, 56, 58, 60, 62, 41, 42, 45, 46, 57, 58, 61, 62, }; -static const uint8_t *const orders_verta[BLOCK_SIZES] = { - orders_verta_8x8, orders_verta_8x8, orders_verta_8x8, orders_verta_8x8, - orders_8x16, orders_16x8, orders_verta_16x16, - orders_16x32, orders_32x16, orders_verta_32x32, - orders_32x64, orders_64x32, orders_64x64, +#if CONFIG_EXT_PARTITION +static const uint8_t orders_verta_8x8[256] = { +0, 2, 4, 6, 16, 18, 20, 22, 64, 66, 68, 70, 80, 82, 84, 86, +1, 2, 5, 6, 17, 18, 21, 22, 65, 66, 69, 70, 81, 82, 85, 86, +8, 10, 12, 14, 24, 26, 28, 30, 72, 74, 76, 78, 88, 90, 92, 94, +9, 10, 13, 14, 25, 26, 29, 30, 73, 74, 77, 78, 89, 90, 93, 94, +32, 34, 36, 38, 48, 50, 52, 54, 96, 98, 100, 102, 112, 114, 116, 118, +33, 34, 37, 38, 49, 50, 53, 54, 97, 98, 101, 102, 113, 114, 117, 118, +40, 42, 44, 46, 56, 58, 60, 62, 104, 106, 108, 110, 120, 122, 124, 126, +41, 42, 45, 46, 57, 58, 61, 62, 105, 106, 109, 110, 121, 122, 125, 126, +128, 130, 132, 134, 144, 146, 148, 150, 192, 194, 196, 198, 208, 210, 212, 214, +129, 130, 133, 134, 145, 146, 149, 150, 193, 194, 197, 198, 209, 210, 213, 214, +136, 138, 140, 142, 152, 154, 156, 158, 200, 202, 204, 206, 216, 218, 220, 222, +137, 138, 141, 142, 153, 154, 157, 158, 201, 202, 205, 206, 217, 218, 221, 222, +160, 162, 164, 166, 176, 178, 180, 182, 224, 226, 228, 230, 240, 242, 244, 246, +161, 162, 165, 166, 177, 178, 181, 182, 225, 226, 229, 230, 241, 242, 245, 246, +168, 170, 172, 174, 184, 186, 188, 190, 232, 234, 236, 238, 248, 250, 252, 254, +169, 170, 173, 174, 185, 186, 189, 190, 233, 234, 237, 238, 249, 250, 253, 254, }; +static const uint8_t *const orders_verta[BLOCK_SIZES] = { + // 4X4 + orders_verta_8x8, + // 4X8, 8X4, 8X8 + orders_verta_8x8, orders_verta_8x8, orders_verta_8x8, + // 8X16, 16X8, 16X16 + orders_8x16, orders_16x8, orders_verta_16x16, + // 16X32, 32X16, 32X32 + orders_16x32, orders_32x16, orders_verta_32x32, + // 32X64, 64X32, 64X64 + orders_32x64, orders_64x32, orders_verta_64x64, + // 64x128, 128x64, 128x128 + orders_64x128, orders_128x64, orders_128x128 +}; +#else +static const uint8_t *const orders_verta[BLOCK_SIZES] = { + // 4X4 + orders_verta_16x16, + // 4X8, 8X4, 8X8 + orders_verta_16x16, orders_verta_16x16, orders_verta_16x16, + // 8X16, 16X8, 16X16 + orders_16x32, orders_32x16, orders_verta_32x32, + // 16X32, 32X16, 32X32 + orders_32x64, orders_64x32, orders_verta_64x64, + // 32X64, 64X32, 64X64 + orders_64x128, orders_128x64, orders_128x128 +}; +#endif // CONFIG_EXT_PARTITION #endif // CONFIG_EXT_PARTITION_TYPES static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col, @@ -154,24 +267,26 @@ static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col, order = orders_verta[bsize]; else #endif // CONFIG_EXT_PARTITION_TYPES - order = orders[bsize]; + order = orders[bsize]; if (x + step < w) return 1; - mi_row = (mi_row & 7) >> hl; - mi_col = (mi_col & 7) >> wl; + mi_row = (mi_row & MI_MASK) >> hl; + mi_col = (mi_col & MI_MASK) >> wl; // If top row of coding unit if (mi_row == 0) return 1; // If rightmost column of coding unit - if (((mi_col + 1) << wl) >= 8) + if (((mi_col + 1) << wl) >= MI_BLOCK_SIZE) return 0; - my_order = order[((mi_row + 0) << (3 - wl)) + mi_col + 0]; - tr_order = order[((mi_row - 1) << (3 - wl)) + mi_col + 1]; + my_order = + order[((mi_row + 0) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col + 0]; + tr_order = + order[((mi_row - 1) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col + 1]; return my_order > tr_order; } else { @@ -200,17 +315,17 @@ static int vp10_has_bottom(BLOCK_SIZE bsize, int mi_row, int mi_col, if (y + step < h) return 1; - mi_row = (mi_row & 7) >> hl; - mi_col = (mi_col & 7) >> wl; + mi_row = (mi_row & MI_MASK) >> hl; + mi_col = (mi_col & MI_MASK) >> wl; if (mi_col == 0) - return (mi_row << (hl + !ss_y)) + y + step < (8 << !ss_y); + return (mi_row << (hl + !ss_y)) + y + step < (MI_BLOCK_SIZE << !ss_y); - if (((mi_row + 1) << hl) >= 8) + if (((mi_row + 1) << hl) >= MI_BLOCK_SIZE) return 0; - my_order = order[((mi_row + 0) << (3 - wl)) + mi_col + 0]; - bl_order = order[((mi_row + 1) << (3 - wl)) + mi_col - 1]; + my_order = order[((mi_row + 0) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col + 0]; + bl_order = order[((mi_row + 1) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col - 1]; return bl_order < my_order; } @@ -336,8 +451,8 @@ static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bs, if (filter_type != INTRA_FILTER_LINEAR) { const int pad_size = SUBPEL_TAPS >> 1; int len; - DECLARE_ALIGNED(16, uint8_t, buf[SUBPEL_SHIFTS][64]); - DECLARE_ALIGNED(16, uint8_t, src[64 + SUBPEL_TAPS]); + DECLARE_ALIGNED(16, uint8_t, buf[SUBPEL_SHIFTS][MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, src[MAX_SB_SIZE + SUBPEL_TAPS]); uint8_t flags[SUBPEL_SHIFTS]; memset(flags, 0, SUBPEL_SHIFTS * sizeof(flags[0])); @@ -467,8 +582,8 @@ static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bs, if (filter_type != INTRA_FILTER_LINEAR) { const int pad_size = SUBPEL_TAPS >> 1; int len, i; - DECLARE_ALIGNED(16, uint8_t, buf[64][4 * SUBPEL_SHIFTS]); - DECLARE_ALIGNED(16, uint8_t, src[(64 + SUBPEL_TAPS) * 4]); + DECLARE_ALIGNED(16, uint8_t, buf[MAX_SB_SIZE][4 * SUBPEL_SHIFTS]); + DECLARE_ALIGNED(16, uint8_t, src[(MAX_SB_SIZE + SUBPEL_TAPS) * 4]); uint8_t flags[SUBPEL_SHIFTS]; memset(flags, 0, SUBPEL_SHIFTS * sizeof(flags[0])); @@ -1063,8 +1178,8 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, int i; uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); - DECLARE_ALIGNED(16, uint16_t, left_col[64]); - DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]); + DECLARE_ALIGNED(16, uint16_t, left_col[MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint16_t, above_data[MAX_SB_SIZE + 16]); uint16_t *above_row = above_data + 16; const uint16_t *const_above_row = above_row; const int bs = 4 << tx_size; @@ -1220,9 +1335,9 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, int n_left_px, int n_bottomleft_px, int plane) { int i; - DECLARE_ALIGNED(16, uint8_t, left_col[64]); + DECLARE_ALIGNED(16, uint8_t, left_col[MAX_SB_SIZE]); const uint8_t *above_ref = ref - ref_stride; - DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]); + DECLARE_ALIGNED(16, uint8_t, above_data[MAX_SB_SIZE + 16]); uint8_t *above_row = above_data + 16; const uint8_t *const_above_row = above_row; const int bs = 4 << tx_size; diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c index c48eb46b5..b2339c686 100644 --- a/vp10/common/thread_common.c +++ b/vp10/common/thread_common.c @@ -109,6 +109,12 @@ void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer, path = LF_PATH_SLOW; #endif // !CONFIG_EXT_PARTITION_TYPES +#if CONFIG_EXT_PARTITION + printf("STOPPING: This code has not been modified to work with the " + "extended coding unit size experiment"); + exit(EXIT_FAILURE); +#endif // CONFIG_EXT_PARTITION + for (mi_row = start; mi_row < stop; mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) { MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride; @@ -176,6 +182,12 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, const int num_workers = VPXMIN(nworkers, tile_cols); int i; +#if CONFIG_EXT_PARTITION + printf("STOPPING: This code has not been modified to work with the " + "extended coding unit size experiment"); + exit(EXIT_FAILURE); +#endif // CONFIG_EXT_PARTITION + if (!lf_sync->sync_range || sb_rows != lf_sync->rows || num_workers > lf_sync->num_workers) { vp10_loop_filter_dealloc(lf_sync); diff --git a/vp10/common/vp10_convolve.c b/vp10/common/vp10_convolve.c index 8fdd8f16c..9e0dc29c3 100644 --- a/vp10/common/vp10_convolve.c +++ b/vp10/common/vp10_convolve.c @@ -5,8 +5,8 @@ #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" -#define MAX_BLOCK_WIDTH (64) -#define MAX_BLOCK_HEIGHT (64) +#define MAX_BLOCK_WIDTH (MAX_SB_SIZE) +#define MAX_BLOCK_HEIGHT (MAX_SB_SIZE) #define MAX_STEP (32) #define MAX_FILTER_TAP (12) diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index 8aa002700..2e49b3685 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c @@ -489,7 +489,7 @@ static void extend_and_predict_highbd(const uint8_t *buf_ptr1, MACROBLOCKD *xd, int w, int h, int ref, int xs, int ys) { DECLARE_ALIGNED(16, uint16_t, - mc_buf_high[(CU_SIZE + 16) * 2 * (CU_SIZE + 16) * 2]); + mc_buf_high[(MAX_SB_SIZE + 16) * 2 * (MAX_SB_SIZE + 16) * 2]); const uint8_t *buf_ptr; if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -535,7 +535,8 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, #endif // CONFIG_EXT_INTER && CONFIG_SUPERTX MACROBLOCKD *xd, int w, int h, int ref, int xs, int ys) { - DECLARE_ALIGNED(16, uint8_t, mc_buf[(CU_SIZE + 16) * 2 * (CU_SIZE + 16) * 2]); + DECLARE_ALIGNED(16, uint8_t, + mc_buf[(MAX_SB_SIZE + 16) * 2 * (MAX_SB_SIZE + 16) * 2]); const uint8_t *buf_ptr; build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w, @@ -1093,7 +1094,7 @@ static void set_param_topblock(VP10_COMMON *const cm, MACROBLOCKD *const xd, } #if CONFIG_VAR_TX xd->above_txfm_context = cm->above_txfm_context + mi_col; - xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); + xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK); set_txfm_ctx(xd->left_txfm_context, xd->mi[0]->mbmi.tx_size, bh); set_txfm_ctx(xd->above_txfm_context, xd->mi[0]->mbmi.tx_size, bw); #endif @@ -1304,38 +1305,38 @@ static void dec_predict_sb_complex(VP10Decoder *const pbi, uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3]; DECLARE_ALIGNED(16, uint8_t, - tmp_buf1[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); + tmp_buf1[MAX_MB_PLANE * MAX_TX_SQUARE * 2]); DECLARE_ALIGNED(16, uint8_t, - tmp_buf2[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); + tmp_buf2[MAX_MB_PLANE * MAX_TX_SQUARE * 2]); DECLARE_ALIGNED(16, uint8_t, - tmp_buf3[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); - int dst_stride1[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; - int dst_stride2[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; - int dst_stride3[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + tmp_buf3[MAX_MB_PLANE * MAX_TX_SQUARE * 2]); + int dst_stride1[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE}; + int dst_stride2[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE}; + int dst_stride3[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE}; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int len = sizeof(uint16_t); dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); - dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAXTXLEN * MAXTXLEN * len); - dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN * len); + dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_TX_SQUARE * len); + dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAX_TX_SQUARE * len); dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); - dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAXTXLEN * MAXTXLEN * len); - dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN * len); + dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_TX_SQUARE * len); + dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAX_TX_SQUARE * len); dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3); - dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAXTXLEN * MAXTXLEN * len); - dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN * len); + dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAX_TX_SQUARE * len); + dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAX_TX_SQUARE * len); } else { #endif dst_buf1[0] = tmp_buf1; - dst_buf1[1] = tmp_buf1 + MAXTXLEN * MAXTXLEN; - dst_buf1[2] = tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN; + dst_buf1[1] = tmp_buf1 + MAX_TX_SQUARE; + dst_buf1[2] = tmp_buf1 + 2 * MAX_TX_SQUARE; dst_buf2[0] = tmp_buf2; - dst_buf2[1] = tmp_buf2 + MAXTXLEN * MAXTXLEN; - dst_buf2[2] = tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN; + dst_buf2[1] = tmp_buf2 + MAX_TX_SQUARE; + dst_buf2[2] = tmp_buf2 + 2 * MAX_TX_SQUARE; dst_buf3[0] = tmp_buf3; - dst_buf3[1] = tmp_buf3 + MAXTXLEN * MAXTXLEN; - dst_buf3[2] = tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN; + dst_buf3[1] = tmp_buf3 + MAX_TX_SQUARE; + dst_buf3[2] = tmp_buf3 + 2 * MAX_TX_SQUARE; #if CONFIG_VP9_HIGHBITDEPTH } #endif @@ -1900,39 +1901,37 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd, if (mbmi->obmc) { #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint8_t, - tmp_buf1[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]); + tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); DECLARE_ALIGNED(16, uint8_t, - tmp_buf2[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]); + tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); #else DECLARE_ALIGNED(16, uint8_t, - tmp_buf1[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); + tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]); DECLARE_ALIGNED(16, uint8_t, - tmp_buf2[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); + tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]); #endif // CONFIG_VP9_HIGHBITDEPTH uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE]; - int dst_stride1[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE}; - int dst_stride2[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE}; + int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE}; + int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE}; assert(mbmi->sb_type >= BLOCK_8X8); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int len = sizeof(uint16_t); dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); - dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + CU_SIZE * CU_SIZE * len); - dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + - CU_SIZE * CU_SIZE * 2 * len); + dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len); + dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * 2 * len); dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); - dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + CU_SIZE * CU_SIZE * len); - dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + - CU_SIZE * CU_SIZE * 2 * len); + dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len); + dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len); } else { #endif // CONFIG_VP9_HIGHBITDEPTH dst_buf1[0] = tmp_buf1; - dst_buf1[1] = tmp_buf1 + CU_SIZE * CU_SIZE; - dst_buf1[2] = tmp_buf1 + CU_SIZE * CU_SIZE * 2; + dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE; + dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2; dst_buf2[0] = tmp_buf2; - dst_buf2[1] = tmp_buf2 + CU_SIZE * CU_SIZE; - dst_buf2[2] = tmp_buf2 + CU_SIZE * CU_SIZE * 2; + dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE; + dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2; #if CONFIG_VP9_HIGHBITDEPTH } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -3281,7 +3280,7 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi, #if CONFIG_ANS &td->token_ans, #endif // CONFIG_ANS - BLOCK_64X64, 4); + BLOCK_LARGEST, MAX_SB_SIZE_LOG2 - 2); } pbi->mb.corrupted |= td->xd.corrupted; if (pbi->mb.corrupted) @@ -3396,7 +3395,7 @@ static int tile_worker_hook(TileWorkerData *const tile_data, #if CONFIG_ANS &tile_data->token_ans, #endif // CONFIG_ANS - BLOCK_64X64, 4); + BLOCK_LARGEST, MAX_SB_SIZE_LOG2 - 2); } } return !tile_data->xd.corrupted; diff --git a/vp10/decoder/decoder.h b/vp10/decoder/decoder.h index 5337cbed4..23c742421 100644 --- a/vp10/decoder/decoder.h +++ b/vp10/decoder/decoder.h @@ -39,8 +39,8 @@ typedef struct TileData { #endif // CONFIG_ANS DECLARE_ALIGNED(16, MACROBLOCKD, xd); /* dqcoeff are shared by all the planes. So planes must be decoded serially */ - DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); - DECLARE_ALIGNED(16, uint8_t, color_index_map[2][64 * 64]); + DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_TX_SQUARE]); + DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]); } TileData; typedef struct TileWorkerData { @@ -52,8 +52,8 @@ typedef struct TileWorkerData { FRAME_COUNTS counts; DECLARE_ALIGNED(16, MACROBLOCKD, xd); /* dqcoeff are shared by all the planes. So planes must be decoded serially */ - DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); - DECLARE_ALIGNED(16, uint8_t, color_index_map[2][64 * 64]); + DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_TX_SQUARE]); + DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]); struct vpx_internal_error_info error_info; } TileWorkerData; diff --git a/vp10/decoder/detokenize.c b/vp10/decoder/detokenize.c index e0f59fb6e..bf4822197 100644 --- a/vp10/decoder/detokenize.c +++ b/vp10/decoder/detokenize.c @@ -62,7 +62,7 @@ static int decode_coefs(const MACROBLOCKD *xd, const vpx_prob *prob; unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1]; unsigned int (*eob_branch_count)[COEFF_CONTEXTS]; - uint8_t token_cache[32 * 32]; + uint8_t token_cache[MAX_TX_SQUARE]; const uint8_t *band_translate = get_band_translate(tx_size); int dq_shift; int v, token; @@ -245,7 +245,7 @@ static int decode_coefs_ans(const MACROBLOCKD *const xd, const vpx_prob *prob; unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1]; unsigned int (*eob_branch_count)[COEFF_CONTEXTS]; - uint8_t token_cache[32 * 32]; + uint8_t token_cache[MAX_TX_SQUARE]; const uint8_t *band_translate = get_band_translate(tx_size); int dq_shift; int v, token; diff --git a/vp10/encoder/aq_complexity.c b/vp10/encoder/aq_complexity.c index 2506a4e55..9f73eccf7 100644 --- a/vp10/encoder/aq_complexity.c +++ b/vp10/encoder/aq_complexity.c @@ -116,8 +116,8 @@ void vp10_caq_select_segment(VP10_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs, VP10_COMMON *const cm = &cpi->common; const int mi_offset = mi_row * cm->mi_cols + mi_col; - const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; - const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; + const int bw = num_8x8_blocks_wide_lookup[BLOCK_LARGEST]; + const int bh = num_8x8_blocks_high_lookup[BLOCK_LARGEST]; const int xmis = VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]); const int ymis = VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[bs]); int x, y; diff --git a/vp10/encoder/aq_cyclicrefresh.c b/vp10/encoder/aq_cyclicrefresh.c index 4d7b7d950..defb97401 100644 --- a/vp10/encoder/aq_cyclicrefresh.c +++ b/vp10/encoder/aq_cyclicrefresh.c @@ -415,9 +415,9 @@ static void cyclic_refresh_update_map(VP10_COMP *const cpi) { bl_index = mi_row * cm->mi_cols + mi_col; // Loop through all 8x8 blocks in superblock and update map. xmis = - VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[BLOCK_64X64]); + VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[BLOCK_LARGEST]); ymis = - VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[BLOCK_64X64]); + VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[BLOCK_LARGEST]); for (y = 0; y < ymis; y++) { for (x = 0; x < xmis; x++) { const int bl_index2 = bl_index + y * cm->mi_cols + x; diff --git a/vp10/encoder/aq_variance.c b/vp10/encoder/aq_variance.c index bed5162fb..45dc8b8f1 100644 --- a/vp10/encoder/aq_variance.c +++ b/vp10/encoder/aq_variance.c @@ -32,9 +32,11 @@ static const int segment_id[ENERGY_SPAN] = {0, 1, 1, 2, 3, 4}; #define SEGMENT_ID(i) segment_id[(i) - ENERGY_MIN] -DECLARE_ALIGNED(16, static const uint8_t, vp10_64_zeros[64]) = {0}; +DECLARE_ALIGNED(16, static const uint8_t, + vp10_all_zeros[MAX_SB_SIZE]) = {0}; #if CONFIG_VP9_HIGHBITDEPTH -DECLARE_ALIGNED(16, static const uint16_t, vp10_highbd_64_zeros[64]) = {0}; +DECLARE_ALIGNED(16, static const uint16_t, + vp10_highbd_all_zeros[MAX_SB_SIZE]) = {0}; #endif unsigned int vp10_vaq_segment_id(int energy) { @@ -153,17 +155,17 @@ static unsigned int block_variance(VP10_COMP *cpi, MACROBLOCK *x, #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { aq_highbd_8_variance(x->plane[0].src.buf, x->plane[0].src.stride, - CONVERT_TO_BYTEPTR(vp10_highbd_64_zeros), 0, bw, bh, + CONVERT_TO_BYTEPTR(vp10_highbd_all_zeros), 0, bw, bh, &sse, &avg); sse >>= 2 * (xd->bd - 8); avg >>= (xd->bd - 8); } else { aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, - vp10_64_zeros, 0, bw, bh, &sse, &avg); + vp10_all_zeros, 0, bw, bh, &sse, &avg); } #else aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, - vp10_64_zeros, 0, bw, bh, &sse, &avg); + vp10_all_zeros, 0, bw, bh, &sse, &avg); #endif // CONFIG_VP9_HIGHBITDEPTH var = sse - (((int64_t)avg * avg) / (bw * bh)); return (256 * var) / (bw * bh); @@ -172,17 +174,17 @@ static unsigned int block_variance(VP10_COMP *cpi, MACROBLOCK *x, if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride, - CONVERT_TO_BYTEPTR(vp10_highbd_64_zeros), + CONVERT_TO_BYTEPTR(vp10_highbd_all_zeros), 0, &sse); } else { var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride, - vp10_64_zeros, 0, &sse); + vp10_all_zeros, 0, &sse); } #else var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride, - vp10_64_zeros, 0, &sse); + vp10_all_zeros, 0, &sse); #endif // CONFIG_VP9_HIGHBITDEPTH return (256 * var) >> num_pels_log2_lookup[bs]; } diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c index ac731352e..721a7a6da 100644 --- a/vp10/encoder/bitstream.c +++ b/vp10/encoder/bitstream.c @@ -1893,7 +1893,7 @@ static void write_modes(VP10_COMP *const cpi, for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) { write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, 0, - mi_row, mi_col, BLOCK_64X64); + mi_row, mi_col, BLOCK_LARGEST); } } } diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h index 582f3bc23..b5e61d99e 100644 --- a/vp10/encoder/block.h +++ b/vp10/encoder/block.h @@ -28,7 +28,7 @@ typedef struct { } diff; typedef struct macroblock_plane { - DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]); + DECLARE_ALIGNED(16, int16_t, src_diff[MAX_SB_SQUARE]); tran_low_t *qcoeff; tran_low_t *coeff; uint16_t *eobs; @@ -63,10 +63,10 @@ typedef struct { } MB_MODE_INFO_EXT; typedef struct { - uint8_t best_palette_color_map[4096]; - double kmeans_data_buf[2 * 4096]; - uint8_t kmeans_indices_buf[4096]; - uint8_t kmeans_pre_indices_buf[4096]; + uint8_t best_palette_color_map[MAX_SB_SQUARE]; + double kmeans_data_buf[2 * MAX_SB_SQUARE]; + uint8_t kmeans_indices_buf[MAX_SB_SQUARE]; + uint8_t kmeans_pre_indices_buf[MAX_SB_SQUARE]; } PALETTE_BUFFER; typedef struct macroblock MACROBLOCK; @@ -140,11 +140,11 @@ struct macroblock { // Notes transform blocks where no coefficents are coded. // Set during mode selection. Read during block encoding. - uint8_t zcoeff_blk[TX_SIZES][256]; + uint8_t zcoeff_blk[TX_SIZES][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4]; #if CONFIG_VAR_TX - uint8_t blk_skip[MAX_MB_PLANE][256]; + uint8_t blk_skip[MAX_MB_PLANE][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4]; #if CONFIG_REF_MV - uint8_t blk_skip_drl[MAX_MB_PLANE][256]; + uint8_t blk_skip_drl[MAX_MB_PLANE][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4]; #endif #endif @@ -164,12 +164,12 @@ struct macroblock { int quant_fp; // skip forward transform and quantization - uint8_t skip_txfm[MAX_MB_PLANE][4]; + uint8_t skip_txfm[MAX_MB_PLANE][MAX_TX_BLOCKS_IN_MAX_SB]; #define SKIP_TXFM_NONE 0 #define SKIP_TXFM_AC_DC 1 #define SKIP_TXFM_AC_ONLY 2 - int64_t bsse[MAX_MB_PLANE][4]; + int64_t bsse[MAX_MB_PLANE][MAX_TX_BLOCKS_IN_MAX_SB]; // Used to store sub partition's choices. MV pred_mv[MAX_REF_FRAMES]; diff --git a/vp10/encoder/context_tree.c b/vp10/encoder/context_tree.c index 0a7619530..b7c826045 100644 --- a/vp10/encoder/context_tree.c +++ b/vp10/encoder/context_tree.c @@ -11,11 +11,14 @@ #include "vp10/encoder/context_tree.h" #include "vp10/encoder/encoder.h" -static const BLOCK_SIZE square[] = { +static const BLOCK_SIZE square[MAX_SB_SIZE_LOG2 - 2] = { BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64, +#if CONFIG_EXT_PARTITION + BLOCK_128X128, +#endif // CONFIG_EXT_PARTITION }; static void alloc_mode_context(VP10_COMMON *cm, int num_4x4_blk, @@ -53,6 +56,14 @@ static void alloc_mode_context(VP10_COMMON *cm, int num_4x4_blk, ctx->eobs_pbuf[i][k] = ctx->eobs[i][k]; } } + + if (cm->allow_screen_content_tools) { + for (i = 0; i < 2; ++i) { + CHECK_MEM_ERROR(cm, ctx->color_index_map[i], + vpx_memalign(32, + num_pix * sizeof(*ctx->color_index_map[i]))); + } + } } static void free_mode_context(PICK_MODE_CONTEXT *ctx) { @@ -177,8 +188,13 @@ static void free_tree_contexts(PC_TREE *tree) { // represents the state of our search. void vp10_setup_pc_tree(VP10_COMMON *cm, ThreadData *td) { int i, j; +#if CONFIG_EXT_PARTITION + const int leaf_nodes = 256; + const int tree_nodes = 256 + 64 + 16 + 4 + 1; +#else const int leaf_nodes = 64; const int tree_nodes = 64 + 16 + 4 + 1; +#endif // CONFIG_EXT_PARTITION int pc_tree_index = 0; PC_TREE *this_pc; PICK_MODE_CONTEXT *this_leaf; @@ -217,7 +233,7 @@ void vp10_setup_pc_tree(VP10_COMMON *cm, ThreadData *td) { // Each node has 4 leaf nodes, fill each block_size level of the tree // from leafs to the root. - for (nodes = 16; nodes > 0; nodes >>= 2) { + for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) { for (i = 0; i < nodes; ++i) { PC_TREE *const tree = &td->pc_tree[pc_tree_index]; alloc_tree_contexts(cm, tree, 4 << (2 * square_index)); @@ -233,11 +249,17 @@ void vp10_setup_pc_tree(VP10_COMMON *cm, ThreadData *td) { } void vp10_free_pc_tree(ThreadData *td) { +#if CONFIG_EXT_PARTITION + const int leaf_nodes = 256; + const int tree_nodes = 256 + 64 + 16 + 4 + 1; +#else + const int leaf_nodes = 64; const int tree_nodes = 64 + 16 + 4 + 1; +#endif // CONFIG_EXT_PARTITION int i; // Set up all 4x4 mode contexts - for (i = 0; i < 64; ++i) + for (i = 0; i < leaf_nodes; ++i) free_mode_context(&td->leaf_tree[i]); // Sets up all the leaf nodes in the tree. diff --git a/vp10/encoder/context_tree.h b/vp10/encoder/context_tree.h index de17e3ea2..7b49354d6 100644 --- a/vp10/encoder/context_tree.h +++ b/vp10/encoder/context_tree.h @@ -49,7 +49,6 @@ typedef struct { // For current partition, only if all Y, U, and V transform blocks' // coefficients are quantized to 0, skippable is set to 0. int skippable; - uint8_t skip_txfm[MAX_MB_PLANE << 2]; int best_mode_index; int hybrid_pred_diff; int comp_pred_diff; diff --git a/vp10/encoder/denoiser.c b/vp10/encoder/denoiser.c index e87667653..fb0280a58 100644 --- a/vp10/encoder/denoiser.c +++ b/vp10/encoder/denoiser.c @@ -189,7 +189,7 @@ int vp10_denoiser_filter_c(const uint8_t *sig, int sig_stride, static uint8_t *block_start(uint8_t *framebuf, int stride, int mi_row, int mi_col) { - return framebuf + (stride * mi_row * 8) + (mi_col * 8); + return framebuf + (stride * mi_row * MI_SIZE) + (mi_col * MI_SIZE); } static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser, diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c index bcedc0ce1..b73f66cce 100644 --- a/vp10/encoder/encodeframe.c +++ b/vp10/encoder/encodeframe.c @@ -93,7 +93,16 @@ static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td, // purposes of activity masking. // Eventually this should be replaced by custom no-reference routines, // which will be faster. -static const uint8_t VP9_VAR_OFFS[64] = { +static const uint8_t VP10_VAR_OFFS[MAX_SB_SIZE] = { + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, +#if CONFIG_EXT_PARTITION 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, @@ -102,10 +111,20 @@ static const uint8_t VP9_VAR_OFFS[64] = { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 +#endif // CONFIG_EXT_PARTITION }; #if CONFIG_VP9_HIGHBITDEPTH -static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = { +static const uint16_t VP10_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = { + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, +#if CONFIG_EXT_PARTITION 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, @@ -114,9 +133,19 @@ static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 +#endif // CONFIG_EXT_PARTITION }; -static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = { +static const uint16_t VP10_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = { + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, +#if CONFIG_EXT_PARTITION 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, @@ -125,9 +154,19 @@ static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = { 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4 +#endif // CONFIG_EXT_PARTITION }; -static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = { +static const uint16_t VP10_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = { + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, +#if CONFIG_EXT_PARTITION 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, @@ -136,6 +175,7 @@ static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = { 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16 +#endif // CONFIG_EXT_PARTITION }; #endif // CONFIG_VP9_HIGHBITDEPTH @@ -144,7 +184,7 @@ unsigned int vp10_get_sby_perpixel_variance(VP10_COMP *cpi, BLOCK_SIZE bs) { unsigned int sse; const unsigned int var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, - VP9_VAR_OFFS, 0, &sse); + VP10_VAR_OFFS, 0, &sse); return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); } @@ -155,18 +195,18 @@ unsigned int vp10_high_get_sby_perpixel_variance( switch (bd) { case 10: var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, - CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10), + CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_10), 0, &sse); break; case 12: var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, - CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12), + CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_12), 0, &sse); break; case 8: default: var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, - CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), + CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_8), 0, &sse); break; } @@ -406,6 +446,13 @@ typedef struct { v32x32 split[4]; } v64x64; +#if CONFIG_EXT_PARTITION +typedef struct { + partition_variance part_variances; + v64x64 split[4]; +} v128x128; +#endif // CONFIG_EXT_PARTITION + typedef struct { partition_variance *part_variances; var *split[4]; @@ -415,12 +462,24 @@ typedef enum { V16X16, V32X32, V64X64, +#if CONFIG_EXT_PARTITION + V128X128, +#endif // CONFIG_EXT_PARTITION } TREE_LEVEL; static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { int i; node->part_variances = NULL; switch (bsize) { +#if CONFIG_EXT_PARTITION + case BLOCK_128X128: { + v128x128 *vt = (v128x128 *) data; + node->part_variances = &vt->part_variances; + for (i = 0; i < 4; i++) + node->split[i] = &vt->split[i].part_variances.none; + break; + } +#endif // CONFIG_EXT_PARTITION case BLOCK_64X64: { v64x64 *vt = (v64x64 *) data; node->part_variances = &vt->part_variances; @@ -770,7 +829,8 @@ static int choose_partitioning(VP10_COMP *cpi, const uint8_t *d; int sp; int dp; - int pixels_wide = 64, pixels_high = 64; + int pixels_wide = 8 * num_8x8_blocks_wide_lookup[BLOCK_LARGEST]; + int pixels_high = 8 * num_8x8_blocks_high_lookup[BLOCK_LARGEST]; int64_t thresholds[4] = {cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], cpi->vbp_thresholds[2], cpi->vbp_thresholds[3]}; @@ -781,10 +841,11 @@ static int choose_partitioning(VP10_COMP *cpi, int variance4x4downsample[16]; int segment_id = CR_SEGMENT_ID_BASE; + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; - segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col); + segment_id = get_segment_id(cm, map, BLOCK_LARGEST, mi_row, mi_col); if (cyclic_refresh_segment_id_boosted(segment_id)) { int q = vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex); @@ -792,11 +853,12 @@ static int choose_partitioning(VP10_COMP *cpi, } } -#if CONFIG_EXT_PARTITION_TYPES - assert(0); -#endif +#if CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES + printf("Not yet implemented: choose_partitioning\n"); + exit(-1); +#endif // CONFIG_EXT_PARTITION - set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); + set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_LARGEST); if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); @@ -813,8 +875,20 @@ static int choose_partitioning(VP10_COMP *cpi, const YV12_BUFFER_CONFIG *yv12_g = NULL; unsigned int y_sad, y_sad_g; - const BLOCK_SIZE bsize = BLOCK_32X32 - + (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows); + + const int max_mi_block_size = num_8x8_blocks_wide_lookup[BLOCK_LARGEST]; + const int is_right_edge = mi_col + max_mi_block_size / 2 > cm->mi_cols; + const int is_left_edge = mi_row + max_mi_block_size / 2 > cm->mi_rows; + BLOCK_SIZE bsize; + + if (is_right_edge && is_left_edge) + bsize = get_subsize(BLOCK_LARGEST, PARTITION_SPLIT); + else if (is_right_edge) + bsize = get_subsize(BLOCK_LARGEST, PARTITION_VERT); + else if (is_left_edge) + bsize = get_subsize(BLOCK_LARGEST, PARTITION_HORZ); + else + bsize = BLOCK_LARGEST; assert(yv12 != NULL); yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); @@ -834,7 +908,7 @@ static int choose_partitioning(VP10_COMP *cpi, &cm->frame_refs[LAST_FRAME - 1].sf); mbmi->ref_frame[0] = LAST_FRAME; mbmi->ref_frame[1] = NONE; - mbmi->sb_type = BLOCK_64X64; + mbmi->sb_type = BLOCK_LARGEST; mbmi->mv[0].as_int = 0; mbmi->interp_filter = BILINEAR; @@ -849,7 +923,7 @@ static int choose_partitioning(VP10_COMP *cpi, x->pred_mv[LAST_FRAME] = mbmi->mv[0].as_mv; } - vp10_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); + vp10_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_LARGEST); for (i = 1; i <= 2; ++i) { struct macroblock_plane *p = &x->plane[i]; @@ -868,33 +942,29 @@ static int choose_partitioning(VP10_COMP *cpi, d = xd->plane[0].dst.buf; dp = xd->plane[0].dst.stride; - // If the y_sad is very small, take 64x64 as partition and exit. - // Don't check on boosted segment for now, as 64x64 is suppressed there. - if (segment_id == CR_SEGMENT_ID_BASE && - y_sad < cpi->vbp_threshold_sad) { - const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64]; - const int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64]; - if (mi_col + block_width / 2 < cm->mi_cols && - mi_row + block_height / 2 < cm->mi_rows) { - set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_64X64); + // If the y_sad is very small, take the largest partition and exit. + // Don't check on boosted segment for now, as largest is suppressed there. + if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) { + if (!is_right_edge && !is_left_edge) { + set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_LARGEST); return 0; } } } else { - d = VP9_VAR_OFFS; + d = VP10_VAR_OFFS; dp = 0; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { switch (xd->bd) { case 10: - d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10); + d = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_10); break; case 12: - d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12); + d = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_12); break; case 8: default: - d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8); + d = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_8); break; } } @@ -1699,15 +1769,6 @@ static void rd_pick_sb_modes(VP10_COMP *cpi, p[i].eobs = ctx->eobs_pbuf[i][0]; } - if (cm->current_video_frame == 0 && cm->allow_screen_content_tools) { - for (i = 0; i < 2; ++i) { - if (ctx->color_index_map[i] == 0) { - CHECK_MEM_ERROR(cm, ctx->color_index_map[i], - vpx_memalign(16, (ctx->num_4x4_blk << 4) * - sizeof(*ctx->color_index_map[i]))); - } - } - } for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i]; @@ -2084,17 +2145,16 @@ static void update_stats(VP10_COMMON *cm, ThreadData *td } } - typedef struct { - ENTROPY_CONTEXT a[16 * MAX_MB_PLANE]; - ENTROPY_CONTEXT l[16 * MAX_MB_PLANE]; - PARTITION_CONTEXT sa[8]; - PARTITION_CONTEXT sl[8]; + ENTROPY_CONTEXT a[2 * MI_BLOCK_SIZE * MAX_MB_PLANE]; + ENTROPY_CONTEXT l[2 * MI_BLOCK_SIZE * MAX_MB_PLANE]; + PARTITION_CONTEXT sa[MI_BLOCK_SIZE]; + PARTITION_CONTEXT sl[MI_BLOCK_SIZE]; #if CONFIG_VAR_TX TXFM_CONTEXT *p_ta; TXFM_CONTEXT *p_tl; - TXFM_CONTEXT ta[8]; - TXFM_CONTEXT tl[8]; + TXFM_CONTEXT ta[MI_BLOCK_SIZE]; + TXFM_CONTEXT tl[MI_BLOCK_SIZE]; #endif } RD_SEARCH_MACROBLOCK_CONTEXT; @@ -2892,11 +2952,11 @@ static void rd_use_partition(VP10_COMP *cpi, // We must have chosen a partitioning and encoding or we'll fail later on. // No other opportunities for success. - if (bsize == BLOCK_64X64) + if (bsize == BLOCK_LARGEST) assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX); if (do_recon) { - int output_enabled = (bsize == BLOCK_64X64); + int output_enabled = (bsize == BLOCK_LARGEST); encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, pc_tree); } @@ -2909,21 +2969,38 @@ static void rd_use_partition(VP10_COMP *cpi, } static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = { - BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, - BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, - BLOCK_8X8, BLOCK_8X8, BLOCK_8X8, - BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, - BLOCK_16X16 + BLOCK_4X4, // 4x4 + BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, // 4x8, 8x4, 8x8 + BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 8x16, 16x8, 16x16 + BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, // 16x32, 32x16, 32x32 + BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 32x64, 64x32, 64x64 +#if CONFIG_EXT_PARTITION + BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 // 64x128, 128x64, 128x128 +#endif // CONFIG_EXT_PARTITION }; static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { - BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, - BLOCK_16X16, BLOCK_32X32, BLOCK_32X32, - BLOCK_32X32, BLOCK_64X64, BLOCK_64X64, - BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, - BLOCK_64X64 + BLOCK_8X8, // 4x4 + BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 4x8, 8x4, 8x8 + BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, // 8x16, 16x8, 16x16 + BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, // 16x32, 32x16, 32x32 + BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, // 32x64, 64x32, 64x64 +#if CONFIG_EXT_PARTITION + BLOCK_64X64, BLOCK_64X64, BLOCK_128X128 // 64x128, 128x64, 128x128 +#endif // CONFIG_EXT_PARTITION }; +// Next square block size less or equal than current block size. +static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { + BLOCK_4X4, // 4x4 + BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 4x8, 8x4, 8x8 + BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, // 8x16, 16x8, 16x16 + BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, // 16x32, 32x16, 32x32 + BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, // 32x64, 64x32, 64x64 +#if CONFIG_EXT_PARTITION + BLOCK_64X64, BLOCK_64X64, BLOCK_128X128 // 64x128, 128x64, 128x128 +#endif // CONFIG_EXT_PARTITION +}; // Look at all the mode_info entries for blocks that are part of this // partition and find the min and max values for sb_type. @@ -2954,15 +3031,6 @@ static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8, } } -// Next square block size less or equal than current block size. -static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { - BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, - BLOCK_8X8, BLOCK_8X8, BLOCK_8X8, - BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, - BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, - BLOCK_64X64 -}; - // Look at neighboring blocks and set a min and max partition size based on // what they chose. static void rd_auto_partition_range(VP10_COMP *cpi, const TileInfo *const tile, @@ -2978,13 +3046,13 @@ static void rd_auto_partition_range(VP10_COMP *cpi, const TileInfo *const tile, const int col8x8_remaining = tile->mi_col_end - mi_col; int bh, bw; BLOCK_SIZE min_size = BLOCK_4X4; - BLOCK_SIZE max_size = BLOCK_64X64; + BLOCK_SIZE max_size = BLOCK_LARGEST; int bs_hist[BLOCK_SIZES] = {0}; // Trap case where we do not have a prediction. if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) { // Default "min to max" and "max to min" - min_size = BLOCK_64X64; + min_size = BLOCK_LARGEST; max_size = BLOCK_4X4; // NOTE: each call to get_sb_partition_size_range() uses the previous @@ -3054,7 +3122,7 @@ static void set_partition_range(VP10_COMMON *cm, MACROBLOCKD *xd, MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str]; BLOCK_SIZE bs, min_size, max_size; - min_size = BLOCK_64X64; + min_size = BLOCK_LARGEST; max_size = BLOCK_4X4; if (prev_mi) { @@ -3104,16 +3172,27 @@ static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { } #if CONFIG_FP_MB_STATS -const int num_16x16_blocks_wide_lookup[BLOCK_SIZES] = - {1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4}; -const int num_16x16_blocks_high_lookup[BLOCK_SIZES] = - {1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4}; const int qindex_skip_threshold_lookup[BLOCK_SIZES] = - {0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120}; + {0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120, +#if CONFIG_EXT_PARTITION + // TODO(debargha): What are the correct numbers here? + 130, 130, 150 +#endif // CONFIG_EXT_PARTITION + }; const int qindex_split_threshold_lookup[BLOCK_SIZES] = - {0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120}; + {0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120, +#if CONFIG_EXT_PARTITION + // TODO(debargha): What are the correct numbers here? + 160, 160, 240 +#endif // CONFIG_EXT_PARTITION + }; const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = - {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6}; + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6 +#if CONFIG_EXT_PARTITION + // TODO(debargha): What are the correct numbers here? + 8, 8, 10 +#endif // CONFIG_EXT_PARTITION + }; typedef enum { MV_ZERO = 0, @@ -3526,8 +3605,8 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td, pc_tree->partitioning = PARTITION_NONE; // Adjust dist breakout threshold according to the partition size. - dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] + - b_height_log2_lookup[bsize]); + dist_breakout_thr >>= (2 * (MAX_SB_SIZE_LOG2 - 2)) + - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); rate_breakout_thr *= num_pels_log2_lookup[bsize]; @@ -4124,12 +4203,12 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td, if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && pc_tree->index != 3) { - int output_enabled = (bsize == BLOCK_64X64); + int output_enabled = (bsize == BLOCK_LARGEST); encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, pc_tree); } - if (bsize == BLOCK_64X64) { + if (bsize == BLOCK_LARGEST) { assert(tp_orig < *tp || (tp_orig == *tp && xd->mi[0]->mbmi.skip)); assert(best_rdc.rate < INT_MAX); assert(best_rdc.dist < INT64_MAX); @@ -4149,6 +4228,11 @@ static void encode_rd_sb_row(VP10_COMP *cpi, MACROBLOCKD *const xd = &x->e_mbd; SPEED_FEATURES *const sf = &cpi->sf; int mi_col; +#if CONFIG_EXT_PARTITION + const int leaf_nodes = 256; +#else + const int leaf_nodes = 64; +#endif // CONFIG_EXT_PARTITION // Initialize the left context for the new SB row vp10_zero_left_context(xd); @@ -4170,10 +4254,10 @@ static void encode_rd_sb_row(VP10_COMP *cpi, MODE_INFO **mi = cm->mi_grid_visible + idx_str; if (sf->adaptive_pred_interp_filter) { - for (i = 0; i < 64; ++i) + for (i = 0; i < leaf_nodes; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE; - for (i = 0; i < 64; ++i) { + for (i = 0; i < leaf_nodes; ++i) { td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE; td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE; td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE; @@ -4187,29 +4271,29 @@ static void encode_rd_sb_row(VP10_COMP *cpi, if (seg->enabled) { const uint8_t *const map = seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; - int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col); + int segment_id = get_segment_id(cm, map, BLOCK_LARGEST, mi_row, mi_col); seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP); } x->source_variance = UINT_MAX; if (sf->partition_search_type == FIXED_PARTITION || seg_skip) { const BLOCK_SIZE bsize = - seg_skip ? BLOCK_64X64 : sf->always_this_block_size; - set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); + seg_skip ? BLOCK_LARGEST : sf->always_this_block_size; + set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_LARGEST); set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, - BLOCK_64X64, &dummy_rate, &dummy_dist, + BLOCK_LARGEST, &dummy_rate, &dummy_dist, #if CONFIG_SUPERTX &dummy_rate_nocoef, #endif // CONFIG_SUPERTX 1, td->pc_root); } else if (cpi->partition_search_skippable_frame) { BLOCK_SIZE bsize; - set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); + set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_LARGEST); bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col); set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, - BLOCK_64X64, &dummy_rate, &dummy_dist, + BLOCK_LARGEST, &dummy_rate, &dummy_dist, #if CONFIG_SUPERTX &dummy_rate_nocoef, #endif // CONFIG_SUPERTX @@ -4218,7 +4302,7 @@ static void encode_rd_sb_row(VP10_COMP *cpi, cm->frame_type != KEY_FRAME) { choose_partitioning(cpi, tile_info, x, mi_row, mi_col); rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, - BLOCK_64X64, &dummy_rate, &dummy_dist, + BLOCK_LARGEST, &dummy_rate, &dummy_dist, #if CONFIG_SUPERTX &dummy_rate_nocoef, #endif // CONFIG_SUPERTX @@ -4226,12 +4310,12 @@ static void encode_rd_sb_row(VP10_COMP *cpi, } else { // If required set upper and lower partition size limits if (sf->auto_min_max_partition_size) { - set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); + set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_LARGEST); rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col, &x->min_partition_size, &x->max_partition_size); } - rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64, + rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_LARGEST, &dummy_rdc, #if CONFIG_SUPERTX &dummy_rate_nocoef, @@ -4930,19 +5014,15 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td, #if CONFIG_OBMC if (mbmi->obmc) { #if CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED(16, uint8_t, - tmp_buf1[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]); - DECLARE_ALIGNED(16, uint8_t, - tmp_buf2[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); #else - DECLARE_ALIGNED(16, uint8_t, - tmp_buf1[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); - DECLARE_ALIGNED(16, uint8_t, - tmp_buf2[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]); #endif // CONFIG_VP9_HIGHBITDEPTH uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE]; - int dst_stride1[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE}; - int dst_stride2[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE}; + int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE}; + int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE}; assert(mbmi->sb_type >= BLOCK_8X8); @@ -4950,21 +5030,19 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td, if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int len = sizeof(uint16_t); dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); - dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + CU_SIZE * CU_SIZE * len); - dst_buf1[2] = CONVERT_TO_BYTEPTR( - tmp_buf1 + CU_SIZE * CU_SIZE * 2 * len); + dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len); + dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * 2 * len); dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); - dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + CU_SIZE * CU_SIZE * len); - dst_buf2[2] = CONVERT_TO_BYTEPTR( - tmp_buf2 + CU_SIZE * CU_SIZE * 2 * len); + dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len); + dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len); } else { #endif // CONFIG_VP9_HIGHBITDEPTH dst_buf1[0] = tmp_buf1; - dst_buf1[1] = tmp_buf1 + CU_SIZE * CU_SIZE; - dst_buf1[2] = tmp_buf1 + CU_SIZE * CU_SIZE * 2; + dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE; + dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2; dst_buf2[0] = tmp_buf2; - dst_buf2[1] = tmp_buf2 + CU_SIZE * CU_SIZE; - dst_buf2[2] = tmp_buf2 + CU_SIZE * CU_SIZE * 2; + dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE; + dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2; #if CONFIG_VP9_HIGHBITDEPTH } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -5447,38 +5525,35 @@ static void predict_sb_complex(VP10_COMP *cpi, ThreadData *td, int i, ctx; uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3]; - DECLARE_ALIGNED(16, uint8_t, - tmp_buf1[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); - DECLARE_ALIGNED(16, uint8_t, - tmp_buf2[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); - DECLARE_ALIGNED(16, uint8_t, - tmp_buf3[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); - int dst_stride1[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; - int dst_stride2[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; - int dst_stride3[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_TX_SQUARE * 2]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_TX_SQUARE * 2]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf3[MAX_MB_PLANE * MAX_TX_SQUARE * 2]); + int dst_stride1[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE}; + int dst_stride2[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE}; + int dst_stride3[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE}; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int len = sizeof(uint16_t); dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); - dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAXTXLEN * MAXTXLEN * len); - dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN * len); + dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_TX_SQUARE * len); + dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAX_TX_SQUARE * len); dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); - dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAXTXLEN * MAXTXLEN * len); - dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN * len); + dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_TX_SQUARE * len); + dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAX_TX_SQUARE * len); dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3); - dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAXTXLEN * MAXTXLEN * len); - dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN * len); + dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAX_TX_SQUARE * len); + dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAX_TX_SQUARE * len); } else { #endif // CONFIG_VP9_HIGHBITDEPTH dst_buf1[0] = tmp_buf1; - dst_buf1[1] = tmp_buf1 + MAXTXLEN * MAXTXLEN; - dst_buf1[2] = tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN; + dst_buf1[1] = tmp_buf1 + MAX_TX_SQUARE; + dst_buf1[2] = tmp_buf1 + 2 * MAX_TX_SQUARE; dst_buf2[0] = tmp_buf2; - dst_buf2[1] = tmp_buf2 + MAXTXLEN * MAXTXLEN; - dst_buf2[2] = tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN; + dst_buf2[1] = tmp_buf2 + MAX_TX_SQUARE; + dst_buf2[2] = tmp_buf2 + 2 * MAX_TX_SQUARE; dst_buf3[0] = tmp_buf3; - dst_buf3[1] = tmp_buf3 + MAXTXLEN * MAXTXLEN; - dst_buf3[2] = tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN; + dst_buf3[1] = tmp_buf3 + MAX_TX_SQUARE; + dst_buf3[2] = tmp_buf3 + 2 * MAX_TX_SQUARE; #if CONFIG_VP9_HIGHBITDEPTH } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -6037,7 +6112,8 @@ static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td, sse_uv = 0; for (plane = 1; plane < MAX_MB_PLANE; ++plane) { #if CONFIG_VAR_TX - ENTROPY_CONTEXT ctxa[16], ctxl[16]; + ENTROPY_CONTEXT ctxa[2 * MI_BLOCK_SIZE]; + ENTROPY_CONTEXT ctxl[2 * MI_BLOCK_SIZE]; const struct macroblockd_plane *const pd = &xd->plane[plane]; int coeff_ctx = 1; @@ -6081,7 +6157,8 @@ static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td, #endif // CONFIG_EXT_TX for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) { #if CONFIG_VAR_TX - ENTROPY_CONTEXT ctxa[16], ctxl[16]; + ENTROPY_CONTEXT ctxa[2 * MI_BLOCK_SIZE]; + ENTROPY_CONTEXT ctxl[2 * MI_BLOCK_SIZE]; const struct macroblockd_plane *const pd = &xd->plane[0]; int coeff_ctx = 1; #endif // CONFIG_VAR_TX diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c index 0b7a04abe..429ac4f5b 100644 --- a/vp10/encoder/encodemb.c +++ b/vp10/encoder/encodemb.c @@ -29,8 +29,8 @@ #include "vp10/encoder/tokenize.h" struct optimize_ctx { - ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; - ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; + ENTROPY_CONTEXT ta[MAX_MB_PLANE][2 * MI_BLOCK_SIZE]; + ENTROPY_CONTEXT tl[MAX_MB_PLANE][2 * MI_BLOCK_SIZE]; }; void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { @@ -96,9 +96,9 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, struct macroblock_plane *const p = &mb->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; const int ref = is_inter_block(&xd->mi[0]->mbmi); - vp10_token_state tokens[1025][2]; - unsigned best_index[1025][2]; - uint8_t token_cache[1024]; + vp10_token_state tokens[MAX_TX_SQUARE+1][2]; + unsigned best_index[MAX_TX_SQUARE+1][2]; + uint8_t token_cache[MAX_TX_SQUARE]; const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block); tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c index 97d091a82..77af3ddcd 100644 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c @@ -1955,6 +1955,8 @@ void vp10_change_config(struct VP10_COMP *cpi, const VP10EncoderConfig *oxcf) { CHECK_MEM_ERROR(cm, x->palette_buffer, vpx_memalign(16, sizeof(*x->palette_buffer))); } + vp10_free_pc_tree(&cpi->td); + vp10_setup_pc_tree(&cpi->common, &cpi->td); } vp10_reset_segment_features(cm); @@ -3147,7 +3149,7 @@ static void loopfilter_frame(VP10_COMP *cpi, VP10_COMMON *cm) { } if (lf->filter_level > 0) { -#if CONFIG_VAR_TX +#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION vp10_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0); #else if (cpi->num_workers > 1) diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h index 3126ca427..9e1b6fb7a 100644 --- a/vp10/encoder/encoder.h +++ b/vp10/encoder/encoder.h @@ -312,8 +312,8 @@ typedef struct VP10_COMP { QUANTS quants; ThreadData td; MB_MODE_INFO_EXT *mbmi_ext_base; - DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]); // 8: SIMD width + DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]); // 8: SIMD width VP10_COMMON common; VP10EncoderConfig oxcf; struct lookahead_ctx *lookahead; diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c index 23184ed92..4327d974c 100644 --- a/vp10/encoder/mcomp.c +++ b/vp10/encoder/mcomp.c @@ -366,13 +366,13 @@ static unsigned int setup_center_error(const MACROBLOCKD *xd, #if CONFIG_VP9_HIGHBITDEPTH if (second_pred != NULL) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]); + DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]); vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, y_stride); besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1); } else { - DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); + DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]); vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); } @@ -384,7 +384,7 @@ static unsigned int setup_center_error(const MACROBLOCKD *xd, #else (void) xd; if (second_pred != NULL) { - DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); + DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]); vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); } else { @@ -694,7 +694,7 @@ static int upsampled_pref_error(const MACROBLOCKD *xd, unsigned int besterr; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]); + DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]); if (second_pred != NULL) vpx_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, y, y_stride); @@ -704,9 +704,9 @@ static int upsampled_pref_error(const MACROBLOCKD *xd, besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride, sse); } else { - DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]); + DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); #else - DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]); + DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); (void) xd; #endif // CONFIG_VP9_HIGHBITDEPTH if (second_pred != NULL) @@ -1961,10 +1961,10 @@ unsigned int vp10_int_pro_motion_estimation(const VP10_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; - DECLARE_ALIGNED(16, int16_t, hbuf[128]); - DECLARE_ALIGNED(16, int16_t, vbuf[128]); - DECLARE_ALIGNED(16, int16_t, src_hbuf[64]); - DECLARE_ALIGNED(16, int16_t, src_vbuf[64]); + DECLARE_ALIGNED(16, int16_t, hbuf[2 * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, int16_t, vbuf[2 * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, int16_t, src_hbuf[MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, int16_t, src_vbuf[MAX_SB_SQUARE]); int idx; const int bw = 4 << b_width_log2_lookup[bsize]; const int bh = 4 << b_height_log2_lookup[bsize]; diff --git a/vp10/encoder/picklpf.c b/vp10/encoder/picklpf.c index 56ff5c008..f491006cf 100644 --- a/vp10/encoder/picklpf.c +++ b/vp10/encoder/picklpf.c @@ -41,7 +41,7 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP10_COMMON *const cm = &cpi->common; int64_t filt_err; -#if CONFIG_VAR_TX +#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level, 1, partial_frame); #else diff --git a/vp10/encoder/quantize.c b/vp10/encoder/quantize.c index f8a59ec7d..3f8f0f427 100644 --- a/vp10/encoder/quantize.c +++ b/vp10/encoder/quantize.c @@ -461,7 +461,7 @@ void vp10_init_quantizer(VP10_COMP *cpi) { cpi->uv_dequant[q][i] = quant; } - for (i = 2; i < 8; i++) { + for (i = 2; i < 8; i++) { // 8: SIMD width quants->y_quant[q][i] = quants->y_quant[q][1]; quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1]; quants->y_round_fp[q][i] = quants->y_round_fp[q][1]; diff --git a/vp10/encoder/quantize.h b/vp10/encoder/quantize.h index 9c0ab3fbf..612846055 100644 --- a/vp10/encoder/quantize.h +++ b/vp10/encoder/quantize.h @@ -27,6 +27,7 @@ typedef void (*VP10_QUANT_FACADE)(const tran_low_t *coeff_ptr, const scan_order *sc); typedef struct { + // 0: dc 1: ac 2-8: ac repeated to SIMD width DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]); diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c index c2f148dbf..203ac4213 100644 --- a/vp10/encoder/rd.c +++ b/vp10/encoder/rd.c @@ -62,7 +62,10 @@ void vp10_rd_cost_init(RD_COST *rd_cost) { // This table is used to correct for block size. // The factors here are << 2 (2 = x0.5, 32 = x8 etc). static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = { - 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32 + 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, +#if CONFIG_EXT_PARTITION + 48, 48, 64 +#endif // CONFIG_EXT_PARTITION }; static void fill_mode_costs(VP10_COMP *cpi) { @@ -560,8 +563,8 @@ void vp10_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2, void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, const struct macroblockd_plane *pd, - ENTROPY_CONTEXT t_above[16], - ENTROPY_CONTEXT t_left[16]) { + ENTROPY_CONTEXT t_above[2 * MI_BLOCK_SIZE], + ENTROPY_CONTEXT t_left[2 * MI_BLOCK_SIZE]) { const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; @@ -935,7 +938,7 @@ void vp10_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh, int mode; for (mode = 0; mode < top_mode; ++mode) { const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4); - const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64); + const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_LARGEST); BLOCK_SIZE bs; for (bs = min_size; bs <= max_size; ++bs) { int *const fact = &factor_buf[bs][mode]; diff --git a/vp10/encoder/rd.h b/vp10/encoder/rd.h index 675b9db36..533e7751c 100644 --- a/vp10/encoder/rd.h +++ b/vp10/encoder/rd.h @@ -330,8 +330,8 @@ void vp10_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame); void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, const struct macroblockd_plane *pd, - ENTROPY_CONTEXT t_above[16], - ENTROPY_CONTEXT t_left[16]); + ENTROPY_CONTEXT t_above[2 * MI_BLOCK_SIZE], + ENTROPY_CONTEXT t_left[2 * MI_BLOCK_SIZE]); void vp10_set_rd_speed_thresholds(struct VP10_COMP *cpi); diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index f3056e9aa..328e70c75 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -102,8 +102,8 @@ typedef struct { struct rdcost_block_args { const VP10_COMP *cpi; MACROBLOCK *x; - ENTROPY_CONTEXT t_above[16]; - ENTROPY_CONTEXT t_left[16]; + ENTROPY_CONTEXT t_above[2 * MI_BLOCK_SIZE]; + ENTROPY_CONTEXT t_left[2 * MI_BLOCK_SIZE]; int this_rate; int64_t this_dist; int64_t this_sse; @@ -376,8 +376,8 @@ static void get_energy_distribution_fine(const VP10_COMP *cpi, unsigned int esq[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; unsigned int var[16]; double total = 0; - const int f_index = bsize - 6; + const int f_index = bsize - BLOCK_16X16; if (f_index < 0) { int i, j, index; int w_shift = bw == 8 ? 1 : 2; @@ -890,7 +890,7 @@ static int cost_coeffs(MACROBLOCK *x, const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = x->token_costs[tx_size][type][is_inter_block(mbmi)]; - uint8_t token_cache[32 * 32]; + uint8_t token_cache[MAX_TX_SQUARE]; #if CONFIG_VAR_TX int pt = coeff_ctx; #else @@ -1045,10 +1045,10 @@ static void dist_block(const VP10_COMP *cpi, MACROBLOCK *x, int plane, if (*eob) { const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; #if CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED(16, uint16_t, recon16[32 * 32]); // MAX TX_SIZE**2 + DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]); uint8_t *recon = (uint8_t*)recon16; #else - DECLARE_ALIGNED(16, uint8_t, recon[32 * 32]); // MAX TX_SIZE**2 + DECLARE_ALIGNED(16, uint8_t, recon[MAX_TX_SQUARE]); #endif // CONFIG_VP9_HIGHBITDEPTH const PLANE_TYPE plane_type = plane == 0 ? PLANE_TYPE_Y : PLANE_TYPE_UV; @@ -1064,18 +1064,18 @@ static void dist_block(const VP10_COMP *cpi, MACROBLOCK *x, int plane, if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { recon = CONVERT_TO_BYTEPTR(recon); inv_txfm_param.bd = xd->bd; - vpx_highbd_convolve_copy(dst, dst_stride, recon, 32, + vpx_highbd_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0, NULL, 0, bs, bs, xd->bd); - highbd_inv_txfm_add(dqcoeff, recon, 32, &inv_txfm_param); + highbd_inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param); } else #endif // CONFIG_VP9_HIGHBITDEPTH { - vpx_convolve_copy(dst, dst_stride, recon, 32, + vpx_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0, NULL, 0, bs, bs); - inv_txfm_add(dqcoeff, recon, 32, &inv_txfm_param); + inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param); } - cpi->fn_ptr[tx_bsize].vf(src, src_stride, recon, 32, &tmp); + cpi->fn_ptr[tx_bsize].vf(src, src_stride, recon, MAX_TX_SIZE, &tmp); } *out_dist = (int64_t)tmp * 16; @@ -2838,10 +2838,10 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, uint8_t *src = &p->src.buf[4 * blk_row * src_stride + 4 * blk_col]; uint8_t *dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col]; #if CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED(16, uint16_t, rec_buffer_alloc_16[32 * 32]); + DECLARE_ALIGNED(16, uint16_t, rec_buffer16[MAX_TX_SQUARE]); uint8_t *rec_buffer; #else - DECLARE_ALIGNED(16, uint8_t, rec_buffer[32 * 32]); + DECLARE_ALIGNED(16, uint8_t, rec_buffer[MAX_TX_SQUARE]); #endif // CONFIG_VP9_HIGHBITDEPTH const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; @@ -2860,16 +2860,16 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, // TODO(any): Use dist_block to compute distortion #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer_alloc_16); - vpx_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, 32, + rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer16); + vpx_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL, 0, NULL, 0, bh, bh, xd->bd); } else { - rec_buffer = (uint8_t *)rec_buffer_alloc_16; - vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32, + rec_buffer = (uint8_t *)rec_buffer16; + vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL, 0, NULL, 0, bh, bh); } #else - vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32, + vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL, 0, NULL, 0, bh, bh); #endif // CONFIG_VP9_HIGHBITDEPTH @@ -2904,12 +2904,12 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { inv_txfm_param.bd = xd->bd; - highbd_inv_txfm_add(dqcoeff, rec_buffer, 32, &inv_txfm_param); + highbd_inv_txfm_add(dqcoeff, rec_buffer, MAX_TX_SIZE, &inv_txfm_param); } else { - inv_txfm_add(dqcoeff, rec_buffer, 32, &inv_txfm_param); + inv_txfm_add(dqcoeff, rec_buffer, MAX_TX_SIZE, &inv_txfm_param); } #else // CONFIG_VP9_HIGHBITDEPTH - inv_txfm_add(dqcoeff, rec_buffer, 32, &inv_txfm_param); + inv_txfm_add(dqcoeff, rec_buffer, MAX_TX_SIZE, &inv_txfm_param); #endif // CONFIG_VP9_HIGHBITDEPTH if ((bh >> 2) + blk_col > max_blocks_wide || @@ -2921,16 +2921,16 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, tmp = 0; for (idy = 0; idy < blocks_height; idy += 2) { for (idx = 0; idx < blocks_width; idx += 2) { - cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx, - src_stride, - rec_buffer + 4 * idy * 32 + 4 * idx, - 32, &this_dist); + uint8_t *const s = src + 4 * idy * src_stride + 4 * idx; + uint8_t *const r = rec_buffer + 4 * idy * MAX_TX_SIZE + 4 * idx; + cpi->fn_ptr[BLOCK_8X8].vf(s, src_stride, r, MAX_TX_SIZE, &this_dist); tmp += this_dist; } } } else { uint32_t this_dist; - cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &this_dist); + cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, MAX_TX_SIZE, + &this_dist); tmp = this_dist; } } @@ -3125,8 +3125,10 @@ static void inter_block_yrd(const VP10_COMP *cpi, MACROBLOCK *x, int idx, idy; int block = 0; int step = 1 << (max_txsize_lookup[plane_bsize] * 2); - ENTROPY_CONTEXT ctxa[16], ctxl[16]; - TXFM_CONTEXT tx_above[8], tx_left[8]; + ENTROPY_CONTEXT ctxa[2 * MI_BLOCK_SIZE]; + ENTROPY_CONTEXT ctxl[2 * MI_BLOCK_SIZE]; + TXFM_CONTEXT tx_above[MI_BLOCK_SIZE]; + TXFM_CONTEXT tx_left[MI_BLOCK_SIZE]; int pnrate = 0, pnskip = 1; int64_t pndist = 0, pnsse = 0; @@ -3240,7 +3242,7 @@ static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x, const int is_inter = is_inter_block(mbmi); TX_SIZE best_tx_size[MI_BLOCK_SIZE][MI_BLOCK_SIZE]; TX_SIZE best_tx = TX_SIZES; - uint8_t best_blk_skip[256]; + uint8_t best_blk_skip[MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4]; const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4); int idx, idy; int prune = 0; @@ -3423,7 +3425,8 @@ static int inter_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x, int step = 1 << (max_txsize_lookup[plane_bsize] * 2); int pnrate = 0, pnskip = 1; int64_t pndist = 0, pnsse = 0; - ENTROPY_CONTEXT ta[16], tl[16]; + ENTROPY_CONTEXT ta[2 * MI_BLOCK_SIZE]; + ENTROPY_CONTEXT tl[2 * MI_BLOCK_SIZE]; vp10_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl); @@ -4560,10 +4563,10 @@ static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x, // Prediction buffer from second frame. #if CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]); + DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]); uint8_t *second_pred; #else - DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]); + DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]); #endif // CONFIG_VP9_HIGHBITDEPTH for (ref = 0; ref < 2; ++ref) { @@ -5733,9 +5736,9 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x, step_param = cpi->mv_step_param; } - if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) { + if (cpi->sf.adaptive_motion_search && bsize < BLOCK_LARGEST) { int boffset = - 2 * (b_width_log2_lookup[BLOCK_64X64] - + 2 * (b_width_log2_lookup[BLOCK_LARGEST] - VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize])); step_param = VPXMAX(step_param, boffset); } @@ -6202,16 +6205,15 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, const int * const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]]; const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME); - const int tmp_buf_sz = CU_SIZE * CU_SIZE; #if CONFIG_REF_MV uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame); #endif #endif // CONFIG_EXT_INTER #if CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); + DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * MAX_SB_SQUARE]); uint8_t *tmp_buf; #else - DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * MAX_SB_SQUARE]); #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_OBMC @@ -6226,7 +6228,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, int best_rate_y, best_rate_uv; #endif // CONFIG_SUPERTX #if CONFIG_VAR_TX - uint8_t best_blk_skip[3][256]; + uint8_t best_blk_skip[MAX_MB_PLANE][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4]; #endif // CONFIG_VAR_TX int64_t best_distortion = INT64_MAX; unsigned int best_pred_var = UINT_MAX; @@ -6241,8 +6243,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, int orig_dst_stride[MAX_MB_PLANE]; int rs = 0; INTERP_FILTER best_filter = SWITCHABLE; - uint8_t skip_txfm[MAX_MB_PLANE][4] = {{0}}; - int64_t bsse[MAX_MB_PLANE][4] = {{0}}; + uint8_t skip_txfm[MAX_MB_PLANE][MAX_TX_BLOCKS_IN_MAX_SB] = {{0}}; + int64_t bsse[MAX_MB_PLANE][MAX_TX_BLOCKS_IN_MAX_SB] = {{0}}; int skip_txfm_sb = 0; int64_t skip_sse_sb = INT64_MAX; @@ -6569,8 +6571,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, restore_dst_buf(xd, orig_dst, orig_dst_stride); } else { for (j = 0; j < MAX_MB_PLANE; j++) { - xd->plane[j].dst.buf = tmp_buf + j * 64 * 64; - xd->plane[j].dst.stride = 64; + xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE; + xd->plane[j].dst.stride = MAX_SB_SIZE; } } vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); @@ -6648,15 +6650,15 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, if (have_newmv_in_inter_mode(this_mode)) { int_mv tmp_mv[2]; int rate_mvs[2], tmp_rate_mv = 0; - uint8_t pred0[2 * CU_SIZE * CU_SIZE * 3]; - uint8_t pred1[2 * CU_SIZE * CU_SIZE * 3]; + uint8_t pred0[2 * MAX_SB_SQUARE * 3]; + uint8_t pred1[2 * MAX_SB_SQUARE * 3]; uint8_t *preds0[3] = {pred0, - pred0 + 2 * CU_SIZE * CU_SIZE, - pred0 + 4 * CU_SIZE * CU_SIZE}; + pred0 + 2 * MAX_SB_SQUARE, + pred0 + 4 * MAX_SB_SQUARE}; uint8_t *preds1[3] = {pred1, - pred1 + 2 * CU_SIZE * CU_SIZE, - pred1 + 4 * CU_SIZE * CU_SIZE}; - int strides[3] = {CU_SIZE, CU_SIZE, CU_SIZE}; + pred1 + 2 * MAX_SB_SQUARE, + pred1 + 4 * MAX_SB_SQUARE}; + int strides[3] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE}; vp10_build_inter_predictors_for_planes_single_buf( xd, bsize, mi_row, mi_col, 0, preds0, strides); vp10_build_inter_predictors_for_planes_single_buf( @@ -6723,15 +6725,15 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, mbmi->mv[1].as_int = cur_mv[1].as_int; } } else { - uint8_t pred0[2 * CU_SIZE * CU_SIZE * 3]; - uint8_t pred1[2 * CU_SIZE * CU_SIZE * 3]; + uint8_t pred0[2 * MAX_SB_SQUARE * 3]; + uint8_t pred1[2 * MAX_SB_SQUARE * 3]; uint8_t *preds0[3] = {pred0, - pred0 + 2 * CU_SIZE * CU_SIZE, - pred0 + 4 * CU_SIZE * CU_SIZE}; + pred0 + 2 * MAX_SB_SQUARE, + pred0 + 4 * MAX_SB_SQUARE}; uint8_t *preds1[3] = {pred1, - pred1 + 2 * CU_SIZE * CU_SIZE, - pred1 + 4 * CU_SIZE * CU_SIZE}; - int strides[3] = {CU_SIZE, CU_SIZE, CU_SIZE}; + pred1 + 2 * MAX_SB_SQUARE, + pred1 + 4 * MAX_SB_SQUARE}; + int strides[3] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE}; vp10_build_inter_predictors_for_planes_single_buf( xd, bsize, mi_row, mi_col, 0, preds0, strides); vp10_build_inter_predictors_for_planes_single_buf( @@ -6791,8 +6793,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, int tmp_rate_mv = 0; mbmi->ref_frame[1] = NONE; for (j = 0; j < MAX_MB_PLANE; j++) { - xd->plane[j].dst.buf = tmp_buf + j * tmp_buf_sz; - xd->plane[j].dst.stride = CU_SIZE; + xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE; + xd->plane[j].dst.stride = MAX_SB_SIZE; } vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); restore_dst_buf(xd, orig_dst, orig_dst_stride); @@ -6805,11 +6807,11 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, rmode = intra_mode_cost[mbmi->interintra_mode]; vp10_build_interintra_predictors(xd, tmp_buf, - tmp_buf + tmp_buf_sz, - tmp_buf + 2 * tmp_buf_sz, - CU_SIZE, - CU_SIZE, - CU_SIZE, + tmp_buf + MAX_SB_SQUARE, + tmp_buf + 2 * MAX_SB_SQUARE, + MAX_SB_SIZE, + MAX_SB_SIZE, + MAX_SB_SIZE, bsize); model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, &skip_txfm_sb, &skip_sse_sb); @@ -6830,11 +6832,11 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, if (wedge_bits) { vp10_build_interintra_predictors(xd, tmp_buf, - tmp_buf + tmp_buf_sz, - tmp_buf + 2 * tmp_buf_sz, - CU_SIZE, - CU_SIZE, - CU_SIZE, + tmp_buf + MAX_SB_SQUARE, + tmp_buf + 2 * MAX_SB_SQUARE, + MAX_SB_SIZE, + MAX_SB_SIZE, + MAX_SB_SIZE, bsize); model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, &skip_txfm_sb, &skip_sse_sb); @@ -6852,11 +6854,11 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, mbmi->interintra_uv_wedge_index = wedge_index; vp10_build_interintra_predictors(xd, tmp_buf, - tmp_buf + tmp_buf_sz, - tmp_buf + 2 * tmp_buf_sz, - CU_SIZE, - CU_SIZE, - CU_SIZE, + tmp_buf + MAX_SB_SQUARE, + tmp_buf + 2 * MAX_SB_SQUARE, + MAX_SB_SIZE, + MAX_SB_SIZE, + MAX_SB_SIZE, bsize); model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, &skip_txfm_sb, &skip_sse_sb); @@ -6937,8 +6939,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, if (best_needs_copy) { // again temporarily set the buffers to local memory to prevent a memcpy for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].dst.buf = tmp_buf + i * 64 * 64; - xd->plane[i].dst.stride = 64; + xd->plane[i].dst.buf = tmp_buf + i * MAX_SB_SQUARE; + xd->plane[i].dst.stride = MAX_SB_SIZE; } } rd = tmp_rd; @@ -7572,33 +7574,33 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, const MODE_INFO *left_mi = xd->left_mi; #if CONFIG_OBMC #if CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * 64 * 64]); - DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * 64 * 64]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); #else - DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]); - DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * 64 * 64]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]); #endif // CONFIG_VP9_HIGHBITDEPTH - uint8_t *dst_buf1[3], *dst_buf2[3]; - int dst_stride1[3] = {64, 64, 64}; - int dst_stride2[3] = {64, 64, 64}; + uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE]; + int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE}; + int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE}; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int len = sizeof(uint16_t); dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); - dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + 4096 * len); - dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 8192 * len); + dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len); + dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAX_SB_SQUARE * len); dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); - dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + 4096 * len); - dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 8192 * len); + dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len); + dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAX_SB_SQUARE * len); } else { #endif // CONFIG_VP9_HIGHBITDEPTH dst_buf1[0] = tmp_buf1; - dst_buf1[1] = tmp_buf1 + 4096; - dst_buf1[2] = tmp_buf1 + 8192; + dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE; + dst_buf1[2] = tmp_buf1 + 2 * MAX_SB_SQUARE; dst_buf2[0] = tmp_buf2; - dst_buf2[1] = tmp_buf2 + 4096; - dst_buf2[2] = tmp_buf2 + 8192; + dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE; + dst_buf2[2] = tmp_buf2 + 2 * MAX_SB_SQUARE; #if CONFIG_VP9_HIGHBITDEPTH } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -9386,7 +9388,7 @@ void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi, int switchable_filter_index; int_mv *second_ref = comp_pred ? &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL; - b_mode_info tmp_best_bmodes[16]; + b_mode_info tmp_best_bmodes[16]; // Should this be 4 ? MB_MODE_INFO tmp_best_mbmode; BEST_SEG_INFO bsi[SWITCHABLE_FILTERS]; int pred_exists = 0; diff --git a/vp10/encoder/segmentation.c b/vp10/encoder/segmentation.c index e12282e70..f71946785 100644 --- a/vp10/encoder/segmentation.c +++ b/vp10/encoder/segmentation.c @@ -328,13 +328,13 @@ void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd) { mi_ptr = cm->mi_grid_visible + tile_info.mi_row_start * cm->mi_stride + tile_info.mi_col_start; for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end; - mi_row += 8, mi_ptr += 8 * cm->mi_stride) { + mi_row += MI_BLOCK_SIZE, mi_ptr += MI_BLOCK_SIZE * cm->mi_stride) { MODE_INFO **mi = mi_ptr; for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end; - mi_col += 8, mi += 8) { + mi_col += MI_BLOCK_SIZE, mi += MI_BLOCK_SIZE) { count_segs_sb(cm, xd, &tile_info, mi, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, - mi_row, mi_col, BLOCK_64X64); + mi_row, mi_col, BLOCK_LARGEST); } } } diff --git a/vp10/encoder/speed_features.c b/vp10/encoder/speed_features.c index 25766bb45..cd1c91acd 100644 --- a/vp10/encoder/speed_features.c +++ b/vp10/encoder/speed_features.c @@ -353,6 +353,11 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf, sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST; sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST; sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST; +#if CONFIG_EXT_PARTITION + sf->inter_mode_mask[BLOCK_64X128] = INTER_NEAREST; + sf->inter_mode_mask[BLOCK_128X64] = INTER_NEAREST; + sf->inter_mode_mask[BLOCK_128X128] = INTER_NEAREST; +#endif // CONFIG_EXT_PARTITION sf->max_intra_bsize = BLOCK_32X32; sf->allow_skip_recode = 1; } @@ -372,6 +377,11 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf, sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO; +#if CONFIG_EXT_PARTITION + sf->inter_mode_mask[BLOCK_64X128] = INTER_NEAREST_NEW_ZERO; + sf->inter_mode_mask[BLOCK_128X64] = INTER_NEAREST_NEW_ZERO; + sf->inter_mode_mask[BLOCK_128X128] = INTER_NEAREST_NEW_ZERO; +#endif // CONFIG_EXT_PARTITION sf->adaptive_rd_thresh = 2; // This feature is only enabled when partition search is disabled. sf->reuse_inter_pred_sby = 1; @@ -483,7 +493,7 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) { sf->use_square_partition_only = 0; sf->auto_min_max_partition_size = NOT_IN_USE; sf->rd_auto_partition_min_limit = BLOCK_4X4; - sf->default_max_partition_size = BLOCK_64X64; + sf->default_max_partition_size = BLOCK_LARGEST; sf->default_min_partition_size = BLOCK_4X4; sf->adjust_partitioning_from_last_frame = 0; sf->last_partitioning_redo_frequency = 4; @@ -514,7 +524,7 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) { sf->schedule_mode_search = 0; for (i = 0; i < BLOCK_SIZES; ++i) sf->inter_mode_mask[i] = INTER_ALL; - sf->max_intra_bsize = BLOCK_64X64; + sf->max_intra_bsize = BLOCK_LARGEST; sf->reuse_inter_pred_sby = 0; // This setting only takes effect when partition_search_type is set // to FIXED_PARTITION. @@ -541,6 +551,12 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) { else if (oxcf->mode == GOOD) set_good_speed_feature(cpi, cm, sf, oxcf->speed); + // sf->partition_search_breakout_dist_thr is set assuming max 64x64 + // blocks. Normalise this if the blocks are bigger. + if (MAX_SB_SIZE_LOG2 > 6) { + sf->partition_search_breakout_dist_thr <<= 2 * (MAX_SB_SIZE_LOG2 - 6); + } + cpi->full_search_sad = vp10_full_search_sad; cpi->diamond_search_sad = vp10_diamond_search_sad; diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c index 2398a536e..a283b1059 100644 --- a/vp10/encoder/tokenize.c +++ b/vp10/encoder/tokenize.c @@ -438,7 +438,7 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col, MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; TOKENEXTRA **tp = args->tp; - uint8_t token_cache[32 * 32]; + uint8_t token_cache[MAX_TX_SQUARE]; struct macroblock_plane *p = &x->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; diff --git a/vp10/encoder/x86/denoiser_sse2.c b/vp10/encoder/x86/denoiser_sse2.c index 047974ef8..5c1303a72 100644 --- a/vp10/encoder/x86/denoiser_sse2.c +++ b/vp10/encoder/x86/denoiser_sse2.c @@ -361,9 +361,7 @@ int vp10_denoiser_filter_sse2(const uint8_t *sig, int sig_stride, avg, avg_stride, increase_denoising, bs, motion_magnitude, 8); - } else if (bs == BLOCK_16X8 || bs == BLOCK_16X16 || bs == BLOCK_16X32 || - bs == BLOCK_32X16|| bs == BLOCK_32X32 || bs == BLOCK_32X64 || - bs == BLOCK_64X32 || bs == BLOCK_64X64) { + } else if (bs < BLOCK_SIZES) { return vp10_denoiser_NxM_sse2_big(sig, sig_stride, mc_avg, mc_avg_stride, avg, avg_stride, diff --git a/vpx_dsp/vpx_convolve.c b/vpx_dsp/vpx_convolve.c index 2e85ed481..59d048812 100644 --- a/vpx_dsp/vpx_convolve.c +++ b/vpx_dsp/vpx_convolve.c @@ -130,20 +130,20 @@ static void convolve(const uint8_t *src, ptrdiff_t src_stride, // --Must round-up because block may be located at sub-pixel position. // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. - uint8_t temp[MAX_EXT_SIZE * MAX_CU_SIZE]; + uint8_t temp[MAX_EXT_SIZE * MAX_SB_SIZE]; int intermediate_height = (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; - assert(w <= MAX_CU_SIZE); - assert(h <= MAX_CU_SIZE); + assert(w <= MAX_SB_SIZE); + assert(h <= MAX_SB_SIZE); assert(y_step_q4 <= 32); assert(x_step_q4 <= 32); convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, - temp, MAX_CU_SIZE, + temp, MAX_SB_SIZE, x_filters, x0_q4, x_step_q4, w, intermediate_height); - convolve_vert(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_CU_SIZE, + convolve_vert(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE, dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h); } @@ -240,13 +240,13 @@ void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, const int16_t *filter_y, int y_step_q4, int w, int h) { /* Fixed size intermediate buffer places limits on parameters. */ - DECLARE_ALIGNED(16, uint8_t, temp[MAX_CU_SIZE * MAX_CU_SIZE]); - assert(w <= MAX_CU_SIZE); - assert(h <= MAX_CU_SIZE); + DECLARE_ALIGNED(16, uint8_t, temp[MAX_SB_SIZE * MAX_SB_SIZE]); + assert(w <= MAX_SB_SIZE); + assert(h <= MAX_SB_SIZE); - vpx_convolve8_c(src, src_stride, temp, MAX_CU_SIZE, + vpx_convolve8_c(src, src_stride, temp, MAX_SB_SIZE, filter_x, x_step_q4, filter_y, y_step_q4, w, h); - vpx_convolve_avg_c(temp, MAX_CU_SIZE, dst, dst_stride, + vpx_convolve_avg_c(temp, MAX_SB_SIZE, dst, dst_stride, NULL, 0, NULL, 0, w, h); } @@ -463,21 +463,21 @@ static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride, // --Must round-up because block may be located at sub-pixel position. // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. - uint16_t temp[MAX_EXT_SIZE * MAX_CU_SIZE]; + uint16_t temp[MAX_EXT_SIZE * MAX_SB_SIZE]; int intermediate_height = (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; - assert(w <= MAX_CU_SIZE); - assert(h <= MAX_CU_SIZE); + assert(w <= MAX_SB_SIZE); + assert(h <= MAX_SB_SIZE); assert(y_step_q4 <= 32); assert(x_step_q4 <= 32); highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, - CONVERT_TO_BYTEPTR(temp), MAX_CU_SIZE, + CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE, x_filters, x0_q4, x_step_q4, w, intermediate_height, bd); highbd_convolve_vert( - CONVERT_TO_BYTEPTR(temp) + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_CU_SIZE, + CONVERT_TO_BYTEPTR(temp) + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE, dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h, bd); } @@ -561,14 +561,14 @@ void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { // Fixed size intermediate buffer places limits on parameters. - DECLARE_ALIGNED(16, uint16_t, temp[MAX_CU_SIZE * MAX_CU_SIZE]); - assert(w <= MAX_CU_SIZE); - assert(h <= MAX_CU_SIZE); + DECLARE_ALIGNED(16, uint16_t, temp[MAX_SB_SIZE * MAX_SB_SIZE]); + assert(w <= MAX_SB_SIZE); + assert(h <= MAX_SB_SIZE); vpx_highbd_convolve8_c(src, src_stride, - CONVERT_TO_BYTEPTR(temp), MAX_CU_SIZE, + CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE, filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd); - vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), MAX_CU_SIZE, + vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE, dst, dst_stride, NULL, 0, NULL, 0, w, h, bd); } diff --git a/vpx_dsp/vpx_dsp_common.h b/vpx_dsp/vpx_dsp_common.h index 8d9bf558d..e12703176 100644 --- a/vpx_dsp/vpx_dsp_common.h +++ b/vpx_dsp/vpx_dsp_common.h @@ -13,18 +13,19 @@ #include "./vpx_config.h" #include "vpx/vpx_integer.h" -#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" #ifdef __cplusplus extern "C" { #endif -#if CONFIG_VP10 && CONFIG_EXT_PARTITION -# define MAX_CU_SIZE 128 -#else -# define MAX_CU_SIZE 64 -#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION +#ifndef MAX_SB_SIZE +# if CONFIG_VP10 && CONFIG_EXT_PARTITION +# define MAX_SB_SIZE 128 +# else +# define MAX_SB_SIZE 64 +# endif // CONFIG_VP10 && CONFIG_EXT_PARTITION +#endif // ndef MAX_SB_SIZE #define VPXMIN(x, y) (((x) < (y)) ? (x) : (y)) #define VPXMAX(x, y) (((x) > (y)) ? (x) : (y)) diff --git a/vpx_dsp/x86/convolve.h b/vpx_dsp/x86/convolve.h index 95c721ab6..ab387d664 100644 --- a/vpx_dsp/x86/convolve.h +++ b/vpx_dsp/x86/convolve.h @@ -99,27 +99,27 @@ void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ int w, int h) { \ assert(filter_x[3] != 128); \ assert(filter_y[3] != 128); \ - assert(w <= MAX_CU_SIZE); \ - assert(h <= MAX_CU_SIZE); \ + assert(w <= MAX_SB_SIZE); \ + assert(h <= MAX_SB_SIZE); \ assert(x_step_q4 == 16); \ assert(y_step_q4 == 16); \ if (filter_x[0] || filter_x[1] || filter_x[2]|| \ filter_y[0] || filter_y[1] || filter_y[2]) { \ - DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+7)]); \ + DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_SB_SIZE * (MAX_SB_SIZE+7)]); \ vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ - fdata2, MAX_CU_SIZE, \ + fdata2, MAX_SB_SIZE, \ filter_x, x_step_q4, filter_y, y_step_q4, \ w, h + 7); \ - vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * MAX_CU_SIZE, MAX_CU_SIZE, \ + vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * MAX_SB_SIZE, MAX_SB_SIZE, \ dst, dst_stride, \ filter_x, x_step_q4, filter_y, \ y_step_q4, w, h); \ } else { \ - DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+1)]); \ - vpx_convolve8_horiz_##opt(src, src_stride, fdata2, MAX_CU_SIZE, \ + DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_SB_SIZE * (MAX_SB_SIZE+1)]); \ + vpx_convolve8_horiz_##opt(src, src_stride, fdata2, MAX_SB_SIZE, \ filter_x, x_step_q4, filter_y, y_step_q4, \ w, h + 1); \ - vpx_convolve8_##avg##vert_##opt(fdata2, MAX_CU_SIZE, dst, dst_stride, \ + vpx_convolve8_##avg##vert_##opt(fdata2, MAX_SB_SIZE, dst, dst_stride, \ filter_x, x_step_q4, filter_y, \ y_step_q4, w, h); \ } \ @@ -239,38 +239,38 @@ void vpx_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ const int16_t *filter_x, int x_step_q4, \ const int16_t *filter_y, int y_step_q4, \ int w, int h, int bd) { \ - assert(w <= MAX_CU_SIZE); \ - assert(h <= MAX_CU_SIZE); \ + assert(w <= MAX_SB_SIZE); \ + assert(h <= MAX_SB_SIZE); \ if (x_step_q4 == 16 && y_step_q4 == 16) { \ if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ - DECLARE_ALIGNED(16, uint16_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+7)]); \ + DECLARE_ALIGNED(16, uint16_t, fdata2[MAX_SB_SIZE * (MAX_SB_SIZE+7)]); \ vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, \ src_stride, \ CONVERT_TO_BYTEPTR(fdata2), \ - MAX_CU_SIZE, \ + MAX_SB_SIZE, \ filter_x, x_step_q4, \ filter_y, y_step_q4, \ w, h + 7, bd); \ vpx_highbd_convolve8_##avg##vert_##opt( \ - CONVERT_TO_BYTEPTR(fdata2) + 3 * MAX_CU_SIZE, \ - MAX_CU_SIZE, \ + CONVERT_TO_BYTEPTR(fdata2) + 3 * MAX_SB_SIZE, \ + MAX_SB_SIZE, \ dst, \ dst_stride, \ filter_x, x_step_q4, \ filter_y, y_step_q4, \ w, h, bd); \ } else { \ - DECLARE_ALIGNED(16, uint16_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+1)]); \ + DECLARE_ALIGNED(16, uint16_t, fdata2[MAX_SB_SIZE * (MAX_SB_SIZE+1)]); \ vpx_highbd_convolve8_horiz_##opt(src, \ src_stride, \ CONVERT_TO_BYTEPTR(fdata2), \ - MAX_CU_SIZE, \ + MAX_SB_SIZE, \ filter_x, x_step_q4, \ filter_y, y_step_q4, \ w, h + 1, bd); \ vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), \ - MAX_CU_SIZE, \ + MAX_SB_SIZE, \ dst, \ dst_stride, \ filter_x, x_step_q4, \ diff --git a/vpx_dsp/x86/subtract_sse2.asm b/vpx_dsp/x86/subtract_sse2.asm index 4273efb85..2225b7cf6 100644 --- a/vpx_dsp/x86/subtract_sse2.asm +++ b/vpx_dsp/x86/subtract_sse2.asm @@ -31,6 +31,10 @@ cglobal subtract_block, 7, 7, 8, \ je .case_16 cmp colsd, 32 je .case_32 +%if CONFIG_EXT_PARTITION + cmp colsd, 64 + je .case_64 +%endif %macro loop16 6 mova m0, [srcq+%1] @@ -55,6 +59,22 @@ cglobal subtract_block, 7, 7, 8, \ mova [diffq+mmsize*1+%6], m1 %endmacro +%if CONFIG_EXT_PARTITION + mov pred_str, pred_stridemp +.loop_128: + loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize + loop16 2*mmsize, 3*mmsize, 2*mmsize, 3*mmsize, 4*mmsize, 6*mmsize + loop16 4*mmsize, 5*mmsize, 4*mmsize, 5*mmsize, 8*mmsize, 10*mmsize + loop16 6*mmsize, 7*mmsize, 6*mmsize, 7*mmsize, 12*mmsize, 14*mmsize + lea diffq, [diffq+diff_strideq*2] + add predq, pred_str + add srcq, src_strideq + sub rowsd, 1 + jnz .loop_128 + RET + +.case_64: +%endif mov pred_str, pred_stridemp .loop_64: loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize diff --git a/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c index 6c5991858..48a88aeb4 100644 --- a/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c +++ b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c @@ -844,12 +844,12 @@ static void scaledconvolve2d(const uint8_t *src, ptrdiff_t src_stride, // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. // --Require an additional 8 rows for the horiz_w8 transpose tail. - DECLARE_ALIGNED(16, uint8_t, temp[(MAX_EXT_SIZE + 8) * MAX_CU_SIZE]); + DECLARE_ALIGNED(16, uint8_t, temp[(MAX_EXT_SIZE + 8) * MAX_SB_SIZE]); const int intermediate_height = (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; - assert(w <= MAX_CU_SIZE); - assert(h <= MAX_CU_SIZE); + assert(w <= MAX_SB_SIZE); + assert(h <= MAX_SB_SIZE); assert(y_step_q4 <= 32); assert(x_step_q4 <= 32); @@ -857,33 +857,33 @@ static void scaledconvolve2d(const uint8_t *src, ptrdiff_t src_stride, scaledconvolve_horiz_w8(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, - MAX_CU_SIZE, + MAX_SB_SIZE, x_filters, x0_q4, x_step_q4, w, intermediate_height); } else { scaledconvolve_horiz_w4(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, - MAX_CU_SIZE, + MAX_SB_SIZE, x_filters, x0_q4, x_step_q4, w, intermediate_height); } if (w >= 16) { - scaledconvolve_vert_w16(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), - MAX_CU_SIZE, + scaledconvolve_vert_w16(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), + MAX_SB_SIZE, dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h); } else if (w == 8) { - scaledconvolve_vert_w8(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), - MAX_CU_SIZE, + scaledconvolve_vert_w8(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), + MAX_SB_SIZE, dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h); } else { - scaledconvolve_vert_w4(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), - MAX_CU_SIZE, + scaledconvolve_vert_w4(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), + MAX_SB_SIZE, dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h); diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h index 38dd2706b..04467d0b2 100644 --- a/vpx_scale/yv12config.h +++ b/vpx_scale/yv12config.h @@ -21,7 +21,11 @@ extern "C" { #include "vpx/vpx_integer.h" #define VP8BORDERINPIXELS 32 -#define VP9INNERBORDERINPIXELS 96 +#if CONFIG_EXT_PARTITION +# define VP9INNERBORDERINPIXELS 160 +#else +# define VP9INNERBORDERINPIXELS 96 +#endif // CONFIG_EXT_PARTITION #define VP9_INTERP_EXTEND 4 #define VP9_ENC_BORDER_IN_PIXELS 160 #define VP9_DEC_BORDER_IN_PIXELS 160